1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 Google Inc.
6  * Copyright (c) 2016 The Khronos Group Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief SPIR-V Assembly Tests for Instructions (special opcode/operand)
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktSpvAsmInstructionTests.hpp"
26 #include "vktAmberTestCase.hpp"
27 
28 #include "tcuCommandLine.hpp"
29 #include "tcuFormatUtil.hpp"
30 #include "tcuFloat.hpp"
31 #include "tcuFloatFormat.hpp"
32 #include "tcuRGBA.hpp"
33 #include "tcuStringTemplate.hpp"
34 #include "tcuTestLog.hpp"
35 #include "tcuVectorUtil.hpp"
36 #include "tcuInterval.hpp"
37 
38 #include "vkDefs.hpp"
39 #include "vkDeviceUtil.hpp"
40 #include "vkMemUtil.hpp"
41 #include "vkPlatform.hpp"
42 #include "vkPrograms.hpp"
43 #include "vkQueryUtil.hpp"
44 #include "vkRef.hpp"
45 #include "vkRefUtil.hpp"
46 #include "vkStrUtil.hpp"
47 #include "vkTypeUtil.hpp"
48 
49 #include "deStringUtil.hpp"
50 #include "deUniquePtr.hpp"
51 #include "deMath.h"
52 #include "deRandom.hpp"
53 #include "tcuStringTemplate.hpp"
54 
55 #include "vktSpvAsmCrossStageInterfaceTests.hpp"
56 #include "vktSpvAsm8bitStorageTests.hpp"
57 #include "vktSpvAsm16bitStorageTests.hpp"
58 #include "vktSpvAsmUboMatrixPaddingTests.hpp"
59 #include "vktSpvAsmConditionalBranchTests.hpp"
60 #include "vktSpvAsmIndexingTests.hpp"
61 #include "vktSpvAsmImageSamplerTests.hpp"
62 #include "vktSpvAsmComputeShaderCase.hpp"
63 #include "vktSpvAsmComputeShaderTestUtil.hpp"
64 #include "vktSpvAsmFloatControlsTests.hpp"
65 #include "vktSpvAsmFromHlslTests.hpp"
66 #include "vktSpvAsmEmptyStructTests.hpp"
67 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
68 #include "vktSpvAsmVariablePointersTests.hpp"
69 #include "vktSpvAsmVariableInitTests.hpp"
70 #include "vktSpvAsmPointerParameterTests.hpp"
71 #include "vktSpvAsmSpirvVersion1p4Tests.hpp"
72 #include "vktSpvAsmSpirvVersionTests.hpp"
73 #include "vktTestCaseUtil.hpp"
74 #include "vktSpvAsmLoopDepLenTests.hpp"
75 #include "vktSpvAsmLoopDepInfTests.hpp"
76 #include "vktSpvAsmCompositeInsertTests.hpp"
77 #include "vktSpvAsmVaryingNameTests.hpp"
78 #include "vktSpvAsmWorkgroupMemoryTests.hpp"
79 #include "vktSpvAsmSignedIntCompareTests.hpp"
80 #include "vktSpvAsmSignedOpTests.hpp"
81 #include "vktSpvAsmPtrAccessChainTests.hpp"
82 #include "vktSpvAsmVectorShuffleTests.hpp"
83 #include "vktSpvAsmFloatControlsExtensionlessTests.hpp"
84 #include "vktSpvAsmNonSemanticInfoTests.hpp"
85 #include "vktSpvAsm64bitCompareTests.hpp"
86 #include "vktSpvAsmTrinaryMinMaxTests.hpp"
87 #include "vktSpvAsmTerminateInvocationTests.hpp"
88 #include "vktSpvAsmMultipleShadersTests.hpp"
89 #ifndef CTS_USES_VULKANSC
90 #include "vktSpvAsmFloatControls2Tests.hpp"
91 #include "vktSpvAsmIntegerDotProductTests.hpp"
92 #endif // CTS_USES_VULKANSC
93 #include "vktSpvAsmPhysicalStorageBufferPointerTests.hpp"
94 
95 #include <cmath>
96 #include <limits>
97 #include <map>
98 #include <string>
99 #include <sstream>
100 #include <utility>
101 #include <stack>
102 #include <cassert>
103 
104 namespace vkt
105 {
106 namespace SpirVAssembly
107 {
108 
109 namespace
110 {
111 
112 using namespace vk;
113 using de::UniquePtr;
114 using std::map;
115 using std::string;
116 using std::vector;
117 using tcu::IVec3;
118 using tcu::IVec4;
119 using tcu::RGBA;
120 using tcu::StringTemplate;
121 using tcu::TestLog;
122 using tcu::TestStatus;
123 using tcu::Vec4;
124 
125 const bool TEST_WITH_NAN    = true;
126 const bool TEST_WITHOUT_NAN = false;
127 
128 const string loadScalarF16FromUint =
129     "%ld_arg_${var} = OpFunction %f16 None %f16_i32_fn\n"
130     "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
131     "%ld_arg_${var}_entry = OpLabel\n"
132     "%ld_arg_${var}_conv = OpBitcast %u32 %ld_arg_${var}_param\n"
133     "%ld_arg_${var}_div = OpUDiv %u32 %ld_arg_${var}_conv %c_u32_2\n"
134     "%ld_arg_${var}_and_low = OpBitwiseAnd %u32 %ld_arg_${var}_param %c_u32_1\n"
135     "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_div\n"
136     "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
137     "%ld_arg_${var}_unpack = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
138     "%ld_arg_${var}_ex = OpVectorExtractDynamic %f16 %ld_arg_${var}_unpack %ld_arg_${var}_and_low\n"
139     "OpReturnValue %ld_arg_${var}_ex\n"
140     "OpFunctionEnd\n";
141 
142 const string loadV2F16FromUint = "%ld_arg_${var} = OpFunction %v2f16 None %v2f16_i32_fn\n"
143                                  "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
144                                  "%ld_arg_${var}_entry = OpLabel\n"
145                                  "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param\n"
146                                  "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
147                                  "%ld_arg_${var}_cast = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
148                                  "OpReturnValue %ld_arg_${var}_cast\n"
149                                  "OpFunctionEnd\n";
150 
151 const string loadV3F16FromUints =
152     // Since we allocate a vec4 worth of values, this case is almost the
153     // same as that case.
154     "%ld_arg_${var} = OpFunction %v3f16 None %v3f16_i32_fn\n"
155     "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
156     "%ld_arg_${var}_entry = OpLabel\n"
157     "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
158     "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
159     "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
160     "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
161     "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
162     "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
163     "%ld_arg_${var}_shuffle = OpVectorShuffle %v3f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2\n"
164     "OpReturnValue %ld_arg_${var}_shuffle\n"
165     "OpFunctionEnd\n";
166 
167 const string loadV4F16FromUints =
168     "%ld_arg_${var} = OpFunction %v4f16 None %v4f16_i32_fn\n"
169     "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
170     "%ld_arg_${var}_entry = OpLabel\n"
171     "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
172     "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
173     "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
174     "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
175     "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
176     "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
177     "%ld_arg_${var}_shuffle = OpVectorShuffle %v4f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2 3\n"
178     "OpReturnValue %ld_arg_${var}_shuffle\n"
179     "OpFunctionEnd\n";
180 
181 const string loadM2x2F16FromUints =
182     "%ld_arg_${var} = OpFunction %m2x2f16 None %m2x2f16_i32_fn\n"
183     "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
184     "%ld_arg_${var}_entry = OpLabel\n"
185     "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
186     "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
187     "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
188     "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
189     "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
190     "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
191     "%ld_arg_${var}_cons = OpCompositeConstruct %m2x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1\n"
192     "OpReturnValue %ld_arg_${var}_cons\n"
193     "OpFunctionEnd\n";
194 
195 const string loadM2x3F16FromUints =
196     "%ld_arg_${var} = OpFunction %m2x3f16 None %m2x3f16_i32_fn\n"
197     "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
198     "%ld_arg_${var}_entry = OpLabel\n"
199     "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
200     "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
201     "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
202     "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
203     "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
204     "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
205     "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
206     "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
207     "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
208     "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
209     "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
210     "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
211     "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
212     "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
213     "%ld_arg_${var}_mat = OpCompositeConstruct %m2x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
214     "OpReturnValue %ld_arg_${var}_mat\n"
215     "OpFunctionEnd\n";
216 
217 const string loadM2x4F16FromUints =
218     "%ld_arg_${var} = OpFunction %m2x4f16 None %m2x4f16_i32_fn\n"
219     "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
220     "%ld_arg_${var}_entry = OpLabel\n"
221     "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
222     "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
223     "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
224     "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
225     "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
226     "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
227     "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
228     "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
229     "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
230     "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
231     "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
232     "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
233     "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
234     "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
235     "%ld_arg_${var}_mat = OpCompositeConstruct %m2x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
236     "OpReturnValue %ld_arg_${var}_mat\n"
237     "OpFunctionEnd\n";
238 
239 const string loadM3x2F16FromUints =
240     "%ld_arg_${var} = OpFunction %m3x2f16 None %m3x2f16_i32_fn\n"
241     "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
242     "%ld_arg_${var}_entry = OpLabel\n"
243     "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
244     "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
245     "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
246     "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
247     "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
248     "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
249     "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
250     "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
251     "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
252     "%ld_arg_${var}_mat = OpCompositeConstruct %m3x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2\n"
253     "OpReturnValue %ld_arg_${var}_mat\n"
254     "OpFunctionEnd\n";
255 
256 const string loadM3x3F16FromUints =
257     "%ld_arg_${var} = OpFunction %m3x3f16 None %m3x3f16_i32_fn\n"
258     "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
259     "%ld_arg_${var}_entry = OpLabel\n"
260     "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
261     "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
262     "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
263     "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
264     "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
265     "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
266     "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
267     "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
268     "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
269     "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
270     "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
271     "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
272     "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
273     "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
274     "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
275     "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
276     "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
277     "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
278     "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
279     "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
280     "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
281     "%ld_arg_${var}_mat = OpCompositeConstruct %m3x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
282     "OpReturnValue %ld_arg_${var}_mat\n"
283     "OpFunctionEnd\n";
284 
285 const string loadM3x4F16FromUints =
286     "%ld_arg_${var} = OpFunction %m3x4f16 None %m3x4f16_i32_fn\n"
287     "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
288     "%ld_arg_${var}_entry = OpLabel\n"
289     "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
290     "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
291     "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
292     "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
293     "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
294     "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
295     "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
296     "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
297     "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
298     "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
299     "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
300     "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
301     "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
302     "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
303     "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
304     "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
305     "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
306     "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
307     "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
308     "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
309     "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
310     "%ld_arg_${var}_mat = OpCompositeConstruct %m3x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
311     "OpReturnValue %ld_arg_${var}_mat\n"
312     "OpFunctionEnd\n";
313 
314 const string loadM4x2F16FromUints =
315     "%ld_arg_${var} = OpFunction %m4x2f16 None %m4x2f16_i32_fn\n"
316     "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
317     "%ld_arg_${var}_entry = OpLabel\n"
318     "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
319     "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
320     "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
321     "%ld_arg_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
322     "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
323     "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
324     "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
325     "%ld_arg_${var}_ld3 = OpLoad %u32 %ld_arg_${var}_gep3\n"
326     "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
327     "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
328     "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
329     "%ld_arg_${var}_bc3 = OpBitcast %v2f16 %ld_arg_${var}_ld3\n"
330     "%ld_arg_${var}_mat = OpCompositeConstruct %m4x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2 "
331     "%ld_arg_${var}_bc3\n"
332     "OpReturnValue %ld_arg_${var}_mat\n"
333     "OpFunctionEnd\n";
334 
335 const string loadM4x3F16FromUints =
336     "%ld_arg_${var} = OpFunction %m4x3f16 None %m4x3f16_i32_fn\n"
337     "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
338     "%ld_arg_${var}_entry = OpLabel\n"
339     "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
340     "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
341     "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
342     "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
343     "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
344     "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
345     "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
346     "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
347     "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
348     "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
349     "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
350     "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
351     "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
352     "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
353     "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
354     "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
355     "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
356     "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
357     "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
358     "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
359     "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
360     "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
361     "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
362     "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
363     "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
364     "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
365     "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
366     "%ld_arg_${var}_vec3 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2\n"
367     "%ld_arg_${var}_mat = OpCompositeConstruct %m4x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 "
368     "%ld_arg_${var}_vec3\n"
369     "OpReturnValue %ld_arg_${var}_mat\n"
370     "OpFunctionEnd\n";
371 
372 const string loadM4x4F16FromUints =
373     "%ld_arg_${var} = OpFunction %m4x4f16 None %m4x4f16_i32_fn\n"
374     "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
375     "%ld_arg_${var}_entry = OpLabel\n"
376     "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
377     "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
378     "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
379     "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
380     "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
381     "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
382     "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
383     "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
384     "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
385     "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
386     "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
387     "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
388     "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
389     "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
390     "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
391     "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
392     "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
393     "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
394     "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
395     "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
396     "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
397     "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
398     "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
399     "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
400     "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
401     "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
402     "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
403     "%ld_arg_${var}_vec3 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2 3\n"
404     "%ld_arg_${var}_mat = OpCompositeConstruct %m4x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 "
405     "%ld_arg_${var}_vec3\n"
406     "OpReturnValue %ld_arg_${var}_mat\n"
407     "OpFunctionEnd\n";
408 
409 const string storeScalarF16AsUint =
410     // This version is sensitive to the initial value in the output buffer.
411     // The infrastructure sets all output buffer bits to one before invoking
412     // the shader so this version uses an atomic and to generate the correct
413     // zeroes.
414     "%st_fn_${var} = OpFunction %void None %void_f16_i32_fn\n"
415     "%st_fn_${var}_param1 = OpFunctionParameter %f16\n"
416     "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
417     "%st_fn_${var}_entry = OpLabel\n"
418     "%st_fn_${var}_and_low = OpBitwiseAnd %u32 %st_fn_${var}_param2 %c_u32_1\n"
419     "%st_fn_${var}_zero_vec = OpBitcast %v2f16 %c_u32_0\n"
420     "%st_fn_${var}_insert = OpVectorInsertDynamic %v2f16 %st_fn_${var}_zero_vec %st_fn_${var}_param1 "
421     "%st_fn_${var}_and_low\n"
422     "%st_fn_${var}_odd = OpIEqual %bool %st_fn_${var}_and_low %c_u32_1\n"
423     // Or 16 bits of ones into the half that was not populated with the result.
424     "%st_fn_${var}_sel = OpSelect %u32 %st_fn_${var}_odd %c_u32_low_ones %c_u32_high_ones\n"
425     "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_insert\n"
426     "%st_fn_${var}_or = OpBitwiseOr %u32 %st_fn_${var}_cast %st_fn_${var}_sel\n"
427     "%st_fn_${var}_conv = OpBitcast %u32 %st_fn_${var}_param2\n"
428     "%st_fn_${var}_div = OpUDiv %u32 %st_fn_${var}_conv %c_u32_2\n"
429     "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_div\n"
430     "%st_fn_${var}_and = OpAtomicAnd %u32 %st_fn_${var}_gep %c_u32_1 %c_u32_0 %st_fn_${var}_or\n"
431     "OpReturn\n"
432     "OpFunctionEnd\n";
433 
434 const string storeV2F16AsUint = "%st_fn_${var} = OpFunction %void None %void_v2f16_i32_fn\n"
435                                 "%st_fn_${var}_param1 = OpFunctionParameter %v2f16\n"
436                                 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
437                                 "%st_fn_${var}_entry = OpLabel\n"
438                                 "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_param1\n"
439                                 "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2\n"
440                                 "OpStore %st_fn_${var}_gep %st_fn_${var}_cast\n"
441                                 "OpReturn\n"
442                                 "OpFunctionEnd\n";
443 
444 const string storeV3F16AsUints =
445     // Since we allocate a vec4 worth of values, this case can be treated
446     // almost the same as a vec4 case. We will store some extra data that
447     // should not be compared.
448     "%st_fn_${var} = OpFunction %void None %void_v3f16_i32_fn\n"
449     "%st_fn_${var}_param1 = OpFunctionParameter %v3f16\n"
450     "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
451     "%st_fn_${var}_entry = OpLabel\n"
452     "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
453     "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
454     "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
455     "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
456     "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
457     "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
458     "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
459     "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
460     "OpReturn\n"
461     "OpFunctionEnd\n";
462 
463 const string storeV4F16AsUints =
464     "%st_fn_${var} = OpFunction %void None %void_v4f16_i32_fn\n"
465     "%st_fn_${var}_param1 = OpFunctionParameter %v4f16\n"
466     "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
467     "%st_fn_${var}_entry = OpLabel\n"
468     "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
469     "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
470     "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
471     "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
472     "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
473     "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
474     "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
475     "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
476     "OpReturn\n"
477     "OpFunctionEnd\n";
478 
479 const string storeM2x2F16AsUints =
480     "%st_fn_${var} = OpFunction %void None %void_m2x2f16_i32_fn\n"
481     "%st_fn_${var}_param1 = OpFunctionParameter %m2x2f16\n"
482     "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
483     "%st_fn_${var}_entry = OpLabel\n"
484     "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
485     "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
486     "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
487     "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
488     "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
489     "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
490     "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
491     "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
492     "OpReturn\n"
493     "OpFunctionEnd\n";
494 
495 const string storeM2x3F16AsUints =
496     // In the extracted elements for 01 and 11 the second element doesn't
497     // matter.
498     "%st_fn_${var} = OpFunction %void None %void_m2x3f16_i32_fn\n"
499     "%st_fn_${var}_param1 = OpFunctionParameter %m2x3f16\n"
500     "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
501     "%st_fn_${var}_entry = OpLabel\n"
502     "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
503     "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
504     "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
505     "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
506     "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
507     "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
508     "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
509     "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
510     "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
511     "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
512     "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
513     "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
514     "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
515     "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
516     "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
517     "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
518     "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
519     "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
520     "OpReturn\n"
521     "OpFunctionEnd\n";
522 
523 const string storeM2x4F16AsUints =
524     "%st_fn_${var} = OpFunction %void None %void_m2x4f16_i32_fn\n"
525     "%st_fn_${var}_param1 = OpFunctionParameter %m2x4f16\n"
526     "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
527     "%st_fn_${var}_entry = OpLabel\n"
528     "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
529     "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
530     "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
531     "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
532     "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
533     "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
534     "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
535     "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
536     "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
537     "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
538     "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
539     "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
540     "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
541     "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
542     "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
543     "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
544     "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
545     "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
546     "OpReturn\n"
547     "OpFunctionEnd\n";
548 
549 const string storeM3x2F16AsUints =
550     "%st_fn_${var} = OpFunction %void None %void_m3x2f16_i32_fn\n"
551     "%st_fn_${var}_param1 = OpFunctionParameter %m3x2f16\n"
552     "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
553     "%st_fn_${var}_entry = OpLabel\n"
554     "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
555     "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
556     "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
557     "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
558     "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
559     "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
560     "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
561     "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
562     "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
563     "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
564     "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
565     "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
566     "OpReturn\n"
567     "OpFunctionEnd\n";
568 
569 const string storeM3x3F16AsUints =
570     // The second element of the each broken down vec3 doesn't matter.
571     "%st_fn_${var} = OpFunction %void None %void_m3x3f16_i32_fn\n"
572     "%st_fn_${var}_param1 = OpFunctionParameter %m3x3f16\n"
573     "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
574     "%st_fn_${var}_entry = OpLabel\n"
575     "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
576     "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
577     "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
578     "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
579     "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
580     "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
581     "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
582     "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
583     "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
584     "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
585     "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
586     "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
587     "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
588     "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
589     "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
590     "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
591     "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
592     "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
593     "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
594     "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
595     "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
596     "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
597     "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
598     "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
599     "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
600     "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
601     "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
602     "OpReturn\n"
603     "OpFunctionEnd\n";
604 
605 const string storeM3x4F16AsUints =
606     "%st_fn_${var} = OpFunction %void None %void_m3x4f16_i32_fn\n"
607     "%st_fn_${var}_param1 = OpFunctionParameter %m3x4f16\n"
608     "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
609     "%st_fn_${var}_entry = OpLabel\n"
610     "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
611     "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
612     "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
613     "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
614     "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
615     "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
616     "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
617     "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
618     "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
619     "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
620     "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
621     "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
622     "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
623     "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
624     "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
625     "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
626     "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
627     "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
628     "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
629     "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
630     "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
631     "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
632     "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
633     "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
634     "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
635     "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
636     "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
637     "OpReturn\n"
638     "OpFunctionEnd\n";
639 
640 const string storeM4x2F16AsUints =
641     "%st_fn_${var} = OpFunction %void None %void_m4x2f16_i32_fn\n"
642     "%st_fn_${var}_param1 = OpFunctionParameter %m4x2f16\n"
643     "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
644     "%st_fn_${var}_entry = OpLabel\n"
645     "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
646     "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
647     "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
648     "%st_fn_${var}_ex3 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 3\n"
649     "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
650     "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
651     "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
652     "%st_fn_${var}_bc3 = OpBitcast %u32 %st_fn_${var}_ex3\n"
653     "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
654     "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
655     "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
656     "%st_fn_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
657     "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
658     "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
659     "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
660     "OpStore %st_fn_${var}_gep3 %st_fn_${var}_bc3\n"
661     "OpReturn\n"
662     "OpFunctionEnd\n";
663 
664 const string storeM4x3F16AsUints =
665     // The last element of each decomposed vec3 doesn't matter.
666     "%st_fn_${var} = OpFunction %void None %void_m4x3f16_i32_fn\n"
667     "%st_fn_${var}_param1 = OpFunctionParameter %m4x3f16\n"
668     "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
669     "%st_fn_${var}_entry = OpLabel\n"
670     "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
671     "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
672     "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
673     "%st_fn_${var}_ex3 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 3\n"
674     "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
675     "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
676     "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
677     "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
678     "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
679     "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
680     "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
681     "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
682     "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
683     "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
684     "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
685     "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
686     "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
687     "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
688     "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
689     "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
690     "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
691     "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
692     "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
693     "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
694     "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
695     "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
696     "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
697     "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
698     "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
699     "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
700     "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
701     "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
702     "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
703     "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
704     "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
705     "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
706     "OpReturn\n"
707     "OpFunctionEnd\n";
708 
709 const string storeM4x4F16AsUints =
710     "%st_fn_${var} = OpFunction %void None %void_m4x4f16_i32_fn\n"
711     "%st_fn_${var}_param1 = OpFunctionParameter %m4x4f16\n"
712     "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
713     "%st_fn_${var}_entry = OpLabel\n"
714     "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
715     "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
716     "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
717     "%st_fn_${var}_ex3 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 3\n"
718     "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
719     "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
720     "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
721     "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
722     "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
723     "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
724     "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
725     "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
726     "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
727     "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
728     "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
729     "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
730     "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
731     "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
732     "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
733     "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
734     "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
735     "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
736     "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
737     "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
738     "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
739     "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
740     "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
741     "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
742     "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
743     "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
744     "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
745     "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
746     "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
747     "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
748     "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
749     "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
750     "OpReturn\n"
751     "OpFunctionEnd\n";
752 
753 template <typename T>
fillRandomScalars(de::Random & rnd,T minValue,T maxValue,void * dst,int numValues,int offset=0)754 static void fillRandomScalars(de::Random &rnd, T minValue, T maxValue, void *dst, int numValues, int offset = 0)
755 {
756     T *const typedPtr = (T *)dst;
757     for (int ndx = 0; ndx < numValues; ndx++)
758         typedPtr[offset + ndx] = de::randomScalar<T>(rnd, minValue, maxValue);
759 }
760 
761 // Filter is a function that returns true if a value should pass, false otherwise.
762 template <typename T, typename FilterT>
fillRandomScalars(de::Random & rnd,T minValue,T maxValue,void * dst,int numValues,FilterT filter,int offset=0)763 static void fillRandomScalars(de::Random &rnd, T minValue, T maxValue, void *dst, int numValues, FilterT filter,
764                               int offset = 0)
765 {
766     T *const typedPtr = (T *)dst;
767     T value;
768     for (int ndx = 0; ndx < numValues; ndx++)
769     {
770         do
771             value = de::randomScalar<T>(rnd, minValue, maxValue);
772         while (!filter(value));
773 
774         typedPtr[offset + ndx] = value;
775     }
776 }
777 
778 // Gets a 64-bit integer with a more logarithmic distribution
randomInt64LogDistributed(de::Random & rnd)779 int64_t randomInt64LogDistributed(de::Random &rnd)
780 {
781     int64_t val = rnd.getUint64();
782     val &= (1ull << rnd.getInt(1, 63)) - 1;
783     if (rnd.getBool())
784         val = -val;
785     return val;
786 }
787 
fillRandomInt64sLogDistributed(de::Random & rnd,vector<int64_t> & dst,int numValues)788 static void fillRandomInt64sLogDistributed(de::Random &rnd, vector<int64_t> &dst, int numValues)
789 {
790     for (int ndx = 0; ndx < numValues; ndx++)
791         dst[ndx] = randomInt64LogDistributed(rnd);
792 }
793 
794 template <typename FilterT>
fillRandomInt64sLogDistributed(de::Random & rnd,vector<int64_t> & dst,int numValues,FilterT filter)795 static void fillRandomInt64sLogDistributed(de::Random &rnd, vector<int64_t> &dst, int numValues, FilterT filter)
796 {
797     for (int ndx = 0; ndx < numValues; ndx++)
798     {
799         int64_t value;
800         do
801         {
802             value = randomInt64LogDistributed(rnd);
803         } while (!filter(value));
804         dst[ndx] = value;
805     }
806 }
807 
filterNonNegative(const int64_t value)808 inline bool filterNonNegative(const int64_t value)
809 {
810     return value >= 0;
811 }
812 
filterPositive(const int64_t value)813 inline bool filterPositive(const int64_t value)
814 {
815     return value > 0;
816 }
817 
filterNotZero(const int64_t value)818 inline bool filterNotZero(const int64_t value)
819 {
820     return value != 0;
821 }
822 
floorAll(vector<float> & values)823 static void floorAll(vector<float> &values)
824 {
825     for (size_t i = 0; i < values.size(); i++)
826         values[i] = deFloatFloor(values[i]);
827 }
828 
floorAll(vector<Vec4> & values)829 static void floorAll(vector<Vec4> &values)
830 {
831     for (size_t i = 0; i < values.size(); i++)
832         values[i] = floor(values[i]);
833 }
834 
835 struct CaseParameter
836 {
837     const char *name;
838     string param;
839 
CaseParametervkt::SpirVAssembly::__anon8834af5b0111::CaseParameter840     CaseParameter(const char *case_, const string &param_) : name(case_), param(param_)
841     {
842     }
843 };
844 
845 // Assembly code used for testing LocalSize, OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
846 //
847 // #version 430
848 //
849 // layout(std140, set = 0, binding = 0) readonly buffer Input {
850 //   float elements[];
851 // } input_data;
852 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
853 //   float elements[];
854 // } output_data;
855 //
856 // layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
857 //
858 // void main() {
859 //   uint x = gl_GlobalInvocationID.x;
860 //   output_data.elements[x] = -input_data.elements[x];
861 // }
862 
863 enum LocalSizeValueType
864 {
865     LSV_NONE,
866     LSV_LITERAL,
867     LSV_SPEC_CONST
868 };
869 
getAsmForLocalSizeTest(bool useLocalSizeId,LocalSizeValueType execModeType,LocalSizeValueType workgroupSizeType,IVec3 workGroupSize,uint32_t ndx)870 static string getAsmForLocalSizeTest(bool useLocalSizeId, LocalSizeValueType execModeType,
871                                      LocalSizeValueType workgroupSizeType, IVec3 workGroupSize, uint32_t ndx)
872 {
873     std::ostringstream out;
874     out << "OpCapability Shader\n"
875            "OpMemoryModel Logical GLSL450\n";
876 
877     // LocalsizeId tests require SPIR-V 1.5, so the interface specification is different
878     if (useLocalSizeId)
879         out << "OpEntryPoint GLCompute %main \"main\" %id %indata %outdata\n";
880     else
881         out << "OpEntryPoint GLCompute %main \"main\" %id\n";
882 
883     // If using workgroup size then this overrides the execution mode, so use nonsense values.
884     IVec3 nonsense(9, 13, 106);
885     IVec3 execModeValue = (workgroupSizeType != LSV_NONE) ? nonsense : workGroupSize;
886 
887     if (execModeType != LSV_NONE)
888     {
889         if (useLocalSizeId)
890             out << "OpExecutionModeId %main LocalSizeId %emv_0 %emv_1 %emv_2\n";
891         else
892             out << "OpExecutionMode %main LocalSize " << execModeValue.x() << " " << execModeValue.y() << " "
893                 << execModeValue.z() << "\n";
894     }
895 
896     out << "OpSource GLSL 430\n"
897            "OpName %main           \"main\"\n"
898            "OpName %id             \"gl_GlobalInvocationID\"\n"
899            "OpDecorate %id BuiltIn GlobalInvocationId\n";
900 
901     if (execModeType == LSV_SPEC_CONST)
902     {
903         out << "OpDecorate %emv_0 SpecId 100\n"
904                "OpDecorate %emv_1 SpecId 101\n"
905                "OpDecorate %emv_2 SpecId 102\n";
906     }
907     if (workgroupSizeType == LSV_SPEC_CONST)
908     {
909         out << "OpDecorate %wgs_0 SpecId 200\n"
910                "OpDecorate %wgs_1 SpecId 201\n"
911                "OpDecorate %wgs_2 SpecId 202\n";
912     }
913 
914     if (workgroupSizeType != LSV_NONE)
915         out << "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n";
916 
917     // SPIR-V 1.0 uses Uniform/BufferBlock, 1.5 uses StorageBuffer/Block
918     string blockDec = useLocalSizeId ? "Block" : "BufferBlock";
919     string blockSC  = useLocalSizeId ? "StorageBuffer" : "Uniform";
920     out << getComputeAsmInputOutputBufferTraits(blockDec) << getComputeAsmCommonTypes(blockSC)
921         << getComputeAsmInputOutputBuffer(blockSC);
922 
923     assert(useLocalSizeId || execModeType != LSV_SPEC_CONST);
924     if (useLocalSizeId)
925     {
926         switch (execModeType)
927         {
928         case LSV_NONE: /* Do nothing */
929             break;
930         case LSV_LITERAL:
931             out << "%emv_0  = OpConstant %u32 " << execModeValue.x()
932                 << "\n"
933                    "%emv_1  = OpConstant %u32 "
934                 << execModeValue.y()
935                 << "\n"
936                    "%emv_2  = OpConstant %u32 "
937                 << execModeValue.z() << "\n";
938             break;
939         case LSV_SPEC_CONST:
940             out << "%emv_0  = OpSpecConstant %u32 " << execModeValue.x()
941                 << "\n"
942                    "%emv_1  = OpSpecConstant %u32 "
943                 << execModeValue.y()
944                 << "\n"
945                    "%emv_2  = OpSpecConstant %u32 "
946                 << execModeValue.z() << "\n";
947             break;
948         }
949     }
950 
951     out << "%id        = OpVariable %uvec3ptr Input\n"
952            "%zero      = OpConstant %i32 0 \n";
953 
954     switch (workgroupSizeType)
955     {
956     case LSV_NONE: /* Do nothing */
957         break;
958     case LSV_LITERAL:
959         out << "%wgs_0  = OpConstant %u32 " << workGroupSize.x()
960             << "\n"
961                "%wgs_1  = OpConstant %u32 "
962             << workGroupSize.y()
963             << "\n"
964                "%wgs_2  = OpConstant %u32 "
965             << workGroupSize.z()
966             << "\n"
967                "%gl_WorkGroupSize = OpConstantComposite %uvec3 %wgs_0 %wgs_1 %wgs_2\n";
968         break;
969     case LSV_SPEC_CONST:
970         out << "%wgs_0  = OpSpecConstant %u32 " << workGroupSize.x()
971             << "\n"
972                "%wgs_1  = OpSpecConstant %u32 "
973             << workGroupSize.y()
974             << "\n"
975                "%wgs_2  = OpSpecConstant %u32 "
976             << workGroupSize.z()
977             << "\n"
978                "%gl_WorkGroupSize = OpSpecConstantComposite %uvec3 %wgs_0 %wgs_1 %wgs_2\n";
979         break;
980     }
981 
982     out << "%main      = OpFunction %void None %voidf\n"
983            "%label     = OpLabel\n"
984            "%idval     = OpLoad %uvec3 %id\n"
985            "%ndx       = OpCompositeExtract %u32 %idval "
986         << ndx
987         << "\n"
988 
989            "%inloc     = OpAccessChain %f32ptr %indata %zero %ndx\n"
990            "%inval     = OpLoad %f32 %inloc\n"
991            "%neg       = OpFNegate %f32 %inval\n"
992            "%outloc    = OpAccessChain %f32ptr %outdata %zero %ndx\n"
993            "             OpStore %outloc %neg\n"
994            "             OpReturn\n"
995            "             OpFunctionEnd\n";
996 
997     return out.str();
998 }
999 
localSizeModeToString(LocalSizeValueType t)1000 static string localSizeModeToString(LocalSizeValueType t)
1001 {
1002     switch (t)
1003     {
1004     case LSV_NONE:
1005         return "none";
1006     case LSV_LITERAL:
1007         return "literal";
1008     case LSV_SPEC_CONST:
1009         return "specid";
1010     default:
1011         assert(0);
1012         return "INVALID";
1013     }
1014 }
1015 
createLocalSizeGroup(tcu::TestContext & testCtx,bool useLocalSizeId)1016 tcu::TestCaseGroup *createLocalSizeGroup(tcu::TestContext &testCtx, bool useLocalSizeId)
1017 {
1018     const char *groupName[]{"localsize", "localsize_id"};
1019 
1020     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, groupName[useLocalSizeId]));
1021     ComputeShaderSpec spec;
1022     de::Random rnd(deStringHash(group->getName()));
1023     const uint32_t numElements = 64u;
1024     vector<float> positiveFloats(numElements, 0);
1025     vector<float> negativeFloats(numElements, 0);
1026 
1027     fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1028 
1029     for (size_t ndx = 0; ndx < numElements; ++ndx)
1030         negativeFloats[ndx] = -positiveFloats[ndx];
1031 
1032     spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1033     spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1034 
1035     if (useLocalSizeId)
1036     {
1037         spec.spirvVersion = SPIRV_VERSION_1_5;
1038         spec.extensions.push_back("VK_KHR_maintenance4");
1039     }
1040 
1041     struct testCase
1042     {
1043         std::string nameSuffix;
1044         IVec3 numWorkGroups;
1045         IVec3 localSize;
1046         uint32_t ndx;
1047     } cases[] = {{"", IVec3(numElements, 1, 1), IVec3(1, 1, 1), 0u},
1048                  {"_x", IVec3(1, 1, 1), IVec3(numElements, 1, 1), 0u},
1049                  {"_y", IVec3(1, 1, 1), IVec3(1, numElements, 1), 1u},
1050                  {"_z", IVec3(1, 1, 1), IVec3(1, 1, numElements), 2u}};
1051 
1052     for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); i++)
1053     {
1054         for (int j = 0; j < 3; j++)
1055         {
1056             for (int k = 0; k < 3; k++)
1057             {
1058                 LocalSizeValueType execModeType = (LocalSizeValueType)j;
1059                 LocalSizeValueType wgSizeType   = (LocalSizeValueType)k;
1060 
1061                 // Something has to specify the local size.
1062                 if (execModeType == LSV_NONE && wgSizeType == LSV_NONE)
1063                     continue;
1064                 // Spec constants not allowed for LocalSize (must use the Id variant)
1065                 if (execModeType == LSV_SPEC_CONST && !useLocalSizeId)
1066                     continue;
1067 
1068                 string testName = localSizeModeToString(execModeType) + "_wgsize_" + localSizeModeToString(wgSizeType) +
1069                                   cases[i].nameSuffix;
1070 
1071                 spec.numWorkGroups = cases[i].numWorkGroups;
1072 
1073                 spec.assembly =
1074                     getAsmForLocalSizeTest(useLocalSizeId, execModeType, wgSizeType, cases[i].localSize, cases[i].ndx);
1075                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
1076             }
1077         }
1078     }
1079 
1080     return group.release();
1081 }
1082 
createOpNopGroup(tcu::TestContext & testCtx)1083 tcu::TestCaseGroup *createOpNopGroup(tcu::TestContext &testCtx)
1084 {
1085     // Test the OpNop instruction
1086     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnop"));
1087     ComputeShaderSpec spec;
1088     de::Random rnd(deStringHash(group->getName()));
1089     const int numElements = 100;
1090     vector<float> positiveFloats(numElements, 0);
1091     vector<float> negativeFloats(numElements, 0);
1092 
1093     fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1094 
1095     for (size_t ndx = 0; ndx < numElements; ++ndx)
1096         negativeFloats[ndx] = -positiveFloats[ndx];
1097 
1098     spec.assembly = string(getComputeAsmShaderPreamble()) +
1099 
1100                     "OpSource GLSL 430\n"
1101                     "OpName %main           \"main\"\n"
1102                     "OpName %id             \"gl_GlobalInvocationID\"\n"
1103 
1104                     "OpDecorate %id BuiltIn GlobalInvocationId\n"
1105 
1106                     + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1107 
1108                     + string(getComputeAsmInputOutputBuffer()) +
1109 
1110                     "%id        = OpVariable %uvec3ptr Input\n"
1111                     "%zero      = OpConstant %i32 0\n"
1112 
1113                     "%main      = OpFunction %void None %voidf\n"
1114                     "%label     = OpLabel\n"
1115                     "%idval     = OpLoad %uvec3 %id\n"
1116                     "%x         = OpCompositeExtract %u32 %idval 0\n"
1117 
1118                     "             OpNop\n" // Inside a function body
1119 
1120                     "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1121                     "%inval     = OpLoad %f32 %inloc\n"
1122                     "%neg       = OpFNegate %f32 %inval\n"
1123                     "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1124                     "             OpStore %outloc %neg\n"
1125                     "             OpReturn\n"
1126                     "             OpFunctionEnd\n";
1127     spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1128     spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1129     spec.numWorkGroups = IVec3(numElements, 1, 1);
1130 
1131     // OpNop appearing at different places
1132     group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
1133 
1134     return group.release();
1135 }
1136 
createUnusedVariableComputeTests(tcu::TestContext & testCtx)1137 tcu::TestCaseGroup *createUnusedVariableComputeTests(tcu::TestContext &testCtx)
1138 {
1139     // Compute shaders with unused variables
1140     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "unused_variables"));
1141     de::Random rnd(deStringHash(group->getName()));
1142     const int numElements = 100;
1143     vector<float> positiveFloats(numElements, 0);
1144     vector<float> negativeFloats(numElements, 0);
1145 
1146     fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1147 
1148     for (size_t ndx = 0; ndx < numElements; ++ndx)
1149         negativeFloats[ndx] = -positiveFloats[ndx];
1150 
1151     const VariableLocation testLocations[] = {
1152         // Set        Binding
1153         {0, 5},
1154         {5, 5},
1155     };
1156 
1157     for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
1158     {
1159         const VariableLocation &location = testLocations[locationNdx];
1160 
1161         // Unused variable.
1162         {
1163             ComputeShaderSpec spec;
1164 
1165             spec.assembly = string(getComputeAsmShaderPreamble()) +
1166 
1167                             "OpDecorate %id BuiltIn GlobalInvocationId\n"
1168 
1169                             + getUnusedDecorations(location)
1170 
1171                             + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1172 
1173                             + getUnusedTypesAndConstants()
1174 
1175                             + string(getComputeAsmInputOutputBuffer())
1176 
1177                             + getUnusedBuffer() +
1178 
1179                             "%id        = OpVariable %uvec3ptr Input\n"
1180                             "%zero      = OpConstant %i32 0\n"
1181 
1182                             "%main      = OpFunction %void None %voidf\n"
1183                             "%label     = OpLabel\n"
1184                             "%idval     = OpLoad %uvec3 %id\n"
1185                             "%x         = OpCompositeExtract %u32 %idval 0\n"
1186 
1187                             "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1188                             "%inval     = OpLoad %f32 %inloc\n"
1189                             "%neg       = OpFNegate %f32 %inval\n"
1190                             "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1191                             "             OpStore %outloc %neg\n"
1192                             "             OpReturn\n"
1193                             "             OpFunctionEnd\n";
1194             spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1195             spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1196             spec.numWorkGroups = IVec3(numElements, 1, 1);
1197 
1198             std::string testName = "variable_" + location.toString();
1199 
1200             group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
1201         }
1202 
1203         // Unused function.
1204         {
1205             ComputeShaderSpec spec;
1206 
1207             spec.assembly = string(getComputeAsmShaderPreamble("", "", "", getUnusedEntryPoint())) +
1208 
1209                             "OpDecorate %id BuiltIn GlobalInvocationId\n"
1210 
1211                             + getUnusedDecorations(location)
1212 
1213                             + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1214 
1215                             + getUnusedTypesAndConstants() +
1216 
1217                             "%c_i32_0 = OpConstant %i32 0\n"
1218                             "%c_i32_1 = OpConstant %i32 1\n"
1219 
1220                             + string(getComputeAsmInputOutputBuffer())
1221 
1222                             + getUnusedBuffer() +
1223 
1224                             "%id        = OpVariable %uvec3ptr Input\n"
1225                             "%zero      = OpConstant %i32 0\n"
1226 
1227                             "%main      = OpFunction %void None %voidf\n"
1228                             "%label     = OpLabel\n"
1229                             "%idval     = OpLoad %uvec3 %id\n"
1230                             "%x         = OpCompositeExtract %u32 %idval 0\n"
1231 
1232                             "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1233                             "%inval     = OpLoad %f32 %inloc\n"
1234                             "%neg       = OpFNegate %f32 %inval\n"
1235                             "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1236                             "             OpStore %outloc %neg\n"
1237                             "             OpReturn\n"
1238                             "             OpFunctionEnd\n"
1239 
1240                             + getUnusedFunctionBody();
1241 
1242             spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1243             spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1244             spec.numWorkGroups = IVec3(numElements, 1, 1);
1245 
1246             std::string testName = "function_" + location.toString();
1247 
1248             group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
1249         }
1250     }
1251 
1252     return group.release();
1253 }
1254 
1255 template <bool nanSupported>
compareFUnord(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)1256 bool compareFUnord(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
1257                    const std::vector<Resource> &expectedOutputs, TestLog &log)
1258 {
1259     if (outputAllocs.size() != 1)
1260         return false;
1261 
1262     vector<uint8_t> input1Bytes;
1263     vector<uint8_t> input2Bytes;
1264     vector<uint8_t> expectedBytes;
1265 
1266     inputs[0].getBytes(input1Bytes);
1267     inputs[1].getBytes(input2Bytes);
1268     expectedOutputs[0].getBytes(expectedBytes);
1269 
1270     const int32_t *const expectedOutputAsInt = reinterpret_cast<const int32_t *>(&expectedBytes.front());
1271     const int32_t *const outputAsInt         = static_cast<const int32_t *>(outputAllocs[0]->getHostPtr());
1272     const float *const input1AsFloat         = reinterpret_cast<const float *>(&input1Bytes.front());
1273     const float *const input2AsFloat         = reinterpret_cast<const float *>(&input2Bytes.front());
1274     bool returnValue                         = true;
1275 
1276     for (size_t idx = 0; idx < expectedBytes.size() / sizeof(int32_t); ++idx)
1277     {
1278         if (!nanSupported && (tcu::Float32(input1AsFloat[idx]).isNaN() || tcu::Float32(input2AsFloat[idx]).isNaN()))
1279             continue;
1280 
1281         if (outputAsInt[idx] != expectedOutputAsInt[idx])
1282         {
1283             log << TestLog::Message << "ERROR: Sub-case failed. inputs: " << input1AsFloat[idx] << ","
1284                 << input2AsFloat[idx] << " output: " << outputAsInt[idx]
1285                 << " expected output: " << expectedOutputAsInt[idx] << TestLog::EndMessage;
1286             returnValue = false;
1287         }
1288     }
1289     return returnValue;
1290 }
1291 
1292 typedef VkBool32 (*compareFuncType)(float, float);
1293 
1294 struct OpFUnordCase
1295 {
1296     const char *name;
1297     const char *opCode;
1298     compareFuncType compareFunc;
1299 
OpFUnordCasevkt::SpirVAssembly::__anon8834af5b0111::OpFUnordCase1300     OpFUnordCase(const char *_name, const char *_opCode, compareFuncType _compareFunc)
1301         : name(_name)
1302         , opCode(_opCode)
1303         , compareFunc(_compareFunc)
1304     {
1305     }
1306 };
1307 
1308 #define ADD_OPFUNORD_CASE(NAME, OPCODE, OPERATOR)                              \
1309     do                                                                         \
1310     {                                                                          \
1311         struct compare_##NAME                                                  \
1312         {                                                                      \
1313             static VkBool32 compare(float x, float y)                          \
1314             {                                                                  \
1315                 return (x OPERATOR y) ? VK_TRUE : VK_FALSE;                    \
1316             }                                                                  \
1317         };                                                                     \
1318         cases.push_back(OpFUnordCase(#NAME, OPCODE, compare_##NAME::compare)); \
1319     } while (false)
1320 
createOpFUnordGroup(tcu::TestContext & testCtx,const bool testWithNan)1321 tcu::TestCaseGroup *createOpFUnordGroup(tcu::TestContext &testCtx, const bool testWithNan)
1322 {
1323     const string nan       = testWithNan ? "_nan" : "";
1324     const string groupName = "opfunord" + nan;
1325     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, groupName.c_str()));
1326     de::Random rnd(deStringHash(group->getName()));
1327     const int numElements = 100;
1328     vector<OpFUnordCase> cases;
1329     string extensions   = testWithNan ? "OpExtension \"SPV_KHR_float_controls\"\n" : "";
1330     string capabilities = testWithNan ? "OpCapability SignedZeroInfNanPreserve\n" : "";
1331     string exeModes     = testWithNan ? "OpExecutionMode %main SignedZeroInfNanPreserve 32\n" : "";
1332     const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble(capabilities, extensions, exeModes)) +
1333                                         "OpSource GLSL 430\n"
1334                                         "OpName %main           \"main\"\n"
1335                                         "OpName %id             \"gl_GlobalInvocationID\"\n"
1336 
1337                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
1338 
1339                                         "OpDecorate %buf BufferBlock\n"
1340                                         "OpDecorate %buf2 BufferBlock\n"
1341                                         "OpDecorate %indata1 DescriptorSet 0\n"
1342                                         "OpDecorate %indata1 Binding 0\n"
1343                                         "OpDecorate %indata2 DescriptorSet 0\n"
1344                                         "OpDecorate %indata2 Binding 1\n"
1345                                         "OpDecorate %outdata DescriptorSet 0\n"
1346                                         "OpDecorate %outdata Binding 2\n"
1347                                         "OpDecorate %f32arr ArrayStride 4\n"
1348                                         "OpDecorate %i32arr ArrayStride 4\n"
1349                                         "OpMemberDecorate %buf 0 Offset 0\n"
1350                                         "OpMemberDecorate %buf2 0 Offset 0\n"
1351 
1352                                         + string(getComputeAsmCommonTypes()) +
1353 
1354                                         "%buf        = OpTypeStruct %f32arr\n"
1355                                         "%bufptr     = OpTypePointer Uniform %buf\n"
1356                                         "%indata1    = OpVariable %bufptr Uniform\n"
1357                                         "%indata2    = OpVariable %bufptr Uniform\n"
1358 
1359                                         "%buf2       = OpTypeStruct %i32arr\n"
1360                                         "%buf2ptr    = OpTypePointer Uniform %buf2\n"
1361                                         "%outdata    = OpVariable %buf2ptr Uniform\n"
1362 
1363                                         "%id        = OpVariable %uvec3ptr Input\n"
1364                                         "%zero      = OpConstant %i32 0\n"
1365                                         "%consti1   = OpConstant %i32 1\n"
1366                                         "%constf1   = OpConstant %f32 1.0\n"
1367 
1368                                         "%main      = OpFunction %void None %voidf\n"
1369                                         "%label     = OpLabel\n"
1370                                         "%idval     = OpLoad %uvec3 %id\n"
1371                                         "%x         = OpCompositeExtract %u32 %idval 0\n"
1372 
1373                                         "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
1374                                         "%inval1    = OpLoad %f32 %inloc1\n"
1375                                         "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
1376                                         "%inval2    = OpLoad %f32 %inloc2\n"
1377                                         "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
1378 
1379                                         "%result    = ${OPCODE} %bool %inval1 %inval2\n"
1380                                         "%int_res   = OpSelect %i32 %result %consti1 %zero\n"
1381                                         "             OpStore %outloc %int_res\n"
1382 
1383                                         "             OpReturn\n"
1384                                         "             OpFunctionEnd\n");
1385 
1386     ADD_OPFUNORD_CASE(equal, "OpFUnordEqual", ==);
1387     ADD_OPFUNORD_CASE(less, "OpFUnordLessThan", <);
1388     ADD_OPFUNORD_CASE(lessequal, "OpFUnordLessThanEqual", <=);
1389     ADD_OPFUNORD_CASE(greater, "OpFUnordGreaterThan", >);
1390     ADD_OPFUNORD_CASE(greaterequal, "OpFUnordGreaterThanEqual", >=);
1391     ADD_OPFUNORD_CASE(notequal, "OpFUnordNotEqual", !=);
1392 
1393     for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1394     {
1395         map<string, string> specializations;
1396         ComputeShaderSpec spec;
1397         const float NaN = std::numeric_limits<float>::quiet_NaN();
1398         vector<float> inputFloats1(numElements, 0);
1399         vector<float> inputFloats2(numElements, 0);
1400         vector<int32_t> expectedInts(numElements, 0);
1401 
1402         specializations["OPCODE"] = cases[caseNdx].opCode;
1403         spec.assembly             = shaderTemplate.specialize(specializations);
1404 
1405         fillRandomScalars(rnd, 1.f, 100.f, &inputFloats1[0], numElements);
1406         for (size_t ndx = 0; ndx < numElements; ++ndx)
1407         {
1408             switch (ndx % 6)
1409             {
1410             case 0:
1411                 inputFloats2[ndx] = inputFloats1[ndx] + 1.0f;
1412                 break;
1413             case 1:
1414                 inputFloats2[ndx] = inputFloats1[ndx] - 1.0f;
1415                 break;
1416             case 2:
1417                 inputFloats2[ndx] = inputFloats1[ndx];
1418                 break;
1419             case 3:
1420                 inputFloats2[ndx] = NaN;
1421                 break;
1422             case 4:
1423                 inputFloats2[ndx] = inputFloats1[ndx];
1424                 inputFloats1[ndx] = NaN;
1425                 break;
1426             case 5:
1427                 inputFloats2[ndx] = NaN;
1428                 inputFloats1[ndx] = NaN;
1429                 break;
1430             }
1431             expectedInts[ndx] = tcu::Float32(inputFloats1[ndx]).isNaN() || tcu::Float32(inputFloats2[ndx]).isNaN() ||
1432                                 cases[caseNdx].compareFunc(inputFloats1[ndx], inputFloats2[ndx]);
1433         }
1434 
1435         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1436         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1437         spec.outputs.push_back(BufferSp(new Int32Buffer(expectedInts)));
1438         spec.numWorkGroups = IVec3(numElements, 1, 1);
1439         spec.verifyIO      = testWithNan ? &compareFUnord<true> : &compareFUnord<false>;
1440 
1441         if (testWithNan)
1442         {
1443             spec.extensions.push_back("VK_KHR_shader_float_controls");
1444             spec.requestedVulkanFeatures.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat32 = true;
1445         }
1446 
1447         group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
1448     }
1449 
1450     return group.release();
1451 }
1452 
1453 struct OpAtomicCase
1454 {
1455     const char *name;
1456     const char *assembly;
1457     const char *retValAssembly;
1458     OpAtomicType opAtomic;
1459     int32_t numOutputElements;
1460 
OpAtomicCasevkt::SpirVAssembly::__anon8834af5b0111::OpAtomicCase1461     OpAtomicCase(const char *_name, const char *_assembly, const char *_retValAssembly, OpAtomicType _opAtomic,
1462                  int32_t _numOutputElements)
1463         : name(_name)
1464         , assembly(_assembly)
1465         , retValAssembly(_retValAssembly)
1466         , opAtomic(_opAtomic)
1467         , numOutputElements(_numOutputElements)
1468     {
1469     }
1470 };
1471 
createOpAtomicGroup(tcu::TestContext & testCtx,bool useStorageBuffer,int numElements=65535,bool verifyReturnValues=false,bool volatileAtomic=false)1472 tcu::TestCaseGroup *createOpAtomicGroup(tcu::TestContext &testCtx, bool useStorageBuffer, int numElements = 65535,
1473                                         bool verifyReturnValues = false, bool volatileAtomic = false)
1474 {
1475     std::string groupName("opatomic");
1476     if (useStorageBuffer)
1477         groupName += "_storage_buffer";
1478     if (verifyReturnValues)
1479         groupName += "_return_values";
1480     if (volatileAtomic)
1481         groupName += "_volatile";
1482     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, groupName.c_str()));
1483     vector<OpAtomicCase> cases;
1484 
1485     const StringTemplate shaderTemplate(
1486 
1487         string("OpCapability Shader\n") + (volatileAtomic ? "OpCapability VulkanMemoryModelKHR\n" : "") +
1488         (useStorageBuffer ? "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n" : "") +
1489         (volatileAtomic ? "OpExtension \"SPV_KHR_vulkan_memory_model\"\n" : "") +
1490         (volatileAtomic ? "OpMemoryModel Logical VulkanKHR\n" : "OpMemoryModel Logical GLSL450\n") +
1491         "OpEntryPoint GLCompute %main \"main\" %id\n"
1492         "OpExecutionMode %main LocalSize 1 1 1\n" +
1493 
1494         "OpSource GLSL 430\n"
1495         "OpName %main           \"main\"\n"
1496         "OpName %id             \"gl_GlobalInvocationID\"\n"
1497 
1498         "OpDecorate %id BuiltIn GlobalInvocationId\n"
1499 
1500         "OpDecorate %buf ${BLOCK_DECORATION}\n"
1501         "OpDecorate %indata DescriptorSet 0\n"
1502         "OpDecorate %indata Binding 0\n"
1503         "OpDecorate %i32arr ArrayStride 4\n"
1504         "OpMemberDecorate %buf 0 Offset 0\n"
1505 
1506         "OpDecorate %sumbuf ${BLOCK_DECORATION}\n"
1507         "OpDecorate %sum DescriptorSet 0\n"
1508         "OpDecorate %sum Binding 1\n"
1509         "OpMemberDecorate %sumbuf 0 Offset 0\n"
1510 
1511         "${RETVAL_BUF_DECORATE}"
1512 
1513         + getComputeAsmCommonTypes("${BLOCK_POINTER_TYPE}") +
1514 
1515         "%buf       = OpTypeStruct %i32arr\n"
1516         "%bufptr    = OpTypePointer ${BLOCK_POINTER_TYPE} %buf\n"
1517         "%indata    = OpVariable %bufptr ${BLOCK_POINTER_TYPE}\n"
1518 
1519         "%sumbuf    = OpTypeStruct %i32arr\n"
1520         "%sumbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %sumbuf\n"
1521         "%sum       = OpVariable %sumbufptr ${BLOCK_POINTER_TYPE}\n"
1522 
1523         "${RETVAL_BUF_DECL}"
1524 
1525         "%id        = OpVariable %uvec3ptr Input\n"
1526         "%minusone  = OpConstant %i32 -1\n"
1527         "%zero      = OpConstant %i32 0\n"
1528         "%one       = OpConstant %u32 1\n"
1529         "%two       = OpConstant %i32 2\n"
1530         "%five      = OpConstant %i32 5\n"
1531         "%volbit    = OpConstant %i32 32768\n"
1532 
1533         "%main      = OpFunction %void None %voidf\n"
1534         "%label     = OpLabel\n"
1535         "%idval     = OpLoad %uvec3 %id\n"
1536         "%x         = OpCompositeExtract %u32 %idval 0\n"
1537 
1538         "%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
1539         "%inval     = OpLoad %i32 %inloc\n"
1540 
1541         "%outloc    = OpAccessChain %i32ptr %sum %zero ${INDEX}\n"
1542         "${INSTRUCTION}"
1543         "${RETVAL_ASSEMBLY}"
1544 
1545         "             OpReturn\n"
1546         "             OpFunctionEnd\n");
1547 
1548 #define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS)               \
1549     do                                                                                                  \
1550     {                                                                                                   \
1551         cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS)); \
1552     } while (false)
1553 #define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) \
1554     ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, 1)
1555 #define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) \
1556     ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, numElements)
1557 
1558     ADD_OPATOMIC_CASE_1(iadd, "%retv      = OpAtomicIAdd %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1559                         "             OpStore %retloc %retv\n", OPATOMIC_IADD);
1560     ADD_OPATOMIC_CASE_1(isub, "%retv      = OpAtomicISub %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1561                         "             OpStore %retloc %retv\n", OPATOMIC_ISUB);
1562     ADD_OPATOMIC_CASE_1(iinc, "%retv      = OpAtomicIIncrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1563                         "             OpStore %retloc %retv\n", OPATOMIC_IINC);
1564     ADD_OPATOMIC_CASE_1(idec, "%retv      = OpAtomicIDecrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1565                         "             OpStore %retloc %retv\n", OPATOMIC_IDEC);
1566     if (!verifyReturnValues)
1567     {
1568         ADD_OPATOMIC_CASE_N(load,
1569                             "%inval2    = OpAtomicLoad %i32 %inloc ${SCOPE} ${SEMANTICS}\n"
1570                             "             OpStore %outloc %inval2\n",
1571                             "", OPATOMIC_LOAD);
1572         ADD_OPATOMIC_CASE_N(store, "             OpAtomicStore %outloc ${SCOPE} ${SEMANTICS} %inval\n", "",
1573                             OPATOMIC_STORE);
1574     }
1575 
1576     ADD_OPATOMIC_CASE_N(
1577         compex,
1578         "%even      = OpSMod %i32 %inval %two\n"
1579         "             OpStore %outloc %even\n"
1580         "%retv      = OpAtomicCompareExchange %i32 %outloc ${SCOPE} ${SEMANTICS} ${SEMANTICS} %minusone %zero\n",
1581         "              OpStore %retloc %retv\n", OPATOMIC_COMPEX);
1582 
1583 #undef ADD_OPATOMIC_CASE
1584 #undef ADD_OPATOMIC_CASE_1
1585 #undef ADD_OPATOMIC_CASE_N
1586 
1587     for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1588     {
1589         map<string, string> specializations;
1590         ComputeShaderSpec spec;
1591         vector<int32_t> inputInts(numElements, 0);
1592         vector<int32_t> expected(cases[caseNdx].numOutputElements, -1);
1593 
1594         if (volatileAtomic)
1595         {
1596             spec.extensions.push_back("VK_KHR_vulkan_memory_model");
1597             spec.requestedVulkanFeatures.extVulkanMemoryModel.vulkanMemoryModel = true;
1598 
1599             // volatile, queuefamily scope
1600             specializations["SEMANTICS"] = "%volbit";
1601             specializations["SCOPE"]     = "%five";
1602         }
1603         else
1604         {
1605             // non-volatile, device scope
1606             specializations["SEMANTICS"] = "%zero";
1607             specializations["SCOPE"]     = "%one";
1608         }
1609         specializations["INDEX"]              = (cases[caseNdx].numOutputElements == 1) ? "%zero" : "%x";
1610         specializations["INSTRUCTION"]        = cases[caseNdx].assembly;
1611         specializations["BLOCK_DECORATION"]   = useStorageBuffer ? "Block" : "BufferBlock";
1612         specializations["BLOCK_POINTER_TYPE"] = useStorageBuffer ? "StorageBuffer" : "Uniform";
1613 
1614         if (verifyReturnValues)
1615         {
1616             const StringTemplate blockDecoration("\n"
1617                                                  "OpDecorate %retbuf ${BLOCK_DECORATION}\n"
1618                                                  "OpDecorate %ret DescriptorSet 0\n"
1619                                                  "OpDecorate %ret Binding 2\n"
1620                                                  "OpMemberDecorate %retbuf 0 Offset 0\n\n");
1621 
1622             const StringTemplate blockDeclaration("\n"
1623                                                   "%retbuf    = OpTypeStruct %i32arr\n"
1624                                                   "%retbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %retbuf\n"
1625                                                   "%ret       = OpVariable %retbufptr ${BLOCK_POINTER_TYPE}\n\n");
1626 
1627             specializations["RETVAL_ASSEMBLY"] =
1628                 "%retloc    = OpAccessChain %i32ptr %ret %zero %x\n" + std::string(cases[caseNdx].retValAssembly);
1629 
1630             specializations["RETVAL_BUF_DECORATE"] = blockDecoration.specialize(specializations);
1631             specializations["RETVAL_BUF_DECL"]     = blockDeclaration.specialize(specializations);
1632         }
1633         else
1634         {
1635             specializations["RETVAL_ASSEMBLY"]     = "";
1636             specializations["RETVAL_BUF_DECORATE"] = "";
1637             specializations["RETVAL_BUF_DECL"]     = "";
1638         }
1639 
1640         spec.assembly = shaderTemplate.specialize(specializations);
1641 
1642         // Specialize one more time, to catch things that were in a template parameter
1643         const StringTemplate assemblyTemplate(spec.assembly);
1644         spec.assembly = assemblyTemplate.specialize(specializations);
1645 
1646         if (useStorageBuffer)
1647             spec.extensions.push_back("VK_KHR_storage_buffer_storage_class");
1648 
1649         spec.inputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements,
1650                                                           cases[caseNdx].opAtomic, BUFFERTYPE_INPUT)));
1651         spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements,
1652                                                            cases[caseNdx].opAtomic, BUFFERTYPE_EXPECTED)));
1653         if (verifyReturnValues)
1654             spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements,
1655                                                                cases[caseNdx].opAtomic, BUFFERTYPE_ATOMIC_RET)));
1656         spec.numWorkGroups = IVec3(numElements, 1, 1);
1657 
1658         if (verifyReturnValues)
1659         {
1660             switch (cases[caseNdx].opAtomic)
1661             {
1662             case OPATOMIC_IADD:
1663                 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IADD>;
1664                 break;
1665             case OPATOMIC_ISUB:
1666                 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_ISUB>;
1667                 break;
1668             case OPATOMIC_IINC:
1669                 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IINC>;
1670                 break;
1671             case OPATOMIC_IDEC:
1672                 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IDEC>;
1673                 break;
1674             case OPATOMIC_COMPEX:
1675                 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_COMPEX>;
1676                 break;
1677             default:
1678                 DE_FATAL("Unsupported OpAtomic type for return value verification");
1679             }
1680         }
1681         group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
1682     }
1683 
1684     return group.release();
1685 }
1686 
createOpLineGroup(tcu::TestContext & testCtx)1687 tcu::TestCaseGroup *createOpLineGroup(tcu::TestContext &testCtx)
1688 {
1689     // Test the OpLine instruction
1690     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opline"));
1691     ComputeShaderSpec spec;
1692     de::Random rnd(deStringHash(group->getName()));
1693     const int numElements = 100;
1694     vector<float> positiveFloats(numElements, 0);
1695     vector<float> negativeFloats(numElements, 0);
1696 
1697     fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1698 
1699     for (size_t ndx = 0; ndx < numElements; ++ndx)
1700         negativeFloats[ndx] = -positiveFloats[ndx];
1701 
1702     spec.assembly = string(getComputeAsmShaderPreamble()) +
1703 
1704                     "%fname1 = OpString \"negateInputs.comp\"\n"
1705                     "%fname2 = OpString \"negateInputs\"\n"
1706 
1707                     "OpSource GLSL 430\n"
1708                     "OpName %main           \"main\"\n"
1709                     "OpName %id             \"gl_GlobalInvocationID\"\n"
1710 
1711                     "OpDecorate %id BuiltIn GlobalInvocationId\n"
1712 
1713                     + string(getComputeAsmInputOutputBufferTraits()) +
1714 
1715                     "OpLine %fname1 0 0\n" // At the earliest possible position
1716 
1717                     + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1718 
1719                     "OpLine %fname1 0 1\n" // Multiple OpLines in sequence
1720                     "OpLine %fname2 1 0\n" // Different filenames
1721                     "OpLine %fname1 1000 100000\n"
1722 
1723                     "%id        = OpVariable %uvec3ptr Input\n"
1724                     "%zero      = OpConstant %i32 0\n"
1725 
1726                     "OpLine %fname1 1 1\n" // Before a function
1727 
1728                     "%main      = OpFunction %void None %voidf\n"
1729                     "%label     = OpLabel\n"
1730 
1731                     "OpLine %fname1 1 1\n" // In a function
1732 
1733                     "%idval     = OpLoad %uvec3 %id\n"
1734                     "%x         = OpCompositeExtract %u32 %idval 0\n"
1735                     "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1736                     "%inval     = OpLoad %f32 %inloc\n"
1737                     "%neg       = OpFNegate %f32 %inval\n"
1738                     "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1739                     "             OpStore %outloc %neg\n"
1740                     "             OpReturn\n"
1741                     "             OpFunctionEnd\n";
1742     spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1743     spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1744     spec.numWorkGroups = IVec3(numElements, 1, 1);
1745 
1746     group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
1747 
1748     return group.release();
1749 }
1750 
veryfiBinaryShader(const ProgramBinary & binary)1751 bool veryfiBinaryShader(const ProgramBinary &binary)
1752 {
1753     const size_t paternCount           = 3u;
1754     bool paternsCheck[paternCount]     = {false, false, false};
1755     const string patersns[paternCount] = {"VULKAN CTS", "Negative values", "Date: 2017/09/21"};
1756     size_t paternNdx                   = 0u;
1757 
1758     for (size_t ndx = 0u; ndx < binary.getSize(); ++ndx)
1759     {
1760         if (false == paternsCheck[paternNdx] && patersns[paternNdx][0] == static_cast<char>(binary.getBinary()[ndx]) &&
1761             deMemoryEqual((const char *)&binary.getBinary()[ndx], &patersns[paternNdx][0],
1762                           patersns[paternNdx].length()))
1763         {
1764             paternsCheck[paternNdx] = true;
1765             paternNdx++;
1766             if (paternNdx == paternCount)
1767                 break;
1768         }
1769     }
1770 
1771     for (size_t ndx = 0u; ndx < paternCount; ++ndx)
1772     {
1773         if (!paternsCheck[ndx])
1774             return false;
1775     }
1776 
1777     return true;
1778 }
1779 
createOpModuleProcessedGroup(tcu::TestContext & testCtx)1780 tcu::TestCaseGroup *createOpModuleProcessedGroup(tcu::TestContext &testCtx)
1781 {
1782     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opmoduleprocessed"));
1783     ComputeShaderSpec spec;
1784     de::Random rnd(deStringHash(group->getName()));
1785     const int numElements = 10;
1786     vector<float> positiveFloats(numElements, 0);
1787     vector<float> negativeFloats(numElements, 0);
1788 
1789     fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1790 
1791     for (size_t ndx = 0; ndx < numElements; ++ndx)
1792         negativeFloats[ndx] = -positiveFloats[ndx];
1793 
1794     spec.assembly = string(getComputeAsmShaderPreamble()) +
1795                     "%fname = OpString \"negateInputs.comp\"\n"
1796 
1797                     "OpSource GLSL 430\n"
1798                     "OpName %main           \"main\"\n"
1799                     "OpName %id             \"gl_GlobalInvocationID\"\n"
1800                     "OpModuleProcessed \"VULKAN CTS\"\n" //OpModuleProcessed;
1801                     "OpModuleProcessed \"Negative values\"\n"
1802                     "OpModuleProcessed \"Date: 2017/09/21\"\n"
1803                     "OpDecorate %id BuiltIn GlobalInvocationId\n"
1804 
1805                     + string(getComputeAsmInputOutputBufferTraits())
1806 
1807                     + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1808 
1809                     "OpLine %fname 0 1\n"
1810 
1811                     "OpLine %fname 1000 1\n"
1812 
1813                     "%id        = OpVariable %uvec3ptr Input\n"
1814                     "%zero      = OpConstant %i32 0\n"
1815                     "%main      = OpFunction %void None %voidf\n"
1816 
1817                     "%label     = OpLabel\n"
1818                     "%idval     = OpLoad %uvec3 %id\n"
1819                     "%x         = OpCompositeExtract %u32 %idval 0\n"
1820 
1821                     "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1822                     "%inval     = OpLoad %f32 %inloc\n"
1823                     "%neg       = OpFNegate %f32 %inval\n"
1824                     "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1825                     "             OpStore %outloc %neg\n"
1826                     "             OpReturn\n"
1827                     "             OpFunctionEnd\n";
1828     spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1829     spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1830     spec.numWorkGroups = IVec3(numElements, 1, 1);
1831     spec.verifyBinary  = veryfiBinaryShader;
1832     spec.spirvVersion  = SPIRV_VERSION_1_3;
1833 
1834     group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
1835 
1836     return group.release();
1837 }
1838 
createOpNoLineGroup(tcu::TestContext & testCtx)1839 tcu::TestCaseGroup *createOpNoLineGroup(tcu::TestContext &testCtx)
1840 {
1841     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnoline"));
1842     ComputeShaderSpec spec;
1843     de::Random rnd(deStringHash(group->getName()));
1844     const int numElements = 100;
1845     vector<float> positiveFloats(numElements, 0);
1846     vector<float> negativeFloats(numElements, 0);
1847 
1848     fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1849 
1850     for (size_t ndx = 0; ndx < numElements; ++ndx)
1851         negativeFloats[ndx] = -positiveFloats[ndx];
1852 
1853     spec.assembly = string(getComputeAsmShaderPreamble()) +
1854 
1855                     "%fname = OpString \"negateInputs.comp\"\n"
1856 
1857                     "OpSource GLSL 430\n"
1858                     "OpName %main           \"main\"\n"
1859                     "OpName %id             \"gl_GlobalInvocationID\"\n"
1860 
1861                     "OpDecorate %id BuiltIn GlobalInvocationId\n"
1862 
1863                     + string(getComputeAsmInputOutputBufferTraits()) +
1864 
1865                     "OpNoLine\n" // At the earliest possible position, without preceding OpLine
1866 
1867                     + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1868 
1869                     "OpLine %fname 0 1\n"
1870                     "OpNoLine\n" // Immediately following a preceding OpLine
1871 
1872                     "OpLine %fname 1000 1\n"
1873 
1874                     "%id        = OpVariable %uvec3ptr Input\n"
1875                     "%zero      = OpConstant %i32 0\n"
1876 
1877                     "OpNoLine\n" // Contents after the previous OpLine
1878 
1879                     "%main      = OpFunction %void None %voidf\n"
1880                     "%label     = OpLabel\n"
1881                     "%idval     = OpLoad %uvec3 %id\n"
1882                     "%x         = OpCompositeExtract %u32 %idval 0\n"
1883 
1884                     "OpNoLine\n" // Multiple OpNoLine
1885                     "OpNoLine\n"
1886                     "OpNoLine\n"
1887 
1888                     "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1889                     "%inval     = OpLoad %f32 %inloc\n"
1890                     "%neg       = OpFNegate %f32 %inval\n"
1891                     "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1892                     "             OpStore %outloc %neg\n"
1893                     "             OpReturn\n"
1894                     "             OpFunctionEnd\n";
1895     spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1896     spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1897     spec.numWorkGroups = IVec3(numElements, 1, 1);
1898 
1899     group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
1900 
1901     return group.release();
1902 }
1903 
1904 // Compare instruction for the contraction compute case.
1905 // Returns true if the output is what is expected from the test case.
compareNoContractCase(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)1906 bool compareNoContractCase(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
1907                            const std::vector<Resource> &expectedOutputs, TestLog &)
1908 {
1909     if (outputAllocs.size() != 1)
1910         return false;
1911 
1912     // Only size is needed because we are not comparing the exact values.
1913     size_t byteSize = expectedOutputs[0].getByteSize();
1914 
1915     const float *outputAsFloat = static_cast<const float *>(outputAllocs[0]->getHostPtr());
1916 
1917     for (size_t i = 0; i < byteSize / sizeof(float); ++i)
1918     {
1919         if (outputAsFloat[i] != 0.f && outputAsFloat[i] != -ldexp(1, -24))
1920         {
1921             return false;
1922         }
1923     }
1924 
1925     return true;
1926 }
1927 
createNoContractionGroup(tcu::TestContext & testCtx)1928 tcu::TestCaseGroup *createNoContractionGroup(tcu::TestContext &testCtx)
1929 {
1930     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "nocontraction"));
1931     vector<CaseParameter> cases;
1932     const int numElements = 100;
1933     vector<float> inputFloats1(numElements, 0);
1934     vector<float> inputFloats2(numElements, 0);
1935     vector<float> outputFloats(numElements, 0);
1936     const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
1937 
1938                                         "OpName %main           \"main\"\n"
1939                                         "OpName %id             \"gl_GlobalInvocationID\"\n"
1940 
1941                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
1942 
1943                                         "${DECORATION}\n"
1944 
1945                                         "OpDecorate %buf BufferBlock\n"
1946                                         "OpDecorate %indata1 DescriptorSet 0\n"
1947                                         "OpDecorate %indata1 Binding 0\n"
1948                                         "OpDecorate %indata2 DescriptorSet 0\n"
1949                                         "OpDecorate %indata2 Binding 1\n"
1950                                         "OpDecorate %outdata DescriptorSet 0\n"
1951                                         "OpDecorate %outdata Binding 2\n"
1952                                         "OpDecorate %f32arr ArrayStride 4\n"
1953                                         "OpMemberDecorate %buf 0 Offset 0\n"
1954 
1955                                         + string(getComputeAsmCommonTypes()) +
1956 
1957                                         "%buf        = OpTypeStruct %f32arr\n"
1958                                         "%bufptr     = OpTypePointer Uniform %buf\n"
1959                                         "%indata1    = OpVariable %bufptr Uniform\n"
1960                                         "%indata2    = OpVariable %bufptr Uniform\n"
1961                                         "%outdata    = OpVariable %bufptr Uniform\n"
1962 
1963                                         "%id         = OpVariable %uvec3ptr Input\n"
1964                                         "%zero       = OpConstant %i32 0\n"
1965                                         "%c_f_m1     = OpConstant %f32 -1.\n"
1966 
1967                                         "%main       = OpFunction %void None %voidf\n"
1968                                         "%label      = OpLabel\n"
1969                                         "%idval      = OpLoad %uvec3 %id\n"
1970                                         "%x          = OpCompositeExtract %u32 %idval 0\n"
1971                                         "%inloc1     = OpAccessChain %f32ptr %indata1 %zero %x\n"
1972                                         "%inval1     = OpLoad %f32 %inloc1\n"
1973                                         "%inloc2     = OpAccessChain %f32ptr %indata2 %zero %x\n"
1974                                         "%inval2     = OpLoad %f32 %inloc2\n"
1975                                         "%mul        = OpFMul %f32 %inval1 %inval2\n"
1976                                         "%add        = OpFAdd %f32 %mul %c_f_m1\n"
1977                                         "%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
1978                                         "              OpStore %outloc %add\n"
1979                                         "              OpReturn\n"
1980                                         "              OpFunctionEnd\n");
1981 
1982     cases.push_back(CaseParameter("multiplication", "OpDecorate %mul NoContraction"));
1983     cases.push_back(CaseParameter("addition", "OpDecorate %add NoContraction"));
1984     cases.push_back(CaseParameter("both", "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"));
1985 
1986     for (size_t ndx = 0; ndx < numElements; ++ndx)
1987     {
1988         inputFloats1[ndx] = 1.f + std::ldexp(1.f, -23); // 1 + 2^-23.
1989         inputFloats2[ndx] = 1.f - std::ldexp(1.f, -23); // 1 - 2^-23.
1990         // Result for (1 + 2^-23) * (1 - 2^-23) - 1. With NoContraction, the multiplication will be
1991         // conducted separately and the result is rounded to 1, or 0x1.fffffcp-1
1992         // So the final result will be 0.f or 0x1p-24.
1993         // If the operation is combined into a precise fused multiply-add, then the result would be
1994         // 2^-46 (0xa8800000).
1995         outputFloats[ndx] = 0.f;
1996     }
1997 
1998     for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1999     {
2000         map<string, string> specializations;
2001         ComputeShaderSpec spec;
2002 
2003         specializations["DECORATION"] = cases[caseNdx].param;
2004         spec.assembly                 = shaderTemplate.specialize(specializations);
2005         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2006         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2007         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2008         spec.numWorkGroups = IVec3(numElements, 1, 1);
2009         // Check against the two possible answers based on rounding mode.
2010         spec.verifyIO = &compareNoContractCase;
2011 
2012         group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
2013     }
2014     return group.release();
2015 }
2016 
compareFRem(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2017 bool compareFRem(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
2018                  const std::vector<Resource> &expectedOutputs, TestLog &)
2019 {
2020     if (outputAllocs.size() != 1)
2021         return false;
2022 
2023     vector<uint8_t> expectedBytes;
2024     expectedOutputs[0].getBytes(expectedBytes);
2025 
2026     const float *expectedOutputAsFloat = reinterpret_cast<const float *>(&expectedBytes.front());
2027     const float *outputAsFloat         = static_cast<const float *>(outputAllocs[0]->getHostPtr());
2028 
2029     for (size_t idx = 0; idx < expectedBytes.size() / sizeof(float); ++idx)
2030     {
2031         const float f0 = expectedOutputAsFloat[idx];
2032         const float f1 = outputAsFloat[idx];
2033         // \todo relative error needs to be fairly high because FRem may be implemented as
2034         // (roughly) frac(a/b)*b, so LSB errors can be magnified. But this should be fine for now.
2035         if (deFloatAbs((f1 - f0) / f0) > 0.02)
2036             return false;
2037     }
2038 
2039     return true;
2040 }
2041 
createOpFRemGroup(tcu::TestContext & testCtx)2042 tcu::TestCaseGroup *createOpFRemGroup(tcu::TestContext &testCtx)
2043 {
2044     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opfrem"));
2045     ComputeShaderSpec spec;
2046     de::Random rnd(deStringHash(group->getName()));
2047     const int numElements = 200;
2048     vector<float> inputFloats1(numElements, 0);
2049     vector<float> inputFloats2(numElements, 0);
2050     vector<float> outputFloats(numElements, 0);
2051 
2052     fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2053     fillRandomScalars(rnd, -100.f, 100.f, &inputFloats2[0], numElements);
2054 
2055     for (size_t ndx = 0; ndx < numElements; ++ndx)
2056     {
2057         // Guard against divisors near zero.
2058         if (std::fabs(inputFloats2[ndx]) < 1e-3)
2059             inputFloats2[ndx] = 8.f;
2060 
2061         // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2062         outputFloats[ndx] = std::fmod(inputFloats1[ndx], inputFloats2[ndx]);
2063     }
2064 
2065     spec.assembly = string(getComputeAsmShaderPreamble()) +
2066 
2067                     "OpName %main           \"main\"\n"
2068                     "OpName %id             \"gl_GlobalInvocationID\"\n"
2069 
2070                     "OpDecorate %id BuiltIn GlobalInvocationId\n"
2071 
2072                     "OpDecorate %buf BufferBlock\n"
2073                     "OpDecorate %indata1 DescriptorSet 0\n"
2074                     "OpDecorate %indata1 Binding 0\n"
2075                     "OpDecorate %indata2 DescriptorSet 0\n"
2076                     "OpDecorate %indata2 Binding 1\n"
2077                     "OpDecorate %outdata DescriptorSet 0\n"
2078                     "OpDecorate %outdata Binding 2\n"
2079                     "OpDecorate %f32arr ArrayStride 4\n"
2080                     "OpMemberDecorate %buf 0 Offset 0\n"
2081 
2082                     + string(getComputeAsmCommonTypes()) +
2083 
2084                     "%buf        = OpTypeStruct %f32arr\n"
2085                     "%bufptr     = OpTypePointer Uniform %buf\n"
2086                     "%indata1    = OpVariable %bufptr Uniform\n"
2087                     "%indata2    = OpVariable %bufptr Uniform\n"
2088                     "%outdata    = OpVariable %bufptr Uniform\n"
2089 
2090                     "%id        = OpVariable %uvec3ptr Input\n"
2091                     "%zero      = OpConstant %i32 0\n"
2092 
2093                     "%main      = OpFunction %void None %voidf\n"
2094                     "%label     = OpLabel\n"
2095                     "%idval     = OpLoad %uvec3 %id\n"
2096                     "%x         = OpCompositeExtract %u32 %idval 0\n"
2097                     "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2098                     "%inval1    = OpLoad %f32 %inloc1\n"
2099                     "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2100                     "%inval2    = OpLoad %f32 %inloc2\n"
2101                     "%rem       = OpFRem %f32 %inval1 %inval2\n"
2102                     "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2103                     "             OpStore %outloc %rem\n"
2104                     "             OpReturn\n"
2105                     "             OpFunctionEnd\n";
2106 
2107     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2108     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2109     spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2110     spec.numWorkGroups = IVec3(numElements, 1, 1);
2111     spec.verifyIO      = &compareFRem;
2112 
2113     group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
2114 
2115     return group.release();
2116 }
2117 
compareNMin(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2118 bool compareNMin(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
2119                  const std::vector<Resource> &expectedOutputs, TestLog &)
2120 {
2121     if (outputAllocs.size() != 1)
2122         return false;
2123 
2124     const BufferSp &expectedOutput(expectedOutputs[0].getBuffer());
2125     std::vector<uint8_t> data;
2126     expectedOutput->getBytes(data);
2127 
2128     const float *const expectedOutputAsFloat = reinterpret_cast<const float *>(&data.front());
2129     const float *const outputAsFloat         = static_cast<const float *>(outputAllocs[0]->getHostPtr());
2130 
2131     for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2132     {
2133         const float f0 = expectedOutputAsFloat[idx];
2134         const float f1 = outputAsFloat[idx];
2135 
2136         // For NMin, we accept NaN as output if both inputs were NaN.
2137         // Otherwise the NaN is the wrong choise, as on architectures that
2138         // do not handle NaN, those are huge values.
2139         if (!(tcu::Float32(f1).isNaN() && tcu::Float32(f0).isNaN()) && deFloatAbs(f1 - f0) > 0.00001f)
2140             return false;
2141     }
2142 
2143     return true;
2144 }
2145 
createOpNMinGroup(tcu::TestContext & testCtx)2146 tcu::TestCaseGroup *createOpNMinGroup(tcu::TestContext &testCtx)
2147 {
2148     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnmin"));
2149     ComputeShaderSpec spec;
2150     de::Random rnd(deStringHash(group->getName()));
2151     const int numElements = 200;
2152     vector<float> inputFloats1(numElements, 0);
2153     vector<float> inputFloats2(numElements, 0);
2154     vector<float> outputFloats(numElements, 0);
2155 
2156     fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2157     fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2158 
2159     // Make the first case a full-NAN case.
2160     inputFloats1[0] = TCU_NAN;
2161     inputFloats2[0] = TCU_NAN;
2162 
2163     for (size_t ndx = 0; ndx < numElements; ++ndx)
2164     {
2165         // By default, pick the smallest
2166         outputFloats[ndx] = std::min(inputFloats1[ndx], inputFloats2[ndx]);
2167 
2168         // Make half of the cases NaN cases
2169         if ((ndx & 1) == 0)
2170         {
2171             // Alternate between the NaN operand
2172             if ((ndx & 2) == 0)
2173             {
2174                 outputFloats[ndx] = inputFloats2[ndx];
2175                 inputFloats1[ndx] = TCU_NAN;
2176             }
2177             else
2178             {
2179                 outputFloats[ndx] = inputFloats1[ndx];
2180                 inputFloats2[ndx] = TCU_NAN;
2181             }
2182         }
2183     }
2184 
2185     spec.assembly = "OpCapability Shader\n"
2186                     "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2187                     "OpMemoryModel Logical GLSL450\n"
2188                     "OpEntryPoint GLCompute %main \"main\" %id\n"
2189                     "OpExecutionMode %main LocalSize 1 1 1\n"
2190 
2191                     "OpName %main           \"main\"\n"
2192                     "OpName %id             \"gl_GlobalInvocationID\"\n"
2193 
2194                     "OpDecorate %id BuiltIn GlobalInvocationId\n"
2195 
2196                     "OpDecorate %buf BufferBlock\n"
2197                     "OpDecorate %indata1 DescriptorSet 0\n"
2198                     "OpDecorate %indata1 Binding 0\n"
2199                     "OpDecorate %indata2 DescriptorSet 0\n"
2200                     "OpDecorate %indata2 Binding 1\n"
2201                     "OpDecorate %outdata DescriptorSet 0\n"
2202                     "OpDecorate %outdata Binding 2\n"
2203                     "OpDecorate %f32arr ArrayStride 4\n"
2204                     "OpMemberDecorate %buf 0 Offset 0\n"
2205 
2206                     + string(getComputeAsmCommonTypes()) +
2207 
2208                     "%buf        = OpTypeStruct %f32arr\n"
2209                     "%bufptr     = OpTypePointer Uniform %buf\n"
2210                     "%indata1    = OpVariable %bufptr Uniform\n"
2211                     "%indata2    = OpVariable %bufptr Uniform\n"
2212                     "%outdata    = OpVariable %bufptr Uniform\n"
2213 
2214                     "%id        = OpVariable %uvec3ptr Input\n"
2215                     "%zero      = OpConstant %i32 0\n"
2216 
2217                     "%main      = OpFunction %void None %voidf\n"
2218                     "%label     = OpLabel\n"
2219                     "%idval     = OpLoad %uvec3 %id\n"
2220                     "%x         = OpCompositeExtract %u32 %idval 0\n"
2221                     "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2222                     "%inval1    = OpLoad %f32 %inloc1\n"
2223                     "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2224                     "%inval2    = OpLoad %f32 %inloc2\n"
2225                     "%rem       = OpExtInst %f32 %std450 NMin %inval1 %inval2\n"
2226                     "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2227                     "             OpStore %outloc %rem\n"
2228                     "             OpReturn\n"
2229                     "             OpFunctionEnd\n";
2230 
2231     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2232     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2233     spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2234     spec.numWorkGroups = IVec3(numElements, 1, 1);
2235     spec.verifyIO      = &compareNMin;
2236 
2237     group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
2238 
2239     return group.release();
2240 }
2241 
compareNMax(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2242 bool compareNMax(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
2243                  const std::vector<Resource> &expectedOutputs, TestLog &)
2244 {
2245     if (outputAllocs.size() != 1)
2246         return false;
2247 
2248     const BufferSp &expectedOutput = expectedOutputs[0].getBuffer();
2249     std::vector<uint8_t> data;
2250     expectedOutput->getBytes(data);
2251 
2252     const float *const expectedOutputAsFloat = reinterpret_cast<const float *>(&data.front());
2253     const float *const outputAsFloat         = static_cast<const float *>(outputAllocs[0]->getHostPtr());
2254 
2255     for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2256     {
2257         const float f0 = expectedOutputAsFloat[idx];
2258         const float f1 = outputAsFloat[idx];
2259 
2260         // For NMax, NaN is considered acceptable result, since in
2261         // architectures that do not handle NaNs, those are huge values.
2262         if (!tcu::Float32(f1).isNaN() && deFloatAbs(f1 - f0) > 0.00001f)
2263             return false;
2264     }
2265 
2266     return true;
2267 }
2268 
createOpNMaxGroup(tcu::TestContext & testCtx)2269 tcu::TestCaseGroup *createOpNMaxGroup(tcu::TestContext &testCtx)
2270 {
2271     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnmax"));
2272     ComputeShaderSpec spec;
2273     de::Random rnd(deStringHash(group->getName()));
2274     const int numElements = 200;
2275     vector<float> inputFloats1(numElements, 0);
2276     vector<float> inputFloats2(numElements, 0);
2277     vector<float> outputFloats(numElements, 0);
2278 
2279     fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2280     fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2281 
2282     // Make the first case a full-NAN case.
2283     inputFloats1[0] = TCU_NAN;
2284     inputFloats2[0] = TCU_NAN;
2285 
2286     for (size_t ndx = 0; ndx < numElements; ++ndx)
2287     {
2288         // By default, pick the biggest
2289         outputFloats[ndx] = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2290 
2291         // Make half of the cases NaN cases
2292         if ((ndx & 1) == 0)
2293         {
2294             // Alternate between the NaN operand
2295             if ((ndx & 2) == 0)
2296             {
2297                 outputFloats[ndx] = inputFloats2[ndx];
2298                 inputFloats1[ndx] = TCU_NAN;
2299             }
2300             else
2301             {
2302                 outputFloats[ndx] = inputFloats1[ndx];
2303                 inputFloats2[ndx] = TCU_NAN;
2304             }
2305         }
2306     }
2307 
2308     spec.assembly = "OpCapability Shader\n"
2309                     "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2310                     "OpMemoryModel Logical GLSL450\n"
2311                     "OpEntryPoint GLCompute %main \"main\" %id\n"
2312                     "OpExecutionMode %main LocalSize 1 1 1\n"
2313 
2314                     "OpName %main           \"main\"\n"
2315                     "OpName %id             \"gl_GlobalInvocationID\"\n"
2316 
2317                     "OpDecorate %id BuiltIn GlobalInvocationId\n"
2318 
2319                     "OpDecorate %buf BufferBlock\n"
2320                     "OpDecorate %indata1 DescriptorSet 0\n"
2321                     "OpDecorate %indata1 Binding 0\n"
2322                     "OpDecorate %indata2 DescriptorSet 0\n"
2323                     "OpDecorate %indata2 Binding 1\n"
2324                     "OpDecorate %outdata DescriptorSet 0\n"
2325                     "OpDecorate %outdata Binding 2\n"
2326                     "OpDecorate %f32arr ArrayStride 4\n"
2327                     "OpMemberDecorate %buf 0 Offset 0\n"
2328 
2329                     + string(getComputeAsmCommonTypes()) +
2330 
2331                     "%buf        = OpTypeStruct %f32arr\n"
2332                     "%bufptr     = OpTypePointer Uniform %buf\n"
2333                     "%indata1    = OpVariable %bufptr Uniform\n"
2334                     "%indata2    = OpVariable %bufptr Uniform\n"
2335                     "%outdata    = OpVariable %bufptr Uniform\n"
2336 
2337                     "%id        = OpVariable %uvec3ptr Input\n"
2338                     "%zero      = OpConstant %i32 0\n"
2339 
2340                     "%main      = OpFunction %void None %voidf\n"
2341                     "%label     = OpLabel\n"
2342                     "%idval     = OpLoad %uvec3 %id\n"
2343                     "%x         = OpCompositeExtract %u32 %idval 0\n"
2344                     "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2345                     "%inval1    = OpLoad %f32 %inloc1\n"
2346                     "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2347                     "%inval2    = OpLoad %f32 %inloc2\n"
2348                     "%rem       = OpExtInst %f32 %std450 NMax %inval1 %inval2\n"
2349                     "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2350                     "             OpStore %outloc %rem\n"
2351                     "             OpReturn\n"
2352                     "             OpFunctionEnd\n";
2353 
2354     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2355     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2356     spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2357     spec.numWorkGroups = IVec3(numElements, 1, 1);
2358     spec.verifyIO      = &compareNMax;
2359 
2360     group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
2361 
2362     return group.release();
2363 }
2364 
compareNClamp(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2365 bool compareNClamp(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
2366                    const std::vector<Resource> &expectedOutputs, TestLog &)
2367 {
2368     if (outputAllocs.size() != 1)
2369         return false;
2370 
2371     const BufferSp &expectedOutput = expectedOutputs[0].getBuffer();
2372     std::vector<uint8_t> data;
2373     expectedOutput->getBytes(data);
2374 
2375     const float *const expectedOutputAsFloat = reinterpret_cast<const float *>(&data.front());
2376     const float *const outputAsFloat         = static_cast<const float *>(outputAllocs[0]->getHostPtr());
2377 
2378     for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float) / 2; ++idx)
2379     {
2380         const float e0  = expectedOutputAsFloat[idx * 2];
2381         const float e1  = expectedOutputAsFloat[idx * 2 + 1];
2382         const float res = outputAsFloat[idx];
2383 
2384         // For NClamp, we have two possible outcomes based on
2385         // whether NaNs are handled or not.
2386         // If either min or max value is NaN, the result is undefined,
2387         // so this test doesn't stress those. If the clamped value is
2388         // NaN, and NaNs are handled, the result is min; if NaNs are not
2389         // handled, they are big values that result in max.
2390         // If all three parameters are NaN, the result should be NaN.
2391         if (!((tcu::Float32(e0).isNaN() && tcu::Float32(res).isNaN()) || (deFloatAbs(e0 - res) < 0.00001f) ||
2392               (deFloatAbs(e1 - res) < 0.00001f)))
2393             return false;
2394     }
2395 
2396     return true;
2397 }
2398 
createOpNClampGroup(tcu::TestContext & testCtx)2399 tcu::TestCaseGroup *createOpNClampGroup(tcu::TestContext &testCtx)
2400 {
2401     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnclamp"));
2402     ComputeShaderSpec spec;
2403     de::Random rnd(deStringHash(group->getName()));
2404     const int numElements = 200;
2405     vector<float> inputFloats1(numElements, 0);
2406     vector<float> inputFloats2(numElements, 0);
2407     vector<float> inputFloats3(numElements, 0);
2408     vector<float> outputFloats(numElements * 2, 0);
2409 
2410     fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2411     fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2412     fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats3[0], numElements);
2413 
2414     for (size_t ndx = 0; ndx < numElements; ++ndx)
2415     {
2416         // Results are only defined if max value is bigger than min value.
2417         if (inputFloats2[ndx] > inputFloats3[ndx])
2418         {
2419             float t           = inputFloats2[ndx];
2420             inputFloats2[ndx] = inputFloats3[ndx];
2421             inputFloats3[ndx] = t;
2422         }
2423 
2424         // By default, do the clamp, setting both possible answers
2425         float defaultRes = std::min(std::max(inputFloats1[ndx], inputFloats2[ndx]), inputFloats3[ndx]);
2426 
2427         float maxResA = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2428         float maxResB = maxResA;
2429 
2430         // Alternate between the NaN cases
2431         if (ndx & 1)
2432         {
2433             inputFloats1[ndx] = TCU_NAN;
2434             // If NaN is handled, the result should be same as the clamp minimum.
2435             // If NaN is not handled, the result should clamp to the clamp maximum.
2436             maxResA = inputFloats2[ndx];
2437             maxResB = inputFloats3[ndx];
2438         }
2439         else
2440         {
2441             // Not a NaN case - only one legal result.
2442             maxResA = defaultRes;
2443             maxResB = defaultRes;
2444         }
2445 
2446         outputFloats[ndx * 2]     = maxResA;
2447         outputFloats[ndx * 2 + 1] = maxResB;
2448     }
2449 
2450     // Make the first case a full-NAN case.
2451     inputFloats1[0] = TCU_NAN;
2452     inputFloats2[0] = TCU_NAN;
2453     inputFloats3[0] = TCU_NAN;
2454     outputFloats[0] = TCU_NAN;
2455     outputFloats[1] = TCU_NAN;
2456 
2457     spec.assembly = "OpCapability Shader\n"
2458                     "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2459                     "OpMemoryModel Logical GLSL450\n"
2460                     "OpEntryPoint GLCompute %main \"main\" %id\n"
2461                     "OpExecutionMode %main LocalSize 1 1 1\n"
2462 
2463                     "OpName %main           \"main\"\n"
2464                     "OpName %id             \"gl_GlobalInvocationID\"\n"
2465 
2466                     "OpDecorate %id BuiltIn GlobalInvocationId\n"
2467 
2468                     "OpDecorate %buf BufferBlock\n"
2469                     "OpDecorate %indata1 DescriptorSet 0\n"
2470                     "OpDecorate %indata1 Binding 0\n"
2471                     "OpDecorate %indata2 DescriptorSet 0\n"
2472                     "OpDecorate %indata2 Binding 1\n"
2473                     "OpDecorate %indata3 DescriptorSet 0\n"
2474                     "OpDecorate %indata3 Binding 2\n"
2475                     "OpDecorate %outdata DescriptorSet 0\n"
2476                     "OpDecorate %outdata Binding 3\n"
2477                     "OpDecorate %f32arr ArrayStride 4\n"
2478                     "OpMemberDecorate %buf 0 Offset 0\n"
2479 
2480                     + string(getComputeAsmCommonTypes()) +
2481 
2482                     "%buf        = OpTypeStruct %f32arr\n"
2483                     "%bufptr     = OpTypePointer Uniform %buf\n"
2484                     "%indata1    = OpVariable %bufptr Uniform\n"
2485                     "%indata2    = OpVariable %bufptr Uniform\n"
2486                     "%indata3    = OpVariable %bufptr Uniform\n"
2487                     "%outdata    = OpVariable %bufptr Uniform\n"
2488 
2489                     "%id        = OpVariable %uvec3ptr Input\n"
2490                     "%zero      = OpConstant %i32 0\n"
2491 
2492                     "%main      = OpFunction %void None %voidf\n"
2493                     "%label     = OpLabel\n"
2494                     "%idval     = OpLoad %uvec3 %id\n"
2495                     "%x         = OpCompositeExtract %u32 %idval 0\n"
2496                     "%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2497                     "%inval1    = OpLoad %f32 %inloc1\n"
2498                     "%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2499                     "%inval2    = OpLoad %f32 %inloc2\n"
2500                     "%inloc3    = OpAccessChain %f32ptr %indata3 %zero %x\n"
2501                     "%inval3    = OpLoad %f32 %inloc3\n"
2502                     "%rem       = OpExtInst %f32 %std450 NClamp %inval1 %inval2 %inval3\n"
2503                     "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2504                     "             OpStore %outloc %rem\n"
2505                     "             OpReturn\n"
2506                     "             OpFunctionEnd\n";
2507 
2508     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2509     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2510     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
2511     spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2512     spec.numWorkGroups = IVec3(numElements, 1, 1);
2513     spec.verifyIO      = &compareNClamp;
2514 
2515     group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
2516 
2517     return group.release();
2518 }
2519 
createOpSRemComputeGroup(tcu::TestContext & testCtx,qpTestResult negFailResult)2520 tcu::TestCaseGroup *createOpSRemComputeGroup(tcu::TestContext &testCtx, qpTestResult negFailResult)
2521 {
2522     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsrem"));
2523     de::Random rnd(deStringHash(group->getName()));
2524     const int numElements = 200;
2525 
2526     const struct CaseParams
2527     {
2528         const char *name;
2529         const char *failMessage; // customized status message
2530         qpTestResult failResult; // override status on failure
2531         int op1Min, op1Max;      // operand ranges
2532         int op2Min, op2Max;
2533     } cases[] = {
2534         {"positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, 0, 65536, 0, 100},
2535         {"all", "Inconsistent results, but within specification", negFailResult, -65536, 65536, -100, 100}, // see below
2536     };
2537     // If either operand is negative the result is undefined. Some implementations may still return correct values.
2538 
2539     for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2540     {
2541         const CaseParams &params = cases[caseNdx];
2542         ComputeShaderSpec spec;
2543         vector<int32_t> inputInts1(numElements, 0);
2544         vector<int32_t> inputInts2(numElements, 0);
2545         vector<int32_t> outputInts(numElements, 0);
2546 
2547         fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2548         fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2549 
2550         for (int ndx = 0; ndx < numElements; ++ndx)
2551         {
2552             // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2553             outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2554         }
2555 
2556         spec.assembly = string(getComputeAsmShaderPreamble()) +
2557 
2558                         "OpName %main           \"main\"\n"
2559                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2560 
2561                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2562 
2563                         "OpDecorate %buf BufferBlock\n"
2564                         "OpDecorate %indata1 DescriptorSet 0\n"
2565                         "OpDecorate %indata1 Binding 0\n"
2566                         "OpDecorate %indata2 DescriptorSet 0\n"
2567                         "OpDecorate %indata2 Binding 1\n"
2568                         "OpDecorate %outdata DescriptorSet 0\n"
2569                         "OpDecorate %outdata Binding 2\n"
2570                         "OpDecorate %i32arr ArrayStride 4\n"
2571                         "OpMemberDecorate %buf 0 Offset 0\n"
2572 
2573                         + string(getComputeAsmCommonTypes()) +
2574 
2575                         "%buf        = OpTypeStruct %i32arr\n"
2576                         "%bufptr     = OpTypePointer Uniform %buf\n"
2577                         "%indata1    = OpVariable %bufptr Uniform\n"
2578                         "%indata2    = OpVariable %bufptr Uniform\n"
2579                         "%outdata    = OpVariable %bufptr Uniform\n"
2580 
2581                         "%id        = OpVariable %uvec3ptr Input\n"
2582                         "%zero      = OpConstant %i32 0\n"
2583 
2584                         "%main      = OpFunction %void None %voidf\n"
2585                         "%label     = OpLabel\n"
2586                         "%idval     = OpLoad %uvec3 %id\n"
2587                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2588                         "%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
2589                         "%inval1    = OpLoad %i32 %inloc1\n"
2590                         "%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
2591                         "%inval2    = OpLoad %i32 %inloc2\n"
2592                         "%rem       = OpSRem %i32 %inval1 %inval2\n"
2593                         "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
2594                         "             OpStore %outloc %rem\n"
2595                         "             OpReturn\n"
2596                         "             OpFunctionEnd\n";
2597 
2598         spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts1)));
2599         spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts2)));
2600         spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts)));
2601         spec.numWorkGroups = IVec3(numElements, 1, 1);
2602         spec.failResult    = params.failResult;
2603         spec.failMessage   = params.failMessage;
2604 
2605         group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, spec));
2606     }
2607 
2608     return group.release();
2609 }
2610 
createOpSRemComputeGroup64(tcu::TestContext & testCtx,qpTestResult negFailResult)2611 tcu::TestCaseGroup *createOpSRemComputeGroup64(tcu::TestContext &testCtx, qpTestResult negFailResult)
2612 {
2613     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsrem64"));
2614     de::Random rnd(deStringHash(group->getName()));
2615     const int numElements = 200;
2616 
2617     const struct CaseParams
2618     {
2619         const char *name;
2620         const char *failMessage; // customized status message
2621         qpTestResult failResult; // override status on failure
2622         bool positive;
2623     } cases[] = {
2624         {"positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, true},
2625         {"all", "Inconsistent results, but within specification", negFailResult, false}, // see below
2626     };
2627     // If either operand is negative the result is undefined. Some implementations may still return correct values.
2628 
2629     for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2630     {
2631         const CaseParams &params = cases[caseNdx];
2632         ComputeShaderSpec spec;
2633         vector<int64_t> inputInts1(numElements, 0);
2634         vector<int64_t> inputInts2(numElements, 0);
2635         vector<int64_t> outputInts(numElements, 0);
2636 
2637         if (params.positive)
2638         {
2639             fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2640             fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2641         }
2642         else
2643         {
2644             fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2645             fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2646         }
2647 
2648         for (int ndx = 0; ndx < numElements; ++ndx)
2649         {
2650             // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2651             outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2652         }
2653 
2654         spec.assembly = "OpCapability Int64\n"
2655 
2656                         + string(getComputeAsmShaderPreamble()) +
2657 
2658                         "OpName %main           \"main\"\n"
2659                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2660 
2661                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2662 
2663                         "OpDecorate %buf BufferBlock\n"
2664                         "OpDecorate %indata1 DescriptorSet 0\n"
2665                         "OpDecorate %indata1 Binding 0\n"
2666                         "OpDecorate %indata2 DescriptorSet 0\n"
2667                         "OpDecorate %indata2 Binding 1\n"
2668                         "OpDecorate %outdata DescriptorSet 0\n"
2669                         "OpDecorate %outdata Binding 2\n"
2670                         "OpDecorate %i64arr ArrayStride 8\n"
2671                         "OpMemberDecorate %buf 0 Offset 0\n"
2672 
2673                         + string(getComputeAsmCommonTypes()) + string(getComputeAsmCommonInt64Types()) +
2674 
2675                         "%buf        = OpTypeStruct %i64arr\n"
2676                         "%bufptr     = OpTypePointer Uniform %buf\n"
2677                         "%indata1    = OpVariable %bufptr Uniform\n"
2678                         "%indata2    = OpVariable %bufptr Uniform\n"
2679                         "%outdata    = OpVariable %bufptr Uniform\n"
2680 
2681                         "%id        = OpVariable %uvec3ptr Input\n"
2682                         "%zero      = OpConstant %i64 0\n"
2683 
2684                         "%main      = OpFunction %void None %voidf\n"
2685                         "%label     = OpLabel\n"
2686                         "%idval     = OpLoad %uvec3 %id\n"
2687                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2688                         "%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
2689                         "%inval1    = OpLoad %i64 %inloc1\n"
2690                         "%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
2691                         "%inval2    = OpLoad %i64 %inloc2\n"
2692                         "%rem       = OpSRem %i64 %inval1 %inval2\n"
2693                         "%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
2694                         "             OpStore %outloc %rem\n"
2695                         "             OpReturn\n"
2696                         "             OpFunctionEnd\n";
2697 
2698         spec.inputs.push_back(BufferSp(new Int64Buffer(inputInts1)));
2699         spec.inputs.push_back(BufferSp(new Int64Buffer(inputInts2)));
2700         spec.outputs.push_back(BufferSp(new Int64Buffer(outputInts)));
2701         spec.numWorkGroups = IVec3(numElements, 1, 1);
2702         spec.failResult    = params.failResult;
2703         spec.failMessage   = params.failMessage;
2704 
2705         spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2706 
2707         group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, spec));
2708     }
2709 
2710     return group.release();
2711 }
2712 
createOpSModComputeGroup(tcu::TestContext & testCtx,qpTestResult negFailResult)2713 tcu::TestCaseGroup *createOpSModComputeGroup(tcu::TestContext &testCtx, qpTestResult negFailResult)
2714 {
2715     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsmod"));
2716     de::Random rnd(deStringHash(group->getName()));
2717     const int numElements = 200;
2718 
2719     const struct CaseParams
2720     {
2721         const char *name;
2722         const char *failMessage; // customized status message
2723         qpTestResult failResult; // override status on failure
2724         int op1Min, op1Max;      // operand ranges
2725         int op2Min, op2Max;
2726     } cases[] = {
2727         {"positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, 0, 65536, 0, 100},
2728         {"all", "Inconsistent results, but within specification", negFailResult, -65536, 65536, -100, 100}, // see below
2729     };
2730     // If either operand is negative the result is undefined. Some implementations may still return correct values.
2731 
2732     for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2733     {
2734         const CaseParams &params = cases[caseNdx];
2735 
2736         ComputeShaderSpec spec;
2737         vector<int32_t> inputInts1(numElements, 0);
2738         vector<int32_t> inputInts2(numElements, 0);
2739         vector<int32_t> outputInts(numElements, 0);
2740 
2741         fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2742         fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2743 
2744         for (int ndx = 0; ndx < numElements; ++ndx)
2745         {
2746             int32_t rem = inputInts1[ndx] % inputInts2[ndx];
2747             if (rem == 0)
2748             {
2749                 outputInts[ndx] = 0;
2750             }
2751             else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2752             {
2753                 // They have the same sign
2754                 outputInts[ndx] = rem;
2755             }
2756             else
2757             {
2758                 // They have opposite sign.  The remainder operation takes the
2759                 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2760                 // of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
2761                 // the result has the correct sign and that it is still
2762                 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2763                 //
2764                 // See also http://mathforum.org/library/drmath/view/52343.html
2765                 outputInts[ndx] = rem + inputInts2[ndx];
2766             }
2767         }
2768 
2769         spec.assembly = string(getComputeAsmShaderPreamble()) +
2770 
2771                         "OpName %main           \"main\"\n"
2772                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2773 
2774                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2775 
2776                         "OpDecorate %buf BufferBlock\n"
2777                         "OpDecorate %indata1 DescriptorSet 0\n"
2778                         "OpDecorate %indata1 Binding 0\n"
2779                         "OpDecorate %indata2 DescriptorSet 0\n"
2780                         "OpDecorate %indata2 Binding 1\n"
2781                         "OpDecorate %outdata DescriptorSet 0\n"
2782                         "OpDecorate %outdata Binding 2\n"
2783                         "OpDecorate %i32arr ArrayStride 4\n"
2784                         "OpMemberDecorate %buf 0 Offset 0\n"
2785 
2786                         + string(getComputeAsmCommonTypes()) +
2787 
2788                         "%buf        = OpTypeStruct %i32arr\n"
2789                         "%bufptr     = OpTypePointer Uniform %buf\n"
2790                         "%indata1    = OpVariable %bufptr Uniform\n"
2791                         "%indata2    = OpVariable %bufptr Uniform\n"
2792                         "%outdata    = OpVariable %bufptr Uniform\n"
2793 
2794                         "%id        = OpVariable %uvec3ptr Input\n"
2795                         "%zero      = OpConstant %i32 0\n"
2796 
2797                         "%main      = OpFunction %void None %voidf\n"
2798                         "%label     = OpLabel\n"
2799                         "%idval     = OpLoad %uvec3 %id\n"
2800                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2801                         "%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
2802                         "%inval1    = OpLoad %i32 %inloc1\n"
2803                         "%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
2804                         "%inval2    = OpLoad %i32 %inloc2\n"
2805                         "%rem       = OpSMod %i32 %inval1 %inval2\n"
2806                         "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
2807                         "             OpStore %outloc %rem\n"
2808                         "             OpReturn\n"
2809                         "             OpFunctionEnd\n";
2810 
2811         spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts1)));
2812         spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts2)));
2813         spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts)));
2814         spec.numWorkGroups = IVec3(numElements, 1, 1);
2815         spec.failResult    = params.failResult;
2816         spec.failMessage   = params.failMessage;
2817 
2818         group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, spec));
2819     }
2820 
2821     return group.release();
2822 }
2823 
createOpSModComputeGroup64(tcu::TestContext & testCtx,qpTestResult negFailResult)2824 tcu::TestCaseGroup *createOpSModComputeGroup64(tcu::TestContext &testCtx, qpTestResult negFailResult)
2825 {
2826     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsmod64"));
2827     de::Random rnd(deStringHash(group->getName()));
2828     const int numElements = 200;
2829 
2830     const struct CaseParams
2831     {
2832         const char *name;
2833         const char *failMessage; // customized status message
2834         qpTestResult failResult; // override status on failure
2835         bool positive;
2836     } cases[] = {
2837         {"positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, true},
2838         {"all", "Inconsistent results, but within specification", negFailResult, false}, // see below
2839     };
2840     // If either operand is negative the result is undefined. Some implementations may still return correct values.
2841 
2842     for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2843     {
2844         const CaseParams &params = cases[caseNdx];
2845 
2846         ComputeShaderSpec spec;
2847         vector<int64_t> inputInts1(numElements, 0);
2848         vector<int64_t> inputInts2(numElements, 0);
2849         vector<int64_t> outputInts(numElements, 0);
2850 
2851         if (params.positive)
2852         {
2853             fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2854             fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2855         }
2856         else
2857         {
2858             fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2859             fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2860         }
2861 
2862         for (int ndx = 0; ndx < numElements; ++ndx)
2863         {
2864             int64_t rem = inputInts1[ndx] % inputInts2[ndx];
2865             if (rem == 0)
2866             {
2867                 outputInts[ndx] = 0;
2868             }
2869             else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2870             {
2871                 // They have the same sign
2872                 outputInts[ndx] = rem;
2873             }
2874             else
2875             {
2876                 // They have opposite sign.  The remainder operation takes the
2877                 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2878                 // of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
2879                 // the result has the correct sign and that it is still
2880                 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2881                 //
2882                 // See also http://mathforum.org/library/drmath/view/52343.html
2883                 outputInts[ndx] = rem + inputInts2[ndx];
2884             }
2885         }
2886 
2887         spec.assembly = "OpCapability Int64\n"
2888 
2889                         + string(getComputeAsmShaderPreamble()) +
2890 
2891                         "OpName %main           \"main\"\n"
2892                         "OpName %id             \"gl_GlobalInvocationID\"\n"
2893 
2894                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
2895 
2896                         "OpDecorate %buf BufferBlock\n"
2897                         "OpDecorate %indata1 DescriptorSet 0\n"
2898                         "OpDecorate %indata1 Binding 0\n"
2899                         "OpDecorate %indata2 DescriptorSet 0\n"
2900                         "OpDecorate %indata2 Binding 1\n"
2901                         "OpDecorate %outdata DescriptorSet 0\n"
2902                         "OpDecorate %outdata Binding 2\n"
2903                         "OpDecorate %i64arr ArrayStride 8\n"
2904                         "OpMemberDecorate %buf 0 Offset 0\n"
2905 
2906                         + string(getComputeAsmCommonTypes()) + string(getComputeAsmCommonInt64Types()) +
2907 
2908                         "%buf        = OpTypeStruct %i64arr\n"
2909                         "%bufptr     = OpTypePointer Uniform %buf\n"
2910                         "%indata1    = OpVariable %bufptr Uniform\n"
2911                         "%indata2    = OpVariable %bufptr Uniform\n"
2912                         "%outdata    = OpVariable %bufptr Uniform\n"
2913 
2914                         "%id        = OpVariable %uvec3ptr Input\n"
2915                         "%zero      = OpConstant %i64 0\n"
2916 
2917                         "%main      = OpFunction %void None %voidf\n"
2918                         "%label     = OpLabel\n"
2919                         "%idval     = OpLoad %uvec3 %id\n"
2920                         "%x         = OpCompositeExtract %u32 %idval 0\n"
2921                         "%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
2922                         "%inval1    = OpLoad %i64 %inloc1\n"
2923                         "%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
2924                         "%inval2    = OpLoad %i64 %inloc2\n"
2925                         "%rem       = OpSMod %i64 %inval1 %inval2\n"
2926                         "%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
2927                         "             OpStore %outloc %rem\n"
2928                         "             OpReturn\n"
2929                         "             OpFunctionEnd\n";
2930 
2931         spec.inputs.push_back(BufferSp(new Int64Buffer(inputInts1)));
2932         spec.inputs.push_back(BufferSp(new Int64Buffer(inputInts2)));
2933         spec.outputs.push_back(BufferSp(new Int64Buffer(outputInts)));
2934         spec.numWorkGroups = IVec3(numElements, 1, 1);
2935         spec.failResult    = params.failResult;
2936         spec.failMessage   = params.failMessage;
2937 
2938         spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2939 
2940         group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, spec));
2941     }
2942 
2943     return group.release();
2944 }
2945 
2946 // Copy contents in the input buffer to the output buffer.
createOpCopyMemoryGroup(tcu::TestContext & testCtx)2947 tcu::TestCaseGroup *createOpCopyMemoryGroup(tcu::TestContext &testCtx)
2948 {
2949     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opcopymemory"));
2950     de::Random rnd(deStringHash(group->getName()));
2951     const int numElements = 100;
2952 
2953     // The following case adds vec4(0., 0.5, 1.5, 2.5) to each of the elements in the input buffer and writes output to the output buffer.
2954     ComputeShaderSpec spec1;
2955     vector<Vec4> inputFloats1(numElements);
2956     vector<Vec4> outputFloats1(numElements);
2957 
2958     fillRandomScalars(rnd, -200.f, 200.f, &inputFloats1[0], numElements * 4);
2959 
2960     // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
2961     floorAll(inputFloats1);
2962 
2963     for (size_t ndx = 0; ndx < numElements; ++ndx)
2964         outputFloats1[ndx] = inputFloats1[ndx] + Vec4(0.f, 0.5f, 1.5f, 2.5f);
2965 
2966     spec1.assembly = string(getComputeAsmShaderPreamble()) +
2967 
2968                      "OpName %main           \"main\"\n"
2969                      "OpName %id             \"gl_GlobalInvocationID\"\n"
2970 
2971                      "OpDecorate %id BuiltIn GlobalInvocationId\n"
2972                      "OpDecorate %vec4arr ArrayStride 16\n"
2973 
2974                      + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2975 
2976                      "%vec4       = OpTypeVector %f32 4\n"
2977                      "%vec4ptr_u  = OpTypePointer Uniform %vec4\n"
2978                      "%vec4ptr_f  = OpTypePointer Function %vec4\n"
2979                      "%vec4arr    = OpTypeRuntimeArray %vec4\n"
2980                      "%buf        = OpTypeStruct %vec4arr\n"
2981                      "%bufptr     = OpTypePointer Uniform %buf\n"
2982                      "%indata     = OpVariable %bufptr Uniform\n"
2983                      "%outdata    = OpVariable %bufptr Uniform\n"
2984 
2985                      "%id         = OpVariable %uvec3ptr Input\n"
2986                      "%zero       = OpConstant %i32 0\n"
2987                      "%c_f_0      = OpConstant %f32 0.\n"
2988                      "%c_f_0_5    = OpConstant %f32 0.5\n"
2989                      "%c_f_1_5    = OpConstant %f32 1.5\n"
2990                      "%c_f_2_5    = OpConstant %f32 2.5\n"
2991                      "%c_vec4     = OpConstantComposite %vec4 %c_f_0 %c_f_0_5 %c_f_1_5 %c_f_2_5\n"
2992 
2993                      "%main       = OpFunction %void None %voidf\n"
2994                      "%label      = OpLabel\n"
2995                      "%v_vec4     = OpVariable %vec4ptr_f Function\n"
2996                      "%idval      = OpLoad %uvec3 %id\n"
2997                      "%x          = OpCompositeExtract %u32 %idval 0\n"
2998                      "%inloc      = OpAccessChain %vec4ptr_u %indata %zero %x\n"
2999                      "%outloc     = OpAccessChain %vec4ptr_u %outdata %zero %x\n"
3000                      "              OpCopyMemory %v_vec4 %inloc\n"
3001                      "%v_vec4_val = OpLoad %vec4 %v_vec4\n"
3002                      "%add        = OpFAdd %vec4 %v_vec4_val %c_vec4\n"
3003                      "              OpStore %outloc %add\n"
3004                      "              OpReturn\n"
3005                      "              OpFunctionEnd\n";
3006 
3007     spec1.inputs.push_back(BufferSp(new Vec4Buffer(inputFloats1)));
3008     spec1.outputs.push_back(BufferSp(new Vec4Buffer(outputFloats1)));
3009     spec1.numWorkGroups = IVec3(numElements, 1, 1);
3010 
3011     group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector", spec1));
3012 
3013     // The following case copies a float[100] variable from the input buffer to the output buffer.
3014     ComputeShaderSpec spec2;
3015     vector<float> inputFloats2(numElements);
3016     vector<float> outputFloats2(numElements);
3017 
3018     fillRandomScalars(rnd, -200.f, 200.f, &inputFloats2[0], numElements);
3019 
3020     for (size_t ndx = 0; ndx < numElements; ++ndx)
3021         outputFloats2[ndx] = inputFloats2[ndx];
3022 
3023     spec2.assembly = string(getComputeAsmShaderPreamble()) +
3024 
3025                      "OpName %main           \"main\"\n"
3026                      "OpName %id             \"gl_GlobalInvocationID\"\n"
3027 
3028                      "OpDecorate %id BuiltIn GlobalInvocationId\n"
3029                      "OpDecorate %f32arr100 ArrayStride 4\n"
3030 
3031                      + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3032 
3033                      "%hundred        = OpConstant %u32 100\n"
3034                      "%f32arr100      = OpTypeArray %f32 %hundred\n"
3035                      "%f32arr100ptr_f = OpTypePointer Function %f32arr100\n"
3036                      "%f32arr100ptr_u = OpTypePointer Uniform %f32arr100\n"
3037                      "%buf            = OpTypeStruct %f32arr100\n"
3038                      "%bufptr         = OpTypePointer Uniform %buf\n"
3039                      "%indata         = OpVariable %bufptr Uniform\n"
3040                      "%outdata        = OpVariable %bufptr Uniform\n"
3041 
3042                      "%id             = OpVariable %uvec3ptr Input\n"
3043                      "%zero           = OpConstant %i32 0\n"
3044 
3045                      "%main           = OpFunction %void None %voidf\n"
3046                      "%label          = OpLabel\n"
3047                      "%var            = OpVariable %f32arr100ptr_f Function\n"
3048                      "%inarr          = OpAccessChain %f32arr100ptr_u %indata %zero\n"
3049                      "%outarr         = OpAccessChain %f32arr100ptr_u %outdata %zero\n"
3050                      "                  OpCopyMemory %var %inarr\n"
3051                      "                  OpCopyMemory %outarr %var\n"
3052                      "                  OpReturn\n"
3053                      "                  OpFunctionEnd\n";
3054 
3055     spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
3056     spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
3057     spec2.numWorkGroups = IVec3(1, 1, 1);
3058 
3059     group->addChild(new SpvAsmComputeShaderCase(testCtx, "array", spec2));
3060 
3061     // The following case copies a struct{vec4, vec4, vec4, vec4} variable from the input buffer to the output buffer.
3062     ComputeShaderSpec spec3;
3063     vector<float> inputFloats3(16);
3064     vector<float> outputFloats3(16);
3065 
3066     fillRandomScalars(rnd, -200.f, 200.f, &inputFloats3[0], 16);
3067 
3068     for (size_t ndx = 0; ndx < 16; ++ndx)
3069         outputFloats3[ndx] = inputFloats3[ndx];
3070 
3071     spec3.assembly = string(getComputeAsmShaderPreamble()) +
3072 
3073                      "OpName %main           \"main\"\n"
3074                      "OpName %id             \"gl_GlobalInvocationID\"\n"
3075 
3076                      "OpDecorate %id BuiltIn GlobalInvocationId\n"
3077                      //"OpMemberDecorate %buf 0 Offset 0\n"  - exists in getComputeAsmInputOutputBufferTraits
3078                      "OpMemberDecorate %buf 1 Offset 16\n"
3079                      "OpMemberDecorate %buf 2 Offset 32\n"
3080                      "OpMemberDecorate %buf 3 Offset 48\n"
3081 
3082                      + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3083 
3084                      "%vec4      = OpTypeVector %f32 4\n"
3085                      "%buf       = OpTypeStruct %vec4 %vec4 %vec4 %vec4\n"
3086                      "%bufptr    = OpTypePointer Uniform %buf\n"
3087                      "%indata    = OpVariable %bufptr Uniform\n"
3088                      "%outdata   = OpVariable %bufptr Uniform\n"
3089                      "%vec4stptr = OpTypePointer Function %buf\n"
3090 
3091                      "%id        = OpVariable %uvec3ptr Input\n"
3092                      "%zero      = OpConstant %i32 0\n"
3093 
3094                      "%main      = OpFunction %void None %voidf\n"
3095                      "%label     = OpLabel\n"
3096                      "%var       = OpVariable %vec4stptr Function\n"
3097                      "             OpCopyMemory %var %indata\n"
3098                      "             OpCopyMemory %outdata %var\n"
3099                      "             OpReturn\n"
3100                      "             OpFunctionEnd\n";
3101 
3102     spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3103     spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
3104     spec3.numWorkGroups = IVec3(1, 1, 1);
3105 
3106     group->addChild(new SpvAsmComputeShaderCase(testCtx, "struct", spec3));
3107 
3108     // The following case negates multiple float variables from the input buffer and stores the results to the output buffer.
3109     ComputeShaderSpec spec4;
3110     vector<float> inputFloats4(numElements);
3111     vector<float> outputFloats4(numElements);
3112 
3113     fillRandomScalars(rnd, -200.f, 200.f, &inputFloats4[0], numElements);
3114 
3115     for (size_t ndx = 0; ndx < numElements; ++ndx)
3116         outputFloats4[ndx] = -inputFloats4[ndx];
3117 
3118     spec4.assembly = string(getComputeAsmShaderPreamble()) +
3119 
3120                      "OpName %main           \"main\"\n"
3121                      "OpName %id             \"gl_GlobalInvocationID\"\n"
3122 
3123                      "OpDecorate %id BuiltIn GlobalInvocationId\n"
3124 
3125                      + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3126                      string(getComputeAsmInputOutputBuffer()) +
3127 
3128                      "%f32ptr_f  = OpTypePointer Function %f32\n"
3129                      "%id        = OpVariable %uvec3ptr Input\n"
3130                      "%zero      = OpConstant %i32 0\n"
3131 
3132                      "%main      = OpFunction %void None %voidf\n"
3133                      "%label     = OpLabel\n"
3134                      "%var       = OpVariable %f32ptr_f Function\n"
3135                      "%idval     = OpLoad %uvec3 %id\n"
3136                      "%x         = OpCompositeExtract %u32 %idval 0\n"
3137                      "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
3138                      "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
3139                      "             OpCopyMemory %var %inloc\n"
3140                      "%val       = OpLoad %f32 %var\n"
3141                      "%neg       = OpFNegate %f32 %val\n"
3142                      "             OpStore %outloc %neg\n"
3143                      "             OpReturn\n"
3144                      "             OpFunctionEnd\n";
3145 
3146     spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3147     spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
3148     spec4.numWorkGroups = IVec3(numElements, 1, 1);
3149 
3150     group->addChild(new SpvAsmComputeShaderCase(testCtx, "float", spec4));
3151 
3152     return group.release();
3153 }
3154 
createOpCopyObjectGroup(tcu::TestContext & testCtx)3155 tcu::TestCaseGroup *createOpCopyObjectGroup(tcu::TestContext &testCtx)
3156 {
3157     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opcopyobject"));
3158     ComputeShaderSpec spec;
3159     de::Random rnd(deStringHash(group->getName()));
3160     const int numElements = 100;
3161     vector<float> inputFloats(numElements, 0);
3162     vector<float> outputFloats(numElements, 0);
3163 
3164     fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
3165 
3166     // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3167     floorAll(inputFloats);
3168 
3169     for (size_t ndx = 0; ndx < numElements; ++ndx)
3170         outputFloats[ndx] = inputFloats[ndx] + 7.5f;
3171 
3172     spec.assembly = string(getComputeAsmShaderPreamble()) +
3173 
3174                     "OpName %main           \"main\"\n"
3175                     "OpName %id             \"gl_GlobalInvocationID\"\n"
3176 
3177                     "OpDecorate %id BuiltIn GlobalInvocationId\n"
3178 
3179                     + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3180 
3181                     "%fmat     = OpTypeMatrix %fvec3 3\n"
3182                     "%three    = OpConstant %u32 3\n"
3183                     "%farr     = OpTypeArray %f32 %three\n"
3184                     "%fst      = OpTypeStruct %f32 %f32\n"
3185 
3186                     + string(getComputeAsmInputOutputBuffer()) +
3187 
3188                     "%id            = OpVariable %uvec3ptr Input\n"
3189                     "%zero          = OpConstant %i32 0\n"
3190                     "%c_f           = OpConstant %f32 1.5\n"
3191                     "%c_fvec3       = OpConstantComposite %fvec3 %c_f %c_f %c_f\n"
3192                     "%c_fmat        = OpConstantComposite %fmat %c_fvec3 %c_fvec3 %c_fvec3\n"
3193                     "%c_farr        = OpConstantComposite %farr %c_f %c_f %c_f\n"
3194                     "%c_fst         = OpConstantComposite %fst %c_f %c_f\n"
3195 
3196                     "%main          = OpFunction %void None %voidf\n"
3197                     "%label         = OpLabel\n"
3198                     "%c_f_copy      = OpCopyObject %f32   %c_f\n"
3199                     "%c_fvec3_copy  = OpCopyObject %fvec3 %c_fvec3\n"
3200                     "%c_fmat_copy   = OpCopyObject %fmat  %c_fmat\n"
3201                     "%c_farr_copy   = OpCopyObject %farr  %c_farr\n"
3202                     "%c_fst_copy    = OpCopyObject %fst   %c_fst\n"
3203                     "%fvec3_elem    = OpCompositeExtract %f32 %c_fvec3_copy 0\n"
3204                     "%fmat_elem     = OpCompositeExtract %f32 %c_fmat_copy 1 2\n"
3205                     "%farr_elem     = OpCompositeExtract %f32 %c_farr_copy 2\n"
3206                     "%fst_elem      = OpCompositeExtract %f32 %c_fst_copy 1\n"
3207                     // Add up. 1.5 * 5 = 7.5.
3208                     "%add1          = OpFAdd %f32 %c_f_copy %fvec3_elem\n"
3209                     "%add2          = OpFAdd %f32 %add1     %fmat_elem\n"
3210                     "%add3          = OpFAdd %f32 %add2     %farr_elem\n"
3211                     "%add4          = OpFAdd %f32 %add3     %fst_elem\n"
3212 
3213                     "%idval         = OpLoad %uvec3 %id\n"
3214                     "%x             = OpCompositeExtract %u32 %idval 0\n"
3215                     "%inloc         = OpAccessChain %f32ptr %indata %zero %x\n"
3216                     "%outloc        = OpAccessChain %f32ptr %outdata %zero %x\n"
3217                     "%inval         = OpLoad %f32 %inloc\n"
3218                     "%add           = OpFAdd %f32 %add4 %inval\n"
3219                     "                 OpStore %outloc %add\n"
3220                     "                 OpReturn\n"
3221                     "                 OpFunctionEnd\n";
3222     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3223     spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3224     spec.numWorkGroups = IVec3(numElements, 1, 1);
3225 
3226     group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", spec));
3227 
3228     return group.release();
3229 }
3230 // Assembly code used for testing OpUnreachable is based on GLSL source code:
3231 //
3232 // #version 430
3233 //
3234 // layout(std140, set = 0, binding = 0) readonly buffer Input {
3235 //   float elements[];
3236 // } input_data;
3237 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
3238 //   float elements[];
3239 // } output_data;
3240 //
3241 // void not_called_func() {
3242 //   // place OpUnreachable here
3243 // }
3244 //
3245 // uint modulo4(uint val) {
3246 //   switch (val % uint(4)) {
3247 //     case 0:  return 3;
3248 //     case 1:  return 2;
3249 //     case 2:  return 1;
3250 //     case 3:  return 0;
3251 //     default: return 100; // place OpUnreachable here
3252 //   }
3253 // }
3254 //
3255 // uint const5() {
3256 //   return 5;
3257 //   // place OpUnreachable here
3258 // }
3259 //
3260 // void main() {
3261 //   uint x = gl_GlobalInvocationID.x;
3262 //   if (const5() > modulo4(1000)) {
3263 //     output_data.elements[x] = -input_data.elements[x];
3264 //   } else {
3265 //     // place OpUnreachable here
3266 //     output_data.elements[x] = input_data.elements[x];
3267 //   }
3268 // }
3269 
addOpUnreachableAmberTests(tcu::TestCaseGroup & group,tcu::TestContext & testCtx)3270 void addOpUnreachableAmberTests(tcu::TestCaseGroup &group, tcu::TestContext &testCtx)
3271 {
3272 #ifndef CTS_USES_VULKANSC
3273     static const char dataDir[] = "spirv_assembly/instruction/compute/unreachable";
3274 
3275     struct Case
3276     {
3277         string name;
3278         string desc;
3279     };
3280 
3281     static const Case cases[] = {
3282         {"unreachable-switch-merge-in-loop",
3283          "Test containing an unreachable switch merge block inside an infinite loop"},
3284     };
3285 
3286     for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3287     {
3288         const string fileName = cases[i].name + ".amber";
3289         group.addChild(
3290             cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3291     }
3292 #else
3293     DE_UNREF(group);
3294     DE_UNREF(testCtx);
3295 #endif
3296 }
3297 
addOpSwitchAmberTests(tcu::TestCaseGroup & group,tcu::TestContext & testCtx)3298 void addOpSwitchAmberTests(tcu::TestCaseGroup &group, tcu::TestContext &testCtx)
3299 {
3300 #ifndef CTS_USES_VULKANSC
3301     static const char dataDir[] = "spirv_assembly/instruction/compute/switch";
3302 
3303     struct Case
3304     {
3305         string name;
3306         string desc;
3307     };
3308 
3309     static const Case cases[] = {
3310         {"switch-case-to-merge-block", "Test switch containing a case that jumps directly to the merge block"},
3311     };
3312 
3313     for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3314     {
3315         const string fileName = cases[i].name + ".amber";
3316         group.addChild(
3317             cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3318     }
3319 #else
3320     DE_UNREF(group);
3321     DE_UNREF(testCtx);
3322 #endif
3323 }
3324 
3325 #ifndef CTS_USES_VULKANSC
createOpArrayLengthComputeGroup(tcu::TestContext & testCtx)3326 tcu::TestCaseGroup *createOpArrayLengthComputeGroup(tcu::TestContext &testCtx)
3327 {
3328     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "oparraylength"));
3329     static const char dataDir[] = "spirv_assembly/instruction/compute/arraylength";
3330 
3331     struct Case
3332     {
3333         string name;
3334         string desc;
3335     };
3336 
3337     static const Case cases[] = {{"array-stride-larger-than-element-size",
3338                                   "Test using an unsized array with stride larger than the element size"}};
3339 
3340     for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3341     {
3342         const string fileName = cases[i].name + ".amber";
3343         group->addChild(
3344             cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3345     }
3346 
3347     return group.release();
3348 }
3349 #endif
3350 
createOpUnreachableGroup(tcu::TestContext & testCtx)3351 tcu::TestCaseGroup *createOpUnreachableGroup(tcu::TestContext &testCtx)
3352 {
3353     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opunreachable"));
3354     ComputeShaderSpec spec;
3355     de::Random rnd(deStringHash(group->getName()));
3356     const int numElements = 100;
3357     vector<float> positiveFloats(numElements, 0);
3358     vector<float> negativeFloats(numElements, 0);
3359 
3360     fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
3361 
3362     for (size_t ndx = 0; ndx < numElements; ++ndx)
3363         negativeFloats[ndx] = -positiveFloats[ndx];
3364 
3365     spec.assembly = string(getComputeAsmShaderPreamble()) +
3366 
3367                     "OpSource GLSL 430\n"
3368                     "OpName %main            \"main\"\n"
3369                     "OpName %func_not_called_func \"not_called_func(\"\n"
3370                     "OpName %func_modulo4         \"modulo4(u1;\"\n"
3371                     "OpName %func_const5          \"const5(\"\n"
3372                     "OpName %id                   \"gl_GlobalInvocationID\"\n"
3373 
3374                     "OpDecorate %id BuiltIn GlobalInvocationId\n"
3375 
3376                     + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3377 
3378                     "%u32ptr    = OpTypePointer Function %u32\n"
3379                     "%uintfuint = OpTypeFunction %u32 %u32ptr\n"
3380                     "%unitf     = OpTypeFunction %u32\n"
3381 
3382                     "%id        = OpVariable %uvec3ptr Input\n"
3383                     "%zero      = OpConstant %u32 0\n"
3384                     "%one       = OpConstant %u32 1\n"
3385                     "%two       = OpConstant %u32 2\n"
3386                     "%three     = OpConstant %u32 3\n"
3387                     "%four      = OpConstant %u32 4\n"
3388                     "%five      = OpConstant %u32 5\n"
3389                     "%hundred   = OpConstant %u32 100\n"
3390                     "%thousand  = OpConstant %u32 1000\n"
3391 
3392                     + string(getComputeAsmInputOutputBuffer()) +
3393 
3394                     // Main()
3395                     "%main   = OpFunction %void None %voidf\n"
3396                     "%main_entry  = OpLabel\n"
3397                     "%v_thousand  = OpVariable %u32ptr Function %thousand\n"
3398                     "%idval       = OpLoad %uvec3 %id\n"
3399                     "%x           = OpCompositeExtract %u32 %idval 0\n"
3400                     "%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
3401                     "%inval       = OpLoad %f32 %inloc\n"
3402                     "%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
3403                     "%ret_const5  = OpFunctionCall %u32 %func_const5\n"
3404                     "%ret_modulo4 = OpFunctionCall %u32 %func_modulo4 %v_thousand\n"
3405                     "%cmp_gt      = OpUGreaterThan %bool %ret_const5 %ret_modulo4\n"
3406                     "               OpSelectionMerge %if_end None\n"
3407                     "               OpBranchConditional %cmp_gt %if_true %if_false\n"
3408                     "%if_true     = OpLabel\n"
3409                     "%negate      = OpFNegate %f32 %inval\n"
3410                     "               OpStore %outloc %negate\n"
3411                     "               OpBranch %if_end\n"
3412                     "%if_false    = OpLabel\n"
3413                     "               OpUnreachable\n" // Unreachable else branch for if statement
3414                     "%if_end      = OpLabel\n"
3415                     "               OpReturn\n"
3416                     "               OpFunctionEnd\n"
3417 
3418                     // not_called_function()
3419                     "%func_not_called_func  = OpFunction %void None %voidf\n"
3420                     "%not_called_func_entry = OpLabel\n"
3421                     "                         OpUnreachable\n" // Unreachable entry block in not called static function
3422                     "                         OpFunctionEnd\n"
3423 
3424                     // modulo4()
3425                     "%func_modulo4  = OpFunction %u32 None %uintfuint\n"
3426                     "%valptr        = OpFunctionParameter %u32ptr\n"
3427                     "%modulo4_entry = OpLabel\n"
3428                     "%val           = OpLoad %u32 %valptr\n"
3429                     "%modulo        = OpUMod %u32 %val %four\n"
3430                     "                 OpSelectionMerge %switch_merge None\n"
3431                     "                 OpSwitch %modulo %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
3432                     "%case0         = OpLabel\n"
3433                     "                 OpReturnValue %three\n"
3434                     "%case1         = OpLabel\n"
3435                     "                 OpReturnValue %two\n"
3436                     "%case2         = OpLabel\n"
3437                     "                 OpReturnValue %one\n"
3438                     "%case3         = OpLabel\n"
3439                     "                 OpReturnValue %zero\n"
3440                     "%default       = OpLabel\n"
3441                     "                 OpUnreachable\n" // Unreachable default case for switch statement
3442                     "%switch_merge  = OpLabel\n"
3443                     "                 OpUnreachable\n" // Unreachable merge block for switch statement
3444                     "                 OpFunctionEnd\n"
3445 
3446                     // const5()
3447                     "%func_const5  = OpFunction %u32 None %unitf\n"
3448                     "%const5_entry = OpLabel\n"
3449                     "                OpReturnValue %five\n"
3450                     "%unreachable  = OpLabel\n"
3451                     "                OpUnreachable\n" // Unreachable block in function
3452                     "                OpFunctionEnd\n";
3453     spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
3454     spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
3455     spec.numWorkGroups = IVec3(numElements, 1, 1);
3456 
3457     group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
3458 
3459     addOpUnreachableAmberTests(*group, testCtx);
3460 
3461     return group.release();
3462 }
3463 
3464 // Assembly code used for testing decoration group is based on GLSL source code:
3465 //
3466 // #version 430
3467 //
3468 // layout(std140, set = 0, binding = 0) readonly buffer Input0 {
3469 //   float elements[];
3470 // } input_data0;
3471 // layout(std140, set = 0, binding = 1) readonly buffer Input1 {
3472 //   float elements[];
3473 // } input_data1;
3474 // layout(std140, set = 0, binding = 2) readonly buffer Input2 {
3475 //   float elements[];
3476 // } input_data2;
3477 // layout(std140, set = 0, binding = 3) readonly buffer Input3 {
3478 //   float elements[];
3479 // } input_data3;
3480 // layout(std140, set = 0, binding = 4) readonly buffer Input4 {
3481 //   float elements[];
3482 // } input_data4;
3483 // layout(std140, set = 0, binding = 5) writeonly buffer Output {
3484 //   float elements[];
3485 // } output_data;
3486 //
3487 // void main() {
3488 //   uint x = gl_GlobalInvocationID.x;
3489 //   output_data.elements[x] = input_data0.elements[x] + input_data1.elements[x] + input_data2.elements[x] + input_data3.elements[x] + input_data4.elements[x];
3490 // }
createDecorationGroupGroup(tcu::TestContext & testCtx)3491 tcu::TestCaseGroup *createDecorationGroupGroup(tcu::TestContext &testCtx)
3492 {
3493     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "decoration_group"));
3494     ComputeShaderSpec spec;
3495     de::Random rnd(deStringHash(group->getName()));
3496     const int numElements = 100;
3497     vector<float> inputFloats0(numElements, 0);
3498     vector<float> inputFloats1(numElements, 0);
3499     vector<float> inputFloats2(numElements, 0);
3500     vector<float> inputFloats3(numElements, 0);
3501     vector<float> inputFloats4(numElements, 0);
3502     vector<float> outputFloats(numElements, 0);
3503 
3504     fillRandomScalars(rnd, -300.f, 300.f, &inputFloats0[0], numElements);
3505     fillRandomScalars(rnd, -300.f, 300.f, &inputFloats1[0], numElements);
3506     fillRandomScalars(rnd, -300.f, 300.f, &inputFloats2[0], numElements);
3507     fillRandomScalars(rnd, -300.f, 300.f, &inputFloats3[0], numElements);
3508     fillRandomScalars(rnd, -300.f, 300.f, &inputFloats4[0], numElements);
3509 
3510     // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3511     floorAll(inputFloats0);
3512     floorAll(inputFloats1);
3513     floorAll(inputFloats2);
3514     floorAll(inputFloats3);
3515     floorAll(inputFloats4);
3516 
3517     for (size_t ndx = 0; ndx < numElements; ++ndx)
3518         outputFloats[ndx] =
3519             inputFloats0[ndx] + inputFloats1[ndx] + inputFloats2[ndx] + inputFloats3[ndx] + inputFloats4[ndx];
3520 
3521     spec.assembly = string(getComputeAsmShaderPreamble()) +
3522 
3523                     "OpSource GLSL 430\n"
3524                     "OpName %main \"main\"\n"
3525                     "OpName %id \"gl_GlobalInvocationID\"\n"
3526 
3527                     // Not using group decoration on variable.
3528                     "OpDecorate %id BuiltIn GlobalInvocationId\n"
3529                     // Not using group decoration on type.
3530                     "OpDecorate %f32arr ArrayStride 4\n"
3531 
3532                     "OpDecorate %groups BufferBlock\n"
3533                     "OpDecorate %groupm Offset 0\n"
3534                     "%groups = OpDecorationGroup\n"
3535                     "%groupm = OpDecorationGroup\n"
3536 
3537                     // Group decoration on multiple structs.
3538                     "OpGroupDecorate %groups %outbuf %inbuf0 %inbuf1 %inbuf2 %inbuf3 %inbuf4\n"
3539                     // Group decoration on multiple struct members.
3540                     "OpGroupMemberDecorate %groupm %outbuf 0 %inbuf0 0 %inbuf1 0 %inbuf2 0 %inbuf3 0 %inbuf4 0\n"
3541 
3542                     "OpDecorate %group1 DescriptorSet 0\n"
3543                     "OpDecorate %group3 DescriptorSet 0\n"
3544                     "OpDecorate %group3 NonWritable\n"
3545                     "OpDecorate %group3 Restrict\n"
3546                     "%group0 = OpDecorationGroup\n"
3547                     "%group1 = OpDecorationGroup\n"
3548                     "%group3 = OpDecorationGroup\n"
3549 
3550                     // Applying the same decoration group multiple times.
3551                     "OpGroupDecorate %group1 %outdata\n"
3552                     "OpGroupDecorate %group1 %outdata\n"
3553                     "OpGroupDecorate %group1 %outdata\n"
3554                     "OpDecorate %outdata DescriptorSet 0\n"
3555                     "OpDecorate %outdata Binding 5\n"
3556                     // Applying decoration group containing nothing.
3557                     "OpGroupDecorate %group0 %indata0\n"
3558                     "OpDecorate %indata0 DescriptorSet 0\n"
3559                     "OpDecorate %indata0 Binding 0\n"
3560                     // Applying decoration group containing one decoration.
3561                     "OpGroupDecorate %group1 %indata1\n"
3562                     "OpDecorate %indata1 Binding 1\n"
3563                     // Applying decoration group containing multiple decorations.
3564                     "OpGroupDecorate %group3 %indata2 %indata3\n"
3565                     "OpDecorate %indata2 Binding 2\n"
3566                     "OpDecorate %indata3 Binding 3\n"
3567                     // Applying multiple decoration groups (with overlapping).
3568                     "OpGroupDecorate %group0 %indata4\n"
3569                     "OpGroupDecorate %group1 %indata4\n"
3570                     "OpGroupDecorate %group3 %indata4\n"
3571                     "OpDecorate %indata4 Binding 4\n"
3572 
3573                     + string(getComputeAsmCommonTypes()) +
3574 
3575                     "%id   = OpVariable %uvec3ptr Input\n"
3576                     "%zero = OpConstant %i32 0\n"
3577 
3578                     "%outbuf    = OpTypeStruct %f32arr\n"
3579                     "%outbufptr = OpTypePointer Uniform %outbuf\n"
3580                     "%outdata   = OpVariable %outbufptr Uniform\n"
3581                     "%inbuf0    = OpTypeStruct %f32arr\n"
3582                     "%inbuf0ptr = OpTypePointer Uniform %inbuf0\n"
3583                     "%indata0   = OpVariable %inbuf0ptr Uniform\n"
3584                     "%inbuf1    = OpTypeStruct %f32arr\n"
3585                     "%inbuf1ptr = OpTypePointer Uniform %inbuf1\n"
3586                     "%indata1   = OpVariable %inbuf1ptr Uniform\n"
3587                     "%inbuf2    = OpTypeStruct %f32arr\n"
3588                     "%inbuf2ptr = OpTypePointer Uniform %inbuf2\n"
3589                     "%indata2   = OpVariable %inbuf2ptr Uniform\n"
3590                     "%inbuf3    = OpTypeStruct %f32arr\n"
3591                     "%inbuf3ptr = OpTypePointer Uniform %inbuf3\n"
3592                     "%indata3   = OpVariable %inbuf3ptr Uniform\n"
3593                     "%inbuf4    = OpTypeStruct %f32arr\n"
3594                     "%inbufptr  = OpTypePointer Uniform %inbuf4\n"
3595                     "%indata4   = OpVariable %inbufptr Uniform\n"
3596 
3597                     "%main   = OpFunction %void None %voidf\n"
3598                     "%label  = OpLabel\n"
3599                     "%idval  = OpLoad %uvec3 %id\n"
3600                     "%x      = OpCompositeExtract %u32 %idval 0\n"
3601                     "%inloc0 = OpAccessChain %f32ptr %indata0 %zero %x\n"
3602                     "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
3603                     "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
3604                     "%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
3605                     "%inloc4 = OpAccessChain %f32ptr %indata4 %zero %x\n"
3606                     "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3607                     "%inval0 = OpLoad %f32 %inloc0\n"
3608                     "%inval1 = OpLoad %f32 %inloc1\n"
3609                     "%inval2 = OpLoad %f32 %inloc2\n"
3610                     "%inval3 = OpLoad %f32 %inloc3\n"
3611                     "%inval4 = OpLoad %f32 %inloc4\n"
3612                     "%add0   = OpFAdd %f32 %inval0 %inval1\n"
3613                     "%add1   = OpFAdd %f32 %add0 %inval2\n"
3614                     "%add2   = OpFAdd %f32 %add1 %inval3\n"
3615                     "%add    = OpFAdd %f32 %add2 %inval4\n"
3616                     "          OpStore %outloc %add\n"
3617                     "          OpReturn\n"
3618                     "          OpFunctionEnd\n";
3619     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats0)));
3620     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
3621     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
3622     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3623     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3624     spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3625     spec.numWorkGroups = IVec3(numElements, 1, 1);
3626 
3627     group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
3628 
3629     return group.release();
3630 }
3631 
3632 enum SpecConstantType
3633 {
3634     SC_INT8,
3635     SC_UINT8,
3636     SC_INT16,
3637     SC_UINT16,
3638     SC_INT32,
3639     SC_UINT32,
3640     SC_INT64,
3641     SC_UINT64,
3642     SC_FLOAT16,
3643     SC_FLOAT32,
3644     SC_FLOAT64,
3645 };
3646 
3647 struct SpecConstantValue
3648 {
3649     SpecConstantType type;
3650     union ValueUnion
3651     {
3652         int8_t i8;
3653         uint8_t u8;
3654         int16_t i16;
3655         uint16_t u16;
3656         int32_t i32;
3657         uint32_t u32;
3658         int64_t i64;
3659         uint64_t u64;
3660         tcu::Float16 f16;
3661         tcu::Float32 f32;
3662         tcu::Float64 f64;
3663 
ValueUnion(int8_t v)3664         ValueUnion(int8_t v) : i8(v)
3665         {
3666         }
ValueUnion(uint8_t v)3667         ValueUnion(uint8_t v) : u8(v)
3668         {
3669         }
ValueUnion(int16_t v)3670         ValueUnion(int16_t v) : i16(v)
3671         {
3672         }
ValueUnion(uint16_t v)3673         ValueUnion(uint16_t v) : u16(v)
3674         {
3675         }
ValueUnion(int32_t v)3676         ValueUnion(int32_t v) : i32(v)
3677         {
3678         }
ValueUnion(uint32_t v)3679         ValueUnion(uint32_t v) : u32(v)
3680         {
3681         }
ValueUnion(int64_t v)3682         ValueUnion(int64_t v) : i64(v)
3683         {
3684         }
ValueUnion(uint64_t v)3685         ValueUnion(uint64_t v) : u64(v)
3686         {
3687         }
ValueUnion(tcu::Float16 v)3688         ValueUnion(tcu::Float16 v) : f16(v)
3689         {
3690         }
ValueUnion(tcu::Float32 v)3691         ValueUnion(tcu::Float32 v) : f32(v)
3692         {
3693         }
ValueUnion(tcu::Float64 v)3694         ValueUnion(tcu::Float64 v) : f64(v)
3695         {
3696         }
3697     } value;
3698 
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3699     SpecConstantValue(int8_t v) : type(SC_INT8), value(v)
3700     {
3701     }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3702     SpecConstantValue(uint8_t v) : type(SC_UINT8), value(v)
3703     {
3704     }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3705     SpecConstantValue(int16_t v) : type(SC_INT16), value(v)
3706     {
3707     }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3708     SpecConstantValue(uint16_t v) : type(SC_UINT16), value(v)
3709     {
3710     }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3711     SpecConstantValue(int32_t v) : type(SC_INT32), value(v)
3712     {
3713     }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3714     SpecConstantValue(uint32_t v) : type(SC_UINT32), value(v)
3715     {
3716     }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3717     SpecConstantValue(int64_t v) : type(SC_INT64), value(v)
3718     {
3719     }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3720     SpecConstantValue(uint64_t v) : type(SC_UINT64), value(v)
3721     {
3722     }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3723     SpecConstantValue(tcu::Float16 v) : type(SC_FLOAT16), value(v)
3724     {
3725     }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3726     SpecConstantValue(tcu::Float32 v) : type(SC_FLOAT32), value(v)
3727     {
3728     }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3729     SpecConstantValue(tcu::Float64 v) : type(SC_FLOAT64), value(v)
3730     {
3731     }
3732 
appendTovkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3733     void appendTo(vkt::SpirVAssembly::SpecConstants &specConstants)
3734     {
3735         switch (type)
3736         {
3737         case SC_INT8:
3738             specConstants.append(value.i8);
3739             break;
3740         case SC_UINT8:
3741             specConstants.append(value.u8);
3742             break;
3743         case SC_INT16:
3744             specConstants.append(value.i16);
3745             break;
3746         case SC_UINT16:
3747             specConstants.append(value.u16);
3748             break;
3749         case SC_INT32:
3750             specConstants.append(value.i32);
3751             break;
3752         case SC_UINT32:
3753             specConstants.append(value.u32);
3754             break;
3755         case SC_INT64:
3756             specConstants.append(value.i64);
3757             break;
3758         case SC_UINT64:
3759             specConstants.append(value.u64);
3760             break;
3761         case SC_FLOAT16:
3762             specConstants.append(value.f16);
3763             break;
3764         case SC_FLOAT32:
3765             specConstants.append(value.f32);
3766             break;
3767         case SC_FLOAT64:
3768             specConstants.append(value.f64);
3769             break;
3770         default:
3771             DE_ASSERT(false);
3772         }
3773     }
3774 };
3775 
3776 enum CaseFlagBits
3777 {
3778     FLAG_NONE    = 0,
3779     FLAG_CONVERT = 1,
3780     FLAG_I8      = (1 << 1),
3781     FLAG_I16     = (1 << 2),
3782     FLAG_I64     = (1 << 3),
3783     FLAG_F16     = (1 << 4),
3784     FLAG_F64     = (1 << 5),
3785 };
3786 using CaseFlags = uint32_t;
3787 
3788 struct SpecConstantTwoValCase
3789 {
3790     const std::string caseName;
3791     const std::string scDefinition0;
3792     const std::string scDefinition1;
3793     const std::string scResultType;
3794     const std::string scOperation;
3795     SpecConstantValue scActualValue0;
3796     SpecConstantValue scActualValue1;
3797     const std::string resultOperation;
3798     vector<int32_t> expectedOutput;
3799     CaseFlags caseFlags;
3800 
SpecConstantTwoValCasevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantTwoValCase3801     SpecConstantTwoValCase(const std::string &name, const std::string &definition0, const std::string &definition1,
3802                            const std::string &resultType, const std::string &operation, SpecConstantValue value0,
3803                            SpecConstantValue value1, const std::string &resultOp, const vector<int32_t> &output,
3804                            CaseFlags flags = FLAG_NONE)
3805         : caseName(name)
3806         , scDefinition0(definition0)
3807         , scDefinition1(definition1)
3808         , scResultType(resultType)
3809         , scOperation(operation)
3810         , scActualValue0(value0)
3811         , scActualValue1(value1)
3812         , resultOperation(resultOp)
3813         , expectedOutput(output)
3814         , caseFlags(flags)
3815     {
3816     }
3817 };
3818 
getSpecConstantOpStructConstantsAndTypes()3819 std::string getSpecConstantOpStructConstantsAndTypes()
3820 {
3821     return "%zero        = OpConstant %i32 0\n"
3822            "%one         = OpConstant %i32 1\n"
3823            "%two         = OpConstant %i32 2\n"
3824            "%three       = OpConstant %i32 3\n"
3825            "%iarr3       = OpTypeArray %i32 %three\n"
3826            "%imat3       = OpTypeArray %iarr3 %three\n"
3827            "%struct      = OpTypeStruct %imat3\n";
3828 }
3829 
getSpecConstantOpStructComposites()3830 std::string getSpecConstantOpStructComposites()
3831 {
3832     return "%iarr3_0     = OpConstantComposite %iarr3 %zero %zero %zero\n"
3833            "%imat3_0     = OpConstantComposite %imat3 %iarr3_0 %iarr3_0 %iarr3_0\n"
3834            "%struct_0    = OpConstantComposite %struct %imat3_0\n";
3835 }
3836 
getSpecConstantOpStructConstBlock()3837 std::string getSpecConstantOpStructConstBlock()
3838 {
3839     return "%iarr3_a     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_0        %iarr3_0     0\n" // Compose (sc_0, sc_1, sc_2)
3840            "%iarr3_b     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_1        %iarr3_a     1\n"
3841            "%iarr3_c     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_2        %iarr3_b     2\n"
3842 
3843            "%iarr3_d     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_1        %iarr3_0     0\n" // Compose (sc_1, sc_2, sc_0)
3844            "%iarr3_e     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_2        %iarr3_d     1\n"
3845            "%iarr3_f     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_0        %iarr3_e     2\n"
3846 
3847            "%iarr3_g     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_2        %iarr3_0     0\n" // Compose (sc_2, sc_0, sc_1)
3848            "%iarr3_h     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_0        %iarr3_g     1\n"
3849            "%iarr3_i     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_1        %iarr3_h     2\n"
3850 
3851            "%imat3_a     = OpSpecConstantOp %imat3  CompositeInsert  %iarr3_c     %imat3_0     0\n" // Matrix with the 3 previous arrays.
3852            "%imat3_b     = OpSpecConstantOp %imat3  CompositeInsert  %iarr3_f     %imat3_a     1\n"
3853            "%imat3_c     = OpSpecConstantOp %imat3  CompositeInsert  %iarr3_i     %imat3_b     2\n"
3854 
3855            "%struct_a    = OpSpecConstantOp %struct CompositeInsert  %imat3_c     %struct_0    0\n" // Save it in the struct.
3856 
3857            "%comp_0_0    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 0 0\n" // Extract some component pairs to compare them.
3858            "%comp_1_0    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 1 0\n"
3859 
3860            "%comp_0_1    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 0 1\n"
3861            "%comp_2_2    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 2 2\n"
3862 
3863            "%comp_2_0    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 2 0\n"
3864            "%comp_1_1    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 1 1\n"
3865 
3866            "%cmpres_0    = OpSpecConstantOp %bool   IEqual %comp_0_0 %comp_1_0\n" // Must be false.
3867            "%cmpres_1    = OpSpecConstantOp %bool   IEqual %comp_0_1 %comp_2_2\n" // Must be true.
3868            "%cmpres_2    = OpSpecConstantOp %bool   IEqual %comp_2_0 %comp_1_1\n" // Must be true.
3869 
3870            "%mustbe_0    = OpSpecConstantOp %i32    Select %cmpres_0 %one %zero\n" // Must select 0
3871            "%mustbe_1    = OpSpecConstantOp %i32    Select %cmpres_1 %one %zero\n" // Must select 1
3872            "%mustbe_2    = OpSpecConstantOp %i32    Select %cmpres_2 %two %one\n"  // Must select 2
3873         ;
3874 }
3875 
getSpecConstantOpStructInstructions()3876 std::string getSpecConstantOpStructInstructions()
3877 {
3878     return
3879         // Multiply final result with (1-mustbezero)*(mustbeone)*(mustbetwo-1). If everything goes right, the factor should be 1 and
3880         // the final result should not be altered.
3881         "%subf_a      = OpISub %i32 %one %mustbe_0\n"
3882         "%subf_b      = OpIMul %i32 %subf_a %mustbe_1\n"
3883         "%subf_c      = OpISub %i32 %mustbe_2 %one\n"
3884         "%factor      = OpIMul %i32 %subf_b %subf_c\n"
3885         "%sc_final    = OpIMul %i32 %factor %sc_factor\n";
3886 }
3887 
createSpecConstantGroup(tcu::TestContext & testCtx)3888 tcu::TestCaseGroup *createSpecConstantGroup(tcu::TestContext &testCtx)
3889 {
3890     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opspecconstantop"));
3891     vector<SpecConstantTwoValCase> cases;
3892     de::Random rnd(deStringHash(group->getName()));
3893     const int numElements = 100;
3894     vector<int32_t> inputInts(numElements, 0);
3895     vector<int32_t> outputInts1(numElements, 0);
3896     vector<int32_t> outputInts2(numElements, 0);
3897     vector<int32_t> outputInts3(numElements, 0);
3898     vector<int32_t> outputInts4(numElements, 0);
3899     vector<int32_t> outputInts5(numElements, 0);
3900     const StringTemplate shaderTemplate("${CAPABILITIES:opt}" + string(getComputeAsmShaderPreamble()) +
3901 
3902                                         "OpName %main           \"main\"\n"
3903                                         "OpName %id             \"gl_GlobalInvocationID\"\n"
3904 
3905                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
3906                                         "OpDecorate %sc_0  SpecId 0\n"
3907                                         "OpDecorate %sc_1  SpecId 1\n"
3908                                         "OpDecorate %i32arr ArrayStride 4\n"
3909 
3910                                         + string(getComputeAsmInputOutputBufferTraits()) +
3911                                         string(getComputeAsmCommonTypes()) +
3912 
3913                                         "${OPTYPE_DEFINITIONS:opt}"
3914                                         "%buf     = OpTypeStruct %i32arr\n"
3915                                         "%bufptr  = OpTypePointer Uniform %buf\n"
3916                                         "%indata    = OpVariable %bufptr Uniform\n"
3917                                         "%outdata   = OpVariable %bufptr Uniform\n"
3918 
3919                                         "%id        = OpVariable %uvec3ptr Input\n"
3920                                         "%zero      = OpConstant %i32 0\n"
3921 
3922                                         "%sc_0      = OpSpecConstant${SC_DEF0}\n"
3923                                         "%sc_1      = OpSpecConstant${SC_DEF1}\n"
3924                                         "%sc_final  = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n"
3925 
3926                                         "%main      = OpFunction %void None %voidf\n"
3927                                         "%label     = OpLabel\n"
3928                                         "${TYPE_CONVERT:opt}"
3929                                         "%idval     = OpLoad %uvec3 %id\n"
3930                                         "%x         = OpCompositeExtract %u32 %idval 0\n"
3931                                         "%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
3932                                         "%inval     = OpLoad %i32 %inloc\n"
3933                                         "%final     = ${GEN_RESULT}\n"
3934                                         "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
3935                                         "             OpStore %outloc %final\n"
3936                                         "             OpReturn\n"
3937                                         "             OpFunctionEnd\n");
3938 
3939     fillRandomScalars(rnd, -65536, 65536, &inputInts[0], numElements);
3940 
3941     for (size_t ndx = 0; ndx < numElements; ++ndx)
3942     {
3943         outputInts1[ndx] = inputInts[ndx] + 42;
3944         outputInts2[ndx] = inputInts[ndx];
3945         outputInts3[ndx] = inputInts[ndx] - 11200;
3946         outputInts4[ndx] = inputInts[ndx] + 1;
3947         outputInts5[ndx] = inputInts[ndx] - 42;
3948     }
3949 
3950     const char addScToInput[]       = "OpIAdd %i32 %inval %sc_final";
3951     const char addSc32ToInput[]     = "OpIAdd %i32 %inval %sc_final32";
3952     const char selectTrueUsingSc[]  = "OpSelect %i32 %sc_final %inval %zero";
3953     const char selectFalseUsingSc[] = "OpSelect %i32 %sc_final %zero %inval";
3954 
3955     cases.push_back(SpecConstantTwoValCase("iadd", " %i32 0", " %i32 0", "%i32", "IAdd                 %sc_0 %sc_1", 62,
3956                                            -20, addScToInput, outputInts1));
3957     cases.push_back(SpecConstantTwoValCase("isub", " %i32 0", " %i32 0", "%i32", "ISub                 %sc_0 %sc_1",
3958                                            100, 58, addScToInput, outputInts1));
3959     cases.push_back(SpecConstantTwoValCase("imul", " %i32 0", " %i32 0", "%i32", "IMul                 %sc_0 %sc_1", -2,
3960                                            -21, addScToInput, outputInts1));
3961     cases.push_back(SpecConstantTwoValCase("sdiv", " %i32 0", " %i32 0", "%i32", "SDiv                 %sc_0 %sc_1",
3962                                            -126, -3, addScToInput, outputInts1));
3963     cases.push_back(SpecConstantTwoValCase("udiv", " %i32 0", " %i32 0", "%i32", "UDiv                 %sc_0 %sc_1",
3964                                            126, 3, addScToInput, outputInts1));
3965     cases.push_back(SpecConstantTwoValCase("srem", " %i32 0", " %i32 0", "%i32", "SRem                 %sc_0 %sc_1", 7,
3966                                            3, addScToInput, outputInts4));
3967     cases.push_back(SpecConstantTwoValCase("smod", " %i32 0", " %i32 0", "%i32", "SMod                 %sc_0 %sc_1", 7,
3968                                            3, addScToInput, outputInts4));
3969     cases.push_back(SpecConstantTwoValCase("umod", " %i32 0", " %i32 0", "%i32", "UMod                 %sc_0 %sc_1",
3970                                            342, 50, addScToInput, outputInts1));
3971     cases.push_back(SpecConstantTwoValCase("bitwiseand", " %i32 0", " %i32 0", "%i32",
3972                                            "BitwiseAnd           %sc_0 %sc_1", 42, 63, addScToInput, outputInts1));
3973     cases.push_back(SpecConstantTwoValCase("bitwiseor", " %i32 0", " %i32 0", "%i32",
3974                                            "BitwiseOr            %sc_0 %sc_1", 34, 8, addScToInput, outputInts1));
3975     cases.push_back(SpecConstantTwoValCase("bitwisexor", " %i32 0", " %i32 0", "%i32",
3976                                            "BitwiseXor           %sc_0 %sc_1", 18, 56, addScToInput, outputInts1));
3977     cases.push_back(SpecConstantTwoValCase("shiftrightlogical", " %i32 0", " %i32 0", "%i32",
3978                                            "ShiftRightLogical    %sc_0 %sc_1", 168, 2, addScToInput, outputInts1));
3979     cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic", " %i32 0", " %i32 0", "%i32",
3980                                            "ShiftRightArithmetic %sc_0 %sc_1", -168, 2, addScToInput, outputInts5));
3981     cases.push_back(SpecConstantTwoValCase("shiftleftlogical", " %i32 0", " %i32 0", "%i32",
3982                                            "ShiftLeftLogical     %sc_0 %sc_1", 21, 1, addScToInput, outputInts1));
3983 
3984     // Shifts for other integer sizes.
3985     cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i64", " %i64 0", " %i64 0", "%i64",
3986                                            "ShiftRightLogical    %sc_0 %sc_1", int64_t{168}, int64_t{2}, addSc32ToInput,
3987                                            outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3988     cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i64", " %i64 0", " %i64 0", "%i64",
3989                                            "ShiftRightArithmetic %sc_0 %sc_1", int64_t{-168}, int64_t{2},
3990                                            addSc32ToInput, outputInts5, (FLAG_I64 | FLAG_CONVERT)));
3991     cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i64", " %i64 0", " %i64 0", "%i64",
3992                                            "ShiftLeftLogical     %sc_0 %sc_1", int64_t{21}, int64_t{1}, addSc32ToInput,
3993                                            outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3994     cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i16", " %i16 0", " %i16 0", "%i16",
3995                                            "ShiftRightLogical    %sc_0 %sc_1", int16_t{168}, int16_t{2}, addSc32ToInput,
3996                                            outputInts1, (FLAG_I16 | FLAG_CONVERT)));
3997     cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i16", " %i16 0", " %i16 0", "%i16",
3998                                            "ShiftRightArithmetic %sc_0 %sc_1", int16_t{-168}, int16_t{2},
3999                                            addSc32ToInput, outputInts5, (FLAG_I16 | FLAG_CONVERT)));
4000     cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i16", " %i16 0", " %i16 0", "%i16",
4001                                            "ShiftLeftLogical     %sc_0 %sc_1", int16_t{21}, int16_t{1}, addSc32ToInput,
4002                                            outputInts1, (FLAG_I16 | FLAG_CONVERT)));
4003     cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i8", " %i8 0", " %i8 0", "%i8",
4004                                            "ShiftRightLogical    %sc_0 %sc_1", int8_t{84}, int8_t{1}, addSc32ToInput,
4005                                            outputInts1, (FLAG_I8 | FLAG_CONVERT)));
4006     cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i8", " %i8 0", " %i8 0", "%i8",
4007                                            "ShiftRightArithmetic %sc_0 %sc_1", int8_t{-84}, int8_t{1}, addSc32ToInput,
4008                                            outputInts5, (FLAG_I8 | FLAG_CONVERT)));
4009     cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i8", " %i8 0", " %i8 0", "%i8",
4010                                            "ShiftLeftLogical     %sc_0 %sc_1", int8_t{21}, int8_t{1}, addSc32ToInput,
4011                                            outputInts1, (FLAG_I8 | FLAG_CONVERT)));
4012 
4013     // Shifts for other integer sizes but only in the shift amount.
4014     cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i64", " %i32 0", " %i64 0", "%i32",
4015                                            "ShiftRightLogical    %sc_0 %sc_1", 168, int64_t{2}, addScToInput,
4016                                            outputInts1, (FLAG_I64)));
4017     cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i64", " %i32 0", " %i64 0", "%i32",
4018                                            "ShiftRightArithmetic %sc_0 %sc_1", -168, int64_t{2}, addScToInput,
4019                                            outputInts5, (FLAG_I64)));
4020     cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i64", " %i32 0", " %i64 0", "%i32",
4021                                            "ShiftLeftLogical     %sc_0 %sc_1", 21, int64_t{1}, addScToInput,
4022                                            outputInts1, (FLAG_I64)));
4023     cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i16", " %i32 0", " %i16 0", "%i32",
4024                                            "ShiftRightLogical    %sc_0 %sc_1", 168, int16_t{2}, addScToInput,
4025                                            outputInts1, (FLAG_I16)));
4026     cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i16", " %i32 0", " %i16 0", "%i32",
4027                                            "ShiftRightArithmetic %sc_0 %sc_1", -168, int16_t{2}, addScToInput,
4028                                            outputInts5, (FLAG_I16)));
4029     cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i16", " %i32 0", " %i16 0", "%i32",
4030                                            "ShiftLeftLogical     %sc_0 %sc_1", 21, int16_t{1}, addScToInput,
4031                                            outputInts1, (FLAG_I16)));
4032     cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i8", " %i32 0", " %i8 0", "%i32",
4033                                            "ShiftRightLogical    %sc_0 %sc_1", 84, int8_t{1}, addScToInput, outputInts1,
4034                                            (FLAG_I8)));
4035     cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i8", " %i32 0", " %i8 0", "%i32",
4036                                            "ShiftRightArithmetic %sc_0 %sc_1", -84, int8_t{1}, addScToInput,
4037                                            outputInts5, (FLAG_I8)));
4038     cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i8", " %i32 0", " %i8 0", "%i32",
4039                                            "ShiftLeftLogical     %sc_0 %sc_1", 21, int8_t{1}, addScToInput, outputInts1,
4040                                            (FLAG_I8)));
4041 
4042     cases.push_back(SpecConstantTwoValCase("slessthan", " %i32 0", " %i32 0", "%bool",
4043                                            "SLessThan            %sc_0 %sc_1", -20, -10, selectTrueUsingSc,
4044                                            outputInts2));
4045     cases.push_back(SpecConstantTwoValCase("ulessthan", " %i32 0", " %i32 0", "%bool",
4046                                            "ULessThan            %sc_0 %sc_1", 10, 20, selectTrueUsingSc, outputInts2));
4047     cases.push_back(SpecConstantTwoValCase("sgreaterthan", " %i32 0", " %i32 0", "%bool",
4048                                            "SGreaterThan         %sc_0 %sc_1", -1000, 50, selectFalseUsingSc,
4049                                            outputInts2));
4050     cases.push_back(SpecConstantTwoValCase("ugreaterthan", " %i32 0", " %i32 0", "%bool",
4051                                            "UGreaterThan         %sc_0 %sc_1", 10, 5, selectTrueUsingSc, outputInts2));
4052     cases.push_back(SpecConstantTwoValCase("slessthanequal", " %i32 0", " %i32 0", "%bool",
4053                                            "SLessThanEqual       %sc_0 %sc_1", -10, -10, selectTrueUsingSc,
4054                                            outputInts2));
4055     cases.push_back(SpecConstantTwoValCase("ulessthanequal", " %i32 0", " %i32 0", "%bool",
4056                                            "ULessThanEqual       %sc_0 %sc_1", 50, 100, selectTrueUsingSc,
4057                                            outputInts2));
4058     cases.push_back(SpecConstantTwoValCase("sgreaterthanequal", " %i32 0", " %i32 0", "%bool",
4059                                            "SGreaterThanEqual    %sc_0 %sc_1", -1000, 50, selectFalseUsingSc,
4060                                            outputInts2));
4061     cases.push_back(SpecConstantTwoValCase("ugreaterthanequal", " %i32 0", " %i32 0", "%bool",
4062                                            "UGreaterThanEqual    %sc_0 %sc_1", 10, 10, selectTrueUsingSc, outputInts2));
4063     cases.push_back(SpecConstantTwoValCase("iequal", " %i32 0", " %i32 0", "%bool", "IEqual               %sc_0 %sc_1",
4064                                            42, 24, selectFalseUsingSc, outputInts2));
4065     cases.push_back(SpecConstantTwoValCase("inotequal", " %i32 0", " %i32 0", "%bool",
4066                                            "INotEqual            %sc_0 %sc_1", 42, 24, selectTrueUsingSc, outputInts2));
4067     cases.push_back(SpecConstantTwoValCase("logicaland", "True %bool", "True %bool", "%bool",
4068                                            "LogicalAnd           %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputInts2));
4069     cases.push_back(SpecConstantTwoValCase("logicalor", "False %bool", "False %bool", "%bool",
4070                                            "LogicalOr            %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputInts2));
4071     cases.push_back(SpecConstantTwoValCase("logicalequal", "True %bool", "True %bool", "%bool",
4072                                            "LogicalEqual         %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputInts2));
4073     cases.push_back(SpecConstantTwoValCase("logicalnotequal", "False %bool", "False %bool", "%bool",
4074                                            "LogicalNotEqual      %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputInts2));
4075     cases.push_back(SpecConstantTwoValCase("snegate", " %i32 0", " %i32 0", "%i32", "SNegate              %sc_0", -42,
4076                                            0, addScToInput, outputInts1));
4077     cases.push_back(SpecConstantTwoValCase("not", " %i32 0", " %i32 0", "%i32", "Not                  %sc_0", -43, 0,
4078                                            addScToInput, outputInts1));
4079     cases.push_back(SpecConstantTwoValCase("logicalnot", "False %bool", "False %bool", "%bool",
4080                                            "LogicalNot           %sc_0", 1, 0, selectFalseUsingSc, outputInts2));
4081     cases.push_back(SpecConstantTwoValCase("select", "False %bool", " %i32 0", "%i32",
4082                                            "Select               %sc_0 %sc_1 %zero", 1, 42, addScToInput, outputInts1));
4083     cases.push_back(SpecConstantTwoValCase("sconvert", " %i32 0", " %i32 0", "%i16", "SConvert             %sc_0",
4084                                            -11200, 0, addSc32ToInput, outputInts3, (FLAG_I16 | FLAG_CONVERT)));
4085     cases.push_back(SpecConstantTwoValCase("fconvert", " %f32 0", " %f32 0", "%f64", "FConvert             %sc_0",
4086                                            tcu::Float32{-11200.0}, tcu::Float32{0.0}, addSc32ToInput, outputInts3,
4087                                            (FLAG_F64 | FLAG_CONVERT)));
4088     cases.push_back(SpecConstantTwoValCase("fconvert16", " %f16 0", " %f16 0", "%f32", "FConvert             %sc_0",
4089                                            tcu::Float16{1.0}, tcu::Float16{0.0}, addSc32ToInput, outputInts4,
4090                                            (FLAG_F16 | FLAG_CONVERT)));
4091 
4092     for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
4093     {
4094         map<string, string> specializations;
4095         ComputeShaderSpec spec;
4096 
4097         specializations["SC_DEF0"]        = cases[caseNdx].scDefinition0;
4098         specializations["SC_DEF1"]        = cases[caseNdx].scDefinition1;
4099         specializations["SC_RESULT_TYPE"] = cases[caseNdx].scResultType;
4100         specializations["SC_OP"]          = cases[caseNdx].scOperation;
4101         specializations["GEN_RESULT"]     = cases[caseNdx].resultOperation;
4102 
4103         // Special SPIR-V code when using 16-bit integers.
4104         if (cases[caseNdx].caseFlags & FLAG_I16)
4105         {
4106             spec.requestedVulkanFeatures.coreFeatures.shaderInt16 = VK_TRUE;
4107             specializations["CAPABILITIES"] += "OpCapability Int16\n";          // Adds 16-bit integer capability
4108             specializations["OPTYPE_DEFINITIONS"] += "%i16 = OpTypeInt 16 1\n"; // Adds 16-bit integer type
4109             if (cases[caseNdx].caseFlags & FLAG_CONVERT)
4110                 specializations["TYPE_CONVERT"] +=
4111                     "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 16-bit integer to 32-bit integer
4112         }
4113 
4114         // Special SPIR-V code when using 64-bit integers.
4115         if (cases[caseNdx].caseFlags & FLAG_I64)
4116         {
4117             spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
4118             specializations["CAPABILITIES"] += "OpCapability Int64\n";          // Adds 64-bit integer capability
4119             specializations["OPTYPE_DEFINITIONS"] += "%i64 = OpTypeInt 64 1\n"; // Adds 64-bit integer type
4120             if (cases[caseNdx].caseFlags & FLAG_CONVERT)
4121                 specializations["TYPE_CONVERT"] +=
4122                     "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 64-bit integer to 32-bit integer
4123         }
4124 
4125         // Special SPIR-V code when using 64-bit floats.
4126         if (cases[caseNdx].caseFlags & FLAG_F64)
4127         {
4128             spec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
4129             specializations["CAPABILITIES"] += "OpCapability Float64\n";        // Adds 64-bit float capability
4130             specializations["OPTYPE_DEFINITIONS"] += "%f64 = OpTypeFloat 64\n"; // Adds 64-bit float type
4131             if (cases[caseNdx].caseFlags & FLAG_CONVERT)
4132                 specializations["TYPE_CONVERT"] +=
4133                     "%sc_final32 = OpConvertFToS %i32 %sc_final\n"; // Converts 64-bit float to 32-bit integer
4134         }
4135 
4136         // Extension needed for float16 and int8.
4137         if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
4138             spec.extensions.push_back("VK_KHR_shader_float16_int8");
4139 
4140         // Special SPIR-V code when using 16-bit floats.
4141         if (cases[caseNdx].caseFlags & FLAG_F16)
4142         {
4143             spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
4144             specializations["CAPABILITIES"] += "OpCapability Float16\n";        // Adds 16-bit float capability
4145             specializations["OPTYPE_DEFINITIONS"] += "%f16 = OpTypeFloat 16\n"; // Adds 16-bit float type
4146             if (cases[caseNdx].caseFlags & FLAG_CONVERT)
4147                 specializations["TYPE_CONVERT"] +=
4148                     "%sc_final32 = OpConvertFToS %i32 %sc_final\n"; // Converts 16-bit float to 32-bit integer
4149         }
4150 
4151         // Special SPIR-V code when using 8-bit integers.
4152         if (cases[caseNdx].caseFlags & FLAG_I8)
4153         {
4154             spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
4155             specializations["CAPABILITIES"] += "OpCapability Int8\n";         // Adds 8-bit integer capability
4156             specializations["OPTYPE_DEFINITIONS"] += "%i8 = OpTypeInt 8 1\n"; // Adds 8-bit integer type
4157             if (cases[caseNdx].caseFlags & FLAG_CONVERT)
4158                 specializations["TYPE_CONVERT"] +=
4159                     "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 8-bit integer to 32-bit integer
4160         }
4161 
4162         spec.assembly = shaderTemplate.specialize(specializations);
4163         spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
4164         spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
4165         spec.numWorkGroups = IVec3(numElements, 1, 1);
4166         cases[caseNdx].scActualValue0.appendTo(spec.specConstants);
4167         cases[caseNdx].scActualValue1.appendTo(spec.specConstants);
4168 
4169         group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName.c_str(), spec));
4170     }
4171 
4172     ComputeShaderSpec spec;
4173 
4174     spec.assembly =
4175         string(getComputeAsmShaderPreamble()) +
4176 
4177         "OpName %main           \"main\"\n"
4178         "OpName %id             \"gl_GlobalInvocationID\"\n"
4179 
4180         "OpDecorate %id BuiltIn GlobalInvocationId\n"
4181         "OpDecorate %sc_0  SpecId 0\n"
4182         "OpDecorate %sc_1  SpecId 1\n"
4183         "OpDecorate %sc_2  SpecId 2\n"
4184         "OpDecorate %i32arr ArrayStride 4\n"
4185 
4186         + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4187 
4188         "%ivec3       = OpTypeVector %i32 3\n"
4189 
4190         + getSpecConstantOpStructConstantsAndTypes() +
4191 
4192         "%buf         = OpTypeStruct %i32arr\n"
4193         "%bufptr      = OpTypePointer Uniform %buf\n"
4194         "%indata      = OpVariable %bufptr Uniform\n"
4195         "%outdata     = OpVariable %bufptr Uniform\n"
4196 
4197         "%id          = OpVariable %uvec3ptr Input\n"
4198         "%ivec3_0     = OpConstantComposite %ivec3 %zero %zero %zero\n"
4199         "%vec3_undef  = OpUndef %ivec3\n"
4200 
4201         + getSpecConstantOpStructComposites() +
4202 
4203         "%sc_0        = OpSpecConstant %i32 0\n"
4204         "%sc_1        = OpSpecConstant %i32 0\n"
4205         "%sc_2        = OpSpecConstant %i32 0\n"
4206 
4207         + getSpecConstantOpStructConstBlock() +
4208 
4209         "%sc_vec3_0   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_0        %ivec3_0     0\n" // (sc_0, 0, 0)
4210         "%sc_vec3_1   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_1        %ivec3_0     1\n" // (0, sc_1, 0)
4211         "%sc_vec3_2   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_2        %ivec3_0     2\n" // (0, 0, sc_2)
4212         "%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0   %vec3_undef  0          "
4213         "0xFFFFFFFF 2\n" // (sc_0, ???,  0)
4214         "%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1    "
4215         "      0\n" // (???,  sc_1, 0)
4216         "%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle    %vec3_undef  %sc_vec3_2   5          "
4217         "0xFFFFFFFF 5\n" // (sc_2, ???,  sc_2)
4218         "%sc_vec3_01  = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n" // (0,    sc_0, sc_1)
4219         "%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2\n" // (sc_2, sc_0, sc_1)
4220         "%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0\n"     // sc_2
4221         "%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1\n"     // sc_0
4222         "%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2\n"     // sc_1
4223         "%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"          // (sc_2 - sc_0)
4224         "%sc_factor   = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n" // (sc_2 - sc_0) * sc_1
4225 
4226         "%main      = OpFunction %void None %voidf\n"
4227         "%label     = OpLabel\n"
4228 
4229         + getSpecConstantOpStructInstructions() +
4230 
4231         "%idval     = OpLoad %uvec3 %id\n"
4232         "%x         = OpCompositeExtract %u32 %idval 0\n"
4233         "%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
4234         "%inval     = OpLoad %i32 %inloc\n"
4235         "%final     = OpIAdd %i32 %inval %sc_final\n"
4236         "%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
4237         "             OpStore %outloc %final\n"
4238         "             OpReturn\n"
4239         "             OpFunctionEnd\n";
4240     spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
4241     spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts3)));
4242     spec.numWorkGroups = IVec3(numElements, 1, 1);
4243     spec.specConstants.append<int32_t>(123);
4244     spec.specConstants.append<int32_t>(56);
4245     spec.specConstants.append<int32_t>(-77);
4246 
4247     group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", spec));
4248 
4249     return group.release();
4250 }
4251 
createOpPhiVartypeTests(de::MovePtr<tcu::TestCaseGroup> & group,tcu::TestContext & testCtx)4252 void createOpPhiVartypeTests(de::MovePtr<tcu::TestCaseGroup> &group, tcu::TestContext &testCtx)
4253 {
4254     ComputeShaderSpec specInt;
4255     ComputeShaderSpec specFloat;
4256     ComputeShaderSpec specFloat16;
4257     ComputeShaderSpec specVec3;
4258     ComputeShaderSpec specMat4;
4259     ComputeShaderSpec specArray;
4260     ComputeShaderSpec specStruct;
4261     de::Random rnd(deStringHash(group->getName()));
4262     const int numElements = 100;
4263     vector<float> inputFloats(numElements, 0);
4264     vector<float> outputFloats(numElements, 0);
4265     vector<uint32_t> inputUints(numElements, 0);
4266     vector<uint32_t> outputUints(numElements, 0);
4267 
4268     fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4269 
4270     // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4271     floorAll(inputFloats);
4272 
4273     for (size_t ndx = 0; ndx < numElements; ++ndx)
4274     {
4275         // Just check if the value is positive or not
4276         outputFloats[ndx] = (inputFloats[ndx] > 0) ? 1.0f : -1.0f;
4277     }
4278 
4279     for (size_t ndx = 0; ndx < numElements; ++ndx)
4280     {
4281         inputUints[ndx]  = tcu::Float16(inputFloats[ndx]).bits();
4282         outputUints[ndx] = tcu::Float16(outputFloats[ndx]).bits();
4283     }
4284 
4285     // All of the tests are of the form:
4286     //
4287     // testtype r
4288     //
4289     // if (inputdata > 0)
4290     //   r = 1
4291     // else
4292     //   r = -1
4293     //
4294     // return (float)r
4295 
4296     specFloat.assembly = string(getComputeAsmShaderPreamble()) +
4297 
4298                          "OpSource GLSL 430\n"
4299                          "OpName %main \"main\"\n"
4300                          "OpName %id \"gl_GlobalInvocationID\"\n"
4301 
4302                          "OpDecorate %id BuiltIn GlobalInvocationId\n"
4303 
4304                          + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4305                          string(getComputeAsmInputOutputBuffer()) +
4306 
4307                          "%id = OpVariable %uvec3ptr Input\n"
4308                          "%zero       = OpConstant %i32 0\n"
4309                          "%float_0    = OpConstant %f32 0.0\n"
4310                          "%float_1    = OpConstant %f32 1.0\n"
4311                          "%float_n1   = OpConstant %f32 -1.0\n"
4312 
4313                          "%main     = OpFunction %void None %voidf\n"
4314                          "%entry    = OpLabel\n"
4315                          "%idval    = OpLoad %uvec3 %id\n"
4316                          "%x        = OpCompositeExtract %u32 %idval 0\n"
4317                          "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4318                          "%inval    = OpLoad %f32 %inloc\n"
4319 
4320                          "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4321                          "            OpSelectionMerge %cm None\n"
4322                          "            OpBranchConditional %comp %tb %fb\n"
4323                          "%tb       = OpLabel\n"
4324                          "            OpBranch %cm\n"
4325                          "%fb       = OpLabel\n"
4326                          "            OpBranch %cm\n"
4327                          "%cm       = OpLabel\n"
4328                          "%res      = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4329 
4330                          "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4331                          "            OpStore %outloc %res\n"
4332                          "            OpReturn\n"
4333 
4334                          "            OpFunctionEnd\n";
4335     specFloat.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4336     specFloat.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4337     specFloat.numWorkGroups = IVec3(numElements, 1, 1);
4338 
4339     specFloat16.assembly = "OpCapability Shader\n"
4340                            "OpCapability Float16\n"
4341                            "OpMemoryModel Logical GLSL450\n"
4342                            "OpEntryPoint GLCompute %main \"main\" %id\n"
4343                            "OpExecutionMode %main LocalSize 1 1 1\n"
4344 
4345                            "OpSource GLSL 430\n"
4346                            "OpName %main \"main\"\n"
4347                            "OpName %id \"gl_GlobalInvocationID\"\n"
4348 
4349                            "OpDecorate %id BuiltIn GlobalInvocationId\n"
4350 
4351                            "OpDecorate %buf BufferBlock\n"
4352                            "OpDecorate %indata DescriptorSet 0\n"
4353                            "OpDecorate %indata Binding 0\n"
4354                            "OpDecorate %outdata DescriptorSet 0\n"
4355                            "OpDecorate %outdata Binding 1\n"
4356                            "OpDecorate %u32arr ArrayStride 4\n"
4357                            "OpMemberDecorate %buf 0 Offset 0\n"
4358 
4359                            + string(getComputeAsmCommonTypes()) +
4360 
4361                            "%f16      = OpTypeFloat 16\n"
4362                            "%f16vec2  = OpTypeVector %f16 2\n"
4363                            "%fvec2    = OpTypeVector %f32 2\n"
4364                            "%u32ptr   = OpTypePointer Uniform %u32\n"
4365                            "%u32arr   = OpTypeRuntimeArray %u32\n"
4366                            "%f16_0    = OpConstant %f16 0.0\n"
4367 
4368                            "%buf      = OpTypeStruct %u32arr\n"
4369                            "%bufptr   = OpTypePointer Uniform %buf\n"
4370                            "%indata   = OpVariable %bufptr Uniform\n"
4371                            "%outdata  = OpVariable %bufptr Uniform\n"
4372 
4373                            "%id       = OpVariable %uvec3ptr Input\n"
4374                            "%zero     = OpConstant %i32 0\n"
4375                            "%float_0  = OpConstant %f32 0.0\n"
4376                            "%float_1  = OpConstant %f32 1.0\n"
4377                            "%float_n1 = OpConstant %f32 -1.0\n"
4378 
4379                            "%main     = OpFunction %void None %voidf\n"
4380                            "%entry    = OpLabel\n"
4381                            "%idval    = OpLoad %uvec3 %id\n"
4382                            "%x        = OpCompositeExtract %u32 %idval 0\n"
4383                            "%inloc    = OpAccessChain %u32ptr %indata %zero %x\n"
4384                            "%inval    = OpLoad %u32 %inloc\n"
4385                            "%f16_vec2_inval = OpBitcast %f16vec2 %inval\n"
4386                            "%f16_inval = OpCompositeExtract %f16 %f16_vec2_inval 0\n"
4387                            "%f32_inval = OpFConvert %f32 %f16_inval\n"
4388 
4389                            "%comp     = OpFOrdGreaterThan %bool %f32_inval %float_0\n"
4390                            "            OpSelectionMerge %cm None\n"
4391                            "            OpBranchConditional %comp %tb %fb\n"
4392                            "%tb       = OpLabel\n"
4393                            "            OpBranch %cm\n"
4394                            "%fb       = OpLabel\n"
4395                            "            OpBranch %cm\n"
4396                            "%cm       = OpLabel\n"
4397                            "%res      = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4398                            "%f16_res  = OpFConvert %f16 %res\n"
4399 
4400                            "%f16vec2_res = OpCompositeConstruct %f16vec2 %f16_res %f16_0\n"
4401                            "%u32_res  = OpBitcast %u32 %f16vec2_res\n"
4402 
4403                            "%outloc   = OpAccessChain %u32ptr %outdata %zero %x\n"
4404                            "            OpStore %outloc %u32_res\n"
4405                            "            OpReturn\n"
4406 
4407                            "            OpFunctionEnd\n";
4408 
4409     specFloat16.inputs.push_back(BufferSp(new Uint32Buffer(inputUints)));
4410     specFloat16.outputs.push_back(BufferSp(new Uint32Buffer(outputUints)));
4411     specFloat16.numWorkGroups                                        = IVec3(numElements, 1, 1);
4412     specFloat16.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
4413 
4414     specMat4.assembly = string(getComputeAsmShaderPreamble()) +
4415 
4416                         "OpSource GLSL 430\n"
4417                         "OpName %main \"main\"\n"
4418                         "OpName %id \"gl_GlobalInvocationID\"\n"
4419 
4420                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
4421 
4422                         + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4423                         string(getComputeAsmInputOutputBuffer()) +
4424 
4425                         "%id = OpVariable %uvec3ptr Input\n"
4426                         "%v4f32      = OpTypeVector %f32 4\n"
4427                         "%mat4v4f32  = OpTypeMatrix %v4f32 4\n"
4428                         "%zero       = OpConstant %i32 0\n"
4429                         "%float_0    = OpConstant %f32 0.0\n"
4430                         "%float_1    = OpConstant %f32 1.0\n"
4431                         "%float_n1   = OpConstant %f32 -1.0\n"
4432                         "%m11        = OpConstantComposite %v4f32 %float_1 %float_0 %float_0 %float_0\n"
4433                         "%m12        = OpConstantComposite %v4f32 %float_0 %float_1 %float_0 %float_0\n"
4434                         "%m13        = OpConstantComposite %v4f32 %float_0 %float_0 %float_1 %float_0\n"
4435                         "%m14        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_1\n"
4436                         "%m1         = OpConstantComposite %mat4v4f32 %m11 %m12 %m13 %m14\n"
4437                         "%m21        = OpConstantComposite %v4f32 %float_n1 %float_0 %float_0 %float_0\n"
4438                         "%m22        = OpConstantComposite %v4f32 %float_0 %float_n1 %float_0 %float_0\n"
4439                         "%m23        = OpConstantComposite %v4f32 %float_0 %float_0 %float_n1 %float_0\n"
4440                         "%m24        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_n1\n"
4441                         "%m2         = OpConstantComposite %mat4v4f32 %m21 %m22 %m23 %m24\n"
4442 
4443                         "%main     = OpFunction %void None %voidf\n"
4444                         "%entry    = OpLabel\n"
4445                         "%idval    = OpLoad %uvec3 %id\n"
4446                         "%x        = OpCompositeExtract %u32 %idval 0\n"
4447                         "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4448                         "%inval    = OpLoad %f32 %inloc\n"
4449 
4450                         "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4451                         "            OpSelectionMerge %cm None\n"
4452                         "            OpBranchConditional %comp %tb %fb\n"
4453                         "%tb       = OpLabel\n"
4454                         "            OpBranch %cm\n"
4455                         "%fb       = OpLabel\n"
4456                         "            OpBranch %cm\n"
4457                         "%cm       = OpLabel\n"
4458                         "%mres     = OpPhi %mat4v4f32 %m1 %tb %m2 %fb\n"
4459                         "%res      = OpCompositeExtract %f32 %mres 2 2\n"
4460 
4461                         "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4462                         "            OpStore %outloc %res\n"
4463                         "            OpReturn\n"
4464 
4465                         "            OpFunctionEnd\n";
4466     specMat4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4467     specMat4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4468     specMat4.numWorkGroups = IVec3(numElements, 1, 1);
4469 
4470     specVec3.assembly = string(getComputeAsmShaderPreamble()) +
4471 
4472                         "OpSource GLSL 430\n"
4473                         "OpName %main \"main\"\n"
4474                         "OpName %id \"gl_GlobalInvocationID\"\n"
4475 
4476                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
4477 
4478                         + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4479                         string(getComputeAsmInputOutputBuffer()) +
4480 
4481                         "%id = OpVariable %uvec3ptr Input\n"
4482                         "%zero       = OpConstant %i32 0\n"
4483                         "%float_0    = OpConstant %f32 0.0\n"
4484                         "%float_1    = OpConstant %f32 1.0\n"
4485                         "%float_n1   = OpConstant %f32 -1.0\n"
4486                         "%v1         = OpConstantComposite %fvec3 %float_1 %float_1 %float_1\n"
4487                         "%v2         = OpConstantComposite %fvec3 %float_n1 %float_n1 %float_n1\n"
4488 
4489                         "%main     = OpFunction %void None %voidf\n"
4490                         "%entry    = OpLabel\n"
4491                         "%idval    = OpLoad %uvec3 %id\n"
4492                         "%x        = OpCompositeExtract %u32 %idval 0\n"
4493                         "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4494                         "%inval    = OpLoad %f32 %inloc\n"
4495 
4496                         "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4497                         "            OpSelectionMerge %cm None\n"
4498                         "            OpBranchConditional %comp %tb %fb\n"
4499                         "%tb       = OpLabel\n"
4500                         "            OpBranch %cm\n"
4501                         "%fb       = OpLabel\n"
4502                         "            OpBranch %cm\n"
4503                         "%cm       = OpLabel\n"
4504                         "%vres     = OpPhi %fvec3 %v1 %tb %v2 %fb\n"
4505                         "%res      = OpCompositeExtract %f32 %vres 2\n"
4506 
4507                         "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4508                         "            OpStore %outloc %res\n"
4509                         "            OpReturn\n"
4510 
4511                         "            OpFunctionEnd\n";
4512     specVec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4513     specVec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4514     specVec3.numWorkGroups = IVec3(numElements, 1, 1);
4515 
4516     specInt.assembly = string(getComputeAsmShaderPreamble()) +
4517 
4518                        "OpSource GLSL 430\n"
4519                        "OpName %main \"main\"\n"
4520                        "OpName %id \"gl_GlobalInvocationID\"\n"
4521 
4522                        "OpDecorate %id BuiltIn GlobalInvocationId\n"
4523 
4524                        + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4525                        string(getComputeAsmInputOutputBuffer()) +
4526 
4527                        "%id = OpVariable %uvec3ptr Input\n"
4528                        "%zero       = OpConstant %i32 0\n"
4529                        "%float_0    = OpConstant %f32 0.0\n"
4530                        "%i1         = OpConstant %i32 1\n"
4531                        "%i2         = OpConstant %i32 -1\n"
4532 
4533                        "%main     = OpFunction %void None %voidf\n"
4534                        "%entry    = OpLabel\n"
4535                        "%idval    = OpLoad %uvec3 %id\n"
4536                        "%x        = OpCompositeExtract %u32 %idval 0\n"
4537                        "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4538                        "%inval    = OpLoad %f32 %inloc\n"
4539 
4540                        "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4541                        "            OpSelectionMerge %cm None\n"
4542                        "            OpBranchConditional %comp %tb %fb\n"
4543                        "%tb       = OpLabel\n"
4544                        "            OpBranch %cm\n"
4545                        "%fb       = OpLabel\n"
4546                        "            OpBranch %cm\n"
4547                        "%cm       = OpLabel\n"
4548                        "%ires     = OpPhi %i32 %i1 %tb %i2 %fb\n"
4549                        "%res      = OpConvertSToF %f32 %ires\n"
4550 
4551                        "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4552                        "            OpStore %outloc %res\n"
4553                        "            OpReturn\n"
4554 
4555                        "            OpFunctionEnd\n";
4556     specInt.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4557     specInt.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4558     specInt.numWorkGroups = IVec3(numElements, 1, 1);
4559 
4560     specArray.assembly =
4561         string(getComputeAsmShaderPreamble()) +
4562 
4563         "OpSource GLSL 430\n"
4564         "OpName %main \"main\"\n"
4565         "OpName %id \"gl_GlobalInvocationID\"\n"
4566 
4567         "OpDecorate %id BuiltIn GlobalInvocationId\n"
4568 
4569         + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4570         string(getComputeAsmInputOutputBuffer()) +
4571 
4572         "%id = OpVariable %uvec3ptr Input\n"
4573         "%zero       = OpConstant %i32 0\n"
4574         "%u7         = OpConstant %u32 7\n"
4575         "%float_0    = OpConstant %f32 0.0\n"
4576         "%float_1    = OpConstant %f32 1.0\n"
4577         "%float_n1   = OpConstant %f32 -1.0\n"
4578         "%f32a7      = OpTypeArray %f32 %u7\n"
4579         "%a1         = OpConstantComposite %f32a7 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1\n"
4580         "%a2         = OpConstantComposite %f32a7 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 "
4581         "%float_n1\n"
4582         "%main     = OpFunction %void None %voidf\n"
4583         "%entry    = OpLabel\n"
4584         "%idval    = OpLoad %uvec3 %id\n"
4585         "%x        = OpCompositeExtract %u32 %idval 0\n"
4586         "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4587         "%inval    = OpLoad %f32 %inloc\n"
4588 
4589         "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4590         "            OpSelectionMerge %cm None\n"
4591         "            OpBranchConditional %comp %tb %fb\n"
4592         "%tb       = OpLabel\n"
4593         "            OpBranch %cm\n"
4594         "%fb       = OpLabel\n"
4595         "            OpBranch %cm\n"
4596         "%cm       = OpLabel\n"
4597         "%ares     = OpPhi %f32a7 %a1 %tb %a2 %fb\n"
4598         "%res      = OpCompositeExtract %f32 %ares 5\n"
4599 
4600         "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4601         "            OpStore %outloc %res\n"
4602         "            OpReturn\n"
4603 
4604         "            OpFunctionEnd\n";
4605     specArray.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4606     specArray.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4607     specArray.numWorkGroups = IVec3(numElements, 1, 1);
4608 
4609     specStruct.assembly = string(getComputeAsmShaderPreamble()) +
4610 
4611                           "OpSource GLSL 430\n"
4612                           "OpName %main \"main\"\n"
4613                           "OpName %id \"gl_GlobalInvocationID\"\n"
4614 
4615                           "OpDecorate %id BuiltIn GlobalInvocationId\n"
4616 
4617                           + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4618                           string(getComputeAsmInputOutputBuffer()) +
4619 
4620                           "%id = OpVariable %uvec3ptr Input\n"
4621                           "%zero       = OpConstant %i32 0\n"
4622                           "%float_0    = OpConstant %f32 0.0\n"
4623                           "%float_1    = OpConstant %f32 1.0\n"
4624                           "%float_n1   = OpConstant %f32 -1.0\n"
4625 
4626                           "%v2f32      = OpTypeVector %f32 2\n"
4627                           "%Data2      = OpTypeStruct %f32 %v2f32\n"
4628                           "%Data       = OpTypeStruct %Data2 %f32\n"
4629 
4630                           "%in1a       = OpConstantComposite %v2f32 %float_1 %float_1\n"
4631                           "%in1b       = OpConstantComposite %Data2 %float_1 %in1a\n"
4632                           "%s1         = OpConstantComposite %Data %in1b %float_1\n"
4633                           "%in2a       = OpConstantComposite %v2f32 %float_n1 %float_n1\n"
4634                           "%in2b       = OpConstantComposite %Data2 %float_n1 %in2a\n"
4635                           "%s2         = OpConstantComposite %Data %in2b %float_n1\n"
4636 
4637                           "%main     = OpFunction %void None %voidf\n"
4638                           "%entry    = OpLabel\n"
4639                           "%idval    = OpLoad %uvec3 %id\n"
4640                           "%x        = OpCompositeExtract %u32 %idval 0\n"
4641                           "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4642                           "%inval    = OpLoad %f32 %inloc\n"
4643 
4644                           "%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4645                           "            OpSelectionMerge %cm None\n"
4646                           "            OpBranchConditional %comp %tb %fb\n"
4647                           "%tb       = OpLabel\n"
4648                           "            OpBranch %cm\n"
4649                           "%fb       = OpLabel\n"
4650                           "            OpBranch %cm\n"
4651                           "%cm       = OpLabel\n"
4652                           "%sres     = OpPhi %Data %s1 %tb %s2 %fb\n"
4653                           "%res      = OpCompositeExtract %f32 %sres 0 0\n"
4654 
4655                           "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4656                           "            OpStore %outloc %res\n"
4657                           "            OpReturn\n"
4658 
4659                           "            OpFunctionEnd\n";
4660     specStruct.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4661     specStruct.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4662     specStruct.numWorkGroups = IVec3(numElements, 1, 1);
4663 
4664     group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_int", specInt));
4665     group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float", specFloat));
4666     group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float16", specFloat16));
4667     group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_vec3", specVec3));
4668     group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_mat4", specMat4));
4669     group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_array", specArray));
4670     group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_struct", specStruct));
4671 }
4672 
generateConstantDefinitions(int count)4673 string generateConstantDefinitions(int count)
4674 {
4675     std::ostringstream r;
4676     for (int i = 0; i < count; i++)
4677         r << "%cf" << (i * 10 + 5) << " = OpConstant %f32 " << (i * 10 + 5) << ".0\n";
4678     r << "\n";
4679     return r.str();
4680 }
4681 
generateSwitchCases(int count)4682 string generateSwitchCases(int count)
4683 {
4684     std::ostringstream r;
4685     for (int i = 0; i < count; i++)
4686         r << " " << i << " %case" << i;
4687     r << "\n";
4688     return r.str();
4689 }
4690 
generateSwitchTargets(int count)4691 string generateSwitchTargets(int count)
4692 {
4693     std::ostringstream r;
4694     for (int i = 0; i < count; i++)
4695         r << "%case" << i << " = OpLabel\n            OpBranch %phi\n";
4696     r << "\n";
4697     return r.str();
4698 }
4699 
generateOpPhiParams(int count)4700 string generateOpPhiParams(int count)
4701 {
4702     std::ostringstream r;
4703     for (int i = 0; i < count; i++)
4704         r << " %cf" << (i * 10 + 5) << " %case" << i;
4705     r << "\n";
4706     return r.str();
4707 }
4708 
generateIntWidth(int value)4709 string generateIntWidth(int value)
4710 {
4711     std::ostringstream r;
4712     r << value;
4713     return r.str();
4714 }
4715 
4716 // Expand input string by injecting "ABC" between the input
4717 // string characters. The acc/add/treshold parameters are used
4718 // to skip some of the injections to make the result less
4719 // uniform (and a lot shorter).
expandOpPhiCase5(const string & s,int & acc,int add,int treshold)4720 string expandOpPhiCase5(const string &s, int &acc, int add, int treshold)
4721 {
4722     std::ostringstream res;
4723     const char *p = s.c_str();
4724 
4725     while (*p)
4726     {
4727         res << *p;
4728         acc += add;
4729         if (acc > treshold)
4730         {
4731             acc -= treshold;
4732             res << "ABC";
4733         }
4734         p++;
4735     }
4736     return res.str();
4737 }
4738 
4739 // Calculate expected result based on the code string
calcOpPhiCase5(float val,const string & s)4740 float calcOpPhiCase5(float val, const string &s)
4741 {
4742     const char *p = s.c_str();
4743     float x[8];
4744     bool b[8];
4745     const float tv[8] = {0.5f, 1.5f, 3.5f, 7.5f, 15.5f, 31.5f, 63.5f, 127.5f};
4746     const float v     = deFloatAbs(val);
4747     float res         = 0;
4748     int depth         = -1;
4749     int skip          = 0;
4750 
4751     for (int i = 7; i >= 0; --i)
4752         x[i] = std::fmod((float)v, (float)(2 << i));
4753     for (int i = 7; i >= 0; --i)
4754         b[i] = x[i] > tv[i];
4755 
4756     while (*p)
4757     {
4758         if (*p == 'A')
4759         {
4760             depth++;
4761             if (skip == 0 && b[depth])
4762             {
4763                 res++;
4764             }
4765             else
4766                 skip++;
4767         }
4768         if (*p == 'B')
4769         {
4770             if (skip)
4771                 skip--;
4772             if (b[depth] || skip)
4773                 skip++;
4774         }
4775         if (*p == 'C')
4776         {
4777             depth--;
4778             if (skip)
4779                 skip--;
4780         }
4781         p++;
4782     }
4783     return res;
4784 }
4785 
4786 // In the code string, the letters represent the following:
4787 //
4788 // A:
4789 //     if (certain bit is set)
4790 //     {
4791 //       result++;
4792 //
4793 // B:
4794 //     } else {
4795 //
4796 // C:
4797 //     }
4798 //
4799 // examples:
4800 // AABCBC leads to if(){r++;if(){r++;}else{}}else{}
4801 // ABABCC leads to if(){r++;}else{if(){r++;}else{}}
4802 // ABCABC leads to if(){r++;}else{}if(){r++;}else{}
4803 //
4804 // Code generation gets a bit complicated due to the else-branches,
4805 // which do not generate new values. Thus, the generator needs to
4806 // keep track of the previous variable change seen by the else
4807 // branch.
generateOpPhiCase5(const string & s)4808 string generateOpPhiCase5(const string &s)
4809 {
4810     std::stack<int> idStack;
4811     std::stack<std::string> value;
4812     std::stack<std::string> valueLabel;
4813     std::stack<std::string> mergeLeft;
4814     std::stack<std::string> mergeRight;
4815     std::ostringstream res;
4816     const char *p = s.c_str();
4817     int depth     = -1;
4818     int currId    = 0;
4819     int iter      = 0;
4820 
4821     idStack.push(-1);
4822     value.push("%f32_0");
4823     valueLabel.push("%f32_0 %entry");
4824 
4825     while (*p)
4826     {
4827         if (*p == 'A')
4828         {
4829             depth++;
4830             currId = iter;
4831             idStack.push(currId);
4832             res << "\tOpSelectionMerge %m" << currId << " None\n";
4833             res << "\tOpBranchConditional %b" << depth << " %t" << currId << " %f" << currId << "\n";
4834             res << "%t" << currId << " = OpLabel\n";
4835             res << "%rt" << currId << " = OpFAdd %f32 " << value.top() << " %f32_1\n";
4836             std::ostringstream tag;
4837             tag << "%rt" << currId;
4838             value.push(tag.str());
4839             tag << " %t" << currId;
4840             valueLabel.push(tag.str());
4841         }
4842 
4843         if (*p == 'B')
4844         {
4845             mergeLeft.push(valueLabel.top());
4846             value.pop();
4847             valueLabel.pop();
4848             res << "\tOpBranch %m" << currId << "\n";
4849             res << "%f" << currId << " = OpLabel\n";
4850             std::ostringstream tag;
4851             tag << value.top() << " %f" << currId;
4852             valueLabel.pop();
4853             valueLabel.push(tag.str());
4854         }
4855 
4856         if (*p == 'C')
4857         {
4858             mergeRight.push(valueLabel.top());
4859             res << "\tOpBranch %m" << currId << "\n";
4860             res << "%m" << currId << " = OpLabel\n";
4861             if (*(p + 1) == 0)
4862                 res << "%res"; // last result goes to %res
4863             else
4864                 res << "%rm" << currId;
4865             res << " = OpPhi %f32  " << mergeLeft.top() << "  " << mergeRight.top() << "\n";
4866             std::ostringstream tag;
4867             tag << "%rm" << currId;
4868             value.pop();
4869             value.push(tag.str());
4870             tag << " %m" << currId;
4871             valueLabel.pop();
4872             valueLabel.push(tag.str());
4873             mergeLeft.pop();
4874             mergeRight.pop();
4875             depth--;
4876             idStack.pop();
4877             currId = idStack.top();
4878         }
4879         p++;
4880         iter++;
4881     }
4882     return res.str();
4883 }
4884 
createOpPhiGroup(tcu::TestContext & testCtx)4885 tcu::TestCaseGroup *createOpPhiGroup(tcu::TestContext &testCtx)
4886 {
4887     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opphi"));
4888     ComputeShaderSpec spec1;
4889     ComputeShaderSpec spec2;
4890     ComputeShaderSpec spec3;
4891     ComputeShaderSpec spec4;
4892     ComputeShaderSpec spec5;
4893     de::Random rnd(deStringHash(group->getName()));
4894     const int numElements = 100;
4895     vector<float> inputFloats(numElements, 0);
4896     vector<float> outputFloats1(numElements, 0);
4897     vector<float> outputFloats2(numElements, 0);
4898     vector<float> outputFloats3(numElements, 0);
4899     vector<float> outputFloats4(numElements, 0);
4900     vector<float> outputFloats5(numElements, 0);
4901     std::string codestring = "ABC";
4902     const int test4Width   = 512;
4903 
4904     // Build case 5 code string. Each iteration makes the hierarchy more complicated.
4905     // 9 iterations with (7, 24) parameters makes the hierarchy 8 deep with about 1500 lines of
4906     // shader code.
4907     for (int i = 0, acc = 0; i < 9; i++)
4908         codestring = expandOpPhiCase5(codestring, acc, 7, 24);
4909 
4910     fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4911 
4912     // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4913     floorAll(inputFloats);
4914 
4915     for (size_t ndx = 0; ndx < numElements; ++ndx)
4916     {
4917         switch (ndx % 3)
4918         {
4919         case 0:
4920             outputFloats1[ndx] = inputFloats[ndx] + 5.5f;
4921             break;
4922         case 1:
4923             outputFloats1[ndx] = inputFloats[ndx] + 20.5f;
4924             break;
4925         case 2:
4926             outputFloats1[ndx] = inputFloats[ndx] + 1.75f;
4927             break;
4928         default:
4929             break;
4930         }
4931         outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
4932         outputFloats3[ndx] = 8.5f - inputFloats[ndx];
4933 
4934         int index4         = (int)deFloor(deAbs((float)ndx * inputFloats[ndx]));
4935         outputFloats4[ndx] = (float)(index4 % test4Width) * 10.0f + 5.0f;
4936 
4937         outputFloats5[ndx] = calcOpPhiCase5(inputFloats[ndx], codestring);
4938     }
4939 
4940     spec1.assembly =
4941         string(getComputeAsmShaderPreamble()) +
4942 
4943         "OpSource GLSL 430\n"
4944         "OpName %main \"main\"\n"
4945         "OpName %id \"gl_GlobalInvocationID\"\n"
4946 
4947         "OpDecorate %id BuiltIn GlobalInvocationId\n"
4948 
4949         + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4950         string(getComputeAsmInputOutputBuffer()) +
4951 
4952         "%id = OpVariable %uvec3ptr Input\n"
4953         "%zero       = OpConstant %i32 0\n"
4954         "%three      = OpConstant %u32 3\n"
4955         "%constf5p5  = OpConstant %f32 5.5\n"
4956         "%constf20p5 = OpConstant %f32 20.5\n"
4957         "%constf1p75 = OpConstant %f32 1.75\n"
4958         "%constf8p5  = OpConstant %f32 8.5\n"
4959         "%constf6p5  = OpConstant %f32 6.5\n"
4960 
4961         "%main     = OpFunction %void None %voidf\n"
4962         "%entry    = OpLabel\n"
4963         "%idval    = OpLoad %uvec3 %id\n"
4964         "%x        = OpCompositeExtract %u32 %idval 0\n"
4965         "%selector = OpUMod %u32 %x %three\n"
4966         "            OpSelectionMerge %phi None\n"
4967         "            OpSwitch %selector %default 0 %case0 1 %case1 2 %case2\n"
4968 
4969         // Case 1 before OpPhi.
4970         "%case1    = OpLabel\n"
4971         "            OpBranch %phi\n"
4972 
4973         "%default  = OpLabel\n"
4974         "            OpUnreachable\n"
4975 
4976         "%phi      = OpLabel\n"
4977         "%operand  = OpPhi %f32   %constf1p75 %case2   %constf20p5 %case1   %constf5p5 %case0\n" // not in the order of blocks
4978         "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4979         "%inval    = OpLoad %f32 %inloc\n"
4980         "%add      = OpFAdd %f32 %inval %operand\n"
4981         "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4982         "            OpStore %outloc %add\n"
4983         "            OpReturn\n"
4984 
4985         // Case 0 after OpPhi.
4986         "%case0    = OpLabel\n"
4987         "            OpBranch %phi\n"
4988 
4989         // Case 2 after OpPhi.
4990         "%case2    = OpLabel\n"
4991         "            OpBranch %phi\n"
4992 
4993         "            OpFunctionEnd\n";
4994     spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4995     spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
4996     spec1.numWorkGroups = IVec3(numElements, 1, 1);
4997 
4998     group->addChild(new SpvAsmComputeShaderCase(testCtx, "block", spec1));
4999 
5000     spec2.assembly = string(getComputeAsmShaderPreamble()) +
5001 
5002                      "OpName %main \"main\"\n"
5003                      "OpName %id \"gl_GlobalInvocationID\"\n"
5004 
5005                      "OpDecorate %id BuiltIn GlobalInvocationId\n"
5006 
5007                      + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5008                      string(getComputeAsmInputOutputBuffer()) +
5009 
5010                      "%id         = OpVariable %uvec3ptr Input\n"
5011                      "%zero       = OpConstant %i32 0\n"
5012                      "%one        = OpConstant %i32 1\n"
5013                      "%three      = OpConstant %i32 3\n"
5014                      "%constf6p5  = OpConstant %f32 6.5\n"
5015 
5016                      "%main       = OpFunction %void None %voidf\n"
5017                      "%entry      = OpLabel\n"
5018                      "%idval      = OpLoad %uvec3 %id\n"
5019                      "%x          = OpCompositeExtract %u32 %idval 0\n"
5020                      "%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
5021                      "%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
5022                      "%inval      = OpLoad %f32 %inloc\n"
5023                      "              OpBranch %phi\n"
5024 
5025                      "%phi        = OpLabel\n"
5026                      "%step       = OpPhi %i32 %zero  %entry %step_next  %phi\n"
5027                      "%accum      = OpPhi %f32 %inval %entry %accum_next %phi\n"
5028                      "%step_next  = OpIAdd %i32 %step %one\n"
5029                      "%accum_next = OpFAdd %f32 %accum %constf6p5\n"
5030                      "%still_loop = OpSLessThan %bool %step %three\n"
5031                      "              OpLoopMerge %exit %phi None\n"
5032                      "              OpBranchConditional %still_loop %phi %exit\n"
5033 
5034                      "%exit       = OpLabel\n"
5035                      "              OpStore %outloc %accum\n"
5036                      "              OpReturn\n"
5037                      "              OpFunctionEnd\n";
5038     spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5039     spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
5040     spec2.numWorkGroups = IVec3(numElements, 1, 1);
5041 
5042     group->addChild(new SpvAsmComputeShaderCase(testCtx, "induction", spec2));
5043 
5044     spec3.assembly = string(getComputeAsmShaderPreamble()) +
5045 
5046                      "OpName %main \"main\"\n"
5047                      "OpName %id \"gl_GlobalInvocationID\"\n"
5048 
5049                      "OpDecorate %id BuiltIn GlobalInvocationId\n"
5050 
5051                      + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5052                      string(getComputeAsmInputOutputBuffer()) +
5053 
5054                      "%f32ptr_f   = OpTypePointer Function %f32\n"
5055                      "%id         = OpVariable %uvec3ptr Input\n"
5056                      "%true       = OpConstantTrue %bool\n"
5057                      "%false      = OpConstantFalse %bool\n"
5058                      "%zero       = OpConstant %i32 0\n"
5059                      "%constf8p5  = OpConstant %f32 8.5\n"
5060 
5061                      "%main       = OpFunction %void None %voidf\n"
5062                      "%entry      = OpLabel\n"
5063                      "%b          = OpVariable %f32ptr_f Function %constf8p5\n"
5064                      "%idval      = OpLoad %uvec3 %id\n"
5065                      "%x          = OpCompositeExtract %u32 %idval 0\n"
5066                      "%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
5067                      "%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
5068                      "%a_init     = OpLoad %f32 %inloc\n"
5069                      "%b_init     = OpLoad %f32 %b\n"
5070                      "              OpBranch %phi\n"
5071 
5072                      "%phi        = OpLabel\n"
5073                      "%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
5074                      "%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
5075                      "%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
5076                      "              OpLoopMerge %exit %phi None\n"
5077                      "              OpBranchConditional %still_loop %phi %exit\n"
5078 
5079                      "%exit       = OpLabel\n"
5080                      "%sub        = OpFSub %f32 %a_next %b_next\n"
5081                      "              OpStore %outloc %sub\n"
5082                      "              OpReturn\n"
5083                      "              OpFunctionEnd\n";
5084     spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5085     spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
5086     spec3.numWorkGroups = IVec3(numElements, 1, 1);
5087 
5088     group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", spec3));
5089 
5090     spec4.assembly = "OpCapability Shader\n"
5091                      "%ext = OpExtInstImport \"GLSL.std.450\"\n"
5092                      "OpMemoryModel Logical GLSL450\n"
5093                      "OpEntryPoint GLCompute %main \"main\" %id\n"
5094                      "OpExecutionMode %main LocalSize 1 1 1\n"
5095 
5096                      "OpSource GLSL 430\n"
5097                      "OpName %main \"main\"\n"
5098                      "OpName %id \"gl_GlobalInvocationID\"\n"
5099 
5100                      "OpDecorate %id BuiltIn GlobalInvocationId\n"
5101 
5102                      + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5103                      string(getComputeAsmInputOutputBuffer()) +
5104 
5105                      "%id       = OpVariable %uvec3ptr Input\n"
5106                      "%zero     = OpConstant %i32 0\n"
5107                      "%cimod    = OpConstant %u32 " +
5108                      generateIntWidth(test4Width) + "\n"
5109 
5110                      + generateConstantDefinitions(test4Width) +
5111 
5112                      "%main     = OpFunction %void None %voidf\n"
5113                      "%entry    = OpLabel\n"
5114                      "%idval    = OpLoad %uvec3 %id\n"
5115                      "%x        = OpCompositeExtract %u32 %idval 0\n"
5116                      "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
5117                      "%inval    = OpLoad %f32 %inloc\n"
5118                      "%xf       = OpConvertUToF %f32 %x\n"
5119                      "%xm       = OpFMul %f32 %xf %inval\n"
5120                      "%xa       = OpExtInst %f32 %ext FAbs %xm\n"
5121                      "%xi       = OpConvertFToU %u32 %xa\n"
5122                      "%selector = OpUMod %u32 %xi %cimod\n"
5123                      "            OpSelectionMerge %phi None\n"
5124                      "            OpSwitch %selector %default "
5125 
5126                      + generateSwitchCases(test4Width) +
5127 
5128                      "%default  = OpLabel\n"
5129                      "            OpUnreachable\n"
5130 
5131                      + generateSwitchTargets(test4Width) +
5132 
5133                      "%phi      = OpLabel\n"
5134                      "%result   = OpPhi %f32"
5135 
5136                      + generateOpPhiParams(test4Width) +
5137 
5138                      "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
5139                      "            OpStore %outloc %result\n"
5140                      "            OpReturn\n"
5141 
5142                      "            OpFunctionEnd\n";
5143     spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5144     spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
5145     spec4.numWorkGroups = IVec3(numElements, 1, 1);
5146 
5147     group->addChild(new SpvAsmComputeShaderCase(testCtx, "wide", spec4));
5148 
5149     spec5.assembly = "OpCapability Shader\n"
5150                      "%ext      = OpExtInstImport \"GLSL.std.450\"\n"
5151                      "OpMemoryModel Logical GLSL450\n"
5152                      "OpEntryPoint GLCompute %main \"main\" %id\n"
5153                      "OpExecutionMode %main LocalSize 1 1 1\n"
5154                      "%code     = OpString \"" +
5155                      codestring +
5156                      "\"\n"
5157 
5158                      "OpSource GLSL 430\n"
5159                      "OpName %main \"main\"\n"
5160                      "OpName %id \"gl_GlobalInvocationID\"\n"
5161 
5162                      "OpDecorate %id BuiltIn GlobalInvocationId\n"
5163 
5164                      + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5165                      string(getComputeAsmInputOutputBuffer()) +
5166 
5167                      "%id       = OpVariable %uvec3ptr Input\n"
5168                      "%zero     = OpConstant %i32 0\n"
5169                      "%f32_0    = OpConstant %f32 0.0\n"
5170                      "%f32_0_5  = OpConstant %f32 0.5\n"
5171                      "%f32_1    = OpConstant %f32 1.0\n"
5172                      "%f32_1_5  = OpConstant %f32 1.5\n"
5173                      "%f32_2    = OpConstant %f32 2.0\n"
5174                      "%f32_3_5  = OpConstant %f32 3.5\n"
5175                      "%f32_4    = OpConstant %f32 4.0\n"
5176                      "%f32_7_5  = OpConstant %f32 7.5\n"
5177                      "%f32_8    = OpConstant %f32 8.0\n"
5178                      "%f32_15_5 = OpConstant %f32 15.5\n"
5179                      "%f32_16   = OpConstant %f32 16.0\n"
5180                      "%f32_31_5 = OpConstant %f32 31.5\n"
5181                      "%f32_32   = OpConstant %f32 32.0\n"
5182                      "%f32_63_5 = OpConstant %f32 63.5\n"
5183                      "%f32_64   = OpConstant %f32 64.0\n"
5184                      "%f32_127_5 = OpConstant %f32 127.5\n"
5185                      "%f32_128  = OpConstant %f32 128.0\n"
5186                      "%f32_256  = OpConstant %f32 256.0\n"
5187 
5188                      "%main     = OpFunction %void None %voidf\n"
5189                      "%entry    = OpLabel\n"
5190                      "%idval    = OpLoad %uvec3 %id\n"
5191                      "%x        = OpCompositeExtract %u32 %idval 0\n"
5192                      "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
5193                      "%inval    = OpLoad %f32 %inloc\n"
5194 
5195                      "%xabs     = OpExtInst %f32 %ext FAbs %inval\n"
5196                      "%x8       = OpFMod %f32 %xabs %f32_256\n"
5197                      "%x7       = OpFMod %f32 %xabs %f32_128\n"
5198                      "%x6       = OpFMod %f32 %xabs %f32_64\n"
5199                      "%x5       = OpFMod %f32 %xabs %f32_32\n"
5200                      "%x4       = OpFMod %f32 %xabs %f32_16\n"
5201                      "%x3       = OpFMod %f32 %xabs %f32_8\n"
5202                      "%x2       = OpFMod %f32 %xabs %f32_4\n"
5203                      "%x1       = OpFMod %f32 %xabs %f32_2\n"
5204 
5205                      "%b7       = OpFOrdGreaterThanEqual %bool %x8 %f32_127_5\n"
5206                      "%b6       = OpFOrdGreaterThanEqual %bool %x7 %f32_63_5\n"
5207                      "%b5       = OpFOrdGreaterThanEqual %bool %x6 %f32_31_5\n"
5208                      "%b4       = OpFOrdGreaterThanEqual %bool %x5 %f32_15_5\n"
5209                      "%b3       = OpFOrdGreaterThanEqual %bool %x4 %f32_7_5\n"
5210                      "%b2       = OpFOrdGreaterThanEqual %bool %x3 %f32_3_5\n"
5211                      "%b1       = OpFOrdGreaterThanEqual %bool %x2 %f32_1_5\n"
5212                      "%b0       = OpFOrdGreaterThanEqual %bool %x1 %f32_0_5\n"
5213 
5214                      + generateOpPhiCase5(codestring) +
5215 
5216                      "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
5217                      "            OpStore %outloc %res\n"
5218                      "            OpReturn\n"
5219 
5220                      "            OpFunctionEnd\n";
5221     spec5.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5222     spec5.outputs.push_back(BufferSp(new Float32Buffer(outputFloats5)));
5223     spec5.numWorkGroups = IVec3(numElements, 1, 1);
5224 
5225     group->addChild(new SpvAsmComputeShaderCase(testCtx, "nested", spec5));
5226 
5227     createOpPhiVartypeTests(group, testCtx);
5228 
5229     return group.release();
5230 }
5231 
5232 // Assembly code used for testing block order is based on GLSL source code:
5233 //
5234 // #version 430
5235 //
5236 // layout(std140, set = 0, binding = 0) readonly buffer Input {
5237 //   float elements[];
5238 // } input_data;
5239 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
5240 //   float elements[];
5241 // } output_data;
5242 //
5243 // void main() {
5244 //   uint x = gl_GlobalInvocationID.x;
5245 //   output_data.elements[x] = input_data.elements[x];
5246 //   if (x > uint(50)) {
5247 //     switch (x % uint(3)) {
5248 //       case 0: output_data.elements[x] += 1.5f; break;
5249 //       case 1: output_data.elements[x] += 42.f; break;
5250 //       case 2: output_data.elements[x] -= 27.f; break;
5251 //       default: break;
5252 //     }
5253 //   } else {
5254 //     output_data.elements[x] = -input_data.elements[x];
5255 //   }
5256 // }
createBlockOrderGroup(tcu::TestContext & testCtx)5257 tcu::TestCaseGroup *createBlockOrderGroup(tcu::TestContext &testCtx)
5258 {
5259     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "block_order"));
5260     ComputeShaderSpec spec;
5261     de::Random rnd(deStringHash(group->getName()));
5262     const int numElements = 100;
5263     vector<float> inputFloats(numElements, 0);
5264     vector<float> outputFloats(numElements, 0);
5265 
5266     fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
5267 
5268     // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
5269     floorAll(inputFloats);
5270 
5271     for (size_t ndx = 0; ndx <= 50; ++ndx)
5272         outputFloats[ndx] = -inputFloats[ndx];
5273 
5274     for (size_t ndx = 51; ndx < numElements; ++ndx)
5275     {
5276         switch (ndx % 3)
5277         {
5278         case 0:
5279             outputFloats[ndx] = inputFloats[ndx] + 1.5f;
5280             break;
5281         case 1:
5282             outputFloats[ndx] = inputFloats[ndx] + 42.f;
5283             break;
5284         case 2:
5285             outputFloats[ndx] = inputFloats[ndx] - 27.f;
5286             break;
5287         default:
5288             break;
5289         }
5290     }
5291 
5292     spec.assembly = string(getComputeAsmShaderPreamble()) +
5293 
5294                     "OpSource GLSL 430\n"
5295                     "OpName %main \"main\"\n"
5296                     "OpName %id \"gl_GlobalInvocationID\"\n"
5297 
5298                     "OpDecorate %id BuiltIn GlobalInvocationId\n"
5299 
5300                     + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5301 
5302                     "%u32ptr       = OpTypePointer Function %u32\n"
5303                     "%u32ptr_input = OpTypePointer Input %u32\n"
5304 
5305                     + string(getComputeAsmInputOutputBuffer()) +
5306 
5307                     "%id        = OpVariable %uvec3ptr Input\n"
5308                     "%zero      = OpConstant %i32 0\n"
5309                     "%const3    = OpConstant %u32 3\n"
5310                     "%const50   = OpConstant %u32 50\n"
5311                     "%constf1p5 = OpConstant %f32 1.5\n"
5312                     "%constf27  = OpConstant %f32 27.0\n"
5313                     "%constf42  = OpConstant %f32 42.0\n"
5314 
5315                     "%main = OpFunction %void None %voidf\n"
5316 
5317                     // entry block.
5318                     "%entry    = OpLabel\n"
5319 
5320                     // Create a temporary variable to hold the value of gl_GlobalInvocationID.x.
5321                     "%xvar     = OpVariable %u32ptr Function\n"
5322                     "%xptr     = OpAccessChain %u32ptr_input %id %zero\n"
5323                     "%x        = OpLoad %u32 %xptr\n"
5324                     "            OpStore %xvar %x\n"
5325 
5326                     "%cmp      = OpUGreaterThan %bool %x %const50\n"
5327                     "            OpSelectionMerge %if_merge None\n"
5328                     "            OpBranchConditional %cmp %if_true %if_false\n"
5329 
5330                     // False branch for if-statement: placed in the middle of switch cases and before true branch.
5331                     "%if_false = OpLabel\n"
5332                     "%x_f      = OpLoad %u32 %xvar\n"
5333                     "%inloc_f  = OpAccessChain %f32ptr %indata %zero %x_f\n"
5334                     "%inval_f  = OpLoad %f32 %inloc_f\n"
5335                     "%negate   = OpFNegate %f32 %inval_f\n"
5336                     "%outloc_f = OpAccessChain %f32ptr %outdata %zero %x_f\n"
5337                     "            OpStore %outloc_f %negate\n"
5338                     "            OpBranch %if_merge\n"
5339 
5340                     // Merge block for if-statement: placed in the middle of true and false branch.
5341                     "%if_merge = OpLabel\n"
5342                     "            OpReturn\n"
5343 
5344                     // True branch for if-statement: placed in the middle of swtich cases and after the false branch.
5345                     "%if_true  = OpLabel\n"
5346                     "%xval_t   = OpLoad %u32 %xvar\n"
5347                     "%mod      = OpUMod %u32 %xval_t %const3\n"
5348                     "            OpSelectionMerge %switch_merge None\n"
5349                     "            OpSwitch %mod %default 0 %case0 1 %case1 2 %case2\n"
5350 
5351                     // Merge block for switch-statement: placed before the case
5352                     // bodies.  But it must follow OpSwitch which dominates it.
5353                     "%switch_merge = OpLabel\n"
5354                     "                OpBranch %if_merge\n"
5355 
5356                     // Case 1 for switch-statement: placed before case 0.
5357                     // It must follow the OpSwitch that dominates it.
5358                     "%case1    = OpLabel\n"
5359                     "%x_1      = OpLoad %u32 %xvar\n"
5360                     "%inloc_1  = OpAccessChain %f32ptr %indata %zero %x_1\n"
5361                     "%inval_1  = OpLoad %f32 %inloc_1\n"
5362                     "%addf42   = OpFAdd %f32 %inval_1 %constf42\n"
5363                     "%outloc_1 = OpAccessChain %f32ptr %outdata %zero %x_1\n"
5364                     "            OpStore %outloc_1 %addf42\n"
5365                     "            OpBranch %switch_merge\n"
5366 
5367                     // Case 2 for switch-statement.
5368                     "%case2    = OpLabel\n"
5369                     "%x_2      = OpLoad %u32 %xvar\n"
5370                     "%inloc_2  = OpAccessChain %f32ptr %indata %zero %x_2\n"
5371                     "%inval_2  = OpLoad %f32 %inloc_2\n"
5372                     "%subf27   = OpFSub %f32 %inval_2 %constf27\n"
5373                     "%outloc_2 = OpAccessChain %f32ptr %outdata %zero %x_2\n"
5374                     "            OpStore %outloc_2 %subf27\n"
5375                     "            OpBranch %switch_merge\n"
5376 
5377                     // Default case for switch-statement: placed in the middle of normal cases.
5378                     "%default = OpLabel\n"
5379                     "           OpBranch %switch_merge\n"
5380 
5381                     // Case 0 for switch-statement: out of order.
5382                     "%case0    = OpLabel\n"
5383                     "%x_0      = OpLoad %u32 %xvar\n"
5384                     "%inloc_0  = OpAccessChain %f32ptr %indata %zero %x_0\n"
5385                     "%inval_0  = OpLoad %f32 %inloc_0\n"
5386                     "%addf1p5  = OpFAdd %f32 %inval_0 %constf1p5\n"
5387                     "%outloc_0 = OpAccessChain %f32ptr %outdata %zero %x_0\n"
5388                     "            OpStore %outloc_0 %addf1p5\n"
5389                     "            OpBranch %switch_merge\n"
5390 
5391                     "            OpFunctionEnd\n";
5392     spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5393     spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5394     spec.numWorkGroups = IVec3(numElements, 1, 1);
5395 
5396     group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
5397 
5398     return group.release();
5399 }
5400 
createMultipleShaderGroup(tcu::TestContext & testCtx)5401 tcu::TestCaseGroup *createMultipleShaderGroup(tcu::TestContext &testCtx)
5402 {
5403     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "multiple_shaders"));
5404     ComputeShaderSpec spec1;
5405     ComputeShaderSpec spec2;
5406     de::Random rnd(deStringHash(group->getName()));
5407     const int numElements = 100;
5408     vector<float> inputFloats(numElements, 0);
5409     vector<float> outputFloats1(numElements, 0);
5410     vector<float> outputFloats2(numElements, 0);
5411     fillRandomScalars(rnd, -500.f, 500.f, &inputFloats[0], numElements);
5412 
5413     for (size_t ndx = 0; ndx < numElements; ++ndx)
5414     {
5415         outputFloats1[ndx] = inputFloats[ndx] + inputFloats[ndx];
5416         outputFloats2[ndx] = -inputFloats[ndx];
5417     }
5418 
5419     const string assembly(
5420         "OpCapability Shader\n"
5421         "OpMemoryModel Logical GLSL450\n"
5422         "OpEntryPoint GLCompute %comp_main1 \"entrypoint1\" %id\n"
5423         "OpEntryPoint GLCompute %comp_main2 \"entrypoint2\" %id\n"
5424         // A module cannot have two OpEntryPoint instructions with the same Execution Model and the same Name string.
5425         "OpEntryPoint Vertex    %vert_main  \"entrypoint2\" %vert_builtins %vertexIndex %instanceIndex\n"
5426         "OpExecutionMode %comp_main1 LocalSize 1 1 1\n"
5427         "OpExecutionMode %comp_main2 LocalSize 1 1 1\n"
5428 
5429         "OpName %comp_main1              \"entrypoint1\"\n"
5430         "OpName %comp_main2              \"entrypoint2\"\n"
5431         "OpName %vert_main               \"entrypoint2\"\n"
5432         "OpName %id                      \"gl_GlobalInvocationID\"\n"
5433         "OpName %vert_builtin_st         \"gl_PerVertex\"\n"
5434         "OpName %vertexIndex             \"gl_VertexIndex\"\n"
5435         "OpName %instanceIndex           \"gl_InstanceIndex\"\n"
5436         "OpMemberName %vert_builtin_st 0 \"gl_Position\"\n"
5437         "OpMemberName %vert_builtin_st 1 \"gl_PointSize\"\n"
5438         "OpMemberName %vert_builtin_st 2 \"gl_ClipDistance\"\n"
5439 
5440         "OpDecorate %id                      BuiltIn GlobalInvocationId\n"
5441         "OpDecorate %vertexIndex             BuiltIn VertexIndex\n"
5442         "OpDecorate %instanceIndex           BuiltIn InstanceIndex\n"
5443         "OpDecorate %vert_builtin_st         Block\n"
5444         "OpMemberDecorate %vert_builtin_st 0 BuiltIn Position\n"
5445         "OpMemberDecorate %vert_builtin_st 1 BuiltIn PointSize\n"
5446         "OpMemberDecorate %vert_builtin_st 2 BuiltIn ClipDistance\n"
5447 
5448         + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5449         string(getComputeAsmInputOutputBuffer()) +
5450 
5451         "%zero       = OpConstant %i32 0\n"
5452         "%one        = OpConstant %u32 1\n"
5453         "%c_f32_1    = OpConstant %f32 1\n"
5454 
5455         "%i32inputptr         = OpTypePointer Input %i32\n"
5456         "%vec4                = OpTypeVector %f32 4\n"
5457         "%vec4ptr             = OpTypePointer Output %vec4\n"
5458         "%f32arr1             = OpTypeArray %f32 %one\n"
5459         "%vert_builtin_st     = OpTypeStruct %vec4 %f32 %f32arr1\n"
5460         "%vert_builtin_st_ptr = OpTypePointer Output %vert_builtin_st\n"
5461         "%vert_builtins       = OpVariable %vert_builtin_st_ptr Output\n"
5462 
5463         "%id         = OpVariable %uvec3ptr Input\n"
5464         "%vertexIndex = OpVariable %i32inputptr Input\n"
5465         "%instanceIndex = OpVariable %i32inputptr Input\n"
5466         "%c_vec4_1   = OpConstantComposite %vec4 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
5467 
5468         // gl_Position = vec4(1.);
5469         "%vert_main  = OpFunction %void None %voidf\n"
5470         "%vert_entry = OpLabel\n"
5471         "%position   = OpAccessChain %vec4ptr %vert_builtins %zero\n"
5472         "              OpStore %position %c_vec4_1\n"
5473         "              OpReturn\n"
5474         "              OpFunctionEnd\n"
5475 
5476         // Double inputs.
5477         "%comp_main1  = OpFunction %void None %voidf\n"
5478         "%comp1_entry = OpLabel\n"
5479         "%idval1      = OpLoad %uvec3 %id\n"
5480         "%x1          = OpCompositeExtract %u32 %idval1 0\n"
5481         "%inloc1      = OpAccessChain %f32ptr %indata %zero %x1\n"
5482         "%inval1      = OpLoad %f32 %inloc1\n"
5483         "%add         = OpFAdd %f32 %inval1 %inval1\n"
5484         "%outloc1     = OpAccessChain %f32ptr %outdata %zero %x1\n"
5485         "               OpStore %outloc1 %add\n"
5486         "               OpReturn\n"
5487         "               OpFunctionEnd\n"
5488 
5489         // Negate inputs.
5490         "%comp_main2  = OpFunction %void None %voidf\n"
5491         "%comp2_entry = OpLabel\n"
5492         "%idval2      = OpLoad %uvec3 %id\n"
5493         "%x2          = OpCompositeExtract %u32 %idval2 0\n"
5494         "%inloc2      = OpAccessChain %f32ptr %indata %zero %x2\n"
5495         "%inval2      = OpLoad %f32 %inloc2\n"
5496         "%neg         = OpFNegate %f32 %inval2\n"
5497         "%outloc2     = OpAccessChain %f32ptr %outdata %zero %x2\n"
5498         "               OpStore %outloc2 %neg\n"
5499         "               OpReturn\n"
5500         "               OpFunctionEnd\n");
5501 
5502     spec1.assembly = assembly;
5503     spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5504     spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
5505     spec1.numWorkGroups = IVec3(numElements, 1, 1);
5506     spec1.entryPoint    = "entrypoint1";
5507 
5508     spec2.assembly = assembly;
5509     spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5510     spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
5511     spec2.numWorkGroups = IVec3(numElements, 1, 1);
5512     spec2.entryPoint    = "entrypoint2";
5513 
5514     group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader1", spec1));
5515     group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader2", spec2));
5516 
5517     return group.release();
5518 }
5519 
makeLongUTF8String(size_t num4ByteChars)5520 inline std::string makeLongUTF8String(size_t num4ByteChars)
5521 {
5522     // An example of a longest valid UTF-8 character.  Be explicit about the
5523     // character type because Microsoft compilers can otherwise interpret the
5524     // character string as being over wide (16-bit) characters. Ideally, we
5525     // would just use a C++11 UTF-8 string literal, but we want to support older
5526     // Microsoft compilers.
5527     const std::basic_string<char> earthAfrica("\xF0\x9F\x8C\x8D");
5528     std::string longString;
5529     longString.reserve(num4ByteChars * 4);
5530     for (size_t count = 0; count < num4ByteChars; count++)
5531     {
5532         longString += earthAfrica;
5533     }
5534     return longString;
5535 }
5536 
createOpSourceGroup(tcu::TestContext & testCtx)5537 tcu::TestCaseGroup *createOpSourceGroup(tcu::TestContext &testCtx)
5538 {
5539     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsource"));
5540     vector<CaseParameter> cases;
5541     de::Random rnd(deStringHash(group->getName()));
5542     const int numElements = 100;
5543     vector<float> positiveFloats(numElements, 0);
5544     vector<float> negativeFloats(numElements, 0);
5545     const StringTemplate shaderTemplate("OpCapability Shader\n"
5546                                         "OpMemoryModel Logical GLSL450\n"
5547 
5548                                         "OpEntryPoint GLCompute %main \"main\" %id\n"
5549                                         "OpExecutionMode %main LocalSize 1 1 1\n"
5550 
5551                                         "${SOURCE}\n"
5552 
5553                                         "OpName %main           \"main\"\n"
5554                                         "OpName %id             \"gl_GlobalInvocationID\"\n"
5555 
5556                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
5557 
5558                                         + string(getComputeAsmInputOutputBufferTraits()) +
5559                                         string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5560 
5561                                         "%id        = OpVariable %uvec3ptr Input\n"
5562                                         "%zero      = OpConstant %i32 0\n"
5563 
5564                                         "%main      = OpFunction %void None %voidf\n"
5565                                         "%label     = OpLabel\n"
5566                                         "%idval     = OpLoad %uvec3 %id\n"
5567                                         "%x         = OpCompositeExtract %u32 %idval 0\n"
5568                                         "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5569                                         "%inval     = OpLoad %f32 %inloc\n"
5570                                         "%neg       = OpFNegate %f32 %inval\n"
5571                                         "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5572                                         "             OpStore %outloc %neg\n"
5573                                         "             OpReturn\n"
5574                                         "             OpFunctionEnd\n");
5575 
5576     cases.push_back(CaseParameter("unknown_source", "OpSource Unknown 0"));
5577     cases.push_back(CaseParameter("wrong_source", "OpSource OpenCL_C 210"));
5578     cases.push_back(CaseParameter("normal_filename", "%fname = OpString \"filename\"\n"
5579                                                      "OpSource GLSL 430 %fname"));
5580     cases.push_back(CaseParameter("empty_filename", "%fname = OpString \"\"\n"
5581                                                     "OpSource GLSL 430 %fname"));
5582     cases.push_back(CaseParameter("normal_source_code", "%fname = OpString \"filename\"\n"
5583                                                         "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\""));
5584     cases.push_back(CaseParameter("empty_source_code", "%fname = OpString \"filename\"\n"
5585                                                        "OpSource GLSL 430 %fname \"\""));
5586     cases.push_back(CaseParameter("long_source_code", "%fname = OpString \"filename\"\n"
5587                                                       "OpSource GLSL 430 %fname \"" +
5588                                                           makeLongUTF8String(65530) + "ccc\"")); // word count: 65535
5589     cases.push_back(CaseParameter(
5590         "utf8_source_code", "%fname = OpString \"filename\"\n"
5591                             "OpSource GLSL 430 %fname \"\xE2\x98\x82\xE2\x98\x85\"")); // umbrella & black star symbol
5592     cases.push_back(CaseParameter("normal_sourcecontinued", "%fname = OpString \"filename\"\n"
5593                                                             "OpSource GLSL 430 %fname \"#version 430\nvo\"\n"
5594                                                             "OpSourceContinued \"id main() {}\""));
5595     cases.push_back(CaseParameter("empty_sourcecontinued", "%fname = OpString \"filename\"\n"
5596                                                            "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5597                                                            "OpSourceContinued \"\""));
5598     cases.push_back(CaseParameter("long_sourcecontinued", "%fname = OpString \"filename\"\n"
5599                                                           "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5600                                                           "OpSourceContinued \"" +
5601                                                               makeLongUTF8String(65533) +
5602                                                               "ccc\"")); // word count: 65535
5603     cases.push_back(
5604         CaseParameter("utf8_sourcecontinued",
5605                       "%fname = OpString \"filename\"\n"
5606                       "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5607                       "OpSourceContinued \"\xE2\x98\x8E\xE2\x9A\x91\"")); // white telephone & black flag symbol
5608     cases.push_back(CaseParameter("multi_sourcecontinued", "%fname = OpString \"filename\"\n"
5609                                                            "OpSource GLSL 430 %fname \"#version 430\n\"\n"
5610                                                            "OpSourceContinued \"void\"\n"
5611                                                            "OpSourceContinued \"main()\"\n"
5612                                                            "OpSourceContinued \"{}\""));
5613     cases.push_back(CaseParameter("empty_source_before_sourcecontinued",
5614                                   "%fname = OpString \"filename\"\n"
5615                                   "OpSource GLSL 430 %fname \"\"\n"
5616                                   "OpSourceContinued \"#version 430\nvoid main() {}\""));
5617 
5618     fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5619 
5620     for (size_t ndx = 0; ndx < numElements; ++ndx)
5621         negativeFloats[ndx] = -positiveFloats[ndx];
5622 
5623     for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5624     {
5625         map<string, string> specializations;
5626         ComputeShaderSpec spec;
5627 
5628         specializations["SOURCE"] = cases[caseNdx].param;
5629         spec.assembly             = shaderTemplate.specialize(specializations);
5630         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5631         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5632         spec.numWorkGroups = IVec3(numElements, 1, 1);
5633 
5634         group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
5635     }
5636 
5637     return group.release();
5638 }
5639 
createOpSourceExtensionGroup(tcu::TestContext & testCtx)5640 tcu::TestCaseGroup *createOpSourceExtensionGroup(tcu::TestContext &testCtx)
5641 {
5642     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsourceextension"));
5643     vector<CaseParameter> cases;
5644     de::Random rnd(deStringHash(group->getName()));
5645     const int numElements = 100;
5646     vector<float> inputFloats(numElements, 0);
5647     vector<float> outputFloats(numElements, 0);
5648     const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
5649 
5650                                         "OpSourceExtension \"${EXTENSION}\"\n"
5651 
5652                                         "OpName %main           \"main\"\n"
5653                                         "OpName %id             \"gl_GlobalInvocationID\"\n"
5654 
5655                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
5656 
5657                                         + string(getComputeAsmInputOutputBufferTraits()) +
5658                                         string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5659 
5660                                         "%id        = OpVariable %uvec3ptr Input\n"
5661                                         "%zero      = OpConstant %i32 0\n"
5662 
5663                                         "%main      = OpFunction %void None %voidf\n"
5664                                         "%label     = OpLabel\n"
5665                                         "%idval     = OpLoad %uvec3 %id\n"
5666                                         "%x         = OpCompositeExtract %u32 %idval 0\n"
5667                                         "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5668                                         "%inval     = OpLoad %f32 %inloc\n"
5669                                         "%neg       = OpFNegate %f32 %inval\n"
5670                                         "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5671                                         "             OpStore %outloc %neg\n"
5672                                         "             OpReturn\n"
5673                                         "             OpFunctionEnd\n");
5674 
5675     cases.push_back(CaseParameter("empty_extension", ""));
5676     cases.push_back(CaseParameter("real_extension", "GL_ARB_texture_rectangle"));
5677     cases.push_back(CaseParameter("fake_extension", "GL_ARB_im_the_ultimate_extension"));
5678     cases.push_back(CaseParameter("utf8_extension", "GL_ARB_\xE2\x98\x82\xE2\x98\x85"));
5679     cases.push_back(CaseParameter("long_extension", makeLongUTF8String(65533) + "ccc")); // word count: 65535
5680 
5681     fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
5682 
5683     for (size_t ndx = 0; ndx < numElements; ++ndx)
5684         outputFloats[ndx] = -inputFloats[ndx];
5685 
5686     for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5687     {
5688         map<string, string> specializations;
5689         ComputeShaderSpec spec;
5690 
5691         specializations["EXTENSION"] = cases[caseNdx].param;
5692         spec.assembly                = shaderTemplate.specialize(specializations);
5693         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5694         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5695         spec.numWorkGroups = IVec3(numElements, 1, 1);
5696 
5697         group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
5698     }
5699 
5700     return group.release();
5701 }
5702 
5703 // Checks that a compute shader can generate a constant null value of various types, without exercising a computation on it.
createOpConstantNullGroup(tcu::TestContext & testCtx)5704 tcu::TestCaseGroup *createOpConstantNullGroup(tcu::TestContext &testCtx)
5705 {
5706     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opconstantnull"));
5707     vector<CaseParameter> cases;
5708     de::Random rnd(deStringHash(group->getName()));
5709     const int numElements = 100;
5710     vector<float> positiveFloats(numElements, 0);
5711     vector<float> negativeFloats(numElements, 0);
5712     const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
5713 
5714                                         "OpSource GLSL 430\n"
5715                                         "OpName %main           \"main\"\n"
5716                                         "OpName %id             \"gl_GlobalInvocationID\"\n"
5717 
5718                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
5719 
5720                                         + string(getComputeAsmInputOutputBufferTraits()) +
5721                                         string(getComputeAsmCommonTypes()) +
5722                                         "%uvec2     = OpTypeVector %u32 2\n"
5723                                         "%bvec3     = OpTypeVector %bool 3\n"
5724                                         "%fvec4     = OpTypeVector %f32 4\n"
5725                                         "%fmat33    = OpTypeMatrix %fvec3 3\n"
5726                                         "%const100  = OpConstant %u32 100\n"
5727                                         "%uarr100   = OpTypeArray %i32 %const100\n"
5728                                         "%struct    = OpTypeStruct %f32 %i32 %u32\n"
5729                                         "%pointer   = OpTypePointer Function %i32\n" +
5730                                         string(getComputeAsmInputOutputBuffer()) +
5731 
5732                                         "%null      = OpConstantNull ${TYPE}\n"
5733 
5734                                         "%id        = OpVariable %uvec3ptr Input\n"
5735                                         "%zero      = OpConstant %i32 0\n"
5736 
5737                                         "%main      = OpFunction %void None %voidf\n"
5738                                         "%label     = OpLabel\n"
5739                                         "%idval     = OpLoad %uvec3 %id\n"
5740                                         "%x         = OpCompositeExtract %u32 %idval 0\n"
5741                                         "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5742                                         "%inval     = OpLoad %f32 %inloc\n"
5743                                         "%neg       = OpFNegate %f32 %inval\n"
5744                                         "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5745                                         "             OpStore %outloc %neg\n"
5746                                         "             OpReturn\n"
5747                                         "             OpFunctionEnd\n");
5748 
5749     cases.push_back(CaseParameter("bool", "%bool"));
5750     cases.push_back(CaseParameter("sint32", "%i32"));
5751     cases.push_back(CaseParameter("uint32", "%u32"));
5752     cases.push_back(CaseParameter("float32", "%f32"));
5753     cases.push_back(CaseParameter("vec4float32", "%fvec4"));
5754     cases.push_back(CaseParameter("vec3bool", "%bvec3"));
5755     cases.push_back(CaseParameter("vec2uint32", "%uvec2"));
5756     cases.push_back(CaseParameter("matrix", "%fmat33"));
5757     cases.push_back(CaseParameter("array", "%uarr100"));
5758     cases.push_back(CaseParameter("struct", "%struct"));
5759     cases.push_back(CaseParameter("pointer", "%pointer"));
5760 
5761     fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5762 
5763     for (size_t ndx = 0; ndx < numElements; ++ndx)
5764         negativeFloats[ndx] = -positiveFloats[ndx];
5765 
5766     for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5767     {
5768         map<string, string> specializations;
5769         ComputeShaderSpec spec;
5770 
5771         specializations["TYPE"] = cases[caseNdx].param;
5772         spec.assembly           = shaderTemplate.specialize(specializations);
5773         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5774         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5775         spec.numWorkGroups = IVec3(numElements, 1, 1);
5776 
5777         group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
5778     }
5779 
5780     return group.release();
5781 }
5782 
5783 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createOpConstantCompositeGroup(tcu::TestContext & testCtx)5784 tcu::TestCaseGroup *createOpConstantCompositeGroup(tcu::TestContext &testCtx)
5785 {
5786     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opconstantcomposite"));
5787     vector<CaseParameter> cases;
5788     de::Random rnd(deStringHash(group->getName()));
5789     const int numElements = 100;
5790     vector<float> positiveFloats(numElements, 0);
5791     vector<float> negativeFloats(numElements, 0);
5792     const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
5793 
5794                                         "OpSource GLSL 430\n"
5795                                         "OpName %main           \"main\"\n"
5796                                         "OpName %id             \"gl_GlobalInvocationID\"\n"
5797 
5798                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
5799 
5800                                         + string(getComputeAsmInputOutputBufferTraits()) +
5801                                         string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5802 
5803                                         "%id        = OpVariable %uvec3ptr Input\n"
5804                                         "%zero      = OpConstant %i32 0\n"
5805 
5806                                         "${CONSTANT}\n"
5807 
5808                                         "%main      = OpFunction %void None %voidf\n"
5809                                         "%label     = OpLabel\n"
5810                                         "%idval     = OpLoad %uvec3 %id\n"
5811                                         "%x         = OpCompositeExtract %u32 %idval 0\n"
5812                                         "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5813                                         "%inval     = OpLoad %f32 %inloc\n"
5814                                         "%neg       = OpFNegate %f32 %inval\n"
5815                                         "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5816                                         "             OpStore %outloc %neg\n"
5817                                         "             OpReturn\n"
5818                                         "             OpFunctionEnd\n");
5819 
5820     cases.push_back(CaseParameter("vector", "%five = OpConstant %i32 5\n"
5821                                             "%ivec3 = OpTypeVector %i32 3\n"
5822                                             "%const = OpConstantComposite %ivec3 %five %zero %five"));
5823     cases.push_back(CaseParameter("matrix", "%m3fvec3 = OpTypeMatrix %fvec3 3\n"
5824                                             "%ten = OpConstant %f32 10.\n"
5825                                             "%fzero = OpConstant %f32 0.\n"
5826                                             "%vec = OpConstantComposite %fvec3 %ten %fzero %ten\n"
5827                                             "%mat = OpConstantComposite %m3fvec3 %vec %vec %vec"));
5828     cases.push_back(CaseParameter("struct", "%m2vec3 = OpTypeMatrix %fvec3 2\n"
5829                                             "%struct = OpTypeStruct %i32 %f32 %fvec3 %m2vec3\n"
5830                                             "%fzero = OpConstant %f32 0.\n"
5831                                             "%one = OpConstant %f32 1.\n"
5832                                             "%point5 = OpConstant %f32 0.5\n"
5833                                             "%vec = OpConstantComposite %fvec3 %one %one %fzero\n"
5834                                             "%mat = OpConstantComposite %m2vec3 %vec %vec\n"
5835                                             "%const = OpConstantComposite %struct %zero %point5 %vec %mat"));
5836     cases.push_back(CaseParameter("nested_struct", "%st1 = OpTypeStruct %u32 %f32\n"
5837                                                    "%st2 = OpTypeStruct %i32 %i32\n"
5838                                                    "%struct = OpTypeStruct %st1 %st2\n"
5839                                                    "%point5 = OpConstant %f32 0.5\n"
5840                                                    "%one = OpConstant %u32 1\n"
5841                                                    "%ten = OpConstant %i32 10\n"
5842                                                    "%st1val = OpConstantComposite %st1 %one %point5\n"
5843                                                    "%st2val = OpConstantComposite %st2 %ten %ten\n"
5844                                                    "%const = OpConstantComposite %struct %st1val %st2val"));
5845 
5846     fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5847 
5848     for (size_t ndx = 0; ndx < numElements; ++ndx)
5849         negativeFloats[ndx] = -positiveFloats[ndx];
5850 
5851     for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5852     {
5853         map<string, string> specializations;
5854         ComputeShaderSpec spec;
5855 
5856         specializations["CONSTANT"] = cases[caseNdx].param;
5857         spec.assembly               = shaderTemplate.specialize(specializations);
5858         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5859         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5860         spec.numWorkGroups = IVec3(numElements, 1, 1);
5861 
5862         group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
5863     }
5864 
5865     return group.release();
5866 }
5867 
5868 // Creates a floating point number with the given exponent, and significand
5869 // bits set. It can only create normalized numbers. Only the least significant
5870 // 24 bits of the significand will be examined. The final bit of the
5871 // significand will also be ignored. This allows alignment to be written
5872 // similarly to C99 hex-floats.
5873 // For example if you wanted to write 0x1.7f34p-12 you would call
5874 // constructNormalizedFloat(-12, 0x7f3400)
constructNormalizedFloat(int32_t exponent,uint32_t significand)5875 float constructNormalizedFloat(int32_t exponent, uint32_t significand)
5876 {
5877     float f = 1.0f;
5878 
5879     for (int32_t idx = 0; idx < 23; ++idx)
5880     {
5881         f += ((significand & 0x800000) == 0) ? 0.f : std::ldexp(1.0f, -(idx + 1));
5882         significand <<= 1;
5883     }
5884 
5885     return std::ldexp(f, exponent);
5886 }
5887 
5888 // Compare instruction for the OpQuantizeF16 compute exact case.
5889 // Returns true if the output is what is expected from the test case.
compareOpQuantizeF16ComputeExactCase(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)5890 bool compareOpQuantizeF16ComputeExactCase(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
5891                                           const std::vector<Resource> &expectedOutputs, TestLog &)
5892 {
5893     assert(outputAllocs.size() == 1);
5894 
5895     // Only size is needed because we cannot compare Nans.
5896     size_t byteSize = expectedOutputs[0].getByteSize();
5897 
5898     const float *outputAsFloat = static_cast<const float *>(outputAllocs[0]->getHostPtr());
5899 
5900     if (byteSize != 4 * sizeof(float))
5901     {
5902         return false;
5903     }
5904 
5905     if (*outputAsFloat != constructNormalizedFloat(8, 0x304000) &&
5906         *outputAsFloat != constructNormalizedFloat(8, 0x300000))
5907     {
5908         return false;
5909     }
5910     outputAsFloat++;
5911 
5912     if (*outputAsFloat != -constructNormalizedFloat(-7, 0x600000) &&
5913         *outputAsFloat != -constructNormalizedFloat(-7, 0x604000))
5914     {
5915         return false;
5916     }
5917     outputAsFloat++;
5918 
5919     if (*outputAsFloat != constructNormalizedFloat(2, 0x01C000) &&
5920         *outputAsFloat != constructNormalizedFloat(2, 0x020000))
5921     {
5922         return false;
5923     }
5924     outputAsFloat++;
5925 
5926     if (*outputAsFloat != constructNormalizedFloat(1, 0xFFC000) &&
5927         *outputAsFloat != constructNormalizedFloat(2, 0x000000))
5928     {
5929         return false;
5930     }
5931 
5932     return true;
5933 }
5934 
5935 // Checks that every output from a test-case is a float NaN.
compareNan(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)5936 bool compareNan(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
5937                 const std::vector<Resource> &expectedOutputs, TestLog &)
5938 {
5939     assert(outputAllocs.size() == 1);
5940 
5941     // Only size is needed because we cannot compare Nans.
5942     size_t byteSize = expectedOutputs[0].getByteSize();
5943 
5944     const float *const output_as_float = static_cast<const float *>(outputAllocs[0]->getHostPtr());
5945 
5946     for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
5947     {
5948         if (!deFloatIsNaN(output_as_float[idx]))
5949         {
5950             return false;
5951         }
5952     }
5953 
5954     return true;
5955 }
5956 
5957 // Checks that every output from a test-case is either +0.0f or -0.0f
compareZeros(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)5958 bool compareZeros(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
5959                   const std::vector<Resource> &expectedOutputs, TestLog &)
5960 {
5961     assert(outputAllocs.size() == 1);
5962 
5963     // Only size is needed because all the results are supposed to be zero.
5964     size_t byteSize = expectedOutputs[0].getByteSize();
5965 
5966     const float *const output_as_float = static_cast<const float *>(outputAllocs[0]->getHostPtr());
5967 
5968     for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
5969     {
5970         if (output_as_float[idx] != 0)
5971             return false;
5972     }
5973 
5974     return true;
5975 }
5976 
5977 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createOpQuantizeToF16Group(tcu::TestContext & testCtx)5978 tcu::TestCaseGroup *createOpQuantizeToF16Group(tcu::TestContext &testCtx)
5979 {
5980     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opquantize"));
5981 
5982     const std::string shader(string(getComputeAsmShaderPreamble()) +
5983 
5984                              "OpSource GLSL 430\n"
5985                              "OpName %main           \"main\"\n"
5986                              "OpName %id             \"gl_GlobalInvocationID\"\n"
5987 
5988                              "OpDecorate %id BuiltIn GlobalInvocationId\n"
5989 
5990                              + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5991                              string(getComputeAsmInputOutputBuffer()) +
5992 
5993                              "%id        = OpVariable %uvec3ptr Input\n"
5994                              "%zero      = OpConstant %i32 0\n"
5995 
5996                              "%main      = OpFunction %void None %voidf\n"
5997                              "%label     = OpLabel\n"
5998                              "%idval     = OpLoad %uvec3 %id\n"
5999                              "%x         = OpCompositeExtract %u32 %idval 0\n"
6000                              "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6001                              "%inval     = OpLoad %f32 %inloc\n"
6002                              "%quant     = OpQuantizeToF16 %f32 %inval\n"
6003                              "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6004                              "             OpStore %outloc %quant\n"
6005                              "             OpReturn\n"
6006                              "             OpFunctionEnd\n");
6007 
6008     {
6009         ComputeShaderSpec spec;
6010         const uint32_t numElements = 100;
6011         vector<float> infinities;
6012         vector<float> results;
6013 
6014         infinities.reserve(numElements);
6015         results.reserve(numElements);
6016 
6017         for (size_t idx = 0; idx < numElements; ++idx)
6018         {
6019             switch (idx % 4)
6020             {
6021             case 0:
6022                 infinities.push_back(std::numeric_limits<float>::infinity());
6023                 results.push_back(std::numeric_limits<float>::infinity());
6024                 break;
6025             case 1:
6026                 infinities.push_back(-std::numeric_limits<float>::infinity());
6027                 results.push_back(-std::numeric_limits<float>::infinity());
6028                 break;
6029             case 2:
6030                 infinities.push_back(std::ldexp(1.0f, 16));
6031                 results.push_back(std::numeric_limits<float>::infinity());
6032                 break;
6033             case 3:
6034                 infinities.push_back(std::ldexp(-1.0f, 32));
6035                 results.push_back(-std::numeric_limits<float>::infinity());
6036                 break;
6037             }
6038         }
6039 
6040         spec.assembly = shader;
6041         spec.inputs.push_back(BufferSp(new Float32Buffer(infinities)));
6042         spec.outputs.push_back(BufferSp(new Float32Buffer(results)));
6043         spec.numWorkGroups = IVec3(numElements, 1, 1);
6044 
6045         group->addChild(new SpvAsmComputeShaderCase(testCtx, "infinities", spec));
6046     }
6047 
6048     {
6049         ComputeShaderSpec spec;
6050         vector<float> nans;
6051         const uint32_t numElements = 100;
6052 
6053         nans.reserve(numElements);
6054 
6055         for (size_t idx = 0; idx < numElements; ++idx)
6056         {
6057             if (idx % 2 == 0)
6058             {
6059                 nans.push_back(std::numeric_limits<float>::quiet_NaN());
6060             }
6061             else
6062             {
6063                 nans.push_back(-std::numeric_limits<float>::quiet_NaN());
6064             }
6065         }
6066 
6067         spec.assembly = shader;
6068         spec.inputs.push_back(BufferSp(new Float32Buffer(nans)));
6069         spec.outputs.push_back(BufferSp(new Float32Buffer(nans)));
6070         spec.numWorkGroups = IVec3(numElements, 1, 1);
6071         spec.verifyIO      = &compareNan;
6072 
6073         group->addChild(new SpvAsmComputeShaderCase(testCtx, "propagated_nans", spec));
6074     }
6075 
6076     {
6077         ComputeShaderSpec spec;
6078         vector<float> small;
6079         vector<float> zeros;
6080         const uint32_t numElements = 100;
6081 
6082         small.reserve(numElements);
6083         zeros.reserve(numElements);
6084 
6085         for (size_t idx = 0; idx < numElements; ++idx)
6086         {
6087             switch (idx % 6)
6088             {
6089             case 0:
6090                 small.push_back(0.f);
6091                 break;
6092             case 1:
6093                 small.push_back(-0.f);
6094                 break;
6095             case 2:
6096                 small.push_back(std::ldexp(1.0f, -16));
6097                 break;
6098             case 3:
6099                 small.push_back(std::ldexp(-1.0f, -32));
6100                 break;
6101             case 4:
6102                 small.push_back(std::ldexp(1.0f, -127));
6103                 break;
6104             case 5:
6105                 small.push_back(-std::ldexp(1.0f, -128));
6106                 break;
6107             }
6108         }
6109 
6110         spec.assembly = shader;
6111         spec.inputs.push_back(BufferSp(new Float32Buffer(small)));
6112         // Only the size of outputs[0] will be used, actual expected values aren't needed.
6113         spec.outputs.push_back(BufferSp(new Float32Buffer(small)));
6114         spec.numWorkGroups = IVec3(numElements, 1, 1);
6115         spec.verifyIO      = &compareZeros;
6116 
6117         group->addChild(new SpvAsmComputeShaderCase(testCtx, "flush_to_zero", spec));
6118     }
6119 
6120     {
6121         ComputeShaderSpec spec;
6122         vector<float> exact;
6123         const uint32_t numElements = 200;
6124 
6125         exact.reserve(numElements);
6126 
6127         for (size_t idx = 0; idx < numElements; ++idx)
6128             exact.push_back(static_cast<float>(static_cast<int>(idx) - 100));
6129 
6130         spec.assembly = shader;
6131         spec.inputs.push_back(BufferSp(new Float32Buffer(exact)));
6132         spec.outputs.push_back(BufferSp(new Float32Buffer(exact)));
6133         spec.numWorkGroups = IVec3(numElements, 1, 1);
6134 
6135         group->addChild(new SpvAsmComputeShaderCase(testCtx, "exact", spec));
6136     }
6137 
6138     {
6139         ComputeShaderSpec spec;
6140         vector<float> inputs;
6141         const uint32_t numElements = 4;
6142 
6143         inputs.push_back(constructNormalizedFloat(8, 0x300300));
6144         inputs.push_back(-constructNormalizedFloat(-7, 0x600800));
6145         inputs.push_back(constructNormalizedFloat(2, 0x01E000));
6146         inputs.push_back(constructNormalizedFloat(1, 0xFFE000));
6147 
6148         spec.assembly = shader;
6149         spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
6150         spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6151         spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
6152         spec.numWorkGroups = IVec3(numElements, 1, 1);
6153 
6154         group->addChild(new SpvAsmComputeShaderCase(testCtx, "rounded", spec));
6155     }
6156 
6157     return group.release();
6158 }
6159 
createSpecConstantOpQuantizeToF16Group(tcu::TestContext & testCtx)6160 tcu::TestCaseGroup *createSpecConstantOpQuantizeToF16Group(tcu::TestContext &testCtx)
6161 {
6162     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opspecconstantop_opquantize"));
6163 
6164     const std::string shader(
6165         string(getComputeAsmShaderPreamble()) +
6166 
6167         "OpName %main           \"main\"\n"
6168         "OpName %id             \"gl_GlobalInvocationID\"\n"
6169 
6170         "OpDecorate %id BuiltIn GlobalInvocationId\n"
6171 
6172         "OpDecorate %sc_0  SpecId 0\n"
6173         "OpDecorate %sc_1  SpecId 1\n"
6174         "OpDecorate %sc_2  SpecId 2\n"
6175         "OpDecorate %sc_3  SpecId 3\n"
6176         "OpDecorate %sc_4  SpecId 4\n"
6177         "OpDecorate %sc_5  SpecId 5\n"
6178 
6179         + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6180         string(getComputeAsmInputOutputBuffer()) +
6181 
6182         "%id        = OpVariable %uvec3ptr Input\n"
6183         "%zero      = OpConstant %i32 0\n"
6184         "%c_u32_6   = OpConstant %u32 6\n"
6185 
6186         "%sc_0      = OpSpecConstant %f32 0.\n"
6187         "%sc_1      = OpSpecConstant %f32 0.\n"
6188         "%sc_2      = OpSpecConstant %f32 0.\n"
6189         "%sc_3      = OpSpecConstant %f32 0.\n"
6190         "%sc_4      = OpSpecConstant %f32 0.\n"
6191         "%sc_5      = OpSpecConstant %f32 0.\n"
6192 
6193         "%sc_0_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_0\n"
6194         "%sc_1_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_1\n"
6195         "%sc_2_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_2\n"
6196         "%sc_3_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_3\n"
6197         "%sc_4_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_4\n"
6198         "%sc_5_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_5\n"
6199 
6200         "%main      = OpFunction %void None %voidf\n"
6201         "%label     = OpLabel\n"
6202         "%idval     = OpLoad %uvec3 %id\n"
6203         "%x         = OpCompositeExtract %u32 %idval 0\n"
6204         "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6205         "%selector  = OpUMod %u32 %x %c_u32_6\n"
6206         "            OpSelectionMerge %exit None\n"
6207         "            OpSwitch %selector %exit 0 %case0 1 %case1 2 %case2 3 %case3 4 %case4 5 %case5\n"
6208 
6209         "%case0     = OpLabel\n"
6210         "             OpStore %outloc %sc_0_quant\n"
6211         "             OpBranch %exit\n"
6212 
6213         "%case1     = OpLabel\n"
6214         "             OpStore %outloc %sc_1_quant\n"
6215         "             OpBranch %exit\n"
6216 
6217         "%case2     = OpLabel\n"
6218         "             OpStore %outloc %sc_2_quant\n"
6219         "             OpBranch %exit\n"
6220 
6221         "%case3     = OpLabel\n"
6222         "             OpStore %outloc %sc_3_quant\n"
6223         "             OpBranch %exit\n"
6224 
6225         "%case4     = OpLabel\n"
6226         "             OpStore %outloc %sc_4_quant\n"
6227         "             OpBranch %exit\n"
6228 
6229         "%case5     = OpLabel\n"
6230         "             OpStore %outloc %sc_5_quant\n"
6231         "             OpBranch %exit\n"
6232 
6233         "%exit      = OpLabel\n"
6234         "             OpReturn\n"
6235 
6236         "             OpFunctionEnd\n");
6237 
6238     {
6239         ComputeShaderSpec spec;
6240         const uint8_t numCases = 4;
6241         vector<float> inputs(numCases, 0.f);
6242         vector<float> outputs;
6243 
6244         spec.assembly      = shader;
6245         spec.numWorkGroups = IVec3(numCases, 1, 1);
6246 
6247         spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(std::numeric_limits<float>::infinity()));
6248         spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(-std::numeric_limits<float>::infinity()));
6249         spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(std::ldexp(1.0f, 16)));
6250         spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(std::ldexp(-1.0f, 32)));
6251 
6252         outputs.push_back(std::numeric_limits<float>::infinity());
6253         outputs.push_back(-std::numeric_limits<float>::infinity());
6254         outputs.push_back(std::numeric_limits<float>::infinity());
6255         outputs.push_back(-std::numeric_limits<float>::infinity());
6256 
6257         spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6258         spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6259 
6260         group->addChild(new SpvAsmComputeShaderCase(testCtx, "infinities", spec));
6261     }
6262 
6263     {
6264         ComputeShaderSpec spec;
6265         const uint8_t numCases = 2;
6266         vector<float> inputs(numCases, 0.f);
6267         vector<float> outputs;
6268 
6269         spec.assembly      = shader;
6270         spec.numWorkGroups = IVec3(numCases, 1, 1);
6271         spec.verifyIO      = &compareNan;
6272 
6273         outputs.push_back(std::numeric_limits<float>::quiet_NaN());
6274         outputs.push_back(-std::numeric_limits<float>::quiet_NaN());
6275 
6276         for (uint8_t idx = 0; idx < numCases; ++idx)
6277             spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(outputs[idx]));
6278 
6279         spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6280         spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6281 
6282         group->addChild(new SpvAsmComputeShaderCase(testCtx, "propagated_nans", spec));
6283     }
6284 
6285     {
6286         ComputeShaderSpec spec;
6287         const uint8_t numCases = 6;
6288         vector<float> inputs(numCases, 0.f);
6289         vector<float> outputs;
6290 
6291         spec.assembly      = shader;
6292         spec.numWorkGroups = IVec3(numCases, 1, 1);
6293 
6294         spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(0.f));
6295         spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(-0.f));
6296         spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(std::ldexp(1.0f, -16)));
6297         spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(std::ldexp(-1.0f, -32)));
6298         spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(std::ldexp(1.0f, -127)));
6299         spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(-std::ldexp(1.0f, -128)));
6300 
6301         spec.verifyIO = &compareZeros;
6302 
6303         spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6304         // Only the size of outputs[0] will be used, actual expected values aren't needed.
6305         spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
6306 
6307         group->addChild(new SpvAsmComputeShaderCase(testCtx, "flush_to_zero", spec));
6308     }
6309 
6310     {
6311         ComputeShaderSpec spec;
6312         const uint8_t numCases = 6;
6313         vector<float> inputs(numCases, 0.f);
6314         vector<float> outputs;
6315 
6316         spec.assembly      = shader;
6317         spec.numWorkGroups = IVec3(numCases, 1, 1);
6318 
6319         for (uint8_t idx = 0; idx < 6; ++idx)
6320         {
6321             const float f = static_cast<float>(idx * 10 - 30) / 4.f;
6322             spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(f));
6323             outputs.push_back(f);
6324         }
6325 
6326         spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6327         spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6328 
6329         group->addChild(new SpvAsmComputeShaderCase(testCtx, "exact", spec));
6330     }
6331 
6332     {
6333         ComputeShaderSpec spec;
6334         const uint8_t numCases = 4;
6335         vector<float> inputs(numCases, 0.f);
6336         vector<float> outputs;
6337 
6338         spec.assembly      = shader;
6339         spec.numWorkGroups = IVec3(numCases, 1, 1);
6340         spec.verifyIO      = &compareOpQuantizeF16ComputeExactCase;
6341 
6342         outputs.push_back(constructNormalizedFloat(8, 0x300300));
6343         outputs.push_back(-constructNormalizedFloat(-7, 0x600800));
6344         outputs.push_back(constructNormalizedFloat(2, 0x01E000));
6345         outputs.push_back(constructNormalizedFloat(1, 0xFFE000));
6346 
6347         for (uint8_t idx = 0; idx < numCases; ++idx)
6348             spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(outputs[idx]));
6349 
6350         spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6351         spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6352 
6353         group->addChild(new SpvAsmComputeShaderCase(testCtx, "rounded", spec));
6354     }
6355 
6356     return group.release();
6357 }
6358 
6359 // Checks that constant null/composite values can be used in computation.
createOpConstantUsageGroup(tcu::TestContext & testCtx)6360 tcu::TestCaseGroup *createOpConstantUsageGroup(tcu::TestContext &testCtx)
6361 {
6362     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opconstantnullcomposite"));
6363     ComputeShaderSpec spec;
6364     de::Random rnd(deStringHash(group->getName()));
6365     const int numElements = 100;
6366     vector<float> positiveFloats(numElements, 0);
6367     vector<float> negativeFloats(numElements, 0);
6368 
6369     fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
6370 
6371     for (size_t ndx = 0; ndx < numElements; ++ndx)
6372         negativeFloats[ndx] = -positiveFloats[ndx];
6373 
6374     spec.assembly = "OpCapability Shader\n"
6375                     "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
6376                     "OpMemoryModel Logical GLSL450\n"
6377                     "OpEntryPoint GLCompute %main \"main\" %id\n"
6378                     "OpExecutionMode %main LocalSize 1 1 1\n"
6379 
6380                     "OpSource GLSL 430\n"
6381                     "OpName %main           \"main\"\n"
6382                     "OpName %id             \"gl_GlobalInvocationID\"\n"
6383 
6384                     "OpDecorate %id BuiltIn GlobalInvocationId\n"
6385 
6386                     + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6387 
6388                     "%fmat      = OpTypeMatrix %fvec3 3\n"
6389                     "%ten       = OpConstant %u32 10\n"
6390                     "%f32arr10  = OpTypeArray %f32 %ten\n"
6391                     "%fst       = OpTypeStruct %f32 %f32\n"
6392 
6393                     + string(getComputeAsmInputOutputBuffer()) +
6394 
6395                     "%id        = OpVariable %uvec3ptr Input\n"
6396                     "%zero      = OpConstant %i32 0\n"
6397 
6398                     // Create a bunch of null values
6399                     "%unull     = OpConstantNull %u32\n"
6400                     "%fnull     = OpConstantNull %f32\n"
6401                     "%vnull     = OpConstantNull %fvec3\n"
6402                     "%mnull     = OpConstantNull %fmat\n"
6403                     "%anull     = OpConstantNull %f32arr10\n"
6404                     "%snull     = OpConstantComposite %fst %fnull %fnull\n"
6405 
6406                     "%main      = OpFunction %void None %voidf\n"
6407                     "%label     = OpLabel\n"
6408                     "%idval     = OpLoad %uvec3 %id\n"
6409                     "%x         = OpCompositeExtract %u32 %idval 0\n"
6410                     "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6411                     "%inval     = OpLoad %f32 %inloc\n"
6412                     "%neg       = OpFNegate %f32 %inval\n"
6413 
6414                     // Get the abs() of (a certain element of) those null values
6415                     "%unull_cov = OpConvertUToF %f32 %unull\n"
6416                     "%unull_abs = OpExtInst %f32 %std450 FAbs %unull_cov\n"
6417                     "%fnull_abs = OpExtInst %f32 %std450 FAbs %fnull\n"
6418                     "%vnull_0   = OpCompositeExtract %f32 %vnull 0\n"
6419                     "%vnull_abs = OpExtInst %f32 %std450 FAbs %vnull_0\n"
6420                     "%mnull_12  = OpCompositeExtract %f32 %mnull 1 2\n"
6421                     "%mnull_abs = OpExtInst %f32 %std450 FAbs %mnull_12\n"
6422                     "%anull_3   = OpCompositeExtract %f32 %anull 3\n"
6423                     "%anull_abs = OpExtInst %f32 %std450 FAbs %anull_3\n"
6424                     "%snull_1   = OpCompositeExtract %f32 %snull 1\n"
6425                     "%snull_abs = OpExtInst %f32 %std450 FAbs %snull_1\n"
6426 
6427                     // Add them all
6428                     "%add1      = OpFAdd %f32 %neg  %unull_abs\n"
6429                     "%add2      = OpFAdd %f32 %add1 %fnull_abs\n"
6430                     "%add3      = OpFAdd %f32 %add2 %vnull_abs\n"
6431                     "%add4      = OpFAdd %f32 %add3 %mnull_abs\n"
6432                     "%add5      = OpFAdd %f32 %add4 %anull_abs\n"
6433                     "%final     = OpFAdd %f32 %add5 %snull_abs\n"
6434 
6435                     "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6436                     "             OpStore %outloc %final\n" // write to output
6437                     "             OpReturn\n"
6438                     "             OpFunctionEnd\n";
6439     spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
6440     spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
6441     spec.numWorkGroups = IVec3(numElements, 1, 1);
6442 
6443     group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", spec));
6444 
6445     return group.release();
6446 }
6447 
6448 // Assembly code used for testing loop control is based on GLSL source code:
6449 // #version 430
6450 //
6451 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6452 //   float elements[];
6453 // } input_data;
6454 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6455 //   float elements[];
6456 // } output_data;
6457 //
6458 // void main() {
6459 //   uint x = gl_GlobalInvocationID.x;
6460 //   output_data.elements[x] = input_data.elements[x];
6461 //   for (uint i = 0; i < 4; ++i)
6462 //     output_data.elements[x] += 1.f;
6463 // }
createLoopControlGroup(tcu::TestContext & testCtx)6464 tcu::TestCaseGroup *createLoopControlGroup(tcu::TestContext &testCtx)
6465 {
6466     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "loop_control"));
6467     vector<CaseParameter> cases;
6468     de::Random rnd(deStringHash(group->getName()));
6469     const int numElements = 100;
6470     vector<float> inputFloats(numElements, 0);
6471     vector<float> outputFloats(numElements, 0);
6472     const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
6473 
6474                                         "OpSource GLSL 430\n"
6475                                         "OpName %main \"main\"\n"
6476                                         "OpName %id \"gl_GlobalInvocationID\"\n"
6477 
6478                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
6479 
6480                                         + string(getComputeAsmInputOutputBufferTraits()) +
6481                                         string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6482 
6483                                         "%u32ptr      = OpTypePointer Function %u32\n"
6484 
6485                                         "%id          = OpVariable %uvec3ptr Input\n"
6486                                         "%zero        = OpConstant %i32 0\n"
6487                                         "%uzero       = OpConstant %u32 0\n"
6488                                         "%one         = OpConstant %i32 1\n"
6489                                         "%constf1     = OpConstant %f32 1.0\n"
6490                                         "%four        = OpConstant %u32 4\n"
6491 
6492                                         "%main        = OpFunction %void None %voidf\n"
6493                                         "%entry       = OpLabel\n"
6494                                         "%i           = OpVariable %u32ptr Function\n"
6495                                         "               OpStore %i %uzero\n"
6496 
6497                                         "%idval       = OpLoad %uvec3 %id\n"
6498                                         "%x           = OpCompositeExtract %u32 %idval 0\n"
6499                                         "%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
6500                                         "%inval       = OpLoad %f32 %inloc\n"
6501                                         "%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
6502                                         "               OpStore %outloc %inval\n"
6503                                         "               OpBranch %loop_entry\n"
6504 
6505                                         "%loop_entry  = OpLabel\n"
6506                                         "%i_val       = OpLoad %u32 %i\n"
6507                                         "%cmp_lt      = OpULessThan %bool %i_val %four\n"
6508                                         "               OpLoopMerge %loop_merge %loop_body ${CONTROL}\n"
6509                                         "               OpBranchConditional %cmp_lt %loop_body %loop_merge\n"
6510                                         "%loop_body   = OpLabel\n"
6511                                         "%outval      = OpLoad %f32 %outloc\n"
6512                                         "%addf1       = OpFAdd %f32 %outval %constf1\n"
6513                                         "               OpStore %outloc %addf1\n"
6514                                         "%new_i       = OpIAdd %u32 %i_val %one\n"
6515                                         "               OpStore %i %new_i\n"
6516                                         "               OpBranch %loop_entry\n"
6517                                         "%loop_merge  = OpLabel\n"
6518                                         "               OpReturn\n"
6519                                         "               OpFunctionEnd\n");
6520 
6521     cases.push_back(CaseParameter("none", "None"));
6522     cases.push_back(CaseParameter("unroll", "Unroll"));
6523     cases.push_back(CaseParameter("dont_unroll", "DontUnroll"));
6524 
6525     fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6526 
6527     for (size_t ndx = 0; ndx < numElements; ++ndx)
6528         outputFloats[ndx] = inputFloats[ndx] + 4.f;
6529 
6530     for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6531     {
6532         map<string, string> specializations;
6533         ComputeShaderSpec spec;
6534 
6535         specializations["CONTROL"] = cases[caseNdx].param;
6536         spec.assembly              = shaderTemplate.specialize(specializations);
6537         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6538         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6539         spec.numWorkGroups = IVec3(numElements, 1, 1);
6540 
6541         group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
6542     }
6543 
6544     group->addChild(new SpvAsmLoopControlDependencyLengthCase(testCtx, "dependency_length"));
6545     group->addChild(new SpvAsmLoopControlDependencyInfiniteCase(testCtx, "dependency_infinite"));
6546 
6547     return group.release();
6548 }
6549 
6550 // Assembly code used for testing selection control is based on GLSL source code:
6551 // #version 430
6552 //
6553 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6554 //   float elements[];
6555 // } input_data;
6556 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6557 //   float elements[];
6558 // } output_data;
6559 //
6560 // void main() {
6561 //   uint x = gl_GlobalInvocationID.x;
6562 //   float val = input_data.elements[x];
6563 //   if (val > 10.f)
6564 //     output_data.elements[x] = val + 1.f;
6565 //   else
6566 //     output_data.elements[x] = val - 1.f;
6567 // }
createSelectionControlGroup(tcu::TestContext & testCtx)6568 tcu::TestCaseGroup *createSelectionControlGroup(tcu::TestContext &testCtx)
6569 {
6570     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "selection_control"));
6571     vector<CaseParameter> cases;
6572     de::Random rnd(deStringHash(group->getName()));
6573     const int numElements = 100;
6574     vector<float> inputFloats(numElements, 0);
6575     vector<float> outputFloats(numElements, 0);
6576     const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
6577 
6578                                         "OpSource GLSL 430\n"
6579                                         "OpName %main \"main\"\n"
6580                                         "OpName %id \"gl_GlobalInvocationID\"\n"
6581 
6582                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
6583 
6584                                         + string(getComputeAsmInputOutputBufferTraits()) +
6585                                         string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6586 
6587                                         "%id       = OpVariable %uvec3ptr Input\n"
6588                                         "%zero     = OpConstant %i32 0\n"
6589                                         "%constf1  = OpConstant %f32 1.0\n"
6590                                         "%constf10 = OpConstant %f32 10.0\n"
6591 
6592                                         "%main     = OpFunction %void None %voidf\n"
6593                                         "%entry    = OpLabel\n"
6594                                         "%idval    = OpLoad %uvec3 %id\n"
6595                                         "%x        = OpCompositeExtract %u32 %idval 0\n"
6596                                         "%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
6597                                         "%inval    = OpLoad %f32 %inloc\n"
6598                                         "%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
6599                                         "%cmp_gt   = OpFOrdGreaterThan %bool %inval %constf10\n"
6600 
6601                                         "            OpSelectionMerge %if_end ${CONTROL}\n"
6602                                         "            OpBranchConditional %cmp_gt %if_true %if_false\n"
6603                                         "%if_true  = OpLabel\n"
6604                                         "%addf1    = OpFAdd %f32 %inval %constf1\n"
6605                                         "            OpStore %outloc %addf1\n"
6606                                         "            OpBranch %if_end\n"
6607                                         "%if_false = OpLabel\n"
6608                                         "%subf1    = OpFSub %f32 %inval %constf1\n"
6609                                         "            OpStore %outloc %subf1\n"
6610                                         "            OpBranch %if_end\n"
6611                                         "%if_end   = OpLabel\n"
6612                                         "            OpReturn\n"
6613                                         "            OpFunctionEnd\n");
6614 
6615     cases.push_back(CaseParameter("none", "None"));
6616     cases.push_back(CaseParameter("flatten", "Flatten"));
6617     cases.push_back(CaseParameter("dont_flatten", "DontFlatten"));
6618     cases.push_back(CaseParameter("flatten_dont_flatten", "DontFlatten|Flatten"));
6619 
6620     fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6621 
6622     // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6623     floorAll(inputFloats);
6624 
6625     for (size_t ndx = 0; ndx < numElements; ++ndx)
6626         outputFloats[ndx] = inputFloats[ndx] + (inputFloats[ndx] > 10.f ? 1.f : -1.f);
6627 
6628     for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6629     {
6630         map<string, string> specializations;
6631         ComputeShaderSpec spec;
6632 
6633         specializations["CONTROL"] = cases[caseNdx].param;
6634         spec.assembly              = shaderTemplate.specialize(specializations);
6635         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6636         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6637         spec.numWorkGroups = IVec3(numElements, 1, 1);
6638 
6639         group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
6640     }
6641 
6642     return group.release();
6643 }
6644 
getOpNameAbuseCases(vector<CaseParameter> & abuseCases)6645 void getOpNameAbuseCases(vector<CaseParameter> &abuseCases)
6646 {
6647     // Generate a long name.
6648     std::string longname;
6649     longname.resize(65535, 'k'); // max string literal, spir-v 2.17
6650 
6651     // Some bad names, abusing utf-8 encoding. This may also cause problems
6652     // with the logs.
6653     // 1. Various illegal code points in utf-8
6654     std::string utf8illegal = "Illegal bytes in UTF-8: "
6655                               "\xc0 \xc1 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff"
6656                               "illegal surrogates: \xed\xad\xbf \xed\xbe\x80";
6657 
6658     // 2. Zero encoded as overlong, not exactly legal but often supported to differentiate from terminating zero
6659     std::string utf8nul = "UTF-8 encoded nul \xC0\x80 (should not end name)";
6660 
6661     // 3. Some overlong encodings
6662     std::string utf8overlong = "UTF-8 overlong \xF0\x82\x82\xAC \xfc\x83\xbf\xbf\xbf\xbf \xf8\x87\xbf\xbf\xbf "
6663                                "\xf0\x8f\xbf\xbf";
6664 
6665     // 4. Internet "zalgo" meme "bleeding text"
6666     std::string utf8zalgo = "\x56\xcc\xb5\xcc\x85\xcc\x94\xcc\x88\xcd\x8a\xcc\x91\xcc\x88\xcd\x91\xcc\x83\xcd\x82"
6667                             "\xcc\x83\xcd\x90\xcc\x8a\xcc\x92\xcc\x92\xcd\x8b\xcc\x94\xcd\x9d\xcc\x98\xcc\xab\xcc"
6668                             "\xae\xcc\xa9\xcc\xad\xcc\x97\xcc\xb0\x75\xcc\xb6\xcc\xbe\xcc\x80\xcc\x82\xcc\x84\xcd"
6669                             "\x84\xcc\x90\xcd\x86\xcc\x9a\xcd\x84\xcc\x9b\xcd\x86\xcd\x92\xcc\x9a\xcd\x99\xcd\x99"
6670                             "\xcc\xbb\xcc\x98\xcd\x8e\xcd\x88\xcd\x9a\xcc\xa6\xcc\x9c\xcc\xab\xcc\x99\xcd\x94\xcd"
6671                             "\x99\xcd\x95\xcc\xa5\xcc\xab\xcd\x89\x6c\xcc\xb8\xcc\x8e\xcc\x8b\xcc\x8b\xcc\x9a\xcc"
6672                             "\x8e\xcd\x9d\xcc\x80\xcc\xa1\xcc\xad\xcd\x9c\xcc\xba\xcc\x96\xcc\xb3\xcc\xa2\xcd\x8e"
6673                             "\xcc\xa2\xcd\x96\x6b\xcc\xb8\xcc\x84\xcd\x81\xcc\xbf\xcc\x8d\xcc\x89\xcc\x85\xcc\x92"
6674                             "\xcc\x84\xcc\x90\xcd\x81\xcc\x93\xcd\x90\xcd\x92\xcd\x9d\xcc\x84\xcd\x98\xcd\x9d\xcd"
6675                             "\xa0\xcd\x91\xcc\x94\xcc\xb9\xcd\x93\xcc\xa5\xcd\x87\xcc\xad\xcc\xa7\xcd\x96\xcd\x99"
6676                             "\xcc\x9d\xcc\xbc\xcd\x96\xcd\x93\xcc\x9d\xcc\x99\xcc\xa8\xcc\xb1\xcd\x85\xcc\xba\xcc"
6677                             "\xa7\x61\xcc\xb8\xcc\x8e\xcc\x81\xcd\x90\xcd\x84\xcd\x8c\xcc\x8c\xcc\x85\xcd\x86\xcc"
6678                             "\x84\xcd\x84\xcc\x90\xcc\x84\xcc\x8d\xcd\x99\xcd\x8d\xcc\xb0\xcc\xa3\xcc\xa6\xcd\x89"
6679                             "\xcd\x8d\xcd\x87\xcc\x98\xcd\x8d\xcc\xa4\xcd\x9a\xcd\x8e\xcc\xab\xcc\xb9\xcc\xac\xcc"
6680                             "\xa2\xcd\x87\xcc\xa0\xcc\xb3\xcd\x89\xcc\xb9\xcc\xa7\xcc\xa6\xcd\x89\xcd\x95\x6e\xcc"
6681                             "\xb8\xcd\x8a\xcc\x8a\xcd\x82\xcc\x9b\xcd\x81\xcd\x90\xcc\x85\xcc\x9b\xcd\x80\xcd\x91"
6682                             "\xcd\x9b\xcc\x81\xcd\x81\xcc\x9a\xcc\xb3\xcd\x9c\xcc\x9e\xcc\x9d\xcd\x99\xcc\xa2\xcd"
6683                             "\x93\xcd\x96\xcc\x97\xff";
6684 
6685     // General name abuses
6686     abuseCases.push_back(CaseParameter("_has_very_long_name", longname));
6687     abuseCases.push_back(CaseParameter("_utf8_illegal", utf8illegal));
6688     abuseCases.push_back(CaseParameter("_utf8_nul", utf8nul));
6689     abuseCases.push_back(CaseParameter("_utf8_overlong", utf8overlong));
6690     abuseCases.push_back(CaseParameter("_utf8_zalgo", utf8zalgo));
6691 
6692     // GL keywords
6693     abuseCases.push_back(CaseParameter("_is_gl_Position", "gl_Position"));
6694     abuseCases.push_back(CaseParameter("_is_gl_InstanceID", "gl_InstanceID"));
6695     abuseCases.push_back(CaseParameter("_is_gl_PrimitiveID", "gl_PrimitiveID"));
6696     abuseCases.push_back(CaseParameter("_is_gl_TessCoord", "gl_TessCoord"));
6697     abuseCases.push_back(CaseParameter("_is_gl_PerVertex", "gl_PerVertex"));
6698     abuseCases.push_back(CaseParameter("_is_gl_InvocationID", "gl_InvocationID"));
6699     abuseCases.push_back(CaseParameter("_is_gl_PointSize", "gl_PointSize"));
6700     abuseCases.push_back(CaseParameter("_is_gl_PointCoord", "gl_PointCoord"));
6701     abuseCases.push_back(CaseParameter("_is_gl_Layer", "gl_Layer"));
6702     abuseCases.push_back(CaseParameter("_is_gl_FragDepth", "gl_FragDepth"));
6703     abuseCases.push_back(CaseParameter("_is_gl_NumWorkGroups", "gl_NumWorkGroups"));
6704     abuseCases.push_back(CaseParameter("_is_gl_WorkGroupID", "gl_WorkGroupID"));
6705     abuseCases.push_back(CaseParameter("_is_gl_LocalInvocationID", "gl_LocalInvocationID"));
6706     abuseCases.push_back(CaseParameter("_is_gl_GlobalInvocationID", "gl_GlobalInvocationID"));
6707     abuseCases.push_back(CaseParameter("_is_gl_MaxVertexAttribs", "gl_MaxVertexAttribs"));
6708     abuseCases.push_back(CaseParameter("_is_gl_MaxViewports", "gl_MaxViewports"));
6709     abuseCases.push_back(CaseParameter("_is_gl_MaxComputeWorkGroupCount", "gl_MaxComputeWorkGroupCount"));
6710     abuseCases.push_back(CaseParameter("_is_mat3", "mat3"));
6711     abuseCases.push_back(CaseParameter("_is_volatile", "volatile"));
6712     abuseCases.push_back(CaseParameter("_is_inout", "inout"));
6713     abuseCases.push_back(CaseParameter("_is_isampler3d", "isampler3d"));
6714 }
6715 
createOpNameGroup(tcu::TestContext & testCtx)6716 tcu::TestCaseGroup *createOpNameGroup(tcu::TestContext &testCtx)
6717 {
6718     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opname"));
6719     de::MovePtr<tcu::TestCaseGroup> entryMainGroup(new tcu::TestCaseGroup(testCtx, "entry_main"));
6720     de::MovePtr<tcu::TestCaseGroup> entryNotGroup(new tcu::TestCaseGroup(testCtx, "entry_rdc"));
6721     de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "abuse"));
6722     vector<CaseParameter> cases;
6723     vector<CaseParameter> abuseCases;
6724     vector<string> testFunc;
6725     de::Random rnd(deStringHash(group->getName()));
6726     const int numElements = 128;
6727     vector<float> inputFloats(numElements, 0);
6728     vector<float> outputFloats(numElements, 0);
6729 
6730     getOpNameAbuseCases(abuseCases);
6731 
6732     fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6733 
6734     for (size_t ndx = 0; ndx < numElements; ++ndx)
6735         outputFloats[ndx] = -inputFloats[ndx];
6736 
6737     const string commonShaderHeader = "OpCapability Shader\n"
6738                                       "OpMemoryModel Logical GLSL450\n"
6739                                       "OpEntryPoint GLCompute %main \"main\" %id\n"
6740                                       "OpExecutionMode %main LocalSize 1 1 1\n";
6741 
6742     const string commonShaderFooter = "OpDecorate %id BuiltIn GlobalInvocationId\n"
6743 
6744                                       + string(getComputeAsmInputOutputBufferTraits()) +
6745                                       string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6746 
6747                                       "%id        = OpVariable %uvec3ptr Input\n"
6748                                       "%zero      = OpConstant %i32 0\n"
6749 
6750                                       "%func      = OpFunction %void None %voidf\n"
6751                                       "%5         = OpLabel\n"
6752                                       "             OpReturn\n"
6753                                       "             OpFunctionEnd\n"
6754 
6755                                       "%main      = OpFunction %void None %voidf\n"
6756                                       "%entry     = OpLabel\n"
6757                                       "%7         = OpFunctionCall %void %func\n"
6758 
6759                                       "%idval     = OpLoad %uvec3 %id\n"
6760                                       "%x         = OpCompositeExtract %u32 %idval 0\n"
6761 
6762                                       "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6763                                       "%inval     = OpLoad %f32 %inloc\n"
6764                                       "%neg       = OpFNegate %f32 %inval\n"
6765                                       "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6766                                       "             OpStore %outloc %neg\n"
6767 
6768                                       "             OpReturn\n"
6769                                       "             OpFunctionEnd\n";
6770 
6771     const StringTemplate shaderTemplate("OpCapability Shader\n"
6772                                         "OpMemoryModel Logical GLSL450\n"
6773                                         "OpEntryPoint GLCompute %main \"${ENTRY}\" %id\n"
6774                                         "OpExecutionMode %main LocalSize 1 1 1\n"
6775                                         "OpName %${ID} \"${NAME}\"\n" +
6776                                         commonShaderFooter);
6777 
6778     const std::string multipleNames = commonShaderHeader +
6779                                       "OpName %main \"to_be\"\n"
6780                                       "OpName %id   \"or_not\"\n"
6781                                       "OpName %main \"to_be\"\n"
6782                                       "OpName %main \"makes_no\"\n"
6783                                       "OpName %func \"difference\"\n"
6784                                       "OpName %5    \"to_me\"\n" +
6785                                       commonShaderFooter;
6786 
6787     {
6788         ComputeShaderSpec spec;
6789 
6790         spec.assembly      = multipleNames;
6791         spec.numWorkGroups = IVec3(numElements, 1, 1);
6792         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6793         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6794 
6795         abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "main_has_multiple_names", spec));
6796     }
6797 
6798     const std::string everythingNamed = commonShaderHeader +
6799                                         "OpName %main   \"name1\"\n"
6800                                         "OpName %id     \"name2\"\n"
6801                                         "OpName %zero   \"name3\"\n"
6802                                         "OpName %entry  \"name4\"\n"
6803                                         "OpName %func   \"name5\"\n"
6804                                         "OpName %5      \"name6\"\n"
6805                                         "OpName %7      \"name7\"\n"
6806                                         "OpName %idval  \"name8\"\n"
6807                                         "OpName %inloc  \"name9\"\n"
6808                                         "OpName %inval  \"name10\"\n"
6809                                         "OpName %neg    \"name11\"\n"
6810                                         "OpName %outloc \"name12\"\n" +
6811                                         commonShaderFooter;
6812     {
6813         ComputeShaderSpec spec;
6814 
6815         spec.assembly      = everythingNamed;
6816         spec.numWorkGroups = IVec3(numElements, 1, 1);
6817         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6818         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6819 
6820         abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named", spec));
6821     }
6822 
6823     const std::string everythingNamedTheSame = commonShaderHeader +
6824                                                "OpName %main   \"the_same\"\n"
6825                                                "OpName %id     \"the_same\"\n"
6826                                                "OpName %zero   \"the_same\"\n"
6827                                                "OpName %entry  \"the_same\"\n"
6828                                                "OpName %func   \"the_same\"\n"
6829                                                "OpName %5      \"the_same\"\n"
6830                                                "OpName %7      \"the_same\"\n"
6831                                                "OpName %idval  \"the_same\"\n"
6832                                                "OpName %inloc  \"the_same\"\n"
6833                                                "OpName %inval  \"the_same\"\n"
6834                                                "OpName %neg    \"the_same\"\n"
6835                                                "OpName %outloc \"the_same\"\n" +
6836                                                commonShaderFooter;
6837     {
6838         ComputeShaderSpec spec;
6839 
6840         spec.assembly      = everythingNamedTheSame;
6841         spec.numWorkGroups = IVec3(numElements, 1, 1);
6842         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6843         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6844 
6845         abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", spec));
6846     }
6847 
6848     // main_is_...
6849     for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6850     {
6851         map<string, string> specializations;
6852         ComputeShaderSpec spec;
6853 
6854         specializations["ENTRY"] = "main";
6855         specializations["ID"]    = "main";
6856         specializations["NAME"]  = abuseCases[ndx].param;
6857         spec.assembly            = shaderTemplate.specialize(specializations);
6858         spec.numWorkGroups       = IVec3(numElements, 1, 1);
6859         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6860         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6861 
6862         abuseGroup->addChild(
6863             new SpvAsmComputeShaderCase(testCtx, (std::string("main") + abuseCases[ndx].name).c_str(), spec));
6864     }
6865 
6866     // x_is_....
6867     for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6868     {
6869         map<string, string> specializations;
6870         ComputeShaderSpec spec;
6871 
6872         specializations["ENTRY"] = "main";
6873         specializations["ID"]    = "x";
6874         specializations["NAME"]  = abuseCases[ndx].param;
6875         spec.assembly            = shaderTemplate.specialize(specializations);
6876         spec.numWorkGroups       = IVec3(numElements, 1, 1);
6877         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6878         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6879 
6880         abuseGroup->addChild(
6881             new SpvAsmComputeShaderCase(testCtx, (std::string("x") + abuseCases[ndx].name).c_str(), spec));
6882     }
6883 
6884     cases.push_back(CaseParameter("_is_main", "main"));
6885     cases.push_back(CaseParameter("_is_not_main", "not_main"));
6886     testFunc.push_back("main");
6887     testFunc.push_back("func");
6888 
6889     for (size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6890     {
6891         for (size_t ndx = 0; ndx < cases.size(); ++ndx)
6892         {
6893             map<string, string> specializations;
6894             ComputeShaderSpec spec;
6895 
6896             specializations["ENTRY"] = "main";
6897             specializations["ID"]    = testFunc[fNdx];
6898             specializations["NAME"]  = cases[ndx].param;
6899             spec.assembly            = shaderTemplate.specialize(specializations);
6900             spec.numWorkGroups       = IVec3(numElements, 1, 1);
6901             spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6902             spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6903 
6904             entryMainGroup->addChild(
6905                 new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), spec));
6906         }
6907     }
6908 
6909     cases.push_back(CaseParameter("_is_entry", "rdc"));
6910 
6911     for (size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6912     {
6913         for (size_t ndx = 0; ndx < cases.size(); ++ndx)
6914         {
6915             map<string, string> specializations;
6916             ComputeShaderSpec spec;
6917 
6918             specializations["ENTRY"] = "rdc";
6919             specializations["ID"]    = testFunc[fNdx];
6920             specializations["NAME"]  = cases[ndx].param;
6921             spec.assembly            = shaderTemplate.specialize(specializations);
6922             spec.numWorkGroups       = IVec3(numElements, 1, 1);
6923             spec.entryPoint          = "rdc";
6924             spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6925             spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6926 
6927             entryNotGroup->addChild(
6928                 new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), spec));
6929         }
6930     }
6931 
6932     group->addChild(entryMainGroup.release());
6933     group->addChild(entryNotGroup.release());
6934     group->addChild(abuseGroup.release());
6935 
6936     return group.release();
6937 }
6938 
createOpMemberNameGroup(tcu::TestContext & testCtx)6939 tcu::TestCaseGroup *createOpMemberNameGroup(tcu::TestContext &testCtx)
6940 {
6941     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opmembername"));
6942     de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "abuse"));
6943     vector<CaseParameter> abuseCases;
6944     vector<string> testFunc;
6945     de::Random rnd(deStringHash(group->getName()));
6946     const int numElements = 128;
6947     vector<float> inputFloats(numElements, 0);
6948     vector<float> outputFloats(numElements, 0);
6949 
6950     getOpNameAbuseCases(abuseCases);
6951 
6952     fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6953 
6954     for (size_t ndx = 0; ndx < numElements; ++ndx)
6955         outputFloats[ndx] = -inputFloats[ndx];
6956 
6957     const string commonShaderHeader = "OpCapability Shader\n"
6958                                       "OpMemoryModel Logical GLSL450\n"
6959                                       "OpEntryPoint GLCompute %main \"main\" %id\n"
6960                                       "OpExecutionMode %main LocalSize 1 1 1\n";
6961 
6962     const string commonShaderFooter = "OpDecorate %id BuiltIn GlobalInvocationId\n"
6963 
6964                                       + string(getComputeAsmInputOutputBufferTraits()) +
6965                                       string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6966 
6967                                       "%u3str     = OpTypeStruct %u32 %u32 %u32\n"
6968 
6969                                       "%id        = OpVariable %uvec3ptr Input\n"
6970                                       "%zero      = OpConstant %i32 0\n"
6971 
6972                                       "%main      = OpFunction %void None %voidf\n"
6973                                       "%entry     = OpLabel\n"
6974 
6975                                       "%idval     = OpLoad %uvec3 %id\n"
6976                                       "%x0        = OpCompositeExtract %u32 %idval 0\n"
6977 
6978                                       "%idstr     = OpCompositeConstruct %u3str %x0 %x0 %x0\n"
6979                                       "%x         = OpCompositeExtract %u32 %idstr 0\n"
6980 
6981                                       "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6982                                       "%inval     = OpLoad %f32 %inloc\n"
6983                                       "%neg       = OpFNegate %f32 %inval\n"
6984                                       "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6985                                       "             OpStore %outloc %neg\n"
6986 
6987                                       "             OpReturn\n"
6988                                       "             OpFunctionEnd\n";
6989 
6990     const StringTemplate shaderTemplate(commonShaderHeader + "OpMemberName %u3str 0 \"${NAME}\"\n" +
6991                                         commonShaderFooter);
6992 
6993     const std::string multipleNames = commonShaderHeader +
6994                                       "OpMemberName %u3str 0 \"to_be\"\n"
6995                                       "OpMemberName %u3str 1 \"or_not\"\n"
6996                                       "OpMemberName %u3str 0 \"to_be\"\n"
6997                                       "OpMemberName %u3str 2 \"makes_no\"\n"
6998                                       "OpMemberName %u3str 0 \"difference\"\n"
6999                                       "OpMemberName %u3str 0 \"to_me\"\n" +
7000                                       commonShaderFooter;
7001     {
7002         ComputeShaderSpec spec;
7003 
7004         spec.assembly      = multipleNames;
7005         spec.numWorkGroups = IVec3(numElements, 1, 1);
7006         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
7007         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
7008 
7009         abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "u3str_x_has_multiple_names", spec));
7010     }
7011 
7012     const std::string everythingNamedTheSame = commonShaderHeader +
7013                                                "OpMemberName %u3str 0 \"the_same\"\n"
7014                                                "OpMemberName %u3str 1 \"the_same\"\n"
7015                                                "OpMemberName %u3str 2 \"the_same\"\n" +
7016                                                commonShaderFooter;
7017 
7018     {
7019         ComputeShaderSpec spec;
7020 
7021         spec.assembly      = everythingNamedTheSame;
7022         spec.numWorkGroups = IVec3(numElements, 1, 1);
7023         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
7024         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
7025 
7026         abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", spec));
7027     }
7028 
7029     // u3str_x_is_....
7030     for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
7031     {
7032         map<string, string> specializations;
7033         ComputeShaderSpec spec;
7034 
7035         specializations["NAME"] = abuseCases[ndx].param;
7036         spec.assembly           = shaderTemplate.specialize(specializations);
7037         spec.numWorkGroups      = IVec3(numElements, 1, 1);
7038         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
7039         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
7040 
7041         abuseGroup->addChild(
7042             new SpvAsmComputeShaderCase(testCtx, (std::string("u3str_x") + abuseCases[ndx].name).c_str(), spec));
7043     }
7044 
7045     group->addChild(abuseGroup.release());
7046 
7047     return group.release();
7048 }
7049 
7050 // Assembly code used for testing function control is based on GLSL source code:
7051 //
7052 // #version 430
7053 //
7054 // layout(std140, set = 0, binding = 0) readonly buffer Input {
7055 //   float elements[];
7056 // } input_data;
7057 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
7058 //   float elements[];
7059 // } output_data;
7060 //
7061 // float const10() { return 10.f; }
7062 //
7063 // void main() {
7064 //   uint x = gl_GlobalInvocationID.x;
7065 //   output_data.elements[x] = input_data.elements[x] + const10();
7066 // }
createFunctionControlGroup(tcu::TestContext & testCtx)7067 tcu::TestCaseGroup *createFunctionControlGroup(tcu::TestContext &testCtx)
7068 {
7069     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "function_control"));
7070     vector<CaseParameter> cases;
7071     de::Random rnd(deStringHash(group->getName()));
7072     const int numElements = 100;
7073     vector<float> inputFloats(numElements, 0);
7074     vector<float> outputFloats(numElements, 0);
7075     const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
7076 
7077                                         "OpSource GLSL 430\n"
7078                                         "OpName %main \"main\"\n"
7079                                         "OpName %func_const10 \"const10(\"\n"
7080                                         "OpName %id \"gl_GlobalInvocationID\"\n"
7081 
7082                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
7083 
7084                                         + string(getComputeAsmInputOutputBufferTraits()) +
7085                                         string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
7086 
7087                                         "%f32f = OpTypeFunction %f32\n"
7088                                         "%id = OpVariable %uvec3ptr Input\n"
7089                                         "%zero = OpConstant %i32 0\n"
7090                                         "%constf10 = OpConstant %f32 10.0\n"
7091 
7092                                         "%main         = OpFunction %void None %voidf\n"
7093                                         "%entry        = OpLabel\n"
7094                                         "%idval        = OpLoad %uvec3 %id\n"
7095                                         "%x            = OpCompositeExtract %u32 %idval 0\n"
7096                                         "%inloc        = OpAccessChain %f32ptr %indata %zero %x\n"
7097                                         "%inval        = OpLoad %f32 %inloc\n"
7098                                         "%ret_10       = OpFunctionCall %f32 %func_const10\n"
7099                                         "%fadd         = OpFAdd %f32 %inval %ret_10\n"
7100                                         "%outloc       = OpAccessChain %f32ptr %outdata %zero %x\n"
7101                                         "                OpStore %outloc %fadd\n"
7102                                         "                OpReturn\n"
7103                                         "                OpFunctionEnd\n"
7104 
7105                                         "%func_const10 = OpFunction %f32 ${CONTROL} %f32f\n"
7106                                         "%label        = OpLabel\n"
7107                                         "                OpReturnValue %constf10\n"
7108                                         "                OpFunctionEnd\n");
7109 
7110     cases.push_back(CaseParameter("none", "None"));
7111     cases.push_back(CaseParameter("inline", "Inline"));
7112     cases.push_back(CaseParameter("dont_inline", "DontInline"));
7113     cases.push_back(CaseParameter("pure", "Pure"));
7114     cases.push_back(CaseParameter("const", "Const"));
7115     cases.push_back(CaseParameter("inline_pure", "Inline|Pure"));
7116     cases.push_back(CaseParameter("const_dont_inline", "Const|DontInline"));
7117     cases.push_back(CaseParameter("inline_dont_inline", "Inline|DontInline"));
7118     cases.push_back(CaseParameter("pure_inline_dont_inline", "Pure|Inline|DontInline"));
7119 
7120     fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
7121 
7122     // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
7123     floorAll(inputFloats);
7124 
7125     for (size_t ndx = 0; ndx < numElements; ++ndx)
7126         outputFloats[ndx] = inputFloats[ndx] + 10.f;
7127 
7128     for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7129     {
7130         map<string, string> specializations;
7131         ComputeShaderSpec spec;
7132 
7133         specializations["CONTROL"] = cases[caseNdx].param;
7134         spec.assembly              = shaderTemplate.specialize(specializations);
7135         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
7136         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
7137         spec.numWorkGroups = IVec3(numElements, 1, 1);
7138 
7139         group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
7140     }
7141 
7142     return group.release();
7143 }
7144 
createMemoryAccessGroup(tcu::TestContext & testCtx)7145 tcu::TestCaseGroup *createMemoryAccessGroup(tcu::TestContext &testCtx)
7146 {
7147     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "memory_access"));
7148     vector<CaseParameter> cases;
7149     de::Random rnd(deStringHash(group->getName()));
7150     const int numElements = 100;
7151     vector<float> inputFloats(numElements, 0);
7152     vector<float> outputFloats(numElements, 0);
7153     const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
7154 
7155                                         "OpSource GLSL 430\n"
7156                                         "OpName %main           \"main\"\n"
7157                                         "OpName %id             \"gl_GlobalInvocationID\"\n"
7158 
7159                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
7160 
7161                                         + string(getComputeAsmInputOutputBufferTraits()) +
7162                                         string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
7163 
7164                                         "%f32ptr_f  = OpTypePointer Function %f32\n"
7165 
7166                                         "%id        = OpVariable %uvec3ptr Input\n"
7167                                         "%zero      = OpConstant %i32 0\n"
7168                                         "%four      = OpConstant %i32 4\n"
7169 
7170                                         "%main      = OpFunction %void None %voidf\n"
7171                                         "%label     = OpLabel\n"
7172                                         "%copy      = OpVariable %f32ptr_f Function\n"
7173                                         "%idval     = OpLoad %uvec3 %id ${ACCESS}\n"
7174                                         "%x         = OpCompositeExtract %u32 %idval 0\n"
7175                                         "%inloc     = OpAccessChain %f32ptr %indata  %zero %x\n"
7176                                         "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
7177                                         "             OpCopyMemory %copy %inloc ${ACCESS}\n"
7178                                         "%val1      = OpLoad %f32 %copy\n"
7179                                         "%val2      = OpLoad %f32 %inloc\n"
7180                                         "%add       = OpFAdd %f32 %val1 %val2\n"
7181                                         "             OpStore %outloc %add ${ACCESS}\n"
7182                                         "             OpReturn\n"
7183                                         "             OpFunctionEnd\n");
7184 
7185     cases.push_back(CaseParameter("null", ""));
7186     cases.push_back(CaseParameter("none", "None"));
7187     cases.push_back(CaseParameter("volatile", "Volatile"));
7188     cases.push_back(CaseParameter("aligned", "Aligned 4"));
7189     cases.push_back(CaseParameter("nontemporal", "Nontemporal"));
7190     cases.push_back(CaseParameter("aligned_nontemporal", "Aligned|Nontemporal 4"));
7191     cases.push_back(CaseParameter("aligned_volatile", "Volatile|Aligned 4"));
7192 
7193     fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
7194 
7195     for (size_t ndx = 0; ndx < numElements; ++ndx)
7196         outputFloats[ndx] = inputFloats[ndx] + inputFloats[ndx];
7197 
7198     for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7199     {
7200         map<string, string> specializations;
7201         ComputeShaderSpec spec;
7202 
7203         specializations["ACCESS"] = cases[caseNdx].param;
7204         spec.assembly             = shaderTemplate.specialize(specializations);
7205         spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
7206         spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
7207         spec.numWorkGroups = IVec3(numElements, 1, 1);
7208 
7209         group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
7210     }
7211 
7212     return group.release();
7213 }
7214 
7215 // Checks that we can get undefined values for various types, without exercising a computation with it.
createOpUndefGroup(tcu::TestContext & testCtx)7216 tcu::TestCaseGroup *createOpUndefGroup(tcu::TestContext &testCtx)
7217 {
7218     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opundef"));
7219     vector<CaseParameter> cases;
7220     de::Random rnd(deStringHash(group->getName()));
7221     const int numElements = 100;
7222     vector<float> positiveFloats(numElements, 0);
7223     vector<float> negativeFloats(numElements, 0);
7224     const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
7225 
7226                                         "OpSource GLSL 430\n"
7227                                         "OpName %main           \"main\"\n"
7228                                         "OpName %id             \"gl_GlobalInvocationID\"\n"
7229 
7230                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
7231 
7232                                         + string(getComputeAsmInputOutputBufferTraits()) +
7233                                         string(getComputeAsmCommonTypes()) +
7234                                         "%uvec2     = OpTypeVector %u32 2\n"
7235                                         "%fvec4     = OpTypeVector %f32 4\n"
7236                                         "%fmat33    = OpTypeMatrix %fvec3 3\n"
7237                                         "%image     = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
7238                                         "%sampler   = OpTypeSampler\n"
7239                                         "%simage    = OpTypeSampledImage %image\n"
7240                                         "%const100  = OpConstant %u32 100\n"
7241                                         "%uarr100   = OpTypeArray %i32 %const100\n"
7242                                         "%struct    = OpTypeStruct %f32 %i32 %u32\n"
7243                                         "%pointer   = OpTypePointer Function %i32\n" +
7244                                         string(getComputeAsmInputOutputBuffer()) +
7245 
7246                                         "%id        = OpVariable %uvec3ptr Input\n"
7247                                         "%zero      = OpConstant %i32 0\n"
7248 
7249                                         "%main      = OpFunction %void None %voidf\n"
7250                                         "%label     = OpLabel\n"
7251 
7252                                         "%undef     = OpUndef ${TYPE}\n"
7253 
7254                                         "%idval     = OpLoad %uvec3 %id\n"
7255                                         "%x         = OpCompositeExtract %u32 %idval 0\n"
7256 
7257                                         "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
7258                                         "%inval     = OpLoad %f32 %inloc\n"
7259                                         "%neg       = OpFNegate %f32 %inval\n"
7260                                         "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
7261                                         "             OpStore %outloc %neg\n"
7262                                         "             OpReturn\n"
7263                                         "             OpFunctionEnd\n");
7264 
7265     cases.push_back(CaseParameter("bool", "%bool"));
7266     cases.push_back(CaseParameter("sint32", "%i32"));
7267     cases.push_back(CaseParameter("uint32", "%u32"));
7268     cases.push_back(CaseParameter("float32", "%f32"));
7269     cases.push_back(CaseParameter("vec4float32", "%fvec4"));
7270     cases.push_back(CaseParameter("vec2uint32", "%uvec2"));
7271     cases.push_back(CaseParameter("matrix", "%fmat33"));
7272     cases.push_back(CaseParameter("image", "%image"));
7273     cases.push_back(CaseParameter("sampler", "%sampler"));
7274     cases.push_back(CaseParameter("sampledimage", "%simage"));
7275     cases.push_back(CaseParameter("array", "%uarr100"));
7276     cases.push_back(CaseParameter("runtimearray", "%f32arr"));
7277     cases.push_back(CaseParameter("struct", "%struct"));
7278     cases.push_back(CaseParameter("pointer", "%pointer"));
7279 
7280     fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7281 
7282     for (size_t ndx = 0; ndx < numElements; ++ndx)
7283         negativeFloats[ndx] = -positiveFloats[ndx];
7284 
7285     for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7286     {
7287         map<string, string> specializations;
7288         ComputeShaderSpec spec;
7289 
7290         specializations["TYPE"] = cases[caseNdx].param;
7291         spec.assembly           = shaderTemplate.specialize(specializations);
7292         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7293         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7294         spec.numWorkGroups = IVec3(numElements, 1, 1);
7295 
7296         group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
7297     }
7298 
7299     // OpUndef with constants.
7300 #ifndef CTS_USES_VULKANSC
7301     {
7302         static const char data_dir[] = "spirv_assembly/instruction/compute/undef";
7303 
7304         static const struct
7305         {
7306             const std::string name;
7307             const std::string desc;
7308         } amberCases[] = {
7309             {"undefined_constant_composite", "OpUndef value in OpConstantComposite"},
7310             {"undefined_spec_constant_composite", "OpUndef value in OpSpecConstantComposite"},
7311         };
7312 
7313         for (int i = 0; i < DE_LENGTH_OF_ARRAY(amberCases); ++i)
7314         {
7315             cts_amber::AmberTestCase *testCase =
7316                 cts_amber::createAmberTestCase(testCtx, amberCases[i].name.c_str(), amberCases[i].desc.c_str(),
7317                                                data_dir, amberCases[i].name + ".amber");
7318             group->addChild(testCase);
7319         }
7320     }
7321 #endif
7322 
7323     return group.release();
7324 }
7325 
7326 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createFloat16OpConstantCompositeGroup(tcu::TestContext & testCtx)7327 tcu::TestCaseGroup *createFloat16OpConstantCompositeGroup(tcu::TestContext &testCtx)
7328 {
7329     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opconstantcomposite"));
7330     vector<CaseParameter> cases;
7331     de::Random rnd(deStringHash(group->getName()));
7332     const int numElements = 100;
7333     vector<float> positiveFloats(numElements, 0);
7334     vector<float> negativeFloats(numElements, 0);
7335     const StringTemplate shaderTemplate("OpCapability Shader\n"
7336                                         "OpCapability Float16\n"
7337                                         "OpMemoryModel Logical GLSL450\n"
7338                                         "OpEntryPoint GLCompute %main \"main\" %id\n"
7339                                         "OpExecutionMode %main LocalSize 1 1 1\n"
7340                                         "OpSource GLSL 430\n"
7341                                         "OpName %main           \"main\"\n"
7342                                         "OpName %id             \"gl_GlobalInvocationID\"\n"
7343 
7344                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
7345 
7346                                         + string(getComputeAsmInputOutputBufferTraits()) +
7347                                         string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
7348 
7349                                         "%id        = OpVariable %uvec3ptr Input\n"
7350                                         "%zero      = OpConstant %i32 0\n"
7351                                         "%f16       = OpTypeFloat 16\n"
7352                                         "%c_f16_0   = OpConstant %f16 0.0\n"
7353                                         "%c_f16_0_5 = OpConstant %f16 0.5\n"
7354                                         "%c_f16_1   = OpConstant %f16 1.0\n"
7355                                         "%v2f16     = OpTypeVector %f16 2\n"
7356                                         "%v3f16     = OpTypeVector %f16 3\n"
7357                                         "%v4f16     = OpTypeVector %f16 4\n"
7358 
7359                                         "${CONSTANT}\n"
7360 
7361                                         "%main      = OpFunction %void None %voidf\n"
7362                                         "%label     = OpLabel\n"
7363                                         "%idval     = OpLoad %uvec3 %id\n"
7364                                         "%x         = OpCompositeExtract %u32 %idval 0\n"
7365                                         "%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
7366                                         "%inval     = OpLoad %f32 %inloc\n"
7367                                         "%neg       = OpFNegate %f32 %inval\n"
7368                                         "%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
7369                                         "             OpStore %outloc %neg\n"
7370                                         "             OpReturn\n"
7371                                         "             OpFunctionEnd\n");
7372 
7373     cases.push_back(CaseParameter("vector", "%const = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"));
7374     cases.push_back(CaseParameter("matrix", "%m3v3f16 = OpTypeMatrix %v3f16 3\n"
7375                                             "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7376                                             "%mat = OpConstantComposite %m3v3f16 %vec %vec %vec"));
7377     cases.push_back(CaseParameter("struct", "%m2v3f16 = OpTypeMatrix %v3f16 2\n"
7378                                             "%struct = OpTypeStruct %i32 %f16 %v3f16 %m2v3f16\n"
7379                                             "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7380                                             "%mat = OpConstantComposite %m2v3f16 %vec %vec\n"
7381                                             "%const = OpConstantComposite %struct %zero %c_f16_0_5 %vec %mat\n"));
7382     cases.push_back(CaseParameter("nested_struct", "%st1 = OpTypeStruct %i32 %f16\n"
7383                                                    "%st2 = OpTypeStruct %i32 %i32\n"
7384                                                    "%struct = OpTypeStruct %st1 %st2\n"
7385                                                    "%st1val = OpConstantComposite %st1 %zero %c_f16_0_5\n"
7386                                                    "%st2val = OpConstantComposite %st2 %zero %zero\n"
7387                                                    "%const = OpConstantComposite %struct %st1val %st2val"));
7388 
7389     fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7390 
7391     for (size_t ndx = 0; ndx < numElements; ++ndx)
7392         negativeFloats[ndx] = -positiveFloats[ndx];
7393 
7394     for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7395     {
7396         map<string, string> specializations;
7397         ComputeShaderSpec spec;
7398 
7399         specializations["CONSTANT"] = cases[caseNdx].param;
7400         spec.assembly               = shaderTemplate.specialize(specializations);
7401         spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7402         spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7403         spec.numWorkGroups = IVec3(numElements, 1, 1);
7404 
7405         spec.extensions.push_back("VK_KHR_shader_float16_int8");
7406 
7407         spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
7408 
7409         group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
7410     }
7411 
7412     return group.release();
7413 }
7414 
squarize(const vector<deFloat16> & inData,const uint32_t argNo)7415 const vector<deFloat16> squarize(const vector<deFloat16> &inData, const uint32_t argNo)
7416 {
7417     const size_t inDataLength = inData.size();
7418     vector<deFloat16> result;
7419 
7420     result.reserve(inDataLength * inDataLength);
7421 
7422     if (argNo == 0)
7423     {
7424         for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7425             result.insert(result.end(), inData.begin(), inData.end());
7426     }
7427 
7428     if (argNo == 1)
7429     {
7430         for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7431         {
7432             const vector<deFloat16> tmp(inDataLength, inData[numIdx]);
7433 
7434             result.insert(result.end(), tmp.begin(), tmp.end());
7435         }
7436     }
7437 
7438     return result;
7439 }
7440 
squarizeVector(const vector<deFloat16> & inData,const uint32_t argNo)7441 const vector<deFloat16> squarizeVector(const vector<deFloat16> &inData, const uint32_t argNo)
7442 {
7443     vector<deFloat16> vec;
7444     vector<deFloat16> result;
7445 
7446     // Create vectors. vec will contain each possible pair from inData
7447     {
7448         const size_t inDataLength = inData.size();
7449 
7450         DE_ASSERT(inDataLength <= 64);
7451 
7452         vec.reserve(2 * inDataLength * inDataLength);
7453 
7454         for (size_t numIdxX = 0; numIdxX < inDataLength; ++numIdxX)
7455             for (size_t numIdxY = 0; numIdxY < inDataLength; ++numIdxY)
7456             {
7457                 vec.push_back(inData[numIdxX]);
7458                 vec.push_back(inData[numIdxY]);
7459             }
7460     }
7461 
7462     // Create vector pairs. result will contain each possible pair from vec
7463     {
7464         const size_t coordsPerVector = 2;
7465         const size_t vectorsCount    = vec.size() / coordsPerVector;
7466 
7467         result.reserve(coordsPerVector * vectorsCount * vectorsCount);
7468 
7469         if (argNo == 0)
7470         {
7471             for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7472                 for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7473                 {
7474                     for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7475                         result.push_back(vec[coordsPerVector * numIdxY + coordNdx]);
7476                 }
7477         }
7478 
7479         if (argNo == 1)
7480         {
7481             for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7482                 for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7483                 {
7484                     for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7485                         result.push_back(vec[coordsPerVector * numIdxX + coordNdx]);
7486                 }
7487         }
7488     }
7489 
7490     return result;
7491 }
7492 
7493 struct fp16isNan
7494 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isNan7495     bool operator()(const tcu::Float16 in1, const tcu::Float16)
7496     {
7497         return in1.isNaN();
7498     }
7499 };
7500 struct fp16isInf
7501 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isInf7502     bool operator()(const tcu::Float16 in1, const tcu::Float16)
7503     {
7504         return in1.isInf();
7505     }
7506 };
7507 struct fp16isEqual
7508 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isEqual7509     bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)
7510     {
7511         return in1.asFloat() == in2.asFloat();
7512     }
7513 };
7514 struct fp16isUnequal
7515 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isUnequal7516     bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)
7517     {
7518         return in1.asFloat() != in2.asFloat();
7519     }
7520 };
7521 struct fp16isLess
7522 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isLess7523     bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)
7524     {
7525         return in1.asFloat() < in2.asFloat();
7526     }
7527 };
7528 struct fp16isGreater
7529 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isGreater7530     bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)
7531     {
7532         return in1.asFloat() > in2.asFloat();
7533     }
7534 };
7535 struct fp16isLessOrEqual
7536 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isLessOrEqual7537     bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)
7538     {
7539         return in1.asFloat() <= in2.asFloat();
7540     }
7541 };
7542 struct fp16isGreaterOrEqual
7543 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isGreaterOrEqual7544     bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)
7545     {
7546         return in1.asFloat() >= in2.asFloat();
7547     }
7548 };
7549 
7550 template <class TestedLogicalFunction, bool onlyTestFunc, bool unationModeAnd, bool nanSupported>
compareFP16Logical(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)7551 bool compareFP16Logical(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
7552                         const std::vector<Resource> &, TestLog &log)
7553 {
7554     if (inputs.size() != 2 || outputAllocs.size() != 1)
7555         return false;
7556 
7557     vector<uint8_t> input1Bytes;
7558     vector<uint8_t> input2Bytes;
7559 
7560     inputs[0].getBytes(input1Bytes);
7561     inputs[1].getBytes(input2Bytes);
7562 
7563     const uint32_t denormModesCount     = 2;
7564     const deFloat16 float16one          = tcu::Float16(1.0f).bits();
7565     const deFloat16 float16zero         = tcu::Float16(0.0f).bits();
7566     const tcu::Float16 zero             = tcu::Float16::zero(1);
7567     const deFloat16 *const outputAsFP16 = static_cast<deFloat16 *>(outputAllocs[0]->getHostPtr());
7568     const deFloat16 *const input1AsFP16 = reinterpret_cast<deFloat16 *const>(&input1Bytes.front());
7569     const deFloat16 *const input2AsFP16 = reinterpret_cast<deFloat16 *const>(&input2Bytes.front());
7570     uint32_t successfulRuns             = denormModesCount;
7571     std::string results[denormModesCount];
7572     TestedLogicalFunction testedLogicalFunction;
7573 
7574     for (uint32_t denormMode = 0; denormMode < denormModesCount; denormMode++)
7575     {
7576         const bool flushToZero = (denormMode == 1);
7577 
7578         for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deFloat16); ++idx)
7579         {
7580             const tcu::Float16 f1pre = tcu::Float16(input1AsFP16[idx]);
7581             const tcu::Float16 f2pre = tcu::Float16(input2AsFP16[idx]);
7582             const tcu::Float16 f1    = (flushToZero && f1pre.isDenorm()) ? zero : f1pre;
7583             const tcu::Float16 f2    = (flushToZero && f2pre.isDenorm()) ? zero : f2pre;
7584             deFloat16 expectedOutput = float16zero;
7585 
7586             if (onlyTestFunc)
7587             {
7588                 if (testedLogicalFunction(f1, f2))
7589                     expectedOutput = float16one;
7590             }
7591             else
7592             {
7593                 const bool f1nan = f1.isNaN();
7594                 const bool f2nan = f2.isNaN();
7595 
7596                 // Skip NaN floats if not supported by implementation
7597                 if (!nanSupported && (f1nan || f2nan))
7598                     continue;
7599 
7600                 if (unationModeAnd)
7601                 {
7602                     const bool ordered = !f1nan && !f2nan;
7603 
7604                     if (ordered && testedLogicalFunction(f1, f2))
7605                         expectedOutput = float16one;
7606                 }
7607                 else
7608                 {
7609                     const bool unordered = f1nan || f2nan;
7610 
7611                     if (unordered || testedLogicalFunction(f1, f2))
7612                         expectedOutput = float16one;
7613                 }
7614             }
7615 
7616             if (outputAsFP16[idx] != expectedOutput)
7617             {
7618                 std::ostringstream str;
7619 
7620                 str << "ERROR: Sub-case #" << idx << " flushToZero:" << flushToZero << std::hex << " failed, inputs: 0x"
7621                     << f1.bits() << ";0x" << f2.bits() << " output: 0x" << outputAsFP16[idx] << " expected output: 0x"
7622                     << expectedOutput;
7623 
7624                 results[denormMode] = str.str();
7625 
7626                 successfulRuns--;
7627 
7628                 break;
7629             }
7630         }
7631     }
7632 
7633     if (successfulRuns == 0)
7634         for (uint32_t denormMode = 0; denormMode < denormModesCount; denormMode++)
7635             log << TestLog::Message << results[denormMode] << TestLog::EndMessage;
7636 
7637     return successfulRuns > 0;
7638 }
7639 
7640 } // namespace
7641 
createOpSourceTests(tcu::TestContext & testCtx)7642 tcu::TestCaseGroup *createOpSourceTests(tcu::TestContext &testCtx)
7643 {
7644     struct NameCodePair
7645     {
7646         string name, code;
7647     };
7648     RGBA defaultColors[4];
7649     de::MovePtr<tcu::TestCaseGroup> opSourceTests(new tcu::TestCaseGroup(testCtx, "opsource"));
7650     const std::string opsourceGLSLWithFile = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile ";
7651     map<string, string> fragments          = passthruFragments();
7652     const NameCodePair tests[]             = {{"unknown", "OpSource Unknown 321"},
7653                                               {"essl", "OpSource ESSL 310"},
7654                                               {"glsl", "OpSource GLSL 450"},
7655                                               {"opencl_cpp", "OpSource OpenCL_CPP 120"},
7656                                               {"opencl_c", "OpSource OpenCL_C 120"},
7657                                               {"multiple", "OpSource GLSL 450\nOpSource GLSL 450"},
7658                                               {"file", opsourceGLSLWithFile},
7659                                               {"source", opsourceGLSLWithFile + "\"void main(){}\""},
7660                                               // Longest possible source string: SPIR-V limits instructions to 65535
7661                                               // words, of which the first 4 are opsourceGLSLWithFile; the rest will
7662                                               // contain 65530 UTF8 characters (one word each) plus one last word
7663                                               // containing 3 ASCII characters and \0.
7664                                               {"longsource", opsourceGLSLWithFile + '"' + makeLongUTF8String(65530) + "ccc" + '"'}};
7665 
7666     getDefaultColors(defaultColors);
7667     for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7668     {
7669         fragments["debug"] = tests[testNdx].code;
7670         createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7671     }
7672 
7673     return opSourceTests.release();
7674 }
7675 
createOpSourceContinuedTests(tcu::TestContext & testCtx)7676 tcu::TestCaseGroup *createOpSourceContinuedTests(tcu::TestContext &testCtx)
7677 {
7678     struct NameCodePair
7679     {
7680         string name, code;
7681     };
7682     RGBA defaultColors[4];
7683     de::MovePtr<tcu::TestCaseGroup> opSourceTests(new tcu::TestCaseGroup(testCtx, "opsourcecontinued"));
7684     map<string, string> fragments = passthruFragments();
7685     const std::string opsource = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile \"void main(){}\"\n";
7686     const NameCodePair tests[] = {{"empty", opsource + "OpSourceContinued \"\""},
7687                                   {"short", opsource + "OpSourceContinued \"abcde\""},
7688                                   {"multiple", opsource + "OpSourceContinued \"abcde\"\nOpSourceContinued \"fghij\""},
7689                                   // Longest possible source string: SPIR-V limits instructions to 65535
7690                                   // words, of which the first one is OpSourceContinued/length; the rest
7691                                   // will contain 65533 UTF8 characters (one word each) plus one last word
7692                                   // containing 3 ASCII characters and \0.
7693                                   {"long", opsource + "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\""}};
7694 
7695     getDefaultColors(defaultColors);
7696     for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7697     {
7698         fragments["debug"] = tests[testNdx].code;
7699         createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7700     }
7701 
7702     return opSourceTests.release();
7703 }
createOpNoLineTests(tcu::TestContext & testCtx)7704 tcu::TestCaseGroup *createOpNoLineTests(tcu::TestContext &testCtx)
7705 {
7706     RGBA defaultColors[4];
7707     de::MovePtr<tcu::TestCaseGroup> opLineTests(new tcu::TestCaseGroup(testCtx, "opnoline"));
7708     map<string, string> fragments;
7709     getDefaultColors(defaultColors);
7710     fragments["debug"] = "%name = OpString \"name\"\n";
7711 
7712     fragments["pre_main"] = "OpNoLine\n"
7713                             "OpNoLine\n"
7714                             "OpLine %name 1 1\n"
7715                             "OpNoLine\n"
7716                             "OpLine %name 1 1\n"
7717                             "OpLine %name 1 1\n"
7718                             "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7719                             "OpNoLine\n"
7720                             "OpLine %name 1 1\n"
7721                             "OpNoLine\n"
7722                             "OpLine %name 1 1\n"
7723                             "OpLine %name 1 1\n"
7724                             "%second_param1 = OpFunctionParameter %v4f32\n"
7725                             "OpNoLine\n"
7726                             "OpNoLine\n"
7727                             "%label_secondfunction = OpLabel\n"
7728                             "OpNoLine\n"
7729                             "OpReturnValue %second_param1\n"
7730                             "OpFunctionEnd\n"
7731                             "OpNoLine\n"
7732                             "OpNoLine\n";
7733 
7734     fragments["testfun"] =
7735         // A %test_code function that returns its argument unchanged.
7736         "OpNoLine\n"
7737         "OpNoLine\n"
7738         "OpLine %name 1 1\n"
7739         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7740         "OpNoLine\n"
7741         "%param1 = OpFunctionParameter %v4f32\n"
7742         "OpNoLine\n"
7743         "OpNoLine\n"
7744         "%label_testfun = OpLabel\n"
7745         "OpNoLine\n"
7746         "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7747         "OpReturnValue %val1\n"
7748         "OpFunctionEnd\n"
7749         "OpLine %name 1 1\n"
7750         "OpNoLine\n";
7751 
7752     createTestsForAllStages("opnoline", defaultColors, defaultColors, fragments, opLineTests.get());
7753 
7754     return opLineTests.release();
7755 }
7756 
createOpModuleProcessedTests(tcu::TestContext & testCtx)7757 tcu::TestCaseGroup *createOpModuleProcessedTests(tcu::TestContext &testCtx)
7758 {
7759     RGBA defaultColors[4];
7760     de::MovePtr<tcu::TestCaseGroup> opModuleProcessedTests(new tcu::TestCaseGroup(testCtx, "opmoduleprocessed"));
7761     map<string, string> fragments;
7762     std::vector<std::string> noExtensions;
7763     GraphicsResources resources;
7764 
7765     getDefaultColors(defaultColors);
7766     resources.verifyBinary = veryfiBinaryShader;
7767     resources.spirvVersion = SPIRV_VERSION_1_3;
7768 
7769     fragments["moduleprocessed"] = "OpModuleProcessed \"VULKAN CTS\"\n"
7770                                    "OpModuleProcessed \"Negative values\"\n"
7771                                    "OpModuleProcessed \"Date: 2017/09/21\"\n";
7772 
7773     fragments["pre_main"] = "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7774                             "%second_param1 = OpFunctionParameter %v4f32\n"
7775                             "%label_secondfunction = OpLabel\n"
7776                             "OpReturnValue %second_param1\n"
7777                             "OpFunctionEnd\n";
7778 
7779     fragments["testfun"] =
7780         // A %test_code function that returns its argument unchanged.
7781         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7782         "%param1 = OpFunctionParameter %v4f32\n"
7783         "%label_testfun = OpLabel\n"
7784         "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7785         "OpReturnValue %val1\n"
7786         "OpFunctionEnd\n";
7787 
7788     createTestsForAllStages("opmoduleprocessed", defaultColors, defaultColors, fragments, resources, noExtensions,
7789                             opModuleProcessedTests.get());
7790 
7791     return opModuleProcessedTests.release();
7792 }
7793 
createOpLineTests(tcu::TestContext & testCtx)7794 tcu::TestCaseGroup *createOpLineTests(tcu::TestContext &testCtx)
7795 {
7796     RGBA defaultColors[4];
7797     de::MovePtr<tcu::TestCaseGroup> opLineTests(new tcu::TestCaseGroup(testCtx, "opline"));
7798     map<string, string> fragments;
7799     std::vector<std::pair<std::string, std::string>> problemStrings;
7800 
7801     problemStrings.push_back(std::make_pair<std::string, std::string>("empty_name", ""));
7802     problemStrings.push_back(std::make_pair<std::string, std::string>("short_name", "short_name"));
7803     problemStrings.push_back(std::make_pair<std::string, std::string>("long_name", makeLongUTF8String(65530) + "ccc"));
7804     getDefaultColors(defaultColors);
7805 
7806     fragments["debug"] = "%other_name = OpString \"other_name\"\n";
7807 
7808     fragments["pre_main"] = "OpLine %file_name 32 0\n"
7809                             "OpLine %file_name 32 32\n"
7810                             "OpLine %file_name 32 40\n"
7811                             "OpLine %other_name 32 40\n"
7812                             "OpLine %other_name 0 100\n"
7813                             "OpLine %other_name 0 4294967295\n"
7814                             "OpLine %other_name 4294967295 0\n"
7815                             "OpLine %other_name 32 40\n"
7816                             "OpLine %file_name 0 0\n"
7817                             "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7818                             "OpLine %file_name 1 0\n"
7819                             "%second_param1 = OpFunctionParameter %v4f32\n"
7820                             "OpLine %file_name 1 3\n"
7821                             "OpLine %file_name 1 2\n"
7822                             "%label_secondfunction = OpLabel\n"
7823                             "OpLine %file_name 0 2\n"
7824                             "OpReturnValue %second_param1\n"
7825                             "OpFunctionEnd\n"
7826                             "OpLine %file_name 0 2\n"
7827                             "OpLine %file_name 0 2\n";
7828 
7829     fragments["testfun"] =
7830         // A %test_code function that returns its argument unchanged.
7831         "OpLine %file_name 1 0\n"
7832         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7833         "OpLine %file_name 16 330\n"
7834         "%param1 = OpFunctionParameter %v4f32\n"
7835         "OpLine %file_name 14 442\n"
7836         "%label_testfun = OpLabel\n"
7837         "OpLine %file_name 11 1024\n"
7838         "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7839         "OpLine %file_name 2 97\n"
7840         "OpReturnValue %val1\n"
7841         "OpFunctionEnd\n"
7842         "OpLine %file_name 5 32\n";
7843 
7844     for (size_t i = 0; i < problemStrings.size(); ++i)
7845     {
7846         map<string, string> testFragments = fragments;
7847         testFragments["debug"] += "%file_name = OpString \"" + problemStrings[i].second + "\"\n";
7848         createTestsForAllStages(string("opline") + "_" + problemStrings[i].first, defaultColors, defaultColors,
7849                                 testFragments, opLineTests.get());
7850     }
7851 
7852     return opLineTests.release();
7853 }
7854 
createOpConstantNullTests(tcu::TestContext & testCtx)7855 tcu::TestCaseGroup *createOpConstantNullTests(tcu::TestContext &testCtx)
7856 {
7857     de::MovePtr<tcu::TestCaseGroup> opConstantNullTests(new tcu::TestCaseGroup(testCtx, "opconstantnull"));
7858     RGBA colors[4];
7859 
7860     const char functionStart[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7861                                  "%param1 = OpFunctionParameter %v4f32\n"
7862                                  "%lbl    = OpLabel\n";
7863 
7864     const char functionEnd[] = "OpReturnValue %transformed_param\n"
7865                                "OpFunctionEnd\n";
7866 
7867     struct NameConstantsCode
7868     {
7869         string name;
7870         string constants;
7871         string code;
7872     };
7873 
7874     NameConstantsCode tests[] = {
7875         {"vec4", "%cnull = OpConstantNull %v4f32\n", "%transformed_param = OpFAdd %v4f32 %param1 %cnull\n"},
7876         {"float", "%cnull = OpConstantNull %f32\n",
7877          "%vp = OpVariable %fp_v4f32 Function\n"
7878          "%v  = OpLoad %v4f32 %vp\n"
7879          "%v0 = OpVectorInsertDynamic %v4f32 %v %cnull %c_i32_0\n"
7880          "%v1 = OpVectorInsertDynamic %v4f32 %v0 %cnull %c_i32_1\n"
7881          "%v2 = OpVectorInsertDynamic %v4f32 %v1 %cnull %c_i32_2\n"
7882          "%v3 = OpVectorInsertDynamic %v4f32 %v2 %cnull %c_i32_3\n"
7883          "%transformed_param = OpFAdd %v4f32 %param1 %v3\n"},
7884         {"bool", "%cnull             = OpConstantNull %bool\n",
7885          "%v                 = OpVariable %fp_v4f32 Function\n"
7886          "                     OpStore %v %param1\n"
7887          "                     OpSelectionMerge %false_label None\n"
7888          "                     OpBranchConditional %cnull %true_label %false_label\n"
7889          "%true_label        = OpLabel\n"
7890          "                     OpStore %v %c_v4f32_0_5_0_5_0_5_0_5\n"
7891          "                     OpBranch %false_label\n"
7892          "%false_label       = OpLabel\n"
7893          "%transformed_param = OpLoad %v4f32 %v\n"},
7894         {"i32", "%cnull             = OpConstantNull %i32\n",
7895          "%v                 = OpVariable %fp_v4f32 Function %c_v4f32_0_5_0_5_0_5_0_5\n"
7896          "%b                 = OpIEqual %bool %cnull %c_i32_0\n"
7897          "                     OpSelectionMerge %false_label None\n"
7898          "                     OpBranchConditional %b %true_label %false_label\n"
7899          "%true_label        = OpLabel\n"
7900          "                     OpStore %v %param1\n"
7901          "                     OpBranch %false_label\n"
7902          "%false_label       = OpLabel\n"
7903          "%transformed_param = OpLoad %v4f32 %v\n"},
7904         {"struct",
7905          "%stype             = OpTypeStruct %f32 %v4f32\n"
7906          "%fp_stype          = OpTypePointer Function %stype\n"
7907          "%cnull             = OpConstantNull %stype\n",
7908          "%v                 = OpVariable %fp_stype Function %cnull\n"
7909          "%f                 = OpAccessChain %fp_v4f32 %v %c_i32_1\n"
7910          "%f_val             = OpLoad %v4f32 %f\n"
7911          "%transformed_param = OpFAdd %v4f32 %param1 %f_val\n"},
7912         {"array",
7913          "%a4_v4f32          = OpTypeArray %v4f32 %c_u32_4\n"
7914          "%fp_a4_v4f32       = OpTypePointer Function %a4_v4f32\n"
7915          "%cnull             = OpConstantNull %a4_v4f32\n",
7916          "%v                 = OpVariable %fp_a4_v4f32 Function %cnull\n"
7917          "%f                 = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7918          "%f1                = OpAccessChain %fp_v4f32 %v %c_u32_1\n"
7919          "%f2                = OpAccessChain %fp_v4f32 %v %c_u32_2\n"
7920          "%f3                = OpAccessChain %fp_v4f32 %v %c_u32_3\n"
7921          "%f_val             = OpLoad %v4f32 %f\n"
7922          "%f1_val            = OpLoad %v4f32 %f1\n"
7923          "%f2_val            = OpLoad %v4f32 %f2\n"
7924          "%f3_val            = OpLoad %v4f32 %f3\n"
7925          "%t0                = OpFAdd %v4f32 %param1 %f_val\n"
7926          "%t1                = OpFAdd %v4f32 %t0 %f1_val\n"
7927          "%t2                = OpFAdd %v4f32 %t1 %f2_val\n"
7928          "%transformed_param = OpFAdd %v4f32 %t2 %f3_val\n"},
7929         {"matrix",
7930          "%mat4x4_f32        = OpTypeMatrix %v4f32 4\n"
7931          "%cnull             = OpConstantNull %mat4x4_f32\n",
7932          // Our null matrix * any vector should result in a zero vector.
7933          "%v                 = OpVectorTimesMatrix %v4f32 %param1 %cnull\n"
7934          "%transformed_param = OpFAdd %v4f32 %param1 %v\n"}};
7935 
7936     getHalfColorsFullAlpha(colors);
7937 
7938     for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7939     {
7940         map<string, string> fragments;
7941         fragments["pre_main"] = tests[testNdx].constants;
7942         fragments["testfun"]  = string(functionStart) + tests[testNdx].code + functionEnd;
7943         createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, opConstantNullTests.get());
7944     }
7945     return opConstantNullTests.release();
7946 }
createOpConstantCompositeTests(tcu::TestContext & testCtx)7947 tcu::TestCaseGroup *createOpConstantCompositeTests(tcu::TestContext &testCtx)
7948 {
7949     de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests(new tcu::TestCaseGroup(testCtx, "opconstantcomposite"));
7950     RGBA inputColors[4];
7951     RGBA outputColors[4];
7952 
7953     const char functionStart[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7954                                  "%param1 = OpFunctionParameter %v4f32\n"
7955                                  "%lbl    = OpLabel\n";
7956 
7957     const char functionEnd[] = "OpReturnValue %transformed_param\n"
7958                                "OpFunctionEnd\n";
7959 
7960     struct NameConstantsCode
7961     {
7962         string name;
7963         string constants;
7964         string code;
7965     };
7966 
7967     NameConstantsCode tests[] = {
7968         {"vec4",
7969 
7970          "%cval              = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0\n",
7971          "%transformed_param = OpFAdd %v4f32 %param1 %cval\n"},
7972         {
7973             "struct",
7974 
7975             "%stype             = OpTypeStruct %v4f32 %f32\n"
7976             "%fp_stype          = OpTypePointer Function %stype\n"
7977             "%f32_n_1           = OpConstant %f32 -1.0\n"
7978             "%f32_1_5           = OpConstant %f32 !0x3fc00000\n" // +1.5
7979             "%cvec              = OpConstantComposite %v4f32 %f32_1_5 %f32_1_5 %f32_1_5 %c_f32_1\n"
7980             "%cval              = OpConstantComposite %stype %cvec %f32_n_1\n",
7981 
7982             "%v                 = OpVariable %fp_stype Function %cval\n"
7983             "%vec_ptr           = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7984             "%f32_ptr           = OpAccessChain %fp_f32 %v %c_u32_1\n"
7985             "%vec_val           = OpLoad %v4f32 %vec_ptr\n"
7986             "%f32_val           = OpLoad %f32 %f32_ptr\n"
7987             "%tmp1              = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_1 %f32_val\n" // vec4(-1)
7988             "%tmp2              = OpFAdd %v4f32 %tmp1 %param1\n"                          // param1 + vec4(-1)
7989             "%transformed_param = OpFAdd %v4f32 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
7990         },
7991         {// [1|0|0|0.5] [x] = x + 0.5
7992          // [0|1|0|0.5] [y] = y + 0.5
7993          // [0|0|1|0.5] [z] = z + 0.5
7994          // [0|0|0|1  ] [1] = 1
7995          "matrix",
7996 
7997          "%mat4x4_f32          = OpTypeMatrix %v4f32 4\n"
7998          "%v4f32_1_0_0_0       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_0 %c_f32_0 %c_f32_0\n"
7999          "%v4f32_0_1_0_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_1 %c_f32_0 %c_f32_0\n"
8000          "%v4f32_0_0_1_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_1 %c_f32_0\n"
8001          "%v4f32_0_5_0_5_0_5_1 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_1\n"
8002          "%cval                = OpConstantComposite %mat4x4_f32 %v4f32_1_0_0_0 %v4f32_0_1_0_0 %v4f32_0_0_1_0 "
8003          "%v4f32_0_5_0_5_0_5_1\n",
8004 
8005          "%transformed_param   = OpMatrixTimesVector %v4f32 %cval %param1\n"},
8006         {"array",
8007 
8008          "%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
8009          "%fp_a4f32            = OpTypePointer Function %a4f32\n"
8010          "%f32_n_1             = OpConstant %f32 -1.0\n"
8011          "%f32_1_5             = OpConstant %f32 !0x3fc00000\n" // +1.5
8012          "%carr                = OpConstantComposite %a4f32 %c_f32_0 %f32_n_1 %f32_1_5 %c_f32_0\n",
8013 
8014          "%v                   = OpVariable %fp_a4f32 Function %carr\n"
8015          "%f                   = OpAccessChain %fp_f32 %v %c_u32_0\n"
8016          "%f1                  = OpAccessChain %fp_f32 %v %c_u32_1\n"
8017          "%f2                  = OpAccessChain %fp_f32 %v %c_u32_2\n"
8018          "%f3                  = OpAccessChain %fp_f32 %v %c_u32_3\n"
8019          "%f_val               = OpLoad %f32 %f\n"
8020          "%f1_val              = OpLoad %f32 %f1\n"
8021          "%f2_val              = OpLoad %f32 %f2\n"
8022          "%f3_val              = OpLoad %f32 %f3\n"
8023          "%ftot1               = OpFAdd %f32 %f_val %f1_val\n"
8024          "%ftot2               = OpFAdd %f32 %ftot1 %f2_val\n"
8025          "%ftot3               = OpFAdd %f32 %ftot2 %f3_val\n" // 0 - 1 + 1.5 + 0
8026          "%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %ftot3\n"
8027          "%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"},
8028         {//
8029          // [
8030          //   {
8031          //      0.0,
8032          //      [ 1.0, 1.0, 1.0, 1.0]
8033          //   },
8034          //   {
8035          //      1.0,
8036          //      [ 0.0, 0.5, 0.0, 0.0]
8037          //   }, //     ^^^
8038          //   {
8039          //      0.0,
8040          //      [ 1.0, 1.0, 1.0, 1.0]
8041          //   }
8042          // ]
8043          "array_of_struct_of_array",
8044 
8045          "%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
8046          "%fp_a4f32            = OpTypePointer Function %a4f32\n"
8047          "%stype               = OpTypeStruct %f32 %a4f32\n"
8048          "%a3stype             = OpTypeArray %stype %c_u32_3\n"
8049          "%fp_a3stype          = OpTypePointer Function %a3stype\n"
8050          "%ca4f32_0            = OpConstantComposite %a4f32 %c_f32_0 %c_f32_0_5 %c_f32_0 %c_f32_0\n"
8051          "%ca4f32_1            = OpConstantComposite %a4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
8052          "%cstype1             = OpConstantComposite %stype %c_f32_0 %ca4f32_1\n"
8053          "%cstype2             = OpConstantComposite %stype %c_f32_1 %ca4f32_0\n"
8054          "%carr                = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
8055 
8056          "%v                   = OpVariable %fp_a3stype Function %carr\n"
8057          "%f                   = OpAccessChain %fp_f32 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
8058          "%f_l                 = OpLoad %f32 %f\n"
8059          "%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %f_l\n"
8060          "%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"}};
8061 
8062     getHalfColorsFullAlpha(inputColors);
8063     outputColors[0] = RGBA(255, 255, 255, 255);
8064     outputColors[1] = RGBA(255, 127, 127, 255);
8065     outputColors[2] = RGBA(127, 255, 127, 255);
8066     outputColors[3] = RGBA(127, 127, 255, 255);
8067 
8068     for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
8069     {
8070         map<string, string> fragments;
8071         fragments["pre_main"] = tests[testNdx].constants;
8072         fragments["testfun"]  = string(functionStart) + tests[testNdx].code + functionEnd;
8073         createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments,
8074                                 opConstantCompositeTests.get());
8075     }
8076     return opConstantCompositeTests.release();
8077 }
8078 
createSelectionBlockOrderTests(tcu::TestContext & testCtx)8079 tcu::TestCaseGroup *createSelectionBlockOrderTests(tcu::TestContext &testCtx)
8080 {
8081     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "selection_block_order"));
8082     RGBA inputColors[4];
8083     RGBA outputColors[4];
8084     map<string, string> fragments;
8085 
8086     // vec4 test_code(vec4 param) {
8087     //   vec4 result = param;
8088     //   for (int i = 0; i < 4; ++i) {
8089     //     if (i == 0) result[i] = 0.;
8090     //     else        result[i] = 1. - result[i];
8091     //   }
8092     //   return result;
8093     // }
8094     const char function[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8095                             "%param1    = OpFunctionParameter %v4f32\n"
8096                             "%lbl       = OpLabel\n"
8097                             "%iptr      = OpVariable %fp_i32 Function\n"
8098                             "%result    = OpVariable %fp_v4f32 Function\n"
8099                             "             OpStore %iptr %c_i32_0\n"
8100                             "             OpStore %result %param1\n"
8101                             "             OpBranch %loop\n"
8102 
8103                             // Loop entry block.
8104                             "%loop      = OpLabel\n"
8105                             "%ival      = OpLoad %i32 %iptr\n"
8106                             "%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
8107                             "             OpLoopMerge %exit %if_entry None\n"
8108                             "             OpBranchConditional %lt_4 %if_entry %exit\n"
8109 
8110                             // Merge block for loop.
8111                             "%exit      = OpLabel\n"
8112                             "%ret       = OpLoad %v4f32 %result\n"
8113                             "             OpReturnValue %ret\n"
8114 
8115                             // If-statement entry block.
8116                             "%if_entry  = OpLabel\n"
8117                             "%loc       = OpAccessChain %fp_f32 %result %ival\n"
8118                             "%eq_0      = OpIEqual %bool %ival %c_i32_0\n"
8119                             "             OpSelectionMerge %if_exit None\n"
8120                             "             OpBranchConditional %eq_0 %if_true %if_false\n"
8121 
8122                             // False branch for if-statement.
8123                             "%if_false  = OpLabel\n"
8124                             "%val       = OpLoad %f32 %loc\n"
8125                             "%sub       = OpFSub %f32 %c_f32_1 %val\n"
8126                             "             OpStore %loc %sub\n"
8127                             "             OpBranch %if_exit\n"
8128 
8129                             // Merge block for if-statement.
8130                             "%if_exit   = OpLabel\n"
8131                             "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
8132                             "             OpStore %iptr %ival_next\n"
8133                             "             OpBranch %loop\n"
8134 
8135                             // True branch for if-statement.
8136                             "%if_true   = OpLabel\n"
8137                             "             OpStore %loc %c_f32_0\n"
8138                             "             OpBranch %if_exit\n"
8139 
8140                             "             OpFunctionEnd\n";
8141 
8142     fragments["testfun"] = function;
8143 
8144     inputColors[0] = RGBA(127, 127, 127, 0);
8145     inputColors[1] = RGBA(127, 0, 0, 0);
8146     inputColors[2] = RGBA(0, 127, 0, 0);
8147     inputColors[3] = RGBA(0, 0, 127, 0);
8148 
8149     outputColors[0] = RGBA(0, 128, 128, 255);
8150     outputColors[1] = RGBA(0, 255, 255, 255);
8151     outputColors[2] = RGBA(0, 128, 255, 255);
8152     outputColors[3] = RGBA(0, 255, 128, 255);
8153 
8154     createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
8155 
8156     return group.release();
8157 }
8158 
createSwitchBlockOrderTests(tcu::TestContext & testCtx)8159 tcu::TestCaseGroup *createSwitchBlockOrderTests(tcu::TestContext &testCtx)
8160 {
8161     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "switch_block_order"));
8162     RGBA inputColors[4];
8163     RGBA outputColors[4];
8164     map<string, string> fragments;
8165 
8166     const char typesAndConstants[] = "%c_f32_p2  = OpConstant %f32 0.2\n"
8167                                      "%c_f32_p4  = OpConstant %f32 0.4\n"
8168                                      "%c_f32_p6  = OpConstant %f32 0.6\n"
8169                                      "%c_f32_p8  = OpConstant %f32 0.8\n";
8170 
8171     // vec4 test_code(vec4 param) {
8172     //   vec4 result = param;
8173     //   for (int i = 0; i < 4; ++i) {
8174     //     switch (i) {
8175     //       case 0: result[i] += .2; break;
8176     //       case 1: result[i] += .6; break;
8177     //       case 2: result[i] += .4; break;
8178     //       case 3: result[i] += .8; break;
8179     //       default: break; // unreachable
8180     //     }
8181     //   }
8182     //   return result;
8183     // }
8184     const char function[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8185                             "%param1    = OpFunctionParameter %v4f32\n"
8186                             "%lbl       = OpLabel\n"
8187                             "%iptr      = OpVariable %fp_i32 Function\n"
8188                             "%result    = OpVariable %fp_v4f32 Function\n"
8189                             "             OpStore %iptr %c_i32_0\n"
8190                             "             OpStore %result %param1\n"
8191                             "             OpBranch %loop\n"
8192 
8193                             // Loop entry block.
8194                             "%loop      = OpLabel\n"
8195                             "%ival      = OpLoad %i32 %iptr\n"
8196                             "%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
8197                             "             OpLoopMerge %exit %cont None\n"
8198                             "             OpBranchConditional %lt_4 %switch_entry %exit\n"
8199 
8200                             // Merge block for loop.
8201                             "%exit      = OpLabel\n"
8202                             "%ret       = OpLoad %v4f32 %result\n"
8203                             "             OpReturnValue %ret\n"
8204 
8205                             // Switch-statement entry block.
8206                             "%switch_entry   = OpLabel\n"
8207                             "%loc            = OpAccessChain %fp_f32 %result %ival\n"
8208                             "%val            = OpLoad %f32 %loc\n"
8209                             "                  OpSelectionMerge %switch_exit None\n"
8210                             "                  OpSwitch %ival %switch_default 0 %case0 1 %case1 2 %case2 3 %case3\n"
8211 
8212                             "%case2          = OpLabel\n"
8213                             "%addp4          = OpFAdd %f32 %val %c_f32_p4\n"
8214                             "                  OpStore %loc %addp4\n"
8215                             "                  OpBranch %switch_exit\n"
8216 
8217                             "%switch_default = OpLabel\n"
8218                             "                  OpUnreachable\n"
8219 
8220                             "%case3          = OpLabel\n"
8221                             "%addp8          = OpFAdd %f32 %val %c_f32_p8\n"
8222                             "                  OpStore %loc %addp8\n"
8223                             "                  OpBranch %switch_exit\n"
8224 
8225                             "%case0          = OpLabel\n"
8226                             "%addp2          = OpFAdd %f32 %val %c_f32_p2\n"
8227                             "                  OpStore %loc %addp2\n"
8228                             "                  OpBranch %switch_exit\n"
8229 
8230                             // Merge block for switch-statement.
8231                             "%switch_exit    = OpLabel\n"
8232                             "%ival_next      = OpIAdd %i32 %ival %c_i32_1\n"
8233                             "                  OpStore %iptr %ival_next\n"
8234                             "                  OpBranch %cont\n"
8235                             "%cont           = OpLabel\n"
8236                             "                  OpBranch %loop\n"
8237 
8238                             "%case1          = OpLabel\n"
8239                             "%addp6          = OpFAdd %f32 %val %c_f32_p6\n"
8240                             "                  OpStore %loc %addp6\n"
8241                             "                  OpBranch %switch_exit\n"
8242 
8243                             "                  OpFunctionEnd\n";
8244 
8245     fragments["pre_main"] = typesAndConstants;
8246     fragments["testfun"]  = function;
8247 
8248     inputColors[0] = RGBA(127, 27, 127, 51);
8249     inputColors[1] = RGBA(127, 0, 0, 51);
8250     inputColors[2] = RGBA(0, 27, 0, 51);
8251     inputColors[3] = RGBA(0, 0, 127, 51);
8252 
8253     outputColors[0] = RGBA(178, 180, 229, 255);
8254     outputColors[1] = RGBA(178, 153, 102, 255);
8255     outputColors[2] = RGBA(51, 180, 102, 255);
8256     outputColors[3] = RGBA(51, 153, 229, 255);
8257 
8258     createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
8259 
8260     addOpSwitchAmberTests(*group, testCtx);
8261 
8262     return group.release();
8263 }
8264 
createDecorationGroupTests(tcu::TestContext & testCtx)8265 tcu::TestCaseGroup *createDecorationGroupTests(tcu::TestContext &testCtx)
8266 {
8267     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "decoration_group"));
8268     RGBA inputColors[4];
8269     RGBA outputColors[4];
8270     map<string, string> fragments;
8271 
8272     const char decorations[] = "OpDecorate %array_group         ArrayStride 4\n"
8273                                "OpDecorate %struct_member_group Offset 0\n"
8274                                "%array_group         = OpDecorationGroup\n"
8275                                "%struct_member_group = OpDecorationGroup\n"
8276 
8277                                "OpDecorate %group1 RelaxedPrecision\n"
8278                                "OpDecorate %group3 RelaxedPrecision\n"
8279                                "OpDecorate %group3 Flat\n"
8280                                "OpDecorate %group3 Restrict\n"
8281                                "%group0 = OpDecorationGroup\n"
8282                                "%group1 = OpDecorationGroup\n"
8283                                "%group3 = OpDecorationGroup\n";
8284 
8285     const char typesAndConstants[] = "%a3f32     = OpTypeArray %f32 %c_u32_3\n"
8286                                      "%struct1   = OpTypeStruct %a3f32\n"
8287                                      "%struct2   = OpTypeStruct %a3f32\n"
8288                                      "%fp_struct1 = OpTypePointer Function %struct1\n"
8289                                      "%fp_struct2 = OpTypePointer Function %struct2\n"
8290                                      "%c_f32_2    = OpConstant %f32 2.\n"
8291                                      "%c_f32_n2   = OpConstant %f32 -2.\n"
8292 
8293                                      "%c_a3f32_1 = OpConstantComposite %a3f32 %c_f32_1 %c_f32_2 %c_f32_1\n"
8294                                      "%c_a3f32_2 = OpConstantComposite %a3f32 %c_f32_n1 %c_f32_n2 %c_f32_n1\n"
8295                                      "%c_struct1 = OpConstantComposite %struct1 %c_a3f32_1\n"
8296                                      "%c_struct2 = OpConstantComposite %struct2 %c_a3f32_2\n";
8297 
8298     const char function[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8299                             "%param     = OpFunctionParameter %v4f32\n"
8300                             "%entry     = OpLabel\n"
8301                             "%result    = OpVariable %fp_v4f32 Function\n"
8302                             "%v_struct1 = OpVariable %fp_struct1 Function\n"
8303                             "%v_struct2 = OpVariable %fp_struct2 Function\n"
8304                             "             OpStore %result %param\n"
8305                             "             OpStore %v_struct1 %c_struct1\n"
8306                             "             OpStore %v_struct2 %c_struct2\n"
8307                             "%ptr1      = OpAccessChain %fp_f32 %v_struct1 %c_i32_0 %c_i32_2\n"
8308                             "%val1      = OpLoad %f32 %ptr1\n"
8309                             "%ptr2      = OpAccessChain %fp_f32 %v_struct2 %c_i32_0 %c_i32_2\n"
8310                             "%val2      = OpLoad %f32 %ptr2\n"
8311                             "%addvalues = OpFAdd %f32 %val1 %val2\n"
8312                             "%ptr       = OpAccessChain %fp_f32 %result %c_i32_1\n"
8313                             "%val       = OpLoad %f32 %ptr\n"
8314                             "%addresult = OpFAdd %f32 %addvalues %val\n"
8315                             "             OpStore %ptr %addresult\n"
8316                             "%ret       = OpLoad %v4f32 %result\n"
8317                             "             OpReturnValue %ret\n"
8318                             "             OpFunctionEnd\n";
8319 
8320     struct CaseNameDecoration
8321     {
8322         string name;
8323         string decoration;
8324     };
8325 
8326     CaseNameDecoration tests[] = {
8327         {"same_decoration_group_on_multiple_types",
8328          "OpGroupMemberDecorate %struct_member_group %struct1 0 %struct2 0\n"},
8329         {"empty_decoration_group", "OpGroupDecorate %group0      %a3f32\n"
8330                                    "OpGroupDecorate %group0      %result\n"},
8331         {"one_element_decoration_group", "OpGroupDecorate %array_group %a3f32\n"},
8332         {"multiple_elements_decoration_group", "OpGroupDecorate %group3      %v_struct1\n"},
8333         {"multiple_decoration_groups_on_same_variable", "OpGroupDecorate %group0      %v_struct2\n"
8334                                                         "OpGroupDecorate %group1      %v_struct2\n"
8335                                                         "OpGroupDecorate %group3      %v_struct2\n"},
8336         {"same_decoration_group_multiple_times", "OpGroupDecorate %group1      %addvalues\n"
8337                                                  "OpGroupDecorate %group1      %addvalues\n"
8338                                                  "OpGroupDecorate %group1      %addvalues\n"},
8339 
8340     };
8341 
8342     getHalfColorsFullAlpha(inputColors);
8343     getHalfColorsFullAlpha(outputColors);
8344 
8345     for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
8346     {
8347         fragments["decoration"] = decorations + tests[idx].decoration;
8348         fragments["pre_main"]   = typesAndConstants;
8349         fragments["testfun"]    = function;
8350 
8351         createTestsForAllStages(tests[idx].name, inputColors, outputColors, fragments, group.get());
8352     }
8353 
8354     return group.release();
8355 }
8356 
8357 struct SpecConstantTwoValGraphicsCase
8358 {
8359     const std::string caseName;
8360     const std::string scDefinition0;
8361     const std::string scDefinition1;
8362     const std::string scResultType;
8363     const std::string scOperation;
8364     SpecConstantValue scActualValue0;
8365     SpecConstantValue scActualValue1;
8366     const std::string resultOperation;
8367     RGBA expectedColors[4];
8368     CaseFlags caseFlags;
8369 
SpecConstantTwoValGraphicsCasevkt::SpirVAssembly::SpecConstantTwoValGraphicsCase8370     SpecConstantTwoValGraphicsCase(const std::string &name, const std::string &definition0,
8371                                    const std::string &definition1, const std::string &resultType,
8372                                    const std::string &operation, const SpecConstantValue &value0,
8373                                    const SpecConstantValue &value1, const std::string &resultOp,
8374                                    const RGBA (&output)[4], CaseFlags flags = FLAG_NONE)
8375         : caseName(name)
8376         , scDefinition0(definition0)
8377         , scDefinition1(definition1)
8378         , scResultType(resultType)
8379         , scOperation(operation)
8380         , scActualValue0(value0)
8381         , scActualValue1(value1)
8382         , resultOperation(resultOp)
8383         , caseFlags(flags)
8384     {
8385         expectedColors[0] = output[0];
8386         expectedColors[1] = output[1];
8387         expectedColors[2] = output[2];
8388         expectedColors[3] = output[3];
8389     }
8390 };
8391 
createSpecConstantTests(tcu::TestContext & testCtx)8392 tcu::TestCaseGroup *createSpecConstantTests(tcu::TestContext &testCtx)
8393 {
8394     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opspecconstantop"));
8395     vector<SpecConstantTwoValGraphicsCase> cases;
8396     RGBA inputColors[4];
8397     RGBA outputColors0[4];
8398     RGBA outputColors1[4];
8399     RGBA outputColors2[4];
8400 
8401     const char decorations1[] = "OpDecorate %sc_0  SpecId 0\n"
8402                                 "OpDecorate %sc_1  SpecId 1\n";
8403 
8404     const char typesAndConstants1[] = "${OPTYPE_DEFINITIONS:opt}"
8405                                       "%sc_0      = OpSpecConstant${SC_DEF0}\n"
8406                                       "%sc_1      = OpSpecConstant${SC_DEF1}\n"
8407                                       "%sc_op     = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
8408 
8409     const char function1[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8410                              "%param     = OpFunctionParameter %v4f32\n"
8411                              "%label     = OpLabel\n"
8412                              "%result    = OpVariable %fp_v4f32 Function\n"
8413                              "${TYPE_CONVERT:opt}"
8414                              "             OpStore %result %param\n"
8415                              "%gen       = ${GEN_RESULT}\n"
8416                              "%index     = OpIAdd %i32 %gen %c_i32_1\n"
8417                              "%loc       = OpAccessChain %fp_f32 %result %index\n"
8418                              "%val       = OpLoad %f32 %loc\n"
8419                              "%add       = OpFAdd %f32 %val %c_f32_0_5\n"
8420                              "             OpStore %loc %add\n"
8421                              "%ret       = OpLoad %v4f32 %result\n"
8422                              "             OpReturnValue %ret\n"
8423                              "             OpFunctionEnd\n";
8424 
8425     inputColors[0] = RGBA(127, 127, 127, 255);
8426     inputColors[1] = RGBA(127, 0, 0, 255);
8427     inputColors[2] = RGBA(0, 127, 0, 255);
8428     inputColors[3] = RGBA(0, 0, 127, 255);
8429 
8430     // Derived from inputColors[x] by adding 128 to inputColors[x][0].
8431     outputColors0[0] = RGBA(255, 127, 127, 255);
8432     outputColors0[1] = RGBA(255, 0, 0, 255);
8433     outputColors0[2] = RGBA(128, 127, 0, 255);
8434     outputColors0[3] = RGBA(128, 0, 127, 255);
8435 
8436     // Derived from inputColors[x] by adding 128 to inputColors[x][1].
8437     outputColors1[0] = RGBA(127, 255, 127, 255);
8438     outputColors1[1] = RGBA(127, 128, 0, 255);
8439     outputColors1[2] = RGBA(0, 255, 0, 255);
8440     outputColors1[3] = RGBA(0, 128, 127, 255);
8441 
8442     // Derived from inputColors[x] by adding 128 to inputColors[x][2].
8443     outputColors2[0] = RGBA(127, 127, 255, 255);
8444     outputColors2[1] = RGBA(127, 0, 128, 255);
8445     outputColors2[2] = RGBA(0, 127, 128, 255);
8446     outputColors2[3] = RGBA(0, 0, 255, 255);
8447 
8448     const char addZeroToSc[]        = "OpIAdd %i32 %c_i32_0 %sc_op";
8449     const char addZeroToSc32[]      = "OpIAdd %i32 %c_i32_0 %sc_op32";
8450     const char selectTrueUsingSc[]  = "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
8451     const char selectFalseUsingSc[] = "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
8452 
8453     cases.push_back(SpecConstantTwoValGraphicsCase(
8454         "iadd", " %i32 0", " %i32 0", "%i32", "IAdd                 %sc_0 %sc_1", 19, -20, addZeroToSc, outputColors0));
8455     cases.push_back(SpecConstantTwoValGraphicsCase(
8456         "isub", " %i32 0", " %i32 0", "%i32", "ISub                 %sc_0 %sc_1", 19, 20, addZeroToSc, outputColors0));
8457     cases.push_back(SpecConstantTwoValGraphicsCase(
8458         "imul", " %i32 0", " %i32 0", "%i32", "IMul                 %sc_0 %sc_1", -1, -1, addZeroToSc, outputColors2));
8459     cases.push_back(SpecConstantTwoValGraphicsCase("sdiv", " %i32 0", " %i32 0", "%i32",
8460                                                    "SDiv                 %sc_0 %sc_1", -126, 126, addZeroToSc,
8461                                                    outputColors0));
8462     cases.push_back(SpecConstantTwoValGraphicsCase("udiv", " %i32 0", " %i32 0", "%i32",
8463                                                    "UDiv                 %sc_0 %sc_1", 126, 126, addZeroToSc,
8464                                                    outputColors2));
8465     cases.push_back(SpecConstantTwoValGraphicsCase(
8466         "srem", " %i32 0", " %i32 0", "%i32", "SRem                 %sc_0 %sc_1", 3, 2, addZeroToSc, outputColors2));
8467     cases.push_back(SpecConstantTwoValGraphicsCase(
8468         "smod", " %i32 0", " %i32 0", "%i32", "SMod                 %sc_0 %sc_1", 3, 2, addZeroToSc, outputColors2));
8469     cases.push_back(SpecConstantTwoValGraphicsCase("umod", " %i32 0", " %i32 0", "%i32",
8470                                                    "UMod                 %sc_0 %sc_1", 1001, 500, addZeroToSc,
8471                                                    outputColors2));
8472     cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseand", " %i32 0", " %i32 0", "%i32",
8473                                                    "BitwiseAnd           %sc_0 %sc_1", 0x33, 0x0d, addZeroToSc,
8474                                                    outputColors2));
8475     cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseor", " %i32 0", " %i32 0", "%i32",
8476                                                    "BitwiseOr            %sc_0 %sc_1", 0, 1, addZeroToSc,
8477                                                    outputColors2));
8478     cases.push_back(SpecConstantTwoValGraphicsCase("bitwisexor", " %i32 0", " %i32 0", "%i32",
8479                                                    "BitwiseXor           %sc_0 %sc_1", 0x2e, 0x2f, addZeroToSc,
8480                                                    outputColors2));
8481     cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical", " %i32 0", " %i32 0", "%i32",
8482                                                    "ShiftRightLogical    %sc_0 %sc_1", 2, 1, addZeroToSc,
8483                                                    outputColors2));
8484     cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic", " %i32 0", " %i32 0", "%i32",
8485                                                    "ShiftRightArithmetic %sc_0 %sc_1", -4, 2, addZeroToSc,
8486                                                    outputColors0));
8487     cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical", " %i32 0", " %i32 0", "%i32",
8488                                                    "ShiftLeftLogical     %sc_0 %sc_1", 1, 0, addZeroToSc,
8489                                                    outputColors2));
8490 
8491     // Shifts for other integer sizes.
8492     cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i64", " %i64 0", " %i64 0", "%i64",
8493                                                    "ShiftRightLogical    %sc_0 %sc_1", int64_t{2}, int64_t{1},
8494                                                    addZeroToSc32, outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8495     cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i64", " %i64 0", " %i64 0", "%i64",
8496                                                    "ShiftRightArithmetic %sc_0 %sc_1", int64_t{-4}, int64_t{2},
8497                                                    addZeroToSc32, outputColors0, (FLAG_I64 | FLAG_CONVERT)));
8498     cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i64", " %i64 0", " %i64 0", "%i64",
8499                                                    "ShiftLeftLogical     %sc_0 %sc_1", int64_t{1}, int64_t{0},
8500                                                    addZeroToSc32, outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8501     cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i16", " %i16 0", " %i16 0", "%i16",
8502                                                    "ShiftRightLogical    %sc_0 %sc_1", int16_t{2}, int16_t{1},
8503                                                    addZeroToSc32, outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8504     cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i16", " %i16 0", " %i16 0", "%i16",
8505                                                    "ShiftRightArithmetic %sc_0 %sc_1", int16_t{-4}, int16_t{2},
8506                                                    addZeroToSc32, outputColors0, (FLAG_I16 | FLAG_CONVERT)));
8507     cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i16", " %i16 0", " %i16 0", "%i16",
8508                                                    "ShiftLeftLogical     %sc_0 %sc_1", int16_t{1}, int16_t{0},
8509                                                    addZeroToSc32, outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8510     cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i8", " %i8 0", " %i8 0", "%i8",
8511                                                    "ShiftRightLogical    %sc_0 %sc_1", int8_t{2}, int8_t{1},
8512                                                    addZeroToSc32, outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8513     cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i8", " %i8 0", " %i8 0", "%i8",
8514                                                    "ShiftRightArithmetic %sc_0 %sc_1", int8_t{-4}, int8_t{2},
8515                                                    addZeroToSc32, outputColors0, (FLAG_I8 | FLAG_CONVERT)));
8516     cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i8", " %i8 0", " %i8 0", "%i8",
8517                                                    "ShiftLeftLogical     %sc_0 %sc_1", int8_t{1}, int8_t{0},
8518                                                    addZeroToSc32, outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8519 
8520     // Shifts for other integer sizes but only in the shift amount.
8521     cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i64", " %i32 0", " %i64 0", "%i32",
8522                                                    "ShiftRightLogical    %sc_0 %sc_1", 2, int64_t{1}, addZeroToSc,
8523                                                    outputColors2, (FLAG_I64)));
8524     cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i64", " %i32 0", " %i64 0", "%i32",
8525                                                    "ShiftRightArithmetic %sc_0 %sc_1", -4, int64_t{2}, addZeroToSc,
8526                                                    outputColors0, (FLAG_I64)));
8527     cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i64", " %i32 0", " %i64 0", "%i32",
8528                                                    "ShiftLeftLogical     %sc_0 %sc_1", 1, int64_t{0}, addZeroToSc,
8529                                                    outputColors2, (FLAG_I64)));
8530     cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i16", " %i32 0", " %i16 0", "%i32",
8531                                                    "ShiftRightLogical    %sc_0 %sc_1", 2, int16_t{1}, addZeroToSc,
8532                                                    outputColors2, (FLAG_I16)));
8533     cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i16", " %i32 0", " %i16 0", "%i32",
8534                                                    "ShiftRightArithmetic %sc_0 %sc_1", -4, int16_t{2}, addZeroToSc,
8535                                                    outputColors0, (FLAG_I16)));
8536     cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i16", " %i32 0", " %i16 0", "%i32",
8537                                                    "ShiftLeftLogical     %sc_0 %sc_1", 1, int16_t{0}, addZeroToSc,
8538                                                    outputColors2, (FLAG_I16)));
8539     cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i8", " %i32 0", " %i8 0", "%i32",
8540                                                    "ShiftRightLogical    %sc_0 %sc_1", 2, int8_t{1}, addZeroToSc,
8541                                                    outputColors2, (FLAG_I8)));
8542     cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i8", " %i32 0", " %i8 0", "%i32",
8543                                                    "ShiftRightArithmetic %sc_0 %sc_1", -4, int8_t{2}, addZeroToSc,
8544                                                    outputColors0, (FLAG_I8)));
8545     cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i8", " %i32 0", " %i8 0", "%i32",
8546                                                    "ShiftLeftLogical     %sc_0 %sc_1", 1, int8_t{0}, addZeroToSc,
8547                                                    outputColors2, (FLAG_I8)));
8548 
8549     cases.push_back(SpecConstantTwoValGraphicsCase("slessthan", " %i32 0", " %i32 0", "%bool",
8550                                                    "SLessThan            %sc_0 %sc_1", -20, -10, selectTrueUsingSc,
8551                                                    outputColors2));
8552     cases.push_back(SpecConstantTwoValGraphicsCase("ulessthan", " %i32 0", " %i32 0", "%bool",
8553                                                    "ULessThan            %sc_0 %sc_1", 10, 20, selectTrueUsingSc,
8554                                                    outputColors2));
8555     cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthan", " %i32 0", " %i32 0", "%bool",
8556                                                    "SGreaterThan         %sc_0 %sc_1", -1000, 50, selectFalseUsingSc,
8557                                                    outputColors2));
8558     cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthan", " %i32 0", " %i32 0", "%bool",
8559                                                    "UGreaterThan         %sc_0 %sc_1", 10, 5, selectTrueUsingSc,
8560                                                    outputColors2));
8561     cases.push_back(SpecConstantTwoValGraphicsCase("slessthanequal", " %i32 0", " %i32 0", "%bool",
8562                                                    "SLessThanEqual       %sc_0 %sc_1", -10, -10, selectTrueUsingSc,
8563                                                    outputColors2));
8564     cases.push_back(SpecConstantTwoValGraphicsCase("ulessthanequal", " %i32 0", " %i32 0", "%bool",
8565                                                    "ULessThanEqual       %sc_0 %sc_1", 50, 100, selectTrueUsingSc,
8566                                                    outputColors2));
8567     cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthanequal", " %i32 0", " %i32 0", "%bool",
8568                                                    "SGreaterThanEqual    %sc_0 %sc_1", -1000, 50, selectFalseUsingSc,
8569                                                    outputColors2));
8570     cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthanequal", " %i32 0", " %i32 0", "%bool",
8571                                                    "UGreaterThanEqual    %sc_0 %sc_1", 10, 10, selectTrueUsingSc,
8572                                                    outputColors2));
8573     cases.push_back(SpecConstantTwoValGraphicsCase("iequal", " %i32 0", " %i32 0", "%bool",
8574                                                    "IEqual               %sc_0 %sc_1", 42, 24, selectFalseUsingSc,
8575                                                    outputColors2));
8576     cases.push_back(SpecConstantTwoValGraphicsCase("inotequal", " %i32 0", " %i32 0", "%bool",
8577                                                    "INotEqual            %sc_0 %sc_1", 42, 24, selectTrueUsingSc,
8578                                                    outputColors2));
8579     cases.push_back(SpecConstantTwoValGraphicsCase("logicaland", "True %bool", "True %bool", "%bool",
8580                                                    "LogicalAnd           %sc_0 %sc_1", 0, 1, selectFalseUsingSc,
8581                                                    outputColors2));
8582     cases.push_back(SpecConstantTwoValGraphicsCase("logicalor", "False %bool", "False %bool", "%bool",
8583                                                    "LogicalOr            %sc_0 %sc_1", 1, 0, selectTrueUsingSc,
8584                                                    outputColors2));
8585     cases.push_back(SpecConstantTwoValGraphicsCase("logicalequal", "True %bool", "True %bool", "%bool",
8586                                                    "LogicalEqual         %sc_0 %sc_1", 0, 1, selectFalseUsingSc,
8587                                                    outputColors2));
8588     cases.push_back(SpecConstantTwoValGraphicsCase("logicalnotequal", "False %bool", "False %bool", "%bool",
8589                                                    "LogicalNotEqual      %sc_0 %sc_1", 1, 0, selectTrueUsingSc,
8590                                                    outputColors2));
8591     cases.push_back(SpecConstantTwoValGraphicsCase("snegate", " %i32 0", " %i32 0", "%i32",
8592                                                    "SNegate              %sc_0", -1, 0, addZeroToSc, outputColors2));
8593     cases.push_back(SpecConstantTwoValGraphicsCase("not", " %i32 0", " %i32 0", "%i32", "Not                  %sc_0",
8594                                                    -2, 0, addZeroToSc, outputColors2));
8595     cases.push_back(SpecConstantTwoValGraphicsCase("logicalnot", "False %bool", "False %bool", "%bool",
8596                                                    "LogicalNot           %sc_0", 1, 0, selectFalseUsingSc,
8597                                                    outputColors2));
8598     cases.push_back(SpecConstantTwoValGraphicsCase("select", "False %bool", " %i32 0", "%i32",
8599                                                    "Select               %sc_0 %sc_1 %c_i32_0", 1, 1, addZeroToSc,
8600                                                    outputColors2));
8601     cases.push_back(SpecConstantTwoValGraphicsCase("sconvert", " %i32 0", " %i32 0", "%i16",
8602                                                    "SConvert             %sc_0", -1, 0, addZeroToSc32, outputColors0,
8603                                                    (FLAG_I16 | FLAG_CONVERT)));
8604     cases.push_back(SpecConstantTwoValGraphicsCase("fconvert", " %f32 0", " %f32 0", "%f64",
8605                                                    "FConvert             %sc_0", tcu::Float32(-1.0), tcu::Float32(0.0),
8606                                                    addZeroToSc32, outputColors0, (FLAG_F64 | FLAG_CONVERT)));
8607     cases.push_back(SpecConstantTwoValGraphicsCase("fconvert16", " %f16 0", " %f16 0", "%f32",
8608                                                    "FConvert             %sc_0", tcu::Float16(-1.0), tcu::Float16(0.0),
8609                                                    addZeroToSc32, outputColors0, (FLAG_F16 | FLAG_CONVERT)));
8610     // \todo[2015-12-1 antiagainst] OpQuantizeToF16
8611 
8612     for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
8613     {
8614         map<string, string> specializations;
8615         map<string, string> fragments;
8616         SpecConstants specConstants;
8617         PushConstants noPushConstants;
8618         GraphicsResources noResources;
8619         GraphicsInterfaces noInterfaces;
8620         vector<string> extensions;
8621         VulkanFeatures requiredFeatures;
8622 
8623         // Special SPIR-V code when using 16-bit integers.
8624         if (cases[caseNdx].caseFlags & FLAG_I16)
8625         {
8626             requiredFeatures.coreFeatures.shaderInt16 = VK_TRUE;
8627             fragments["capability"] += "OpCapability Int16\n";                  // Adds 16-bit integer capability
8628             specializations["OPTYPE_DEFINITIONS"] += "%i16 = OpTypeInt 16 1\n"; // Adds 16-bit integer type
8629             if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8630                 specializations["TYPE_CONVERT"] +=
8631                     "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 16-bit integer to 32-bit integer
8632         }
8633 
8634         // Special SPIR-V code when using 64-bit integers.
8635         if (cases[caseNdx].caseFlags & FLAG_I64)
8636         {
8637             requiredFeatures.coreFeatures.shaderInt64 = VK_TRUE;
8638             fragments["capability"] += "OpCapability Int64\n";                  // Adds 64-bit integer capability
8639             specializations["OPTYPE_DEFINITIONS"] += "%i64 = OpTypeInt 64 1\n"; // Adds 64-bit integer type
8640             if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8641                 specializations["TYPE_CONVERT"] +=
8642                     "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 64-bit integer to 32-bit integer
8643         }
8644 
8645         // Special SPIR-V code when using 64-bit floats.
8646         if (cases[caseNdx].caseFlags & FLAG_F64)
8647         {
8648             requiredFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
8649             fragments["capability"] += "OpCapability Float64\n";                // Adds 64-bit float capability
8650             specializations["OPTYPE_DEFINITIONS"] += "%f64 = OpTypeFloat 64\n"; // Adds 64-bit float type
8651             if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8652                 specializations["TYPE_CONVERT"] +=
8653                     "%sc_op32 = OpConvertFToS %i32 %sc_op\n"; // Converts 64-bit float to 32-bit integer
8654         }
8655 
8656         // Extension needed for float16 and int8.
8657         if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
8658             extensions.push_back("VK_KHR_shader_float16_int8");
8659 
8660         // Special SPIR-V code when using 16-bit floats.
8661         if (cases[caseNdx].caseFlags & FLAG_F16)
8662         {
8663             requiredFeatures.extFloat16Int8.shaderFloat16 = true;
8664             fragments["capability"] += "OpCapability Float16\n";                // Adds 16-bit float capability
8665             specializations["OPTYPE_DEFINITIONS"] += "%f16 = OpTypeFloat 16\n"; // Adds 16-bit float type
8666             if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8667                 specializations["TYPE_CONVERT"] +=
8668                     "%sc_op32 = OpConvertFToS %i32 %sc_op\n"; // Converts 16-bit float to 32-bit integer
8669         }
8670 
8671         // Special SPIR-V code when using 8-bit integers.
8672         if (cases[caseNdx].caseFlags & FLAG_I8)
8673         {
8674             requiredFeatures.extFloat16Int8.shaderInt8 = true;
8675             fragments["capability"] += "OpCapability Int8\n";                 // Adds 8-bit integer capability
8676             specializations["OPTYPE_DEFINITIONS"] += "%i8 = OpTypeInt 8 1\n"; // Adds 8-bit integer type
8677             if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8678                 specializations["TYPE_CONVERT"] +=
8679                     "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 8-bit integer to 32-bit integer
8680         }
8681 
8682         specializations["SC_DEF0"]        = cases[caseNdx].scDefinition0;
8683         specializations["SC_DEF1"]        = cases[caseNdx].scDefinition1;
8684         specializations["SC_RESULT_TYPE"] = cases[caseNdx].scResultType;
8685         specializations["SC_OP"]          = cases[caseNdx].scOperation;
8686         specializations["GEN_RESULT"]     = cases[caseNdx].resultOperation;
8687 
8688         fragments["decoration"] = tcu::StringTemplate(decorations1).specialize(specializations);
8689         fragments["pre_main"]   = tcu::StringTemplate(typesAndConstants1).specialize(specializations);
8690         fragments["testfun"]    = tcu::StringTemplate(function1).specialize(specializations);
8691 
8692         cases[caseNdx].scActualValue0.appendTo(specConstants);
8693         cases[caseNdx].scActualValue1.appendTo(specConstants);
8694 
8695         createTestsForAllStages(cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments,
8696                                 specConstants, noPushConstants, noResources, noInterfaces, extensions, requiredFeatures,
8697                                 group.get());
8698     }
8699 
8700     const char decorations2[] = "OpDecorate %sc_0  SpecId 0\n"
8701                                 "OpDecorate %sc_1  SpecId 1\n"
8702                                 "OpDecorate %sc_2  SpecId 2\n";
8703 
8704     const std::string typesAndConstants2 = "%vec3_0      = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
8705                                            "%vec3_undef  = OpUndef %v3i32\n"
8706 
8707                                            + getSpecConstantOpStructConstantsAndTypes() +
8708                                            getSpecConstantOpStructComposites() +
8709 
8710                                            "%sc_0        = OpSpecConstant %i32 0\n"
8711                                            "%sc_1        = OpSpecConstant %i32 0\n"
8712                                            "%sc_2        = OpSpecConstant %i32 0\n"
8713 
8714                                            + getSpecConstantOpStructConstBlock() +
8715 
8716                                            "%sc_vec3_0   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_0        "
8717                                            "%vec3_0      0\n" // (sc_0, 0,    0)
8718                                            "%sc_vec3_1   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_1        "
8719                                            "%vec3_0      1\n" // (0,    sc_1, 0)
8720                                            "%sc_vec3_2   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_2        "
8721                                            "%vec3_0      2\n" // (0,    0,    sc_2)
8722                                            "%sc_vec3_0_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0   "
8723                                            "%vec3_undef  0          0xFFFFFFFF 2\n" // (sc_0, ???,  0)
8724                                            "%sc_vec3_1_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_1   "
8725                                            "%vec3_undef  0xFFFFFFFF 1          0\n" // (???,  sc_1, 0)
8726                                            "%sc_vec3_2_s = OpSpecConstantOp %v3i32 VectorShuffle    %vec3_undef  "
8727                                            "%sc_vec3_2   5          0xFFFFFFFF 5\n" // (sc_2, ???,  sc_2)
8728                                            "%sc_vec3_01  = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0_s "
8729                                            "%sc_vec3_1_s 1 0 4\n" // (0,    sc_0, sc_1)
8730                                            "%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_01  "
8731                                            "%sc_vec3_2_s 5 1 2\n" // (sc_2, sc_0, sc_1)
8732                                            "%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012       "
8733                                            "       0\n" // sc_2
8734                                            "%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012       "
8735                                            "       1\n" // sc_0
8736                                            "%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012       "
8737                                            "       2\n" // sc_1
8738                                            "%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    "
8739                                            "%sc_ext_1\n" // (sc_2 - sc_0)
8740                                            "%sc_factor   = OpSpecConstantOp %i32   IMul             %sc_sub      "
8741                                            "%sc_ext_2\n"; // (sc_2 - sc_0) * sc_1
8742 
8743     const std::string function2 = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8744                                   "%param     = OpFunctionParameter %v4f32\n"
8745                                   "%label     = OpLabel\n"
8746                                   "%result    = OpVariable %fp_v4f32 Function\n"
8747 
8748                                   + getSpecConstantOpStructInstructions() +
8749 
8750                                   "             OpStore %result %param\n"
8751                                   "%loc       = OpAccessChain %fp_f32 %result %sc_final\n"
8752                                   "%val       = OpLoad %f32 %loc\n"
8753                                   "%add       = OpFAdd %f32 %val %c_f32_0_5\n"
8754                                   "             OpStore %loc %add\n"
8755                                   "%ret       = OpLoad %v4f32 %result\n"
8756                                   "             OpReturnValue %ret\n"
8757                                   "             OpFunctionEnd\n";
8758 
8759     map<string, string> fragments;
8760     SpecConstants specConstants;
8761 
8762     fragments["decoration"] = decorations2;
8763     fragments["pre_main"]   = typesAndConstants2;
8764     fragments["testfun"]    = function2;
8765 
8766     specConstants.append<int32_t>(56789);
8767     specConstants.append<int32_t>(-2);
8768     specConstants.append<int32_t>(56788);
8769 
8770     createTestsForAllStages("vector_related", inputColors, outputColors2, fragments, specConstants, group.get());
8771 
8772     return group.release();
8773 }
8774 
createOpPhiTests(tcu::TestContext & testCtx)8775 tcu::TestCaseGroup *createOpPhiTests(tcu::TestContext &testCtx)
8776 {
8777     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opphi"));
8778     RGBA inputColors[4];
8779     RGBA outputColors1[4];
8780     RGBA outputColors2[4];
8781     RGBA outputColors3[4];
8782     RGBA outputColors4[4];
8783     map<string, string> fragments1;
8784     map<string, string> fragments2;
8785     map<string, string> fragments3;
8786     map<string, string> fragments4;
8787     std::vector<std::string> extensions4;
8788     GraphicsResources resources4;
8789     VulkanFeatures vulkanFeatures4;
8790 
8791     const char typesAndConstants1[] = "%c_f32_p2  = OpConstant %f32 0.2\n"
8792                                       "%c_f32_p4  = OpConstant %f32 0.4\n"
8793                                       "%c_f32_p5  = OpConstant %f32 0.5\n"
8794                                       "%c_f32_p8  = OpConstant %f32 0.8\n";
8795 
8796     // vec4 test_code(vec4 param) {
8797     //   vec4 result = param;
8798     //   for (int i = 0; i < 4; ++i) {
8799     //     float operand;
8800     //     switch (i) {
8801     //       case 0: operand = .2; break;
8802     //       case 1: operand = .5; break;
8803     //       case 2: operand = .4; break;
8804     //       case 3: operand = .0; break;
8805     //       default: break; // unreachable
8806     //     }
8807     //     result[i] += operand;
8808     //   }
8809     //   return result;
8810     // }
8811     const char function1[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8812                              "%param1    = OpFunctionParameter %v4f32\n"
8813                              "%lbl       = OpLabel\n"
8814                              "%iptr      = OpVariable %fp_i32 Function\n"
8815                              "%result    = OpVariable %fp_v4f32 Function\n"
8816                              "             OpStore %iptr %c_i32_0\n"
8817                              "             OpStore %result %param1\n"
8818                              "             OpBranch %loop\n"
8819 
8820                              "%loop      = OpLabel\n"
8821                              "%ival      = OpLoad %i32 %iptr\n"
8822                              "%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
8823                              "             OpLoopMerge %exit %cont None\n"
8824                              "             OpBranchConditional %lt_4 %entry %exit\n"
8825 
8826                              "%entry     = OpLabel\n"
8827                              "%loc       = OpAccessChain %fp_f32 %result %ival\n"
8828                              "%val       = OpLoad %f32 %loc\n"
8829                              "             OpSelectionMerge %phi None\n"
8830                              "             OpSwitch %ival %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
8831 
8832                              "%case0     = OpLabel\n"
8833                              "             OpBranch %phi\n"
8834                              "%case1     = OpLabel\n"
8835                              "             OpBranch %phi\n"
8836                              "%case2     = OpLabel\n"
8837                              "             OpBranch %phi\n"
8838                              "%case3     = OpLabel\n"
8839                              "             OpBranch %phi\n"
8840 
8841                              "%default   = OpLabel\n"
8842                              "             OpUnreachable\n"
8843 
8844                              "%phi       = OpLabel\n"
8845                              "%operand   = OpPhi %f32 %c_f32_p4 %case2 %c_f32_p5 %case1 %c_f32_p2 %case0 %c_f32_0 "
8846                              "%case3\n" // not in the order of blocks
8847                              "             OpBranch %cont\n"
8848                              "%cont      = OpLabel\n"
8849                              "%add       = OpFAdd %f32 %val %operand\n"
8850                              "             OpStore %loc %add\n"
8851                              "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
8852                              "             OpStore %iptr %ival_next\n"
8853                              "             OpBranch %loop\n"
8854 
8855                              "%exit      = OpLabel\n"
8856                              "%ret       = OpLoad %v4f32 %result\n"
8857                              "             OpReturnValue %ret\n"
8858 
8859                              "             OpFunctionEnd\n";
8860 
8861     fragments1["pre_main"] = typesAndConstants1;
8862     fragments1["testfun"]  = function1;
8863 
8864     getHalfColorsFullAlpha(inputColors);
8865 
8866     outputColors1[0] = RGBA(178, 255, 229, 255);
8867     outputColors1[1] = RGBA(178, 127, 102, 255);
8868     outputColors1[2] = RGBA(51, 255, 102, 255);
8869     outputColors1[3] = RGBA(51, 127, 229, 255);
8870 
8871     createTestsForAllStages("out_of_order", inputColors, outputColors1, fragments1, group.get());
8872 
8873     const char typesAndConstants2[] = "%c_f32_p2  = OpConstant %f32 0.2\n";
8874 
8875     // Add .4 to the second element of the given parameter.
8876     const char function2[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8877                              "%param     = OpFunctionParameter %v4f32\n"
8878                              "%entry     = OpLabel\n"
8879                              "%result    = OpVariable %fp_v4f32 Function\n"
8880                              "             OpStore %result %param\n"
8881                              "%loc       = OpAccessChain %fp_f32 %result %c_i32_1\n"
8882                              "%val       = OpLoad %f32 %loc\n"
8883                              "             OpBranch %phi\n"
8884 
8885                              "%phi        = OpLabel\n"
8886                              "%step       = OpPhi %i32 %c_i32_0  %entry %step_next  %phi\n"
8887                              "%accum      = OpPhi %f32 %val      %entry %accum_next %phi\n"
8888                              "%step_next  = OpIAdd %i32 %step  %c_i32_1\n"
8889                              "%accum_next = OpFAdd %f32 %accum %c_f32_p2\n"
8890                              "%still_loop = OpSLessThan %bool %step %c_i32_2\n"
8891                              "              OpLoopMerge %exit %phi None\n"
8892                              "              OpBranchConditional %still_loop %phi %exit\n"
8893 
8894                              "%exit       = OpLabel\n"
8895                              "              OpStore %loc %accum\n"
8896                              "%ret        = OpLoad %v4f32 %result\n"
8897                              "              OpReturnValue %ret\n"
8898 
8899                              "              OpFunctionEnd\n";
8900 
8901     fragments2["pre_main"] = typesAndConstants2;
8902     fragments2["testfun"]  = function2;
8903 
8904     outputColors2[0] = RGBA(127, 229, 127, 255);
8905     outputColors2[1] = RGBA(127, 102, 0, 255);
8906     outputColors2[2] = RGBA(0, 229, 0, 255);
8907     outputColors2[3] = RGBA(0, 102, 127, 255);
8908 
8909     createTestsForAllStages("induction", inputColors, outputColors2, fragments2, group.get());
8910 
8911     const char typesAndConstants3[] = "%true      = OpConstantTrue %bool\n"
8912                                       "%false     = OpConstantFalse %bool\n"
8913                                       "%c_f32_p2  = OpConstant %f32 0.2\n";
8914 
8915     // Swap the second and the third element of the given parameter.
8916     const char function3[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8917                              "%param     = OpFunctionParameter %v4f32\n"
8918                              "%entry     = OpLabel\n"
8919                              "%result    = OpVariable %fp_v4f32 Function\n"
8920                              "             OpStore %result %param\n"
8921                              "%a_loc     = OpAccessChain %fp_f32 %result %c_i32_1\n"
8922                              "%a_init    = OpLoad %f32 %a_loc\n"
8923                              "%b_loc     = OpAccessChain %fp_f32 %result %c_i32_2\n"
8924                              "%b_init    = OpLoad %f32 %b_loc\n"
8925                              "             OpBranch %phi\n"
8926 
8927                              "%phi        = OpLabel\n"
8928                              "%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
8929                              "%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
8930                              "%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
8931                              "              OpLoopMerge %exit %phi None\n"
8932                              "              OpBranchConditional %still_loop %phi %exit\n"
8933 
8934                              "%exit       = OpLabel\n"
8935                              "              OpStore %a_loc %a_next\n"
8936                              "              OpStore %b_loc %b_next\n"
8937                              "%ret        = OpLoad %v4f32 %result\n"
8938                              "              OpReturnValue %ret\n"
8939 
8940                              "              OpFunctionEnd\n";
8941 
8942     fragments3["pre_main"] = typesAndConstants3;
8943     fragments3["testfun"]  = function3;
8944 
8945     outputColors3[0] = RGBA(127, 127, 127, 255);
8946     outputColors3[1] = RGBA(127, 0, 0, 255);
8947     outputColors3[2] = RGBA(0, 0, 127, 255);
8948     outputColors3[3] = RGBA(0, 127, 0, 255);
8949 
8950     createTestsForAllStages("swap", inputColors, outputColors3, fragments3, group.get());
8951 
8952     const char typesAndConstants4[] = "%f16        = OpTypeFloat 16\n"
8953                                       "%v4f16      = OpTypeVector %f16 4\n"
8954                                       "%fp_f16     = OpTypePointer Function %f16\n"
8955                                       "%fp_v4f16   = OpTypePointer Function %v4f16\n"
8956                                       "%true       = OpConstantTrue %bool\n"
8957                                       "%false      = OpConstantFalse %bool\n"
8958                                       "%c_f32_p2   = OpConstant %f32 0.2\n";
8959 
8960     // Swap the second and the third element of the given parameter.
8961     const char function4[] = "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8962                              "%param      = OpFunctionParameter %v4f32\n"
8963                              "%entry      = OpLabel\n"
8964                              "%result     = OpVariable %fp_v4f16 Function\n"
8965                              "%param16    = OpFConvert %v4f16 %param\n"
8966                              "              OpStore %result %param16\n"
8967                              "%a_loc      = OpAccessChain %fp_f16 %result %c_i32_1\n"
8968                              "%a_init     = OpLoad %f16 %a_loc\n"
8969                              "%b_loc      = OpAccessChain %fp_f16 %result %c_i32_2\n"
8970                              "%b_init     = OpLoad %f16 %b_loc\n"
8971                              "              OpBranch %phi\n"
8972 
8973                              "%phi        = OpLabel\n"
8974                              "%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
8975                              "%a_next     = OpPhi %f16  %a_init %entry %b_next %phi\n"
8976                              "%b_next     = OpPhi %f16  %b_init %entry %a_next %phi\n"
8977                              "              OpLoopMerge %exit %phi None\n"
8978                              "              OpBranchConditional %still_loop %phi %exit\n"
8979 
8980                              "%exit       = OpLabel\n"
8981                              "              OpStore %a_loc %a_next\n"
8982                              "              OpStore %b_loc %b_next\n"
8983                              "%ret16      = OpLoad %v4f16 %result\n"
8984                              "%ret        = OpFConvert %v4f32 %ret16\n"
8985                              "              OpReturnValue %ret\n"
8986 
8987                              "              OpFunctionEnd\n";
8988 
8989     fragments4["pre_main"]   = typesAndConstants4;
8990     fragments4["testfun"]    = function4;
8991     fragments4["capability"] = "OpCapability Float16\n";
8992 
8993     extensions4.push_back("VK_KHR_shader_float16_int8");
8994 
8995     vulkanFeatures4.extFloat16Int8.shaderFloat16 = true;
8996 
8997     outputColors4[0] = RGBA(127, 127, 127, 255);
8998     outputColors4[1] = RGBA(127, 0, 0, 255);
8999     outputColors4[2] = RGBA(0, 0, 127, 255);
9000     outputColors4[3] = RGBA(0, 127, 0, 255);
9001 
9002     createTestsForAllStages("swap16", inputColors, outputColors4, fragments4, resources4, extensions4, group.get(),
9003                             vulkanFeatures4);
9004 
9005     return group.release();
9006 }
9007 
createNoContractionTests(tcu::TestContext & testCtx)9008 tcu::TestCaseGroup *createNoContractionTests(tcu::TestContext &testCtx)
9009 {
9010     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "nocontraction"));
9011     RGBA inputColors[4];
9012     RGBA outputColors[4];
9013 
9014     // With NoContraction, (1 + 2^-23) * (1 - 2^-23) - 1 should be conducted as a multiplication and an addition separately.
9015     // For the multiplication, the result is 1 - 2^-46, which is out of the precision range for 32-bit float. (32-bit float
9016     // only have 23-bit fraction.) So it will be rounded to 1. Or 0x1.fffffc. Then the final result is 0 or -0x1p-24.
9017     // On the contrary, the result will be 2^-46, which is a normalized number perfectly representable as 32-bit float.
9018     const char constantsAndTypes[] =
9019         "%c_vec4_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_1\n"
9020         "%c_vec4_1       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
9021         "%c_f32_1pl2_23  = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
9022         "%c_f32_1mi2_23  = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
9023         "%c_f32_n1pn24   = OpConstant %f32 -0x1p-24\n";
9024 
9025     const char function[] = "%test_code      = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9026                             "%param          = OpFunctionParameter %v4f32\n"
9027                             "%label          = OpLabel\n"
9028                             "%var1           = OpVariable %fp_f32 Function %c_f32_1pl2_23\n"
9029                             "%var2           = OpVariable %fp_f32 Function\n"
9030                             "%red            = OpCompositeExtract %f32 %param 0\n"
9031                             "%plus_red       = OpFAdd %f32 %c_f32_1mi2_23 %red\n"
9032                             "                  OpStore %var2 %plus_red\n"
9033                             "%val1           = OpLoad %f32 %var1\n"
9034                             "%val2           = OpLoad %f32 %var2\n"
9035                             "%mul            = OpFMul %f32 %val1 %val2\n"
9036                             "%add            = OpFAdd %f32 %mul %c_f32_n1\n"
9037                             "%is0            = OpFOrdEqual %bool %add %c_f32_0\n"
9038                             "%isn1n24         = OpFOrdEqual %bool %add %c_f32_n1pn24\n"
9039                             "%success        = OpLogicalOr %bool %is0 %isn1n24\n"
9040                             "%v4success      = OpCompositeConstruct %v4bool %success %success %success %success\n"
9041                             "%ret            = OpSelect %v4f32 %v4success %c_vec4_0 %c_vec4_1\n"
9042                             "                  OpReturnValue %ret\n"
9043                             "                  OpFunctionEnd\n";
9044 
9045     struct CaseNameDecoration
9046     {
9047         string name;
9048         string decoration;
9049     };
9050 
9051     CaseNameDecoration tests[] = {
9052         {"multiplication", "OpDecorate %mul NoContraction"},
9053         {"addition", "OpDecorate %add NoContraction"},
9054         {"both", "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"},
9055     };
9056 
9057     getHalfColorsFullAlpha(inputColors);
9058 
9059     for (uint8_t idx = 0; idx < 4; ++idx)
9060     {
9061         inputColors[idx].setRed(0);
9062         outputColors[idx] = RGBA(0, 0, 0, 255);
9063     }
9064 
9065     for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(CaseNameDecoration); ++testNdx)
9066     {
9067         map<string, string> fragments;
9068 
9069         fragments["decoration"] = tests[testNdx].decoration;
9070         fragments["pre_main"]   = constantsAndTypes;
9071         fragments["testfun"]    = function;
9072 
9073         createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, group.get());
9074     }
9075 
9076     return group.release();
9077 }
9078 
createMemoryAccessTests(tcu::TestContext & testCtx)9079 tcu::TestCaseGroup *createMemoryAccessTests(tcu::TestContext &testCtx)
9080 {
9081     de::MovePtr<tcu::TestCaseGroup> memoryAccessTests(new tcu::TestCaseGroup(testCtx, "opmemoryaccess"));
9082     RGBA colors[4];
9083 
9084     const char constantsAndTypes[] = "%c_a2f32_1         = OpConstantComposite %a2f32 %c_f32_1 %c_f32_1\n"
9085                                      "%fp_a2f32          = OpTypePointer Function %a2f32\n"
9086                                      "%stype             = OpTypeStruct  %v4f32 %a2f32 %f32\n"
9087                                      "%fp_stype          = OpTypePointer Function %stype\n";
9088 
9089     const char function[] = "%test_code         = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9090                             "%param1            = OpFunctionParameter %v4f32\n"
9091                             "%lbl               = OpLabel\n"
9092                             "%v1                = OpVariable %fp_v4f32 Function\n"
9093                             "%v2                = OpVariable %fp_a2f32 Function\n"
9094                             "%v3                = OpVariable %fp_f32 Function\n"
9095                             "%v                 = OpVariable %fp_stype Function\n"
9096                             "%vv                = OpVariable %fp_stype Function\n"
9097                             "%vvv               = OpVariable %fp_f32 Function\n"
9098 
9099                             "                     OpStore %v1 %c_v4f32_1_1_1_1\n"
9100                             "                     OpStore %v2 %c_a2f32_1\n"
9101                             "                     OpStore %v3 %c_f32_1\n"
9102 
9103                             "%p_v4f32          = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
9104                             "%p_a2f32          = OpAccessChain %fp_a2f32 %v %c_u32_1\n"
9105                             "%p_f32            = OpAccessChain %fp_f32 %v %c_u32_2\n"
9106                             "%v1_v             = OpLoad %v4f32 %v1 ${access_type}\n"
9107                             "%v2_v             = OpLoad %a2f32 %v2 ${access_type}\n"
9108                             "%v3_v             = OpLoad %f32 %v3 ${access_type}\n"
9109 
9110                             "                    OpStore %p_v4f32 %v1_v ${access_type}\n"
9111                             "                    OpStore %p_a2f32 %v2_v ${access_type}\n"
9112                             "                    OpStore %p_f32 %v3_v ${access_type}\n"
9113 
9114                             "                    OpCopyMemory %vv %v ${access_type}\n"
9115                             "                    OpCopyMemory %vvv %p_f32 ${access_type}\n"
9116 
9117                             "%p_f32_2          = OpAccessChain %fp_f32 %vv %c_u32_2\n"
9118                             "%v_f32_2          = OpLoad %f32 %p_f32_2\n"
9119                             "%v_f32_3          = OpLoad %f32 %vvv\n"
9120 
9121                             "%ret1             = OpVectorTimesScalar %v4f32 %param1 %v_f32_2\n"
9122                             "%ret2             = OpVectorTimesScalar %v4f32 %ret1 %v_f32_3\n"
9123                             "                    OpReturnValue %ret2\n"
9124                             "                    OpFunctionEnd\n";
9125 
9126     struct NameMemoryAccess
9127     {
9128         string name;
9129         string accessType;
9130     };
9131 
9132     NameMemoryAccess tests[] = {
9133         {"none", ""},
9134         {"volatile", "Volatile"},
9135         {"aligned", "Aligned 1"},
9136         {"volatile_aligned", "Volatile|Aligned 1"},
9137         {"nontemporal_aligned", "Nontemporal|Aligned 1"},
9138         {"volatile_nontemporal", "Volatile|Nontemporal"},
9139         {"volatile_nontermporal_aligned", "Volatile|Nontemporal|Aligned 1"},
9140     };
9141 
9142     getHalfColorsFullAlpha(colors);
9143 
9144     for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameMemoryAccess); ++testNdx)
9145     {
9146         map<string, string> fragments;
9147         map<string, string> memoryAccess;
9148         memoryAccess["access_type"] = tests[testNdx].accessType;
9149 
9150         fragments["pre_main"] = constantsAndTypes;
9151         fragments["testfun"]  = tcu::StringTemplate(function).specialize(memoryAccess);
9152         createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, memoryAccessTests.get());
9153     }
9154     return memoryAccessTests.release();
9155 }
createOpUndefTests(tcu::TestContext & testCtx)9156 tcu::TestCaseGroup *createOpUndefTests(tcu::TestContext &testCtx)
9157 {
9158     de::MovePtr<tcu::TestCaseGroup> opUndefTests(new tcu::TestCaseGroup(testCtx, "opundef"));
9159     RGBA defaultColors[4];
9160     map<string, string> fragments;
9161     getDefaultColors(defaultColors);
9162 
9163     // First, simple cases that don't do anything with the OpUndef result.
9164     struct NameCodePair
9165     {
9166         string name, decl, type;
9167     };
9168     const NameCodePair tests[] = {{"bool", "", "%bool"},
9169                                   {"vec2uint32", "", "%v2u32"},
9170                                   {"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown", "%type"},
9171                                   {"sampler", "%type = OpTypeSampler", "%type"},
9172                                   {"sampledimage",
9173                                    "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
9174                                    "%type = OpTypeSampledImage %img",
9175                                    "%type"},
9176                                   {"pointer", "", "%fp_i32"},
9177                                   {"runtimearray", "%type = OpTypeRuntimeArray %f32", "%type"},
9178                                   {"array",
9179                                    "%c_u32_100 = OpConstant %u32 100\n"
9180                                    "%type = OpTypeArray %i32 %c_u32_100",
9181                                    "%type"},
9182                                   {"struct", "%type = OpTypeStruct %f32 %i32 %u32", "%type"}};
9183     for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
9184     {
9185         fragments["undef_type"] = tests[testNdx].type;
9186         fragments["testfun"]    = StringTemplate("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9187                                                     "%param1 = OpFunctionParameter %v4f32\n"
9188                                                     "%label_testfun = OpLabel\n"
9189                                                     "%undef = OpUndef ${undef_type}\n"
9190                                                     "OpReturnValue %param1\n"
9191                                                     "OpFunctionEnd\n")
9192                                    .specialize(fragments);
9193         fragments["pre_main"] = tests[testNdx].decl;
9194         createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opUndefTests.get());
9195     }
9196     fragments.clear();
9197 
9198     fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9199                            "%param1 = OpFunctionParameter %v4f32\n"
9200                            "%label_testfun = OpLabel\n"
9201                            "%undef = OpUndef %f32\n"
9202                            "%zero = OpFMul %f32 %undef %c_f32_0\n"
9203                            "%is_nan = OpIsNan %bool %zero\n" //OpUndef may result in NaN which may turn %zero into Nan.
9204                            "%actually_zero = OpSelect %f32 %is_nan %c_f32_0 %zero\n"
9205                            "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9206                            "%b = OpFAdd %f32 %a %actually_zero\n"
9207                            "%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
9208                            "OpReturnValue %ret\n"
9209                            "OpFunctionEnd\n";
9210 
9211     createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
9212 
9213     fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9214                            "%param1 = OpFunctionParameter %v4f32\n"
9215                            "%label_testfun = OpLabel\n"
9216                            "%undef = OpUndef %i32\n"
9217                            "%zero = OpIMul %i32 %undef %c_i32_0\n"
9218                            "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
9219                            "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
9220                            "OpReturnValue %ret\n"
9221                            "OpFunctionEnd\n";
9222 
9223     createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
9224 
9225     fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9226                            "%param1 = OpFunctionParameter %v4f32\n"
9227                            "%label_testfun = OpLabel\n"
9228                            "%undef = OpUndef %u32\n"
9229                            "%zero = OpIMul %u32 %undef %c_i32_0\n"
9230                            "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
9231                            "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
9232                            "OpReturnValue %ret\n"
9233                            "OpFunctionEnd\n";
9234 
9235     createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
9236 
9237     fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9238                            "%param1 = OpFunctionParameter %v4f32\n"
9239                            "%label_testfun = OpLabel\n"
9240                            "%undef = OpUndef %v4f32\n"
9241                            "%vzero = OpVectorTimesScalar %v4f32 %undef %c_f32_0\n"
9242                            "%zero_0 = OpVectorExtractDynamic %f32 %vzero %c_i32_0\n"
9243                            "%zero_1 = OpVectorExtractDynamic %f32 %vzero %c_i32_1\n"
9244                            "%zero_2 = OpVectorExtractDynamic %f32 %vzero %c_i32_2\n"
9245                            "%zero_3 = OpVectorExtractDynamic %f32 %vzero %c_i32_3\n"
9246                            "%is_nan_0 = OpIsNan %bool %zero_0\n"
9247                            "%is_nan_1 = OpIsNan %bool %zero_1\n"
9248                            "%is_nan_2 = OpIsNan %bool %zero_2\n"
9249                            "%is_nan_3 = OpIsNan %bool %zero_3\n"
9250                            "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
9251                            "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
9252                            "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
9253                            "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
9254                            "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9255                            "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
9256                            "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
9257                            "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
9258                            "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
9259                            "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
9260                            "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
9261                            "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
9262                            "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
9263                            "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
9264                            "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
9265                            "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
9266                            "OpReturnValue %ret\n"
9267                            "OpFunctionEnd\n";
9268 
9269     createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
9270 
9271     fragments["pre_main"] = "%m2x2f32 = OpTypeMatrix %v2f32 2\n";
9272     fragments["testfun"]  = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9273                             "%param1 = OpFunctionParameter %v4f32\n"
9274                             "%label_testfun = OpLabel\n"
9275                             "%undef = OpUndef %m2x2f32\n"
9276                             "%mzero = OpMatrixTimesScalar %m2x2f32 %undef %c_f32_0\n"
9277                             "%zero_0 = OpCompositeExtract %f32 %mzero 0 0\n"
9278                             "%zero_1 = OpCompositeExtract %f32 %mzero 0 1\n"
9279                             "%zero_2 = OpCompositeExtract %f32 %mzero 1 0\n"
9280                             "%zero_3 = OpCompositeExtract %f32 %mzero 1 1\n"
9281                             "%is_nan_0 = OpIsNan %bool %zero_0\n"
9282                             "%is_nan_1 = OpIsNan %bool %zero_1\n"
9283                             "%is_nan_2 = OpIsNan %bool %zero_2\n"
9284                             "%is_nan_3 = OpIsNan %bool %zero_3\n"
9285                             "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
9286                             "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
9287                             "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
9288                             "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
9289                             "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9290                             "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
9291                             "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
9292                             "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
9293                             "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
9294                             "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
9295                             "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
9296                             "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
9297                             "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
9298                             "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
9299                             "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
9300                             "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
9301                             "OpReturnValue %ret\n"
9302                             "OpFunctionEnd\n";
9303 
9304     createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
9305 
9306     return opUndefTests.release();
9307 }
9308 
createOpQuantizeSingleOptionTests(tcu::TestCaseGroup * testCtx)9309 void createOpQuantizeSingleOptionTests(tcu::TestCaseGroup *testCtx)
9310 {
9311     const RGBA inputColors[4] = {RGBA(0, 0, 0, 255), RGBA(0, 0, 255, 255), RGBA(0, 255, 0, 255),
9312                                  RGBA(0, 255, 255, 255)};
9313 
9314     const RGBA expectedColors[4] = {RGBA(255, 0, 0, 255), RGBA(255, 0, 0, 255), RGBA(255, 0, 0, 255),
9315                                     RGBA(255, 0, 0, 255)};
9316 
9317     const struct SingleFP16Possibility
9318     {
9319         const char *name;
9320         const char *constant; // Value to assign to %test_constant.
9321         float valueAsFloat;
9322         const char *
9323             condition; // Must assign to %cond an expression that evaluates to true after %c = OpQuantizeToF16(%test_constant + 0).
9324         bool preserveNanInf;
9325     } tests[] = {
9326         {"negative", "-0x1.3p1\n", -constructNormalizedFloat(1, 0x300000),
9327          "%cond = OpFOrdEqual %bool %c %test_constant\n", false}, // -19
9328         {"positive", "0x1.0p7\n", constructNormalizedFloat(7, 0x000000),
9329          "%cond = OpFOrdEqual %bool %c %test_constant\n", false}, // +128
9330         // SPIR-V requires that OpQuantizeToF16 flushes
9331         // any numbers that would end up denormalized in F16 to zero.
9332         {"denorm", "0x0.0006p-126\n", std::ldexp(1.5f, -140), "%cond = OpFOrdEqual %bool %c %c_f32_0\n",
9333          false}, // denorm
9334         {"negative_denorm", "-0x0.0006p-126\n", -std::ldexp(1.5f, -140), "%cond = OpFOrdEqual %bool %c %c_f32_0\n",
9335          false}, // -denorm
9336         {"too_small", "0x1.0p-16\n", std::ldexp(1.0f, -16), "%cond = OpFOrdEqual %bool %c %c_f32_0\n",
9337          false}, // too small positive
9338         {"negative_too_small", "-0x1.0p-32\n", -std::ldexp(1.0f, -32), "%cond = OpFOrdEqual %bool %c %c_f32_0\n",
9339          false}, // too small negative
9340         {"negative_inf", "-0x1.0p128\n", -std::ldexp(1.0f, 128),
9341 
9342          "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9343          "%inf = OpIsInf %bool %c\n"
9344          "%cond = OpLogicalAnd %bool %gz %inf\n",
9345          true}, // -inf to -inf
9346         {"inf", "0x1.0p128\n", std::ldexp(1.0f, 128),
9347 
9348          "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9349          "%inf = OpIsInf %bool %c\n"
9350          "%cond = OpLogicalAnd %bool %gz %inf\n",
9351          true}, // +inf to +inf
9352         {"round_to_negative_inf", "-0x1.0p32\n", -std::ldexp(1.0f, 32),
9353 
9354          "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9355          "%inf = OpIsInf %bool %c\n"
9356          "%cond = OpLogicalAnd %bool %gz %inf\n",
9357          true}, // round to -inf
9358         {"round_to_inf", "0x1.0p16\n", std::ldexp(1.0f, 16),
9359 
9360          "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9361          "%inf = OpIsInf %bool %c\n"
9362          "%cond = OpLogicalAnd %bool %gz %inf\n",
9363          true}, // round to +inf
9364         {"nan", "0x1.1p128\n", std::numeric_limits<float>::quiet_NaN(),
9365 
9366          // Test for any NaN value, as NaNs are not preserved
9367          "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9368          "%cond = OpIsNan %bool %direct_quant\n",
9369          true}, // nan
9370         {"negative_nan", "-0x1.0001p128\n", std::numeric_limits<float>::quiet_NaN(),
9371 
9372          // Test for any NaN value, as NaNs are not preserved
9373          "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9374          "%cond = OpIsNan %bool %direct_quant\n",
9375          true} // -nan
9376     };
9377     const char *constants = "%test_constant = OpConstant %f32 "; // The value will be test.constant.
9378 
9379     StringTemplate function("%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9380                             "%param1        = OpFunctionParameter %v4f32\n"
9381                             "%label_testfun = OpLabel\n"
9382                             "%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9383                             "%b             = OpFAdd %f32 %test_constant %a\n"
9384                             "%c             = OpQuantizeToF16 %f32 %b\n"
9385                             "${condition}\n"
9386                             "%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9387                             "%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9388                             "                 OpReturnValue %retval\n"
9389                             "OpFunctionEnd\n");
9390 
9391     const char *specDecorations = "OpDecorate %test_constant SpecId 0\n";
9392     const char *specConstants   = "%test_constant = OpSpecConstant %f32 0.\n"
9393                                   "%c             = OpSpecConstantOp %f32 QuantizeToF16 %test_constant\n";
9394 
9395     StringTemplate specConstantFunction("%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9396                                         "%param1        = OpFunctionParameter %v4f32\n"
9397                                         "%label_testfun = OpLabel\n"
9398                                         "${condition}\n"
9399                                         "%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9400                                         "%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9401                                         "                 OpReturnValue %retval\n"
9402                                         "OpFunctionEnd\n");
9403 
9404     for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
9405     {
9406         std::vector<std::string> extensions;
9407         VulkanFeatures features;
9408         map<string, string> codeSpecialization;
9409         map<string, string> fragments;
9410         codeSpecialization["condition"] = tests[idx].condition;
9411         fragments["testfun"]            = function.specialize(codeSpecialization);
9412         fragments["pre_main"]           = string(constants) + tests[idx].constant + "\n";
9413 
9414         if (tests[idx].preserveNanInf)
9415         {
9416             fragments["capability"] = "OpCapability SignedZeroInfNanPreserve\n";
9417             fragments["extension"]  = "OpExtension \"SPV_KHR_float_controls\"\n";
9418             extensions.push_back("VK_KHR_shader_float_controls");
9419             features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = true;
9420         }
9421 
9422         createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, SpecConstants(),
9423                                 PushConstants(), GraphicsResources(), GraphicsInterfaces(), extensions, features,
9424                                 testCtx);
9425     }
9426 
9427     for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
9428     {
9429         std::vector<std::string> extensions;
9430         VulkanFeatures features;
9431         map<string, string> codeSpecialization;
9432         map<string, string> fragments;
9433         SpecConstants passConstants;
9434 
9435         codeSpecialization["condition"] = tests[idx].condition;
9436         fragments["testfun"]            = specConstantFunction.specialize(codeSpecialization);
9437         fragments["decoration"]         = specDecorations;
9438         fragments["pre_main"]           = specConstants;
9439 
9440         passConstants.append<float>(tests[idx].valueAsFloat);
9441 
9442         if (tests[idx].preserveNanInf)
9443         {
9444             fragments["capability"] = "OpCapability SignedZeroInfNanPreserve\n";
9445             fragments["extension"]  = "OpExtension \"SPV_KHR_float_controls\"\n";
9446             extensions.push_back("VK_KHR_shader_float_controls");
9447             features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = true;
9448         }
9449 
9450         createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments,
9451                                 passConstants, PushConstants(), GraphicsResources(), GraphicsInterfaces(), extensions,
9452                                 features, testCtx);
9453     }
9454 }
9455 
createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup * testCtx)9456 void createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup *testCtx)
9457 {
9458     RGBA inputColors[4] = {RGBA(0, 0, 0, 255), RGBA(0, 0, 255, 255), RGBA(0, 255, 0, 255), RGBA(0, 255, 255, 255)};
9459 
9460     RGBA expectedColors[4] = {RGBA(255, 0, 0, 255), RGBA(255, 0, 0, 255), RGBA(255, 0, 0, 255), RGBA(255, 0, 0, 255)};
9461 
9462     struct DualFP16Possibility
9463     {
9464         const char *name;
9465         const char *input;
9466         float inputAsFloat;
9467         const char *possibleOutput1;
9468         const char *possibleOutput2;
9469     } tests[] = {
9470         {"positive_round_up_or_round_down", "0x1.3003p8", constructNormalizedFloat(8, 0x300300), "0x1.304p8",
9471          "0x1.3p8"},
9472         {"negative_round_up_or_round_down", "-0x1.6008p-7", -constructNormalizedFloat(-7, 0x600800), "-0x1.6p-7",
9473          "-0x1.604p-7"},
9474         {"carry_bit", "0x1.01ep2", constructNormalizedFloat(2, 0x01e000), "0x1.01cp2", "0x1.02p2"},
9475         {"carry_to_exponent", "0x1.ffep1", constructNormalizedFloat(1, 0xffe000), "0x1.ffcp1", "0x1.0p2"},
9476     };
9477     StringTemplate constants("%input_const = OpConstant %f32 ${input}\n"
9478                              "%possible_solution1 = OpConstant %f32 ${output1}\n"
9479                              "%possible_solution2 = OpConstant %f32 ${output2}\n");
9480 
9481     StringTemplate specConstants("%input_const = OpSpecConstant %f32 0.\n"
9482                                  "%possible_solution1 = OpConstant %f32 ${output1}\n"
9483                                  "%possible_solution2 = OpConstant %f32 ${output2}\n");
9484 
9485     const char *specDecorations = "OpDecorate %input_const  SpecId 0\n";
9486 
9487     const char *function = "%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9488                            "%param1        = OpFunctionParameter %v4f32\n"
9489                            "%label_testfun = OpLabel\n"
9490                            "%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9491                            // For the purposes of this test we assume that 0.f will always get
9492                            // faithfully passed through the pipeline stages.
9493                            "%b             = OpFAdd %f32 %input_const %a\n"
9494                            "%c             = OpQuantizeToF16 %f32 %b\n"
9495                            "%eq_1          = OpFOrdEqual %bool %c %possible_solution1\n"
9496                            "%eq_2          = OpFOrdEqual %bool %c %possible_solution2\n"
9497                            "%cond          = OpLogicalOr %bool %eq_1 %eq_2\n"
9498                            "%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9499                            "%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1"
9500                            "                 OpReturnValue %retval\n"
9501                            "OpFunctionEnd\n";
9502 
9503     for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
9504     {
9505         map<string, string> fragments;
9506         map<string, string> constantSpecialization;
9507 
9508         constantSpecialization["input"]   = tests[idx].input;
9509         constantSpecialization["output1"] = tests[idx].possibleOutput1;
9510         constantSpecialization["output2"] = tests[idx].possibleOutput2;
9511         fragments["testfun"]              = function;
9512         fragments["pre_main"]             = constants.specialize(constantSpecialization);
9513         createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
9514     }
9515 
9516     for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
9517     {
9518         map<string, string> fragments;
9519         map<string, string> constantSpecialization;
9520         SpecConstants passConstants;
9521 
9522         constantSpecialization["output1"] = tests[idx].possibleOutput1;
9523         constantSpecialization["output2"] = tests[idx].possibleOutput2;
9524         fragments["testfun"]              = function;
9525         fragments["decoration"]           = specDecorations;
9526         fragments["pre_main"]             = specConstants.specialize(constantSpecialization);
9527 
9528         passConstants.append<float>(tests[idx].inputAsFloat);
9529 
9530         createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments,
9531                                 passConstants, testCtx);
9532     }
9533 }
9534 
createOpQuantizeTests(tcu::TestContext & testCtx)9535 tcu::TestCaseGroup *createOpQuantizeTests(tcu::TestContext &testCtx)
9536 {
9537     de::MovePtr<tcu::TestCaseGroup> opQuantizeTests(new tcu::TestCaseGroup(testCtx, "opquantize"));
9538     createOpQuantizeSingleOptionTests(opQuantizeTests.get());
9539     createOpQuantizeTwoPossibilityTests(opQuantizeTests.get());
9540     return opQuantizeTests.release();
9541 }
9542 
9543 struct ShaderPermutation
9544 {
9545     uint8_t vertexPermutation;
9546     uint8_t geometryPermutation;
9547     uint8_t tesscPermutation;
9548     uint8_t tessePermutation;
9549     uint8_t fragmentPermutation;
9550 };
9551 
getShaderPermutation(uint8_t inputValue)9552 ShaderPermutation getShaderPermutation(uint8_t inputValue)
9553 {
9554     ShaderPermutation permutation = {
9555         static_cast<uint8_t>(inputValue & 0x10 ? 1u : 0u), static_cast<uint8_t>(inputValue & 0x08 ? 1u : 0u),
9556         static_cast<uint8_t>(inputValue & 0x04 ? 1u : 0u), static_cast<uint8_t>(inputValue & 0x02 ? 1u : 0u),
9557         static_cast<uint8_t>(inputValue & 0x01 ? 1u : 0u)};
9558     return permutation;
9559 }
9560 
createModuleTests(tcu::TestContext & testCtx)9561 tcu::TestCaseGroup *createModuleTests(tcu::TestContext &testCtx)
9562 {
9563     RGBA defaultColors[4];
9564     RGBA invertedColors[4];
9565     de::MovePtr<tcu::TestCaseGroup> moduleTests(new tcu::TestCaseGroup(testCtx, "module"));
9566 
9567     getDefaultColors(defaultColors);
9568     getInvertedDefaultColors(invertedColors);
9569 
9570     // Combined module tests
9571     {
9572         // Shader stages: vertex and fragment
9573         {
9574             const ShaderElement combinedPipeline[] = {ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9575                                                       ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)};
9576 
9577             addFunctionCaseWithPrograms<InstanceContext>(
9578                 moduleTests.get(), "same_module", createCombinedModule, runAndVerifyDefaultPipeline,
9579                 createInstanceContext(combinedPipeline, map<string, string>()));
9580         }
9581 
9582         // Shader stages: vertex, geometry and fragment
9583         {
9584             const ShaderElement combinedPipeline[] = {ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9585                                                       ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9586                                                       ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)};
9587 
9588             addFunctionCaseWithPrograms<InstanceContext>(
9589                 moduleTests.get(), "same_module_geom", createCombinedModule, runAndVerifyDefaultPipeline,
9590                 createInstanceContext(combinedPipeline, map<string, string>()));
9591         }
9592 
9593         // Shader stages: vertex, tessellation control, tessellation evaluation and fragment
9594         {
9595             const ShaderElement combinedPipeline[] = {
9596                 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9597                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9598                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9599                 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)};
9600 
9601             addFunctionCaseWithPrograms<InstanceContext>(
9602                 moduleTests.get(), "same_module_tessc_tesse", createCombinedModule, runAndVerifyDefaultPipeline,
9603                 createInstanceContext(combinedPipeline, map<string, string>()));
9604         }
9605 
9606         // Shader stages: vertex, tessellation control, tessellation evaluation, geometry and fragment
9607         {
9608             const ShaderElement combinedPipeline[] = {
9609                 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9610                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9611                 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9612                 ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9613                 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)};
9614 
9615             addFunctionCaseWithPrograms<InstanceContext>(
9616                 moduleTests.get(), "same_module_tessc_tesse_geom", createCombinedModule, runAndVerifyDefaultPipeline,
9617                 createInstanceContext(combinedPipeline, map<string, string>()));
9618         }
9619     }
9620 
9621     const char *numbers[] = {"1", "2"};
9622 
9623     for (int8_t idx = 0; idx < 32; ++idx)
9624     {
9625         ShaderPermutation permutation = getShaderPermutation(idx);
9626         string name                   = string("vert") + numbers[permutation.vertexPermutation] + "_geom" +
9627                       numbers[permutation.geometryPermutation] + "_tessc" + numbers[permutation.tesscPermutation] +
9628                       "_tesse" + numbers[permutation.tessePermutation] + "_frag" +
9629                       numbers[permutation.fragmentPermutation];
9630         const ShaderElement pipeline[] = {
9631             ShaderElement("vert", string("vert") + numbers[permutation.vertexPermutation], VK_SHADER_STAGE_VERTEX_BIT),
9632             ShaderElement("geom", string("geom") + numbers[permutation.geometryPermutation],
9633                           VK_SHADER_STAGE_GEOMETRY_BIT),
9634             ShaderElement("tessc", string("tessc") + numbers[permutation.tesscPermutation],
9635                           VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9636             ShaderElement("tesse", string("tesse") + numbers[permutation.tessePermutation],
9637                           VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9638             ShaderElement("frag", string("frag") + numbers[permutation.fragmentPermutation],
9639                           VK_SHADER_STAGE_FRAGMENT_BIT)};
9640 
9641         // If there are an even number of swaps, then it should be no-op.
9642         // If there are an odd number, the color should be flipped.
9643         if ((permutation.vertexPermutation + permutation.geometryPermutation + permutation.tesscPermutation +
9644              permutation.tessePermutation + permutation.fragmentPermutation) %
9645                 2 ==
9646             0)
9647         {
9648             addFunctionCaseWithPrograms<InstanceContext>(
9649                 moduleTests.get(), name, createMultipleEntries, runAndVerifyDefaultPipeline,
9650                 createInstanceContext(pipeline, defaultColors, defaultColors, map<string, string>()));
9651         }
9652         else
9653         {
9654             addFunctionCaseWithPrograms<InstanceContext>(
9655                 moduleTests.get(), name, createMultipleEntries, runAndVerifyDefaultPipeline,
9656                 createInstanceContext(pipeline, defaultColors, invertedColors, map<string, string>()));
9657         }
9658     }
9659     return moduleTests.release();
9660 }
9661 
getUnusedVarTestNamePiece(const std::string & prefix,ShaderTask task)9662 std::string getUnusedVarTestNamePiece(const std::string &prefix, ShaderTask task)
9663 {
9664     switch (task)
9665     {
9666     case SHADER_TASK_NONE:
9667         return "";
9668     case SHADER_TASK_NORMAL:
9669         return prefix + "_normal";
9670     case SHADER_TASK_UNUSED_VAR:
9671         return prefix + "_unused_var";
9672     case SHADER_TASK_UNUSED_FUNC:
9673         return prefix + "_unused_func";
9674     default:
9675         DE_ASSERT(false);
9676     }
9677     // unreachable
9678     return "";
9679 }
9680 
getShaderTaskIndexName(ShaderTaskIndex index)9681 std::string getShaderTaskIndexName(ShaderTaskIndex index)
9682 {
9683     switch (index)
9684     {
9685     case SHADER_TASK_INDEX_VERTEX:
9686         return "vertex";
9687     case SHADER_TASK_INDEX_GEOMETRY:
9688         return "geom";
9689     case SHADER_TASK_INDEX_TESS_CONTROL:
9690         return "tessc";
9691     case SHADER_TASK_INDEX_TESS_EVAL:
9692         return "tesse";
9693     case SHADER_TASK_INDEX_FRAGMENT:
9694         return "frag";
9695     default:
9696         DE_ASSERT(false);
9697     }
9698     // unreachable
9699     return "";
9700 }
9701 
getUnusedVarTestName(const ShaderTaskArray & shaderTasks,const VariableLocation & location)9702 std::string getUnusedVarTestName(const ShaderTaskArray &shaderTasks, const VariableLocation &location)
9703 {
9704     std::string testName = location.toString();
9705 
9706     for (size_t i = 0; i < DE_LENGTH_OF_ARRAY(shaderTasks); ++i)
9707     {
9708         if (shaderTasks[i] != SHADER_TASK_NONE)
9709         {
9710             testName += "_" + getUnusedVarTestNamePiece(getShaderTaskIndexName((ShaderTaskIndex)i), shaderTasks[i]);
9711         }
9712     }
9713 
9714     return testName;
9715 }
9716 
createUnusedVariableTests(tcu::TestContext & testCtx)9717 tcu::TestCaseGroup *createUnusedVariableTests(tcu::TestContext &testCtx)
9718 {
9719     de::MovePtr<tcu::TestCaseGroup> moduleTests(new tcu::TestCaseGroup(testCtx, "unused_variables"));
9720 
9721     ShaderTaskArray shaderCombinations[] = {
9722         // Vertex                    Geometry                    Tess. Control                Tess. Evaluation            Fragment
9723         {SHADER_TASK_UNUSED_VAR, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL},
9724         {SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL},
9725         {SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_UNUSED_VAR},
9726         {SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_UNUSED_FUNC},
9727         {SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_VAR, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL},
9728         {SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL},
9729         {SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_UNUSED_VAR, SHADER_TASK_NORMAL, SHADER_TASK_NORMAL},
9730         {SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NORMAL, SHADER_TASK_NORMAL},
9731         {SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_VAR, SHADER_TASK_NORMAL},
9732         {SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NORMAL}};
9733 
9734     const VariableLocation testLocations[] = {
9735         // Set        Binding
9736         {0, 5},
9737         {5, 5},
9738     };
9739 
9740     for (size_t combNdx = 0; combNdx < DE_LENGTH_OF_ARRAY(shaderCombinations); ++combNdx)
9741     {
9742         for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
9743         {
9744             const ShaderTaskArray &shaderTasks = shaderCombinations[combNdx];
9745             const VariableLocation &location   = testLocations[locationNdx];
9746             std::string testName               = getUnusedVarTestName(shaderTasks, location);
9747 
9748             addFunctionCaseWithPrograms<UnusedVariableContext>(moduleTests.get(), testName, createUnusedVariableModules,
9749                                                                runAndVerifyUnusedVariablePipeline,
9750                                                                createUnusedVariableContext(shaderTasks, location));
9751         }
9752     }
9753 
9754     return moduleTests.release();
9755 }
9756 
createLoopTests(tcu::TestContext & testCtx)9757 tcu::TestCaseGroup *createLoopTests(tcu::TestContext &testCtx)
9758 {
9759     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "loop"));
9760     RGBA defaultColors[4];
9761     getDefaultColors(defaultColors);
9762     map<string, string> fragments;
9763     fragments["pre_main"] = "%c_f32_5 = OpConstant %f32 5.\n";
9764 
9765     // A loop with a single block. The Continue Target is the loop block
9766     // itself. In SPIR-V terms, the "loop construct" contains no blocks at all
9767     // -- the "continue construct" forms the entire loop.
9768     fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9769                            "%param1 = OpFunctionParameter %v4f32\n"
9770 
9771                            "%entry = OpLabel\n"
9772                            "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9773                            "OpBranch %loop\n"
9774 
9775                            ";adds and subtracts 1.0 to %val in alternate iterations\n"
9776                            "%loop = OpLabel\n"
9777                            "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9778                            "%delta = OpPhi %f32 %c_f32_1 %entry %minus_delta %loop\n"
9779                            "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9780                            "%val = OpFAdd %f32 %val1 %delta\n"
9781                            "%minus_delta = OpFSub %f32 %c_f32_0 %delta\n"
9782                            "%count__ = OpISub %i32 %count %c_i32_1\n"
9783                            "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9784                            "OpLoopMerge %exit %loop None\n"
9785                            "OpBranchConditional %again %loop %exit\n"
9786 
9787                            "%exit = OpLabel\n"
9788                            "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9789                            "OpReturnValue %result\n"
9790 
9791                            "OpFunctionEnd\n";
9792 
9793     createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
9794 
9795     // Body comprised of multiple basic blocks.
9796     const StringTemplate multiBlock("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9797                                     "%param1 = OpFunctionParameter %v4f32\n"
9798 
9799                                     "%entry = OpLabel\n"
9800                                     "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9801                                     "OpBranch %loop\n"
9802 
9803                                     ";adds and subtracts 1.0 to %val in alternate iterations\n"
9804                                     "%loop = OpLabel\n"
9805                                     "%count = OpPhi %i32 %c_i32_4 %entry %count__ %cont\n"
9806                                     "%delta = OpPhi %f32 %c_f32_1 %entry %delta_next %cont\n"
9807                                     "%val1 = OpPhi %f32 %val0 %entry %val %cont\n"
9808                                     // There are several possibilities for the Continue Target below.  Each
9809                                     // will be specialized into a separate test case.
9810                                     "OpLoopMerge %exit ${continue_target} None\n"
9811                                     "OpBranch %if\n"
9812 
9813                                     "%if = OpLabel\n"
9814                                     ";delta_next = (delta > 0) ? -1 : 1;\n"
9815                                     "%gt0 = OpFOrdGreaterThan %bool %delta %c_f32_0\n"
9816                                     "OpSelectionMerge %gather DontFlatten\n"
9817                                     "OpBranchConditional %gt0 %even %odd ;tells us if %count is even or odd\n"
9818 
9819                                     "%odd = OpLabel\n"
9820                                     "OpBranch %gather\n"
9821 
9822                                     "%even = OpLabel\n"
9823                                     "OpBranch %gather\n"
9824 
9825                                     "%gather = OpLabel\n"
9826                                     "%delta_next = OpPhi %f32 %c_f32_n1 %even %c_f32_1 %odd\n"
9827                                     "%val = OpFAdd %f32 %val1 %delta\n"
9828                                     "%count__ = OpISub %i32 %count %c_i32_1\n"
9829                                     "OpBranch %cont\n"
9830 
9831                                     "%cont = OpLabel\n"
9832                                     "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9833                                     "OpBranchConditional %again %loop %exit\n"
9834 
9835                                     "%exit = OpLabel\n"
9836                                     "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9837                                     "OpReturnValue %result\n"
9838 
9839                                     "OpFunctionEnd\n");
9840 
9841     map<string, string> continue_target;
9842 
9843     // The Continue Target is the loop block itself.
9844     continue_target["continue_target"] = "%if";
9845     fragments["testfun"]               = multiBlock.specialize(continue_target);
9846     createTestsForAllStages("multi_block_continue_construct", defaultColors, defaultColors, fragments, testGroup.get());
9847 
9848     // The Continue Target is at the end of the loop.
9849     continue_target["continue_target"] = "%cont";
9850     fragments["testfun"]               = multiBlock.specialize(continue_target);
9851     createTestsForAllStages("multi_block_loop_construct", defaultColors, defaultColors, fragments, testGroup.get());
9852 
9853     // A loop with continue statement.
9854     fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9855                            "%param1 = OpFunctionParameter %v4f32\n"
9856 
9857                            "%entry = OpLabel\n"
9858                            "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9859                            "OpBranch %loop\n"
9860 
9861                            ";adds 4, 3, and 1 to %val0 (skips 2)\n"
9862                            "%loop = OpLabel\n"
9863                            "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9864                            "%val1 = OpPhi %f32 %val0 %entry %val %continue\n"
9865                            "OpLoopMerge %exit %continue None\n"
9866                            "OpBranch %if\n"
9867 
9868                            "%if = OpLabel\n"
9869                            ";skip if %count==2\n"
9870                            "%eq2 = OpIEqual %bool %count %c_i32_2\n"
9871                            "OpBranchConditional %eq2 %continue %body\n"
9872 
9873                            "%body = OpLabel\n"
9874                            "%fcount = OpConvertSToF %f32 %count\n"
9875                            "%val2 = OpFAdd %f32 %val1 %fcount\n"
9876                            "OpBranch %continue\n"
9877 
9878                            "%continue = OpLabel\n"
9879                            "%val = OpPhi %f32 %val2 %body %val1 %if\n"
9880                            "%count__ = OpISub %i32 %count %c_i32_1\n"
9881                            "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9882                            "OpBranchConditional %again %loop %exit\n"
9883 
9884                            "%exit = OpLabel\n"
9885                            "%same = OpFSub %f32 %val %c_f32_8\n"
9886                            "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9887                            "OpReturnValue %result\n"
9888                            "OpFunctionEnd\n";
9889     createTestsForAllStages("continue", defaultColors, defaultColors, fragments, testGroup.get());
9890 
9891     // A loop with break.
9892     fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9893                            "%param1 = OpFunctionParameter %v4f32\n"
9894 
9895                            "%entry = OpLabel\n"
9896                            ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9897                            "%dot = OpDot %f32 %param1 %param1\n"
9898                            "%div = OpFDiv %f32 %dot %c_f32_5\n"
9899                            "%zero = OpConvertFToU %u32 %div\n"
9900                            "%two = OpIAdd %i32 %zero %c_i32_2\n"
9901                            "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9902                            "OpBranch %loop\n"
9903 
9904                            ";adds 4 and 3 to %val0 (exits early)\n"
9905                            "%loop = OpLabel\n"
9906                            "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9907                            "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9908                            "OpLoopMerge %exit %continue None\n"
9909                            "OpBranch %if\n"
9910 
9911                            "%if = OpLabel\n"
9912                            ";end loop if %count==%two\n"
9913                            "%above2 = OpSGreaterThan %bool %count %two\n"
9914                            "OpBranchConditional %above2 %body %exit\n"
9915 
9916                            "%body = OpLabel\n"
9917                            "%fcount = OpConvertSToF %f32 %count\n"
9918                            "%val2 = OpFAdd %f32 %val1 %fcount\n"
9919                            "OpBranch %continue\n"
9920 
9921                            "%continue = OpLabel\n"
9922                            "%count__ = OpISub %i32 %count %c_i32_1\n"
9923                            "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9924                            "OpBranchConditional %again %loop %exit\n"
9925 
9926                            "%exit = OpLabel\n"
9927                            "%val_post = OpPhi %f32 %val2 %continue %val1 %if\n"
9928                            "%same = OpFSub %f32 %val_post %c_f32_7\n"
9929                            "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9930                            "OpReturnValue %result\n"
9931                            "OpFunctionEnd\n";
9932     createTestsForAllStages("break", defaultColors, defaultColors, fragments, testGroup.get());
9933 
9934     // A loop with return.
9935     fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9936                            "%param1 = OpFunctionParameter %v4f32\n"
9937 
9938                            "%entry = OpLabel\n"
9939                            ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9940                            "%dot = OpDot %f32 %param1 %param1\n"
9941                            "%div = OpFDiv %f32 %dot %c_f32_5\n"
9942                            "%zero = OpConvertFToU %u32 %div\n"
9943                            "%two = OpIAdd %i32 %zero %c_i32_2\n"
9944                            "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9945                            "OpBranch %loop\n"
9946 
9947                            ";returns early without modifying %param1\n"
9948                            "%loop = OpLabel\n"
9949                            "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9950                            "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9951                            "OpLoopMerge %exit %continue None\n"
9952                            "OpBranch %if\n"
9953 
9954                            "%if = OpLabel\n"
9955                            ";return if %count==%two\n"
9956                            "%above2 = OpSGreaterThan %bool %count %two\n"
9957                            "OpSelectionMerge %body DontFlatten\n"
9958                            "OpBranchConditional %above2 %body %early_exit\n"
9959 
9960                            "%early_exit = OpLabel\n"
9961                            "OpReturnValue %param1\n"
9962 
9963                            "%body = OpLabel\n"
9964                            "%fcount = OpConvertSToF %f32 %count\n"
9965                            "%val2 = OpFAdd %f32 %val1 %fcount\n"
9966                            "OpBranch %continue\n"
9967 
9968                            "%continue = OpLabel\n"
9969                            "%count__ = OpISub %i32 %count %c_i32_1\n"
9970                            "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9971                            "OpBranchConditional %again %loop %exit\n"
9972 
9973                            "%exit = OpLabel\n"
9974                            ";should never get here, so return an incorrect result\n"
9975                            "%result = OpVectorInsertDynamic %v4f32 %param1 %val2 %c_i32_0\n"
9976                            "OpReturnValue %result\n"
9977                            "OpFunctionEnd\n";
9978     createTestsForAllStages("return", defaultColors, defaultColors, fragments, testGroup.get());
9979 
9980     // Continue inside a switch block to break to enclosing loop's merge block.
9981     // Matches roughly the following GLSL code:
9982     // for (; keep_going; keep_going = false)
9983     // {
9984     //     switch (int(param1.x))
9985     //     {
9986     //         case 0: continue;
9987     //         case 1: continue;
9988     //         default: continue;
9989     //     }
9990     //     dead code: modify return value to invalid result.
9991     // }
9992     fragments["pre_main"] = "%fp_bool = OpTypePointer Function %bool\n"
9993                             "%true = OpConstantTrue %bool\n"
9994                             "%false = OpConstantFalse %bool\n";
9995 
9996     fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9997                            "%param1 = OpFunctionParameter %v4f32\n"
9998 
9999                            "%entry = OpLabel\n"
10000                            "%keep_going = OpVariable %fp_bool Function\n"
10001                            "%val_ptr = OpVariable %fp_f32 Function\n"
10002                            "%param1_x = OpCompositeExtract %f32 %param1 0\n"
10003                            "OpStore %keep_going %true\n"
10004                            "OpBranch %forloop_begin\n"
10005 
10006                            "%forloop_begin = OpLabel\n"
10007                            "OpLoopMerge %forloop_merge %forloop_continue None\n"
10008                            "OpBranch %forloop\n"
10009 
10010                            "%forloop = OpLabel\n"
10011                            "%for_condition = OpLoad %bool %keep_going\n"
10012                            "OpBranchConditional %for_condition %forloop_body %forloop_merge\n"
10013 
10014                            "%forloop_body = OpLabel\n"
10015                            "OpStore %val_ptr %param1_x\n"
10016                            "%param1_x_int = OpConvertFToS %i32 %param1_x\n"
10017 
10018                            "OpSelectionMerge %switch_merge None\n"
10019                            "OpSwitch %param1_x_int %default 0 %case_0 1 %case_1\n"
10020                            "%case_0 = OpLabel\n"
10021                            "OpBranch %forloop_continue\n"
10022                            "%case_1 = OpLabel\n"
10023                            "OpBranch %forloop_continue\n"
10024                            "%default = OpLabel\n"
10025                            "OpBranch %forloop_continue\n"
10026                            "%switch_merge = OpLabel\n"
10027                            ";should never get here, so change the return value to invalid result\n"
10028                            "OpStore %val_ptr %c_f32_1\n"
10029                            "OpBranch %forloop_continue\n"
10030 
10031                            "%forloop_continue = OpLabel\n"
10032                            "OpStore %keep_going %false\n"
10033                            "OpBranch %forloop_begin\n"
10034                            "%forloop_merge = OpLabel\n"
10035 
10036                            "%val = OpLoad %f32 %val_ptr\n"
10037                            "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
10038                            "OpReturnValue %result\n"
10039                            "OpFunctionEnd\n";
10040     createTestsForAllStages("switch_continue", defaultColors, defaultColors, fragments, testGroup.get());
10041 
10042     return testGroup.release();
10043 }
10044 
10045 // A collection of tests putting OpControlBarrier in places GLSL forbids but SPIR-V allows.
createBarrierTests(tcu::TestContext & testCtx)10046 tcu::TestCaseGroup *createBarrierTests(tcu::TestContext &testCtx)
10047 {
10048     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "barrier"));
10049     map<string, string> fragments;
10050 
10051     // A barrier inside a function body.
10052     fragments["pre_main"] = "%Workgroup = OpConstant %i32 2\n"
10053                             "%Invocation = OpConstant %i32 4\n"
10054                             "%MemorySemanticsNone = OpConstant %i32 0\n";
10055     fragments["testfun"]  = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10056                             "%param1 = OpFunctionParameter %v4f32\n"
10057                             "%label_testfun = OpLabel\n"
10058                             "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10059                             "OpReturnValue %param1\n"
10060                             "OpFunctionEnd\n";
10061     addTessCtrlTest(testGroup.get(), "in_function", fragments);
10062 
10063     // Common setup code for the following tests.
10064     fragments["pre_main"] = "%Workgroup = OpConstant %i32 2\n"
10065                             "%Invocation = OpConstant %i32 4\n"
10066                             "%MemorySemanticsNone = OpConstant %i32 0\n"
10067                             "%c_f32_5 = OpConstant %f32 5.\n";
10068     const string
10069         setupPercentZero = // Begins %test_code function with code that sets %zero to 0u but cannot be optimized away.
10070         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10071         "%param1 = OpFunctionParameter %v4f32\n"
10072         "%entry = OpLabel\n"
10073         ";param1 components are between 0 and 1, so dot product is 4 or less\n"
10074         "%dot = OpDot %f32 %param1 %param1\n"
10075         "%div = OpFDiv %f32 %dot %c_f32_5\n"
10076         "%zero = OpConvertFToU %u32 %div\n";
10077 
10078     // Barriers inside OpSwitch branches.
10079     fragments["testfun"] =
10080         setupPercentZero +
10081         "OpSelectionMerge %switch_exit None\n"
10082         "OpSwitch %zero %switch_default 0 %case0 1 %case1 ;should always go to %case0\n"
10083 
10084         "%case1 = OpLabel\n"
10085         ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
10086         "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10087         "%wrong_branch_alert1 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
10088         "OpBranch %switch_exit\n"
10089 
10090         "%switch_default = OpLabel\n"
10091         "%wrong_branch_alert2 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
10092         ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
10093         "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10094         "OpBranch %switch_exit\n"
10095 
10096         "%case0 = OpLabel\n"
10097         "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10098         "OpBranch %switch_exit\n"
10099 
10100         "%switch_exit = OpLabel\n"
10101         "%ret = OpPhi %v4f32 %param1 %case0 %wrong_branch_alert1 %case1 %wrong_branch_alert2 %switch_default\n"
10102         "OpReturnValue %ret\n"
10103         "OpFunctionEnd\n";
10104     addTessCtrlTest(testGroup.get(), "in_switch", fragments);
10105 
10106     // Barriers inside if-then-else.
10107     fragments["testfun"] =
10108         setupPercentZero +
10109         "%eq0 = OpIEqual %bool %zero %c_u32_0\n"
10110         "OpSelectionMerge %exit DontFlatten\n"
10111         "OpBranchConditional %eq0 %then %else\n"
10112 
10113         "%else = OpLabel\n"
10114         ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
10115         "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10116         "%wrong_branch_alert = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
10117         "OpBranch %exit\n"
10118 
10119         "%then = OpLabel\n"
10120         "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10121         "OpBranch %exit\n"
10122         "%exit = OpLabel\n"
10123         "%ret = OpPhi %v4f32 %param1 %then %wrong_branch_alert %else\n"
10124         "OpReturnValue %ret\n"
10125         "OpFunctionEnd\n";
10126     addTessCtrlTest(testGroup.get(), "in_if", fragments);
10127 
10128     // A barrier after control-flow reconvergence, tempting the compiler to attempt something like this:
10129     // http://lists.llvm.org/pipermail/llvm-dev/2009-October/026317.html.
10130     fragments["testfun"] = setupPercentZero + "%thread_id = OpLoad %i32 %BP_gl_InvocationID\n"
10131                                               "%thread0 = OpIEqual %bool %thread_id %c_i32_0\n"
10132                                               "OpSelectionMerge %exit DontFlatten\n"
10133                                               "OpBranchConditional %thread0 %then %else\n"
10134 
10135                                               "%else = OpLabel\n"
10136                                               "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
10137                                               "OpBranch %exit\n"
10138 
10139                                               "%then = OpLabel\n"
10140                                               "%val1 = OpVectorExtractDynamic %f32 %param1 %zero\n"
10141                                               "OpBranch %exit\n"
10142 
10143                                               "%exit = OpLabel\n"
10144                                               "%val = OpPhi %f32 %val0 %else %val1 %then\n"
10145                                               "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10146                                               "%ret = OpVectorInsertDynamic %v4f32 %param1 %val %zero\n"
10147                                               "OpReturnValue %ret\n"
10148                                               "OpFunctionEnd\n";
10149     addTessCtrlTest(testGroup.get(), "after_divergent_if", fragments);
10150 
10151     // A barrier inside a loop.
10152     fragments["pre_main"] = "%Workgroup = OpConstant %i32 2\n"
10153                             "%Invocation = OpConstant %i32 4\n"
10154                             "%MemorySemanticsNone = OpConstant %i32 0\n"
10155                             "%c_f32_10 = OpConstant %f32 10.\n";
10156     fragments["testfun"]  = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10157                             "%param1 = OpFunctionParameter %v4f32\n"
10158                             "%entry = OpLabel\n"
10159                             "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
10160                             "OpBranch %loop\n"
10161 
10162                            ";adds 4, 3, 2, and 1 to %val0\n"
10163                            "%loop = OpLabel\n"
10164                            "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
10165                            "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
10166                            "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10167                            "%fcount = OpConvertSToF %f32 %count\n"
10168                            "%val = OpFAdd %f32 %val1 %fcount\n"
10169                            "%count__ = OpISub %i32 %count %c_i32_1\n"
10170                            "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
10171                            "OpLoopMerge %exit %loop None\n"
10172                            "OpBranchConditional %again %loop %exit\n"
10173 
10174                            "%exit = OpLabel\n"
10175                            "%same = OpFSub %f32 %val %c_f32_10\n"
10176                            "%ret = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
10177                            "OpReturnValue %ret\n"
10178                            "OpFunctionEnd\n";
10179     addTessCtrlTest(testGroup.get(), "in_loop", fragments);
10180 
10181     return testGroup.release();
10182 }
10183 
10184 // Test for the OpFRem instruction.
createFRemTests(tcu::TestContext & testCtx)10185 tcu::TestCaseGroup *createFRemTests(tcu::TestContext &testCtx)
10186 {
10187     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "frem"));
10188     map<string, string> fragments;
10189     RGBA inputColors[4];
10190     RGBA outputColors[4];
10191 
10192     fragments["pre_main"] =
10193         "%c_f32_3 = OpConstant %f32 3.0\n"
10194         "%c_f32_n3 = OpConstant %f32 -3.0\n"
10195         "%c_f32_4 = OpConstant %f32 4.0\n"
10196         "%c_f32_p75 = OpConstant %f32 0.75\n"
10197         "%c_v4f32_p75_p75_p75_p75 = OpConstantComposite %v4f32 %c_f32_p75 %c_f32_p75 %c_f32_p75 %c_f32_p75 \n"
10198         "%c_v4f32_4_4_4_4 = OpConstantComposite %v4f32 %c_f32_4 %c_f32_4 %c_f32_4 %c_f32_4\n"
10199         "%c_v4f32_3_n3_3_n3 = OpConstantComposite %v4f32 %c_f32_3 %c_f32_n3 %c_f32_3 %c_f32_n3\n";
10200 
10201     // The test does the following.
10202     // vec4 result = (param1 * 8.0) - 4.0;
10203     // return (frem(result.x,3) + 0.75, frem(result.y, -3) + 0.75, 0, 1)
10204     fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10205                            "%param1 = OpFunctionParameter %v4f32\n"
10206                            "%label_testfun = OpLabel\n"
10207                            "%v_times_8 = OpVectorTimesScalar %v4f32 %param1 %c_f32_8\n"
10208                            "%minus_4 = OpFSub %v4f32 %v_times_8 %c_v4f32_4_4_4_4\n"
10209                            "%frem = OpFRem %v4f32 %minus_4 %c_v4f32_3_n3_3_n3\n"
10210                            "%added = OpFAdd %v4f32 %frem %c_v4f32_p75_p75_p75_p75\n"
10211                            "%xyz_1 = OpVectorInsertDynamic %v4f32 %added %c_f32_1 %c_i32_3\n"
10212                            "%xy_0_1 = OpVectorInsertDynamic %v4f32 %xyz_1 %c_f32_0 %c_i32_2\n"
10213                            "OpReturnValue %xy_0_1\n"
10214                            "OpFunctionEnd\n";
10215 
10216     inputColors[0] = RGBA(16, 16, 0, 255);
10217     inputColors[1] = RGBA(232, 232, 0, 255);
10218     inputColors[2] = RGBA(232, 16, 0, 255);
10219     inputColors[3] = RGBA(16, 232, 0, 255);
10220 
10221     outputColors[0] = RGBA(64, 64, 0, 255);
10222     outputColors[1] = RGBA(255, 255, 0, 255);
10223     outputColors[2] = RGBA(255, 64, 0, 255);
10224     outputColors[3] = RGBA(64, 255, 0, 255);
10225 
10226     createTestsForAllStages("frem", inputColors, outputColors, fragments, testGroup.get());
10227     return testGroup.release();
10228 }
10229 
10230 // Test for the OpSRem instruction.
createOpSRemGraphicsTests(tcu::TestContext & testCtx,qpTestResult negFailResult)10231 tcu::TestCaseGroup *createOpSRemGraphicsTests(tcu::TestContext &testCtx, qpTestResult negFailResult)
10232 {
10233     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "srem"));
10234     map<string, string> fragments;
10235 
10236     fragments["pre_main"] = "%c_f32_255 = OpConstant %f32 255.0\n"
10237                             "%c_i32_128 = OpConstant %i32 128\n"
10238                             "%c_i32_255 = OpConstant %i32 255\n"
10239                             "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
10240                             "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10241                             "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10242 
10243     // The test does the following.
10244     // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10245     // ivec4 result = ivec4(srem(ints.x, ints.y), srem(ints.y, ints.z), srem(ints.z, ints.x), 255);
10246     // return float(result + 128) / 255.0;
10247     fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10248                            "%param1 = OpFunctionParameter %v4f32\n"
10249                            "%label_testfun = OpLabel\n"
10250                            "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10251                            "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10252                            "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10253                            "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10254                            "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10255                            "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10256                            "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10257                            "%x_out = OpSRem %i32 %x_in %y_in\n"
10258                            "%y_out = OpSRem %i32 %y_in %z_in\n"
10259                            "%z_out = OpSRem %i32 %z_in %x_in\n"
10260                            "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10261                            "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10262                            "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10263                            "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10264                            "OpReturnValue %float_out\n"
10265                            "OpFunctionEnd\n";
10266 
10267     const struct CaseParams
10268     {
10269         const char *name;
10270         const char *failMessageTemplate; // customized status message
10271         qpTestResult failResult;         // override status on failure
10272         int operands[4][3];              // four (x, y, z) vectors of operands
10273         int results[4][3];               // four (x, y, z) vectors of results
10274     } cases[] = {
10275         {
10276             "positive",
10277             "${reason}",
10278             QP_TEST_RESULT_FAIL,
10279             {{5, 12, 17}, {5, 5, 7}, {75, 8, 81}, {25, 60, 100}}, // operands
10280             {{5, 12, 2}, {0, 5, 2}, {3, 8, 6}, {25, 60, 0}},      // results
10281         },
10282         {
10283             "all",
10284             "Inconsistent results, but within specification: ${reason}",
10285             negFailResult,                                             // negative operands, not required by the spec
10286             {{5, 12, -17}, {-5, -5, 7}, {75, 8, -81}, {25, -60, 100}}, // operands
10287             {{5, 12, -2}, {0, -5, 2}, {3, 8, -6}, {25, -60, 0}},       // results
10288         },
10289     };
10290     // If either operand is negative the result is undefined. Some implementations may still return correct values.
10291 
10292     for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10293     {
10294         const CaseParams &params = cases[caseNdx];
10295         RGBA inputColors[4];
10296         RGBA outputColors[4];
10297 
10298         for (int i = 0; i < 4; ++i)
10299         {
10300             inputColors[i] =
10301                 RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10302             outputColors[i] =
10303                 RGBA(params.results[i][0] + 128, params.results[i][1] + 128, params.results[i][2] + 128, 255);
10304         }
10305 
10306         createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult,
10307                                 params.failMessageTemplate);
10308     }
10309 
10310     return testGroup.release();
10311 }
10312 
10313 // Test for the OpSMod instruction.
createOpSModGraphicsTests(tcu::TestContext & testCtx,qpTestResult negFailResult)10314 tcu::TestCaseGroup *createOpSModGraphicsTests(tcu::TestContext &testCtx, qpTestResult negFailResult)
10315 {
10316     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "smod"));
10317     map<string, string> fragments;
10318 
10319     fragments["pre_main"] = "%c_f32_255 = OpConstant %f32 255.0\n"
10320                             "%c_i32_128 = OpConstant %i32 128\n"
10321                             "%c_i32_255 = OpConstant %i32 255\n"
10322                             "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
10323                             "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10324                             "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10325 
10326     // The test does the following.
10327     // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10328     // ivec4 result = ivec4(smod(ints.x, ints.y), smod(ints.y, ints.z), smod(ints.z, ints.x), 255);
10329     // return float(result + 128) / 255.0;
10330     fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10331                            "%param1 = OpFunctionParameter %v4f32\n"
10332                            "%label_testfun = OpLabel\n"
10333                            "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10334                            "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10335                            "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10336                            "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10337                            "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10338                            "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10339                            "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10340                            "%x_out = OpSMod %i32 %x_in %y_in\n"
10341                            "%y_out = OpSMod %i32 %y_in %z_in\n"
10342                            "%z_out = OpSMod %i32 %z_in %x_in\n"
10343                            "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10344                            "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10345                            "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10346                            "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10347                            "OpReturnValue %float_out\n"
10348                            "OpFunctionEnd\n";
10349 
10350     const struct CaseParams
10351     {
10352         const char *name;
10353         const char *failMessageTemplate; // customized status message
10354         qpTestResult failResult;         // override status on failure
10355         int operands[4][3];              // four (x, y, z) vectors of operands
10356         int results[4][3];               // four (x, y, z) vectors of results
10357     } cases[] = {
10358         {
10359             "positive",
10360             "${reason}",
10361             QP_TEST_RESULT_FAIL,
10362             {{5, 12, 17}, {5, 5, 7}, {75, 8, 81}, {25, 60, 100}}, // operands
10363             {{5, 12, 2}, {0, 5, 2}, {3, 8, 6}, {25, 60, 0}},      // results
10364         },
10365         {
10366             "all",
10367             "Inconsistent results, but within specification: ${reason}",
10368             negFailResult,                                             // negative operands, not required by the spec
10369             {{5, 12, -17}, {-5, -5, 7}, {75, 8, -81}, {25, -60, 100}}, // operands
10370             {{5, -5, 3}, {0, 2, -3}, {3, -73, 69}, {-35, 40, 0}},      // results
10371         },
10372     };
10373     // If either operand is negative the result is undefined. Some implementations may still return correct values.
10374 
10375     for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10376     {
10377         const CaseParams &params = cases[caseNdx];
10378         RGBA inputColors[4];
10379         RGBA outputColors[4];
10380 
10381         for (int i = 0; i < 4; ++i)
10382         {
10383             inputColors[i] =
10384                 RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10385             outputColors[i] =
10386                 RGBA(params.results[i][0] + 128, params.results[i][1] + 128, params.results[i][2] + 128, 255);
10387         }
10388 
10389         createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult,
10390                                 params.failMessageTemplate);
10391     }
10392     return testGroup.release();
10393 }
10394 
10395 enum ConversionDataType
10396 {
10397     DATA_TYPE_SIGNED_8,
10398     DATA_TYPE_SIGNED_16,
10399     DATA_TYPE_SIGNED_32,
10400     DATA_TYPE_SIGNED_64,
10401     DATA_TYPE_UNSIGNED_8,
10402     DATA_TYPE_UNSIGNED_16,
10403     DATA_TYPE_UNSIGNED_32,
10404     DATA_TYPE_UNSIGNED_64,
10405     DATA_TYPE_FLOAT_16,
10406     DATA_TYPE_FLOAT_32,
10407     DATA_TYPE_FLOAT_64,
10408     DATA_TYPE_VEC2_SIGNED_16,
10409     DATA_TYPE_VEC2_SIGNED_32
10410 };
10411 
getBitWidthStr(ConversionDataType type)10412 const string getBitWidthStr(ConversionDataType type)
10413 {
10414     switch (type)
10415     {
10416     case DATA_TYPE_SIGNED_8:
10417     case DATA_TYPE_UNSIGNED_8:
10418         return "8";
10419 
10420     case DATA_TYPE_SIGNED_16:
10421     case DATA_TYPE_UNSIGNED_16:
10422     case DATA_TYPE_FLOAT_16:
10423         return "16";
10424 
10425     case DATA_TYPE_SIGNED_32:
10426     case DATA_TYPE_UNSIGNED_32:
10427     case DATA_TYPE_FLOAT_32:
10428     case DATA_TYPE_VEC2_SIGNED_16:
10429         return "32";
10430 
10431     case DATA_TYPE_SIGNED_64:
10432     case DATA_TYPE_UNSIGNED_64:
10433     case DATA_TYPE_FLOAT_64:
10434     case DATA_TYPE_VEC2_SIGNED_32:
10435         return "64";
10436 
10437     default:
10438         DE_ASSERT(false);
10439     }
10440     return "";
10441 }
10442 
getByteWidthStr(ConversionDataType type)10443 const string getByteWidthStr(ConversionDataType type)
10444 {
10445     switch (type)
10446     {
10447     case DATA_TYPE_SIGNED_8:
10448     case DATA_TYPE_UNSIGNED_8:
10449         return "1";
10450 
10451     case DATA_TYPE_SIGNED_16:
10452     case DATA_TYPE_UNSIGNED_16:
10453     case DATA_TYPE_FLOAT_16:
10454         return "2";
10455 
10456     case DATA_TYPE_SIGNED_32:
10457     case DATA_TYPE_UNSIGNED_32:
10458     case DATA_TYPE_FLOAT_32:
10459     case DATA_TYPE_VEC2_SIGNED_16:
10460         return "4";
10461 
10462     case DATA_TYPE_SIGNED_64:
10463     case DATA_TYPE_UNSIGNED_64:
10464     case DATA_TYPE_FLOAT_64:
10465     case DATA_TYPE_VEC2_SIGNED_32:
10466         return "8";
10467 
10468     default:
10469         DE_ASSERT(false);
10470     }
10471     return "";
10472 }
10473 
isSigned(ConversionDataType type)10474 bool isSigned(ConversionDataType type)
10475 {
10476     switch (type)
10477     {
10478     case DATA_TYPE_SIGNED_8:
10479     case DATA_TYPE_SIGNED_16:
10480     case DATA_TYPE_SIGNED_32:
10481     case DATA_TYPE_SIGNED_64:
10482     case DATA_TYPE_FLOAT_16:
10483     case DATA_TYPE_FLOAT_32:
10484     case DATA_TYPE_FLOAT_64:
10485     case DATA_TYPE_VEC2_SIGNED_16:
10486     case DATA_TYPE_VEC2_SIGNED_32:
10487         return true;
10488 
10489     case DATA_TYPE_UNSIGNED_8:
10490     case DATA_TYPE_UNSIGNED_16:
10491     case DATA_TYPE_UNSIGNED_32:
10492     case DATA_TYPE_UNSIGNED_64:
10493         return false;
10494 
10495     default:
10496         DE_ASSERT(false);
10497     }
10498     return false;
10499 }
10500 
isInt(ConversionDataType type)10501 bool isInt(ConversionDataType type)
10502 {
10503     switch (type)
10504     {
10505     case DATA_TYPE_SIGNED_8:
10506     case DATA_TYPE_SIGNED_16:
10507     case DATA_TYPE_SIGNED_32:
10508     case DATA_TYPE_SIGNED_64:
10509     case DATA_TYPE_UNSIGNED_8:
10510     case DATA_TYPE_UNSIGNED_16:
10511     case DATA_TYPE_UNSIGNED_32:
10512     case DATA_TYPE_UNSIGNED_64:
10513         return true;
10514 
10515     case DATA_TYPE_FLOAT_16:
10516     case DATA_TYPE_FLOAT_32:
10517     case DATA_TYPE_FLOAT_64:
10518     case DATA_TYPE_VEC2_SIGNED_16:
10519     case DATA_TYPE_VEC2_SIGNED_32:
10520         return false;
10521 
10522     default:
10523         DE_ASSERT(false);
10524     }
10525     return false;
10526 }
10527 
isFloat(ConversionDataType type)10528 bool isFloat(ConversionDataType type)
10529 {
10530     switch (type)
10531     {
10532     case DATA_TYPE_SIGNED_8:
10533     case DATA_TYPE_SIGNED_16:
10534     case DATA_TYPE_SIGNED_32:
10535     case DATA_TYPE_SIGNED_64:
10536     case DATA_TYPE_UNSIGNED_8:
10537     case DATA_TYPE_UNSIGNED_16:
10538     case DATA_TYPE_UNSIGNED_32:
10539     case DATA_TYPE_UNSIGNED_64:
10540     case DATA_TYPE_VEC2_SIGNED_16:
10541     case DATA_TYPE_VEC2_SIGNED_32:
10542         return false;
10543 
10544     case DATA_TYPE_FLOAT_16:
10545     case DATA_TYPE_FLOAT_32:
10546     case DATA_TYPE_FLOAT_64:
10547         return true;
10548 
10549     default:
10550         DE_ASSERT(false);
10551     }
10552     return false;
10553 }
10554 
getTypeName(ConversionDataType type)10555 const string getTypeName(ConversionDataType type)
10556 {
10557     string prefix = isSigned(type) ? "" : "u";
10558 
10559     if (isInt(type))
10560         return prefix + "int" + getBitWidthStr(type);
10561     else if (isFloat(type))
10562         return prefix + "float" + getBitWidthStr(type);
10563     else if (type == DATA_TYPE_VEC2_SIGNED_16)
10564         return "i16vec2";
10565     else if (type == DATA_TYPE_VEC2_SIGNED_32)
10566         return "i32vec2";
10567     else
10568         DE_ASSERT(false);
10569 
10570     return "";
10571 }
10572 
getTestName(ConversionDataType from,ConversionDataType to,const char * suffix)10573 const string getTestName(ConversionDataType from, ConversionDataType to, const char *suffix)
10574 {
10575     const string fullSuffix(suffix == DE_NULL ? "" : string("_") + string(suffix));
10576 
10577     return getTypeName(from) + "_to_" + getTypeName(to) + fullSuffix;
10578 }
10579 
getAsmTypeName(ConversionDataType type,uint32_t elements=1)10580 const string getAsmTypeName(ConversionDataType type, uint32_t elements = 1)
10581 {
10582     string prefix;
10583 
10584     if (isInt(type))
10585         prefix = isSigned(type) ? "i" : "u";
10586     else if (isFloat(type))
10587         prefix = "f";
10588     else if (type == DATA_TYPE_VEC2_SIGNED_16)
10589         return "i16vec2";
10590     else if (type == DATA_TYPE_VEC2_SIGNED_32)
10591         return "v2i32";
10592     else
10593         DE_ASSERT(false);
10594     if ((isInt(type) || isFloat(type)) && elements == 2)
10595     {
10596         prefix = "v2" + prefix;
10597     }
10598 
10599     return prefix + getBitWidthStr(type);
10600 }
10601 
10602 template <typename T>
getSpecializedBuffer(int64_t number,uint32_t elements=1)10603 BufferSp getSpecializedBuffer(int64_t number, uint32_t elements = 1)
10604 {
10605     return BufferSp(new Buffer<T>(vector<T>(elements, (T)number)));
10606 }
10607 
getBuffer(ConversionDataType type,int64_t number,uint32_t elements=1)10608 BufferSp getBuffer(ConversionDataType type, int64_t number, uint32_t elements = 1)
10609 {
10610     switch (type)
10611     {
10612     case DATA_TYPE_SIGNED_8:
10613         return getSpecializedBuffer<int8_t>(number, elements);
10614     case DATA_TYPE_SIGNED_16:
10615         return getSpecializedBuffer<int16_t>(number, elements);
10616     case DATA_TYPE_SIGNED_32:
10617         return getSpecializedBuffer<int32_t>(number, elements);
10618     case DATA_TYPE_SIGNED_64:
10619         return getSpecializedBuffer<int64_t>(number, elements);
10620     case DATA_TYPE_UNSIGNED_8:
10621         return getSpecializedBuffer<uint8_t>(number, elements);
10622     case DATA_TYPE_UNSIGNED_16:
10623         return getSpecializedBuffer<uint16_t>(number, elements);
10624     case DATA_TYPE_UNSIGNED_32:
10625         return getSpecializedBuffer<uint32_t>(number, elements);
10626     case DATA_TYPE_UNSIGNED_64:
10627         return getSpecializedBuffer<uint64_t>(number, elements);
10628     case DATA_TYPE_FLOAT_16:
10629         return getSpecializedBuffer<uint16_t>(number, elements);
10630     case DATA_TYPE_FLOAT_32:
10631         return getSpecializedBuffer<uint32_t>(number, elements);
10632     case DATA_TYPE_FLOAT_64:
10633         return getSpecializedBuffer<uint64_t>(number, elements);
10634     case DATA_TYPE_VEC2_SIGNED_16:
10635         return getSpecializedBuffer<uint32_t>(number, elements);
10636     case DATA_TYPE_VEC2_SIGNED_32:
10637         return getSpecializedBuffer<uint64_t>(number, elements);
10638 
10639     default:
10640         TCU_THROW(InternalError, "Unimplemented type passed");
10641     }
10642 }
10643 
usesInt8(ConversionDataType from,ConversionDataType to)10644 bool usesInt8(ConversionDataType from, ConversionDataType to)
10645 {
10646     return (from == DATA_TYPE_SIGNED_8 || to == DATA_TYPE_SIGNED_8 || from == DATA_TYPE_UNSIGNED_8 ||
10647             to == DATA_TYPE_UNSIGNED_8);
10648 }
10649 
usesInt16(ConversionDataType from,ConversionDataType to)10650 bool usesInt16(ConversionDataType from, ConversionDataType to)
10651 {
10652     return (from == DATA_TYPE_SIGNED_16 || to == DATA_TYPE_SIGNED_16 || from == DATA_TYPE_UNSIGNED_16 ||
10653             to == DATA_TYPE_UNSIGNED_16 || from == DATA_TYPE_VEC2_SIGNED_16 || to == DATA_TYPE_VEC2_SIGNED_16);
10654 }
10655 
usesInt32(ConversionDataType from,ConversionDataType to)10656 bool usesInt32(ConversionDataType from, ConversionDataType to)
10657 {
10658     return (from == DATA_TYPE_SIGNED_32 || to == DATA_TYPE_SIGNED_32 || from == DATA_TYPE_UNSIGNED_32 ||
10659             to == DATA_TYPE_UNSIGNED_32 || from == DATA_TYPE_VEC2_SIGNED_32 || to == DATA_TYPE_VEC2_SIGNED_32);
10660 }
10661 
usesInt64(ConversionDataType from,ConversionDataType to)10662 bool usesInt64(ConversionDataType from, ConversionDataType to)
10663 {
10664     return (from == DATA_TYPE_SIGNED_64 || to == DATA_TYPE_SIGNED_64 || from == DATA_TYPE_UNSIGNED_64 ||
10665             to == DATA_TYPE_UNSIGNED_64);
10666 }
10667 
usesFloat16(ConversionDataType from,ConversionDataType to)10668 bool usesFloat16(ConversionDataType from, ConversionDataType to)
10669 {
10670     return (from == DATA_TYPE_FLOAT_16 || to == DATA_TYPE_FLOAT_16);
10671 }
10672 
usesFloat32(ConversionDataType from,ConversionDataType to)10673 bool usesFloat32(ConversionDataType from, ConversionDataType to)
10674 {
10675     return (from == DATA_TYPE_FLOAT_32 || to == DATA_TYPE_FLOAT_32);
10676 }
10677 
usesFloat64(ConversionDataType from,ConversionDataType to)10678 bool usesFloat64(ConversionDataType from, ConversionDataType to)
10679 {
10680     return (from == DATA_TYPE_FLOAT_64 || to == DATA_TYPE_FLOAT_64);
10681 }
10682 
getVulkanFeaturesAndExtensions(ConversionDataType from,ConversionDataType to,bool useStorageExt,VulkanFeatures & vulkanFeatures,vector<string> & extensions)10683 void getVulkanFeaturesAndExtensions(ConversionDataType from, ConversionDataType to, bool useStorageExt,
10684                                     VulkanFeatures &vulkanFeatures, vector<string> &extensions)
10685 {
10686     if (usesInt16(from, to) && !usesInt32(from, to))
10687         vulkanFeatures.coreFeatures.shaderInt16 = true;
10688 
10689     if (usesInt64(from, to))
10690         vulkanFeatures.coreFeatures.shaderInt64 = true;
10691 
10692     if (usesFloat64(from, to))
10693         vulkanFeatures.coreFeatures.shaderFloat64 = true;
10694 
10695     if ((usesInt16(from, to) || usesFloat16(from, to)) && useStorageExt)
10696     {
10697         extensions.push_back("VK_KHR_16bit_storage");
10698         vulkanFeatures.ext16BitStorage.storageBuffer16BitAccess = true;
10699     }
10700 
10701     if (usesFloat16(from, to) || usesInt8(from, to))
10702     {
10703         extensions.push_back("VK_KHR_shader_float16_int8");
10704 
10705         if (usesFloat16(from, to))
10706         {
10707             vulkanFeatures.extFloat16Int8.shaderFloat16 = true;
10708         }
10709 
10710         if (usesInt8(from, to))
10711         {
10712             vulkanFeatures.extFloat16Int8.shaderInt8 = true;
10713 
10714             extensions.push_back("VK_KHR_8bit_storage");
10715             vulkanFeatures.ext8BitStorage.storageBuffer8BitAccess = true;
10716         }
10717     }
10718 }
10719 
10720 struct ConvertCase
10721 {
ConvertCasevkt::SpirVAssembly::ConvertCase10722     ConvertCase(const string &instruction, ConversionDataType from, ConversionDataType to, int64_t number,
10723                 bool separateOutput = false, int64_t outputNumber = 0, const char *suffix = DE_NULL,
10724                 bool useStorageExt = true)
10725         : m_fromType(from)
10726         , m_toType(to)
10727         , m_elements(1)
10728         , m_useStorageExt(useStorageExt)
10729         , m_name(getTestName(from, to, suffix))
10730     {
10731         string caps;
10732         string decl;
10733         string exts;
10734 
10735         m_asmTypes["inStorageType"]  = getAsmTypeName(from);
10736         m_asmTypes["outStorageType"] = getAsmTypeName(to);
10737         m_asmTypes["inCast"]         = "OpCopyObject";
10738         m_asmTypes["outCast"]        = "OpCopyObject";
10739         // If the storage extensions are being avoided, tests instead uses
10740         // vectors so that they are easily convertible to 32-bit integers.
10741         // |m_elements| indicates the size of the vector. It modifies how many
10742         // items added to the buffers and converted in the tests.
10743         //
10744         // Currently only supports 1 (default) or 2 elements.
10745         if (!m_useStorageExt)
10746         {
10747             bool in_change  = false;
10748             bool out_change = false;
10749             if (usesFloat16(from, from) || usesInt16(from, from))
10750             {
10751                 m_asmTypes["inStorageType"] = "u32";
10752                 m_asmTypes["inCast"]        = "OpBitcast";
10753                 m_elements                  = 2;
10754                 in_change                   = true;
10755             }
10756             if (usesFloat16(to, to) || usesInt16(to, to))
10757             {
10758                 m_asmTypes["outStorageType"] = "u32";
10759                 m_asmTypes["outCast"]        = "OpBitcast";
10760                 m_elements                   = 2;
10761                 out_change                   = true;
10762             }
10763             if (in_change && !out_change)
10764             {
10765                 m_asmTypes["outStorageType"] = getAsmTypeName(to, m_elements);
10766             }
10767             if (!in_change && out_change)
10768             {
10769                 m_asmTypes["inStorageType"] = getAsmTypeName(from, m_elements);
10770             }
10771         }
10772 
10773         // Safety check for implementation.
10774         if (m_elements < 1 || m_elements > 2)
10775             TCU_THROW(InternalError, "Unsupported number of elements");
10776 
10777         m_asmTypes["inputType"]  = getAsmTypeName(from, m_elements);
10778         m_asmTypes["outputType"] = getAsmTypeName(to, m_elements);
10779 
10780         m_inputBuffer = getBuffer(from, number, m_elements);
10781         if (separateOutput)
10782             m_outputBuffer = getBuffer(to, outputNumber, m_elements);
10783         else
10784             m_outputBuffer = getBuffer(to, number, m_elements);
10785 
10786         if (usesInt8(from, to))
10787         {
10788             bool requiresInt8Capability = true;
10789             if (instruction == "OpUConvert" || instruction == "OpSConvert")
10790             {
10791                 // Conversions between 8 and 32 bit are provided by SPV_KHR_8bit_storage. The rest requires explicit Int8
10792                 if (usesInt32(from, to))
10793                     requiresInt8Capability = false;
10794             }
10795 
10796             caps += "OpCapability StorageBuffer8BitAccess\n";
10797             if (requiresInt8Capability)
10798                 caps += "OpCapability Int8\n";
10799 
10800             decl += "%i8         = OpTypeInt 8 1\n"
10801                     "%u8         = OpTypeInt 8 0\n";
10802 
10803             if (m_elements == 2)
10804             {
10805                 decl += "%v2i8       = OpTypeVector %i8 2\n"
10806                         "%v2u8       = OpTypeVector %u8 2\n";
10807             }
10808             exts += "OpExtension \"SPV_KHR_8bit_storage\"\n";
10809         }
10810 
10811         if (usesInt16(from, to))
10812         {
10813             bool requiresInt16Capability = true;
10814 
10815             if (instruction == "OpUConvert" || instruction == "OpSConvert" || instruction == "OpFConvert")
10816             {
10817                 // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10818                 if (usesInt32(from, to) || usesFloat32(from, to))
10819                     requiresInt16Capability = false;
10820             }
10821 
10822             decl += "%i16        = OpTypeInt 16 1\n"
10823                     "%u16        = OpTypeInt 16 0\n";
10824             if (m_elements == 2)
10825             {
10826                 decl += "%v2i16      = OpTypeVector %i16 2\n"
10827                         "%v2u16      = OpTypeVector %u16 2\n";
10828             }
10829             else
10830             {
10831                 decl += "%i16vec2    = OpTypeVector %i16 2\n";
10832             }
10833 
10834             // Conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10835             if (requiresInt16Capability || !m_useStorageExt)
10836                 caps += "OpCapability Int16\n";
10837         }
10838 
10839         if (usesFloat16(from, to))
10840         {
10841             decl += "%f16        = OpTypeFloat 16\n";
10842             if (m_elements == 2)
10843             {
10844                 decl += "%v2f16      = OpTypeVector %f16 2\n";
10845             }
10846 
10847             // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Float16
10848             if (!usesFloat32(from, to) || !m_useStorageExt)
10849                 caps += "OpCapability Float16\n";
10850         }
10851 
10852         if ((usesInt16(from, to) || usesFloat16(from, to)) && m_useStorageExt)
10853         {
10854             caps += "OpCapability StorageUniformBufferBlock16\n";
10855             exts += "OpExtension \"SPV_KHR_16bit_storage\"\n";
10856         }
10857 
10858         if (usesInt64(from, to))
10859         {
10860             caps += "OpCapability Int64\n";
10861             decl += "%i64        = OpTypeInt 64 1\n"
10862                     "%u64        = OpTypeInt 64 0\n";
10863             if (m_elements == 2)
10864             {
10865                 decl += "%v2i64      = OpTypeVector %i64 2\n"
10866                         "%v2u64      = OpTypeVector %u64 2\n";
10867             }
10868         }
10869 
10870         if (usesFloat64(from, to))
10871         {
10872             caps += "OpCapability Float64\n";
10873             decl += "%f64        = OpTypeFloat 64\n";
10874             if (m_elements == 2)
10875             {
10876                 decl += "%v2f64        = OpTypeVector %f64 2\n";
10877             }
10878         }
10879 
10880         m_asmTypes["datatype_capabilities"]    = caps;
10881         m_asmTypes["datatype_additional_decl"] = decl;
10882         m_asmTypes["datatype_extensions"]      = exts;
10883     }
10884 
10885     ConversionDataType m_fromType;
10886     ConversionDataType m_toType;
10887     uint32_t m_elements;
10888     bool m_useStorageExt;
10889     string m_name;
10890     map<string, string> m_asmTypes;
10891     BufferSp m_inputBuffer;
10892     BufferSp m_outputBuffer;
10893 };
10894 
getConvertCaseShaderStr(const string & instruction,const ConvertCase & convertCase,bool addVectors=false)10895 const string getConvertCaseShaderStr(const string &instruction, const ConvertCase &convertCase, bool addVectors = false)
10896 {
10897     map<string, string> params = convertCase.m_asmTypes;
10898 
10899     params["instruction"]  = instruction;
10900     params["inDecorator"]  = getByteWidthStr(convertCase.m_fromType);
10901     params["outDecorator"] = getByteWidthStr(convertCase.m_toType);
10902 
10903     std::string shader("OpCapability Shader\n"
10904                        "${datatype_capabilities}"
10905                        "${datatype_extensions:opt}"
10906                        "OpMemoryModel Logical GLSL450\n"
10907                        "OpEntryPoint GLCompute %main \"main\"\n"
10908                        "OpExecutionMode %main LocalSize 1 1 1\n"
10909                        "OpSource GLSL 430\n"
10910                        "OpName %main           \"main\"\n"
10911                        // Decorators
10912                        "OpDecorate %indata DescriptorSet 0\n"
10913                        "OpDecorate %indata Binding 0\n"
10914                        "OpDecorate %outdata DescriptorSet 0\n"
10915                        "OpDecorate %outdata Binding 1\n"
10916                        "OpDecorate %in_buf BufferBlock\n"
10917                        "OpDecorate %out_buf BufferBlock\n"
10918                        "OpMemberDecorate %in_buf 0 Offset 0\n"
10919                        "OpMemberDecorate %out_buf 0 Offset 0\n"
10920                        // Base types
10921                        "%void       = OpTypeVoid\n"
10922                        "%voidf      = OpTypeFunction %void\n"
10923                        "%u32        = OpTypeInt 32 0\n"
10924                        "%i32        = OpTypeInt 32 1\n"
10925                        "%f32        = OpTypeFloat 32\n"
10926                        "%v2i32      = OpTypeVector %i32 2\n"
10927                        "${datatype_additional_decl}");
10928     if (addVectors)
10929     {
10930         shader += "%v2u32 = OpTypeVector %u32 2\n"
10931                   "%v2f32 = OpTypeVector %f32 2\n";
10932     }
10933     shader += "%uvec3      = OpTypeVector %u32 3\n"
10934               // Derived types
10935               "%in_ptr     = OpTypePointer Uniform %${inStorageType}\n"
10936               "%out_ptr    = OpTypePointer Uniform %${outStorageType}\n"
10937               "%in_buf     = OpTypeStruct %${inStorageType}\n"
10938               "%out_buf    = OpTypeStruct %${outStorageType}\n"
10939               "%in_bufptr  = OpTypePointer Uniform %in_buf\n"
10940               "%out_bufptr = OpTypePointer Uniform %out_buf\n"
10941               "%indata     = OpVariable %in_bufptr Uniform\n"
10942               "%outdata    = OpVariable %out_bufptr Uniform\n"
10943               // Constants
10944               "%zero       = OpConstant %i32 0\n"
10945               // Main function
10946               "%main       = OpFunction %void None %voidf\n"
10947               "%label      = OpLabel\n"
10948               "%inloc      = OpAccessChain %in_ptr %indata %zero\n"
10949               "%outloc     = OpAccessChain %out_ptr %outdata %zero\n"
10950               "%inval      = OpLoad %${inStorageType} %inloc\n"
10951               "%in_cast    = ${inCast} %${inputType} %inval\n"
10952               "%conv       = ${instruction} %${outputType} %in_cast\n"
10953               "%out_cast   = ${outCast} %${outStorageType} %conv\n"
10954               "              OpStore %outloc %out_cast\n"
10955               "              OpReturn\n"
10956               "              OpFunctionEnd\n";
10957 
10958     return StringTemplate(shader).specialize(params);
10959 }
10960 
createConvertCases(vector<ConvertCase> & testCases,const string & instruction)10961 void createConvertCases(vector<ConvertCase> &testCases, const string &instruction)
10962 {
10963     if (instruction == "OpUConvert")
10964     {
10965         // Convert unsigned int to unsigned int
10966         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_UNSIGNED_16, 42));
10967         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_UNSIGNED_32, 73));
10968         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_UNSIGNED_64, 121));
10969 
10970         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_UNSIGNED_8, 33));
10971         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_UNSIGNED_32, 60653));
10972         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_UNSIGNED_64, 17991));
10973 
10974         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_UNSIGNED_64, 904256275));
10975         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_UNSIGNED_16, 6275));
10976         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_UNSIGNED_8, 17));
10977 
10978         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_UNSIGNED_32, 701256243));
10979         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_UNSIGNED_16, 4741));
10980         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_UNSIGNED_8, 65));
10981 
10982         // Zero extension for int->uint
10983         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_16, 56));
10984         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_32, -47, true, 209));
10985         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_64, -5, true, 251));
10986         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_32, 14669));
10987         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_64, -3341, true, 62195));
10988         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_64, 973610259));
10989 
10990         // Truncate for int->uint
10991         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_8, -25711, true, 145));
10992         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_8, 103));
10993         testCases.push_back(
10994             ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_8, -1067742499291926803ll, true, 237));
10995         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_16, 12382));
10996         testCases.push_back(
10997             ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_32, -972812359, true, 3322154937u));
10998         testCases.push_back(
10999             ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_16, -1067742499291926803ll, true, 61165));
11000     }
11001     else if (instruction == "OpSConvert")
11002     {
11003         // Sign extension int->int
11004         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_SIGNED_16, -30));
11005         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_SIGNED_32, 55));
11006         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_SIGNED_64, -3));
11007         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_SIGNED_32, 14669));
11008         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_SIGNED_64, -3341));
11009         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_SIGNED_64, 973610259));
11010 
11011         // Truncate for int->int
11012         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_SIGNED_8, 81));
11013         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_SIGNED_8, -93));
11014         testCases.push_back(
11015             ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_SIGNED_8, 3182748172687672ll, true, 56));
11016         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_SIGNED_16, 12382));
11017         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_SIGNED_32, -972812359));
11018         testCases.push_back(
11019             ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_SIGNED_16, -1067742499291926803ll, true, -4371));
11020 
11021         // Sign extension for int->uint
11022         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_16, 56));
11023         testCases.push_back(
11024             ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_32, -47, true, 4294967249u));
11025         testCases.push_back(
11026             ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_64, -5, true, 18446744073709551611ull));
11027         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_32, 14669));
11028         testCases.push_back(
11029             ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_64, -3341, true, 18446744073709548275ull));
11030         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_64, 973610259));
11031 
11032         // Truncate for int->uint
11033         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_8, -25711, true, 145));
11034         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_8, 103));
11035         testCases.push_back(
11036             ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_8, -1067742499291926803ll, true, 237));
11037         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_16, 12382));
11038         testCases.push_back(
11039             ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_32, -972812359, true, 3322154937u));
11040         testCases.push_back(
11041             ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_16, -1067742499291926803ll, true, 61165));
11042 
11043         // Sign extension for uint->int
11044         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_SIGNED_16, 71));
11045         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_SIGNED_32, 201, true, -55));
11046         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_SIGNED_64, 188, true, -68));
11047         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_SIGNED_32, 14669));
11048         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_SIGNED_64, 62195, true, -3341));
11049         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_SIGNED_64, 973610259));
11050 
11051         // Truncate for uint->int
11052         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_SIGNED_8, 67));
11053         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_SIGNED_8, 133, true, -123));
11054         testCases.push_back(
11055             ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_SIGNED_8, 836927654193256494ull, true, 46));
11056         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_SIGNED_16, 12382));
11057         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_SIGNED_32,
11058                                         18446744072736739257ull, true, -972812359));
11059         testCases.push_back(
11060             ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_SIGNED_16, 17379001574417624813ull, true, -4371));
11061 
11062         // Convert i16vec2 to i32vec2 and vice versa
11063         // Unsigned values are used here to represent negative signed values and to allow defined shifting behaviour.
11064         // The actual signed value -32123 is used here as uint16 value 33413 and uint32 value 4294935173
11065         testCases.push_back(ConvertCase(instruction, DATA_TYPE_VEC2_SIGNED_16, DATA_TYPE_VEC2_SIGNED_32,
11066                                         (33413u << 16) | 27593, true, (4294935173ull << 32) | 27593));
11067         testCases.push_back(ConvertCase(instruction, DATA_TYPE_VEC2_SIGNED_32, DATA_TYPE_VEC2_SIGNED_16,
11068                                         (4294935173ull << 32) | 27593, true, (33413u << 16) | 27593));
11069     }
11070     else if (instruction == "OpFConvert")
11071     {
11072         // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
11073         testCases.push_back(
11074             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_FLOAT_64, 0x449a4000, true, 0x4093480000000000));
11075         testCases.push_back(
11076             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_FLOAT_32, 0x4093480000000000, true, 0x449a4000));
11077 
11078         // Conversion to/from 32-bit floats are supported by both 16-bit
11079         // storage and Float16. The tests are duplicated to exercise both
11080         // cases.
11081         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_FLOAT_16, 0x449a4000, true, 0x64D2));
11082         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_32, 0x64D2, true, 0x449a4000));
11083         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_FLOAT_16, 0x449a4000, true, 0x64D2,
11084                                         "no_storage", false));
11085         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_32, 0x64D2, true, 0x449a4000,
11086                                         "no_storage", false));
11087 
11088         testCases.push_back(
11089             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_64, 0x64D2, true, 0x4093480000000000));
11090         testCases.push_back(
11091             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_FLOAT_16, 0x4093480000000000, true, 0x64D2));
11092         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_64, 0x64D2, true,
11093                                         0x4093480000000000, "no_storage", false));
11094         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_FLOAT_16, 0x4093480000000000, true,
11095                                         0x64D2, "no_storage", false));
11096     }
11097     else if (instruction == "OpConvertFToU")
11098     {
11099         // Normal numbers from uint8 range
11100         testCases.push_back(
11101             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x5020, true, 33, "33", false));
11102         testCases.push_back(
11103             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x503F, true, 33, "33rtz", false));
11104         testCases.push_back(
11105             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x42280000, true, 42, "42"));
11106         testCases.push_back(
11107             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x422BFFFF, true, 42, "42rtz"));
11108         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x4067800000000000ull,
11109                                         true, 188, "188"));
11110         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x40679FFFFFFFFFFFull,
11111                                         true, 188, "188rtz"));
11112 
11113         // Maximum uint8 value
11114         testCases.push_back(
11115             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x5BF8, true, 255, "max", false));
11116         testCases.push_back(
11117             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x5BFF, true, 255, "maxrtz", false));
11118         testCases.push_back(
11119             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x437F0000, true, 255, "max"));
11120         testCases.push_back(
11121             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x437FFFFF, true, 255, "maxrtz"));
11122         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x406FE00000000000ull,
11123                                         true, 255, "max"));
11124         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x406FFFFFFFFFFFFFull,
11125                                         true, 255, "maxrtz"));
11126 
11127         // +0
11128         testCases.push_back(
11129             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x0000, true, 0, "p0", false));
11130         testCases.push_back(
11131             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x00000000, true, 0, "p0"));
11132         testCases.push_back(
11133             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x0000000000000000ull, true, 0, "p0"));
11134 
11135         // -0
11136         testCases.push_back(
11137             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x8000, true, 0, "m0", false));
11138         testCases.push_back(
11139             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x80000000, true, 0, "m0"));
11140         testCases.push_back(
11141             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x8000000000000000ull, true, 0, "m0"));
11142 
11143         // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
11144         testCases.push_back(
11145             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x64D2, true, 1234, "1234", false));
11146         testCases.push_back(
11147             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x64D2, true, 1234, "1234", false));
11148         testCases.push_back(
11149             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x64D2, true, 1234, "1234", false));
11150 
11151         // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
11152         testCases.push_back(
11153             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x7BFF, true, 65504, "max", false));
11154         testCases.push_back(
11155             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x7BFF, true, 65504, "max", false));
11156         testCases.push_back(
11157             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x7BFF, true, 65504, "max", false));
11158 
11159         // Show round to zero behaviour
11160         // Example: see https://float.exposed/0x58ff
11161         testCases.push_back(
11162             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x44FF, true, 4, "p4rtz", false));
11163         testCases.push_back(
11164             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x58FF, true, 159, "p159rtz", false));
11165         testCases.push_back(
11166             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x58FF, true, 159, "p159rtz", false));
11167 
11168         // +0
11169         testCases.push_back(
11170             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x0000, true, 0, "p0", false));
11171         testCases.push_back(
11172             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x0000, true, 0, "p0", false));
11173         testCases.push_back(
11174             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x0000, true, 0, "p0", false));
11175 
11176         // -0
11177         testCases.push_back(
11178             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x8000, true, 0, "m0", false));
11179         testCases.push_back(
11180             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x8000, true, 0, "m0", false));
11181         testCases.push_back(
11182             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x8000, true, 0, "m0", false));
11183 
11184         testCases.push_back(
11185             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_16, 0x449a4000, true, 1234));
11186         testCases.push_back(
11187             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_16, 0x449a5fff, true, 1234, "rtz"));
11188         testCases.push_back(
11189             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_32, 0x449a4000, true, 1234));
11190         testCases.push_back(
11191             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_32, 0x449a5fff, true, 1234, "rtz"));
11192         testCases.push_back(
11193             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_64, 0x449a4000, true, 1234));
11194         testCases.push_back(
11195             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_64, 0x449a5fff, true, 1234, "rtz"));
11196         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_64, 0x51b9ad78, true,
11197                                         99684909056ll, "large"));
11198         testCases.push_back(
11199             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_16, 0x4093480000000000, true, 1234));
11200         testCases.push_back(
11201             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_16, 0x40934bffffffffff, true, 1234, "rtz"));
11202         testCases.push_back(
11203             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_32, 0x4093480000000000, true, 1234));
11204         testCases.push_back(
11205             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_32, 0x40934bffffffffff, true, 1234, "rtz"));
11206         testCases.push_back(
11207             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_64, 0x4093480000000000, true, 1234));
11208         testCases.push_back(
11209             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_64, 0x40934bffffffffff, true, 1234, "rtz"));
11210     }
11211     else if (instruction == "OpConvertUToF")
11212     {
11213         // Normal numbers from uint8 range
11214         testCases.push_back(
11215             ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_16, 116, true, 0x5740, "116", false));
11216         testCases.push_back(
11217             ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_32, 232, true, 0x43680000, "232"));
11218         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_64, 164, true,
11219                                         0x4064800000000000ull, "164"));
11220 
11221         // Maximum uint8 value
11222         testCases.push_back(
11223             ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_16, 255, true, 0x5BF8, "max", false));
11224         testCases.push_back(
11225             ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_32, 255, true, 0x437F0000, "max"));
11226         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_64, 255, true,
11227                                         0x406FE00000000000ull, "max"));
11228 
11229         // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
11230         testCases.push_back(
11231             ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_16, 1234, true, 0x64D2, "1234", false));
11232         testCases.push_back(
11233             ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_16, 1234, true, 0x64D2, "1234", false));
11234         testCases.push_back(
11235             ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_16, 1234, true, 0x64D2, "1234", false));
11236 
11237         // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
11238         testCases.push_back(
11239             ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
11240         testCases.push_back(
11241             ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
11242         testCases.push_back(
11243             ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
11244 
11245         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 4294967296ll, true,
11246                                         0x4f800000, "4294967296", false));
11247         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_64, 4294967296ll, true,
11248                                         0x41f0000000000000, "4294967296", false));
11249 
11250         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 0xffffff0000000000,
11251                                         true, 0x5f7fffff, "max", false));
11252 
11253         testCases.push_back(
11254             ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_32, 1234, true, 0x449a4000));
11255         testCases.push_back(
11256             ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_64, 1234, true, 0x4093480000000000));
11257         testCases.push_back(
11258             ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_32, 1234, true, 0x449a4000));
11259         testCases.push_back(
11260             ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_64, 1234, true, 0x4093480000000000));
11261         testCases.push_back(
11262             ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 1234, true, 0x449a4000));
11263         testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 99684909056ll, true,
11264                                         0x51b9ad78, "large"));
11265         testCases.push_back(
11266             ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_64, 1234, true, 0x4093480000000000));
11267     }
11268     else if (instruction == "OpConvertFToS")
11269     {
11270         // Normal numbers from int8 range
11271         testCases.push_back(
11272             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xC980, true, -11, "m11", false));
11273         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xC9e5,
11274                                         /*-11.7890625*/ true, -11, "m11rtz", false));
11275         testCases.push_back(
11276             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC2140000, true, -37, "m37"));
11277         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC2178000,
11278                                         /*-37.875*/ true, -37, "m37rtz"));
11279         testCases.push_back(
11280             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC050800000000000ull, true, -66, "m66"));
11281         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC050B80000000000ull,
11282                                         /*-66.875*/ true, -66, "m66rtz"));
11283 
11284         // Minimum int8 value
11285         testCases.push_back(
11286             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xD800, true, -128, "min", false));
11287         testCases.push_back(
11288             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xD807, true, -128, "minrtz", false));
11289         testCases.push_back(
11290             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC3000000, true, -128, "min"));
11291         testCases.push_back(
11292             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC300e003, true, -128, "minrtz"));
11293         testCases.push_back(
11294             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC060000000000000ull, true, -128, "min"));
11295         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC0601E4FE0000001ull,
11296                                         true, -128, "minrtz"));
11297 
11298         // Maximum int8 value
11299         testCases.push_back(
11300             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x57F0, true, 127, "max", false));
11301         testCases.push_back(
11302             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x57FF, true, 127, "maxrtz", false));
11303         testCases.push_back(
11304             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x42FE0000, true, 127, "max"));
11305         testCases.push_back(
11306             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x42FFFFFF, true, 127, "maxrtz"));
11307         testCases.push_back(
11308             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x405FC00000000000ull, true, 127, "max"));
11309         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x405FFFFFFFFFFFFFull,
11310                                         true, 127, "maxrtz"));
11311 
11312         // +0
11313         testCases.push_back(
11314             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x0000, true, 0, "p0", false));
11315         testCases.push_back(
11316             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x00000000, true, 0, "p0"));
11317         testCases.push_back(
11318             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x0000000000000000ull, true, 0, "p0"));
11319 
11320         // -0
11321         testCases.push_back(
11322             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x8000, true, 0, "m0", false));
11323         testCases.push_back(
11324             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x80000000, true, 0, "m0"));
11325         testCases.push_back(
11326             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x8000000000000000ull, true, 0, "m0"));
11327 
11328         // All hexadecimal values below represent -1234.0 as 32/64-bit IEEE 754 float
11329         testCases.push_back(
11330             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0xE4D2, true, -1234, "m1234", false));
11331         testCases.push_back(
11332             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0xE4D2, true, -1234, "m1234", false));
11333         testCases.push_back(
11334             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0xE4D2, true, -1234, "m1234", false));
11335 
11336         // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
11337         // 0xFBFF = 1111 1011 1111 1111 = 1 11110 1111111111 = -65504
11338         testCases.push_back(
11339             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0xF800, true, -32768, "min", false));
11340         testCases.push_back(
11341             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0xFBFF, true, -65504, "min", false));
11342         testCases.push_back(
11343             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0xFBFF, true, -65504, "min", false));
11344 
11345         // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
11346         // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
11347         testCases.push_back(
11348             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x77FF, true, 32752, "max", false));
11349         testCases.push_back(
11350             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x7BFF, true, 65504, "max", false));
11351         testCases.push_back(
11352             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x7BFF, true, 65504, "max", false));
11353 
11354         // Show round to zero behaviour, from negative side.
11355         // Example: see https://float.exposed/0xd8ff
11356         testCases.push_back(
11357             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0xC4FF, true, -4, "m4rtz", false));
11358         testCases.push_back(
11359             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0xD8FF, true, -159, "m159rtz", false));
11360         testCases.push_back(
11361             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0xD8FF, true, -159, "m159rtz", false));
11362 
11363         // Show round to zero behaviour, from positive side.
11364         // Example: see https://float.exposed/0x58ff
11365         testCases.push_back(
11366             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x44FF, true, 4, "p4rtz", false));
11367         testCases.push_back(
11368             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x58FF, true, 159, "p159rtz", false));
11369         testCases.push_back(
11370             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x58FF, true, 159, "p159rtz", false));
11371 
11372         // +0
11373         testCases.push_back(
11374             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x0000, true, 0, "p0", false));
11375         testCases.push_back(
11376             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x0000, true, 0, "p0", false));
11377         testCases.push_back(
11378             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x0000, true, 0, "p0", false));
11379 
11380         // -0
11381         testCases.push_back(
11382             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x8000, true, 0, "m0", false));
11383         testCases.push_back(
11384             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x8000, true, 0, "m0", false));
11385         testCases.push_back(
11386             ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x8000, true, 0, "m0", false));
11387 
11388         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc49a4000, true, -1234));
11389         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_32, 0xc49a4000, true, -1234));
11390         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_64, 0xc49a4000, true, -1234));
11391         testCases.push_back(
11392             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc49a5f00, true, -1234, "rtz"));
11393         testCases.push_back(
11394             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_32, 0xc49a5f00, true, -1234, "rtz"));
11395         testCases.push_back(
11396             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_64, 0xc49a5f00, true, -1234, "rtz"));
11397         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_64, 0xd1b9ad78, true,
11398                                         -99684909056ll, "largepos"));
11399         testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_64, 0x51b9ad78, true,
11400                                         99684909056ll, "largeneg"));
11401         testCases.push_back(
11402             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_16, 0xc093480000000000, true, -1234));
11403         testCases.push_back(
11404             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_32, 0xc093480000000000, true, -1234));
11405         testCases.push_back(
11406             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_64, 0xc093480000000000, true, -1234));
11407         testCases.push_back(
11408             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_16, 0xc0934bff000000ff, true, -1234, "rtz"));
11409         testCases.push_back(
11410             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_32, 0xc0934bff000000ff, true, -1234, "rtz"));
11411         testCases.push_back(
11412             ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_64, 0xc0934bff000000ff, true, -1234, "rtz"));
11413         testCases.push_back(
11414             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0x453b9000, true, 3001, "p3001"));
11415         testCases.push_back(
11416             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0x453b9fff, true, 3001, "p3001rtz"));
11417         testCases.push_back(
11418             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc53b9000, true, -3001, "m3001"));
11419         testCases.push_back(
11420             ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc53b9fff, true, -3001, "m3001rtz"));
11421     }
11422     else if (instruction == "OpConvertSToF")
11423     {
11424         // Normal numbers from int8 range
11425         testCases.push_back(
11426             ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_16, -12, true, 0xCA00, "m21", false));
11427         testCases.push_back(
11428             ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_32, -21, true, 0xC1A80000, "m21"));
11429         testCases.push_back(
11430             ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_64, -99, true, 0xC058C00000000000ull, "m99"));
11431 
11432         // Minimum int8 value
11433         testCases.push_back(
11434             ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_16, -128, true, 0xD800, "min", false));
11435         testCases.push_back(
11436             ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_32, -128, true, 0xC3000000, "min"));
11437         testCases.push_back(
11438             ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_64, -128, true, 0xC060000000000000ull, "min"));
11439 
11440         // Maximum int8 value
11441         testCases.push_back(
11442             ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_16, 127, true, 0x57F0, "max", false));
11443         testCases.push_back(
11444             ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_32, 127, true, 0x42FE0000, "max"));
11445         testCases.push_back(
11446             ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_64, 127, true, 0x405FC00000000000ull, "max"));
11447 
11448         // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
11449         testCases.push_back(
11450             ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_16, -1234, true, 0xE4D2, "m1234", false));
11451         testCases.push_back(
11452             ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, -1234, true, 0xE4D2, "m1234", false));
11453         testCases.push_back(
11454             ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, -1234, true, 0xE4D2, "m1234", false));
11455 
11456         // 0x7800 = 0111 1000 0000 0000 = 0 11110 0000000000 = 32768
11457         testCases.push_back(
11458             ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, 32768, true, 0x7800, "p32768", false));
11459         testCases.push_back(
11460             ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, 32768, true, 0x7800, "p32768", false));
11461 
11462         // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
11463         testCases.push_back(
11464             ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, -32768, true, 0xF800, "m32768", false));
11465         testCases.push_back(
11466             ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, -32768, true, 0xF800, "m32768", false));
11467 
11468         // 0xFBFF = 1111 1000 0000 0000 = 1 11110 1111111111 = -65504
11469         testCases.push_back(
11470             ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_16, -32768, true, 0xF800, "min", false));
11471         testCases.push_back(
11472             ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, -65504, true, 0xFBFF, "min", false));
11473         testCases.push_back(
11474             ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, -65504, true, 0xFBFF, "min", false));
11475 
11476         // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
11477         // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
11478         testCases.push_back(
11479             ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_16, 32752, true, 0x77FF, "max", false));
11480         testCases.push_back(
11481             ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
11482         testCases.push_back(
11483             ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
11484 
11485         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, 4294967296ll, true,
11486                                         0x4f800000, "p4294967296", false));
11487         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_64, 4294967296ll, true,
11488                                         0x41f0000000000000, "p4294967296", false));
11489         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -4294967296ll, true,
11490                                         0xcf800000, "m4294967296", false));
11491         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_64, -4294967296ll, true,
11492                                         0xc1f0000000000000, "m4294967296", false));
11493 
11494         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, 0x7fffff8000000000, true,
11495                                         0x5effffff, "max", false));
11496         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -0x7fffff8000000000, true,
11497                                         0xdeffffff, "min", false));
11498 
11499         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_32, -1234, true, 0xc49a4000));
11500         testCases.push_back(
11501             ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_64, -1234, true, 0xc093480000000000));
11502         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_32, -1234, true, 0xc49a4000));
11503         testCases.push_back(
11504             ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_64, -1234, true, 0xc093480000000000));
11505         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -1234, true, 0xc49a4000));
11506         testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -99684909056ll, true,
11507                                         0xd1b9ad78, "large"));
11508         testCases.push_back(
11509             ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_64, -1234, true, 0xc093480000000000));
11510     }
11511     else
11512         DE_FATAL("Unknown instruction");
11513 }
11514 
getConvertCaseFragments(string instruction,const ConvertCase & convertCase)11515 const map<string, string> getConvertCaseFragments(string instruction, const ConvertCase &convertCase)
11516 {
11517     map<string, string> params = convertCase.m_asmTypes;
11518     map<string, string> fragments;
11519 
11520     params["instruction"] = instruction;
11521     params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11522 
11523     const StringTemplate decoration("      OpDecorate %SSBOi DescriptorSet 0\n"
11524                                     "      OpDecorate %SSBOo DescriptorSet 0\n"
11525                                     "      OpDecorate %SSBOi Binding 0\n"
11526                                     "      OpDecorate %SSBOo Binding 1\n"
11527                                     "      OpDecorate %s_SSBOi Block\n"
11528                                     "      OpDecorate %s_SSBOo Block\n"
11529                                     "OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11530                                     "OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11531 
11532     const StringTemplate pre_main("${datatype_additional_decl:opt}"
11533                                   "    %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11534                                   "   %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11535                                   "   %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11536                                   "   %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11537                                   " %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11538                                   " %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11539                                   "     %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11540                                   "     %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11541 
11542     const StringTemplate testfun("%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11543                                  "%param      = OpFunctionParameter %v4f32\n"
11544                                  "%label      = OpLabel\n"
11545                                  "%iLoc       = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11546                                  "%oLoc       = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11547                                  "%valIn      = OpLoad %${inStorageType} %iLoc\n"
11548                                  "%valInCast  = ${inCast} %${inputType} %valIn\n"
11549                                  "%conv       = ${instruction} %${outputType} %valInCast\n"
11550                                  "%valOutCast = ${outCast} %${outStorageType} %conv\n"
11551                                  "              OpStore %oLoc %valOutCast\n"
11552                                  "              OpReturnValue %param\n"
11553                                  "              OpFunctionEnd\n");
11554 
11555     params["datatype_extensions"] =
11556         params["datatype_extensions"] + "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11557 
11558     fragments["capability"] = params["datatype_capabilities"];
11559     fragments["extension"]  = params["datatype_extensions"];
11560     fragments["decoration"] = decoration.specialize(params);
11561     fragments["pre_main"]   = pre_main.specialize(params);
11562     fragments["testfun"]    = testfun.specialize(params);
11563 
11564     return fragments;
11565 }
11566 
getConvertCaseFragmentsNoStorage(string instruction,const ConvertCase & convertCase)11567 const map<string, string> getConvertCaseFragmentsNoStorage(string instruction, const ConvertCase &convertCase)
11568 {
11569     map<string, string> params = convertCase.m_asmTypes;
11570     map<string, string> fragments;
11571 
11572     params["instruction"] = instruction;
11573     params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11574 
11575     const StringTemplate decoration("      OpDecorate %SSBOi DescriptorSet 0\n"
11576                                     "      OpDecorate %SSBOo DescriptorSet 0\n"
11577                                     "      OpDecorate %SSBOi Binding 0\n"
11578                                     "      OpDecorate %SSBOo Binding 1\n"
11579                                     "      OpDecorate %s_SSBOi Block\n"
11580                                     "      OpDecorate %s_SSBOo Block\n"
11581                                     "OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11582                                     "OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11583 
11584     const StringTemplate pre_main("${datatype_additional_decl:opt}"
11585                                   "    %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11586                                   "   %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11587                                   "   %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11588                                   "   %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11589                                   " %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11590                                   " %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11591                                   "     %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11592                                   "     %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11593 
11594     const StringTemplate testfun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11595                                  "%param     = OpFunctionParameter %v4f32\n"
11596                                  "%label     = OpLabel\n"
11597                                  "%iLoc      = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11598                                  "%oLoc      = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11599                                  "%inval      = OpLoad %${inStorageType} %iLoc\n"
11600                                  "%in_cast    = ${inCast} %${inputType} %inval\n"
11601                                  "%conv       = ${instruction} %${outputType} %in_cast\n"
11602                                  "%out_cast   = ${outCast} %${outStorageType} %conv\n"
11603                                  "              OpStore %oLoc %out_cast\n"
11604                                  "              OpReturnValue %param\n"
11605                                  "              OpFunctionEnd\n");
11606 
11607     params["datatype_extensions"] =
11608         params["datatype_extensions"] + "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11609 
11610     fragments["capability"] = params["datatype_capabilities"];
11611     fragments["extension"]  = params["datatype_extensions"];
11612     fragments["decoration"] = decoration.specialize(params);
11613     fragments["pre_main"]   = pre_main.specialize(params);
11614     fragments["testfun"]    = testfun.specialize(params);
11615     return fragments;
11616 }
11617 
11618 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in compute shaders
createConvertComputeTests(tcu::TestContext & testCtx,const string & instruction,const string & name)11619 tcu::TestCaseGroup *createConvertComputeTests(tcu::TestContext &testCtx, const string &instruction, const string &name)
11620 {
11621     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, name.c_str()));
11622     vector<ConvertCase> testCases;
11623     createConvertCases(testCases, instruction);
11624 
11625     for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11626     {
11627         ComputeShaderSpec spec;
11628         spec.assembly      = getConvertCaseShaderStr(instruction, *test, true);
11629         spec.numWorkGroups = IVec3(1, 1, 1);
11630         spec.inputs.push_back(test->m_inputBuffer);
11631         spec.outputs.push_back(test->m_outputBuffer);
11632 
11633         getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt,
11634                                        spec.requestedVulkanFeatures, spec.extensions);
11635 
11636         group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), spec));
11637     }
11638     return group.release();
11639 }
11640 
11641 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in graphics shaders
createConvertGraphicsTests(tcu::TestContext & testCtx,const string & instruction,const string & name)11642 tcu::TestCaseGroup *createConvertGraphicsTests(tcu::TestContext &testCtx, const string &instruction, const string &name)
11643 {
11644     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, name.c_str()));
11645     vector<ConvertCase> testCases;
11646     createConvertCases(testCases, instruction);
11647 
11648     for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11649     {
11650         map<string, string> fragments = (test->m_useStorageExt) ? getConvertCaseFragments(instruction, *test) :
11651                                                                   getConvertCaseFragmentsNoStorage(instruction, *test);
11652         VulkanFeatures vulkanFeatures;
11653         GraphicsResources resources;
11654         vector<string> extensions;
11655         SpecConstants noSpecConstants;
11656         PushConstants noPushConstants;
11657         GraphicsInterfaces noInterfaces;
11658         tcu::RGBA defaultColors[4];
11659 
11660         getDefaultColors(defaultColors);
11661         resources.inputs.push_back(Resource(test->m_inputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11662         resources.outputs.push_back(Resource(test->m_outputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11663         extensions.push_back("VK_KHR_storage_buffer_storage_class");
11664 
11665         getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, vulkanFeatures,
11666                                        extensions);
11667 
11668         vulkanFeatures.coreFeatures.vertexPipelineStoresAndAtomics = true;
11669         vulkanFeatures.coreFeatures.fragmentStoresAndAtomics       = true;
11670 
11671         createTestsForAllStages(test->m_name, defaultColors, defaultColors, fragments, noSpecConstants, noPushConstants,
11672                                 resources, noInterfaces, extensions, vulkanFeatures, group.get());
11673     }
11674     return group.release();
11675 }
11676 
11677 // Constant-Creation Instructions: OpConstant, OpConstantComposite
createOpConstantFloat16Tests(tcu::TestContext & testCtx)11678 tcu::TestCaseGroup *createOpConstantFloat16Tests(tcu::TestContext &testCtx)
11679 {
11680     de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests(new tcu::TestCaseGroup(testCtx, "opconstant"));
11681     RGBA inputColors[4];
11682     RGBA outputColors[4];
11683     vector<string> extensions;
11684     GraphicsResources resources;
11685     VulkanFeatures features;
11686 
11687     const char functionStart[] = "%test_code             = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11688                                  "%param1                = OpFunctionParameter %v4f32\n"
11689                                  "%lbl                   = OpLabel\n";
11690 
11691     const char functionEnd[] = "%transformed_param_32  = OpFConvert %v4f32 %transformed_param\n"
11692                                "                         OpReturnValue %transformed_param_32\n"
11693                                "                         OpFunctionEnd\n";
11694 
11695     struct NameConstantsCode
11696     {
11697         string name;
11698         string constants;
11699         string code;
11700     };
11701 
11702 #define FLOAT_16_COMMON_TYPES_AND_CONSTS                                                       \
11703     "%f16                  = OpTypeFloat 16\n"                                                 \
11704     "%c_f16_0              = OpConstant %f16 0.0\n"                                            \
11705     "%c_f16_0_5            = OpConstant %f16 0.5\n"                                            \
11706     "%c_f16_1              = OpConstant %f16 1.0\n"                                            \
11707     "%v4f16                = OpTypeVector %f16 4\n"                                            \
11708     "%fp_f16               = OpTypePointer Function %f16\n"                                    \
11709     "%fp_v4f16             = OpTypePointer Function %v4f16\n"                                  \
11710     "%c_v4f16_1_1_1_1      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n" \
11711     "%a4f16                = OpTypeArray %f16 %c_u32_4\n"
11712 
11713     NameConstantsCode tests[] = {
11714         {"vec4",
11715 
11716          FLOAT_16_COMMON_TYPES_AND_CONSTS
11717          "%cval                 = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_0\n",
11718          "%param1_16            = OpFConvert %v4f16 %param1\n"
11719          "%transformed_param    = OpFAdd %v4f16 %param1_16 %cval\n"},
11720         {
11721             "struct",
11722 
11723             FLOAT_16_COMMON_TYPES_AND_CONSTS
11724             "%stype                = OpTypeStruct %v4f16 %f16\n"
11725             "%fp_stype             = OpTypePointer Function %stype\n"
11726             "%f16_n_1              = OpConstant %f16 -1.0\n"
11727             "%f16_1_5              = OpConstant %f16 !0x3e00\n" // +1.5
11728             "%cvec                 = OpConstantComposite %v4f16 %f16_1_5 %f16_1_5 %f16_1_5 %c_f16_1\n"
11729             "%cval                 = OpConstantComposite %stype %cvec %f16_n_1\n",
11730 
11731             "%v                    = OpVariable %fp_stype Function %cval\n"
11732             "%vec_ptr              = OpAccessChain %fp_v4f16 %v %c_u32_0\n"
11733             "%f16_ptr              = OpAccessChain %fp_f16 %v %c_u32_1\n"
11734             "%vec_val              = OpLoad %v4f16 %vec_ptr\n"
11735             "%f16_val              = OpLoad %f16 %f16_ptr\n"
11736             "%tmp1                 = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_1 %f16_val\n" // vec4(-1)
11737             "%param1_16            = OpFConvert %v4f16 %param1\n"
11738             "%tmp2                 = OpFAdd %v4f16 %tmp1 %param1_16\n" // param1 + vec4(-1)
11739             "%transformed_param    = OpFAdd %v4f16 %tmp2 %vec_val\n"   // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
11740         },
11741         {// [1|0|0|0.5] [x] = x + 0.5
11742          // [0|1|0|0.5] [y] = y + 0.5
11743          // [0|0|1|0.5] [z] = z + 0.5
11744          // [0|0|0|1  ] [1] = 1
11745          "matrix",
11746 
11747          FLOAT_16_COMMON_TYPES_AND_CONSTS
11748          "%mat4x4_f16           = OpTypeMatrix %v4f16 4\n"
11749          "%v4f16_1_0_0_0        = OpConstantComposite %v4f16 %c_f16_1 %c_f16_0 %c_f16_0 %c_f16_0\n"
11750          "%v4f16_0_1_0_0        = OpConstantComposite %v4f16 %c_f16_0 %c_f16_1 %c_f16_0 %c_f16_0\n"
11751          "%v4f16_0_0_1_0        = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_1 %c_f16_0\n"
11752          "%v4f16_0_5_0_5_0_5_1  = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_1\n"
11753          "%cval                 = OpConstantComposite %mat4x4_f16 %v4f16_1_0_0_0 %v4f16_0_1_0_0 %v4f16_0_0_1_0 "
11754          "%v4f16_0_5_0_5_0_5_1\n",
11755 
11756          "%param1_16            = OpFConvert %v4f16 %param1\n"
11757          "%transformed_param    = OpMatrixTimesVector %v4f16 %cval %param1_16\n"},
11758         {"array",
11759 
11760          FLOAT_16_COMMON_TYPES_AND_CONSTS
11761          "%c_v4f16_1_1_1_0      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11762          "%fp_a4f16             = OpTypePointer Function %a4f16\n"
11763          "%f16_n_1              = OpConstant %f16 -1.0\n"
11764          "%f16_1_5              = OpConstant %f16 !0x3e00\n" // +1.5
11765          "%carr                 = OpConstantComposite %a4f16 %c_f16_0 %f16_n_1 %f16_1_5 %c_f16_0\n",
11766 
11767          "%v                    = OpVariable %fp_a4f16 Function %carr\n"
11768          "%f                    = OpAccessChain %fp_f16 %v %c_u32_0\n"
11769          "%f1                   = OpAccessChain %fp_f16 %v %c_u32_1\n"
11770          "%f2                   = OpAccessChain %fp_f16 %v %c_u32_2\n"
11771          "%f3                   = OpAccessChain %fp_f16 %v %c_u32_3\n"
11772          "%f_val                = OpLoad %f16 %f\n"
11773          "%f1_val               = OpLoad %f16 %f1\n"
11774          "%f2_val               = OpLoad %f16 %f2\n"
11775          "%f3_val               = OpLoad %f16 %f3\n"
11776          "%ftot1                = OpFAdd %f16 %f_val %f1_val\n"
11777          "%ftot2                = OpFAdd %f16 %ftot1 %f2_val\n"
11778          "%ftot3                = OpFAdd %f16 %ftot2 %f3_val\n" // 0 - 1 + 1.5 + 0
11779          "%add_vec              = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %ftot3\n"
11780          "%param1_16            = OpFConvert %v4f16 %param1\n"
11781          "%transformed_param    = OpFAdd %v4f16 %param1_16 %add_vec\n"},
11782         {//
11783          // [
11784          //   {
11785          //      0.0,
11786          //      [ 1.0, 1.0, 1.0, 1.0]
11787          //   },
11788          //   {
11789          //      1.0,
11790          //      [ 0.0, 0.5, 0.0, 0.0]
11791          //   }, //     ^^^
11792          //   {
11793          //      0.0,
11794          //      [ 1.0, 1.0, 1.0, 1.0]
11795          //   }
11796          // ]
11797          "array_of_struct_of_array",
11798 
11799          FLOAT_16_COMMON_TYPES_AND_CONSTS
11800          "%c_v4f16_1_1_1_0      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11801          "%fp_a4f16             = OpTypePointer Function %a4f16\n"
11802          "%stype                = OpTypeStruct %f16 %a4f16\n"
11803          "%a3stype              = OpTypeArray %stype %c_u32_3\n"
11804          "%fp_a3stype           = OpTypePointer Function %a3stype\n"
11805          "%ca4f16_0             = OpConstantComposite %a4f16 %c_f16_0 %c_f16_0_5 %c_f16_0 %c_f16_0\n"
11806          "%ca4f16_1             = OpConstantComposite %a4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
11807          "%cstype1              = OpConstantComposite %stype %c_f16_0 %ca4f16_1\n"
11808          "%cstype2              = OpConstantComposite %stype %c_f16_1 %ca4f16_0\n"
11809          "%carr                 = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
11810 
11811          "%v                    = OpVariable %fp_a3stype Function %carr\n"
11812          "%f                    = OpAccessChain %fp_f16 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
11813          "%f_l                  = OpLoad %f16 %f\n"
11814          "%add_vec              = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %f_l\n"
11815          "%param1_16            = OpFConvert %v4f16 %param1\n"
11816          "%transformed_param    = OpFAdd %v4f16 %param1_16 %add_vec\n"}};
11817 
11818     getHalfColorsFullAlpha(inputColors);
11819     outputColors[0] = RGBA(255, 255, 255, 255);
11820     outputColors[1] = RGBA(255, 127, 127, 255);
11821     outputColors[2] = RGBA(127, 255, 127, 255);
11822     outputColors[3] = RGBA(127, 127, 255, 255);
11823 
11824     extensions.push_back("VK_KHR_shader_float16_int8");
11825     features.extFloat16Int8.shaderFloat16 = true;
11826 
11827     for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
11828     {
11829         map<string, string> fragments;
11830 
11831         fragments["capability"] = "OpCapability Float16\n";
11832         fragments["pre_main"]   = tests[testNdx].constants;
11833         fragments["testfun"]    = string(functionStart) + tests[testNdx].code + functionEnd;
11834 
11835         createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, resources, extensions,
11836                                 opConstantCompositeTests.get(), features);
11837     }
11838     return opConstantCompositeTests.release();
11839 }
11840 
11841 template <typename T>
11842 void finalizeTestsCreation(T &specResource, const map<string, string> &fragments, tcu::TestContext &testCtx,
11843                            tcu::TestCaseGroup &testGroup, const std::string &testName,
11844                            const VulkanFeatures &vulkanFeatures, const vector<string> &extensions,
11845                            const IVec3 &numWorkGroups, const bool splitRenderArea = false);
11846 
11847 template <>
finalizeTestsCreation(GraphicsResources & specResource,const map<string,string> & fragments,tcu::TestContext &,tcu::TestCaseGroup & testGroup,const std::string & testName,const VulkanFeatures & vulkanFeatures,const vector<string> & extensions,const IVec3 &,const bool splitRenderArea)11848 void finalizeTestsCreation(GraphicsResources &specResource, const map<string, string> &fragments, tcu::TestContext &,
11849                            tcu::TestCaseGroup &testGroup, const std::string &testName,
11850                            const VulkanFeatures &vulkanFeatures, const vector<string> &extensions, const IVec3 &,
11851                            const bool splitRenderArea)
11852 {
11853     RGBA defaultColors[4];
11854     getDefaultColors(defaultColors);
11855 
11856     createTestsForAllStages(testName, defaultColors, defaultColors, fragments, specResource, extensions, &testGroup,
11857                             vulkanFeatures, QP_TEST_RESULT_FAIL, std::string(), splitRenderArea);
11858 }
11859 
11860 template <>
finalizeTestsCreation(ComputeShaderSpec & specResource,const map<string,string> & fragments,tcu::TestContext & testCtx,tcu::TestCaseGroup & testGroup,const std::string & testName,const VulkanFeatures & vulkanFeatures,const vector<string> & extensions,const IVec3 & numWorkGroups,bool)11861 void finalizeTestsCreation(ComputeShaderSpec &specResource, const map<string, string> &fragments,
11862                            tcu::TestContext &testCtx, tcu::TestCaseGroup &testGroup, const std::string &testName,
11863                            const VulkanFeatures &vulkanFeatures, const vector<string> &extensions,
11864                            const IVec3 &numWorkGroups, bool)
11865 {
11866     specResource.numWorkGroups           = numWorkGroups;
11867     specResource.requestedVulkanFeatures = vulkanFeatures;
11868     specResource.extensions              = extensions;
11869 
11870     specResource.assembly = makeComputeShaderAssembly(fragments);
11871 
11872     testGroup.addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), specResource));
11873 }
11874 
11875 template <class SpecResource>
createFloat16LogicalSet(tcu::TestContext & testCtx,const bool nanSupported)11876 tcu::TestCaseGroup *createFloat16LogicalSet(tcu::TestContext &testCtx, const bool nanSupported)
11877 {
11878     const string nan       = nanSupported ? "_nan" : "";
11879     const string groupName = "logical" + nan;
11880     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str()));
11881 
11882     de::Random rnd(deStringHash(testGroup->getName()));
11883     const string spvCapabilities =
11884         string("OpCapability Float16\n") + (nanSupported ? "OpCapability SignedZeroInfNanPreserve\n" : "");
11885     const string spvExtensions         = (nanSupported ? "OpExtension \"SPV_KHR_float_controls\"\n" : "");
11886     const string spvExecutionMode      = nanSupported ? "OpExecutionMode %BP_main SignedZeroInfNanPreserve 16\n" : "";
11887     const uint32_t numDataPointsScalar = 16;
11888     const uint32_t numDataPointsVector = 14;
11889     const vector<deFloat16> float16DataScalar = getFloat16s(rnd, numDataPointsScalar);
11890     const vector<deFloat16> float16DataVector = getFloat16s(rnd, numDataPointsVector);
11891     const vector<deFloat16> float16Data1 =
11892         squarize(float16DataScalar, 0); // Total Size: square(sizeof(float16DataScalar))
11893     const vector<deFloat16> float16Data2 = squarize(float16DataScalar, 1);
11894     const vector<deFloat16> float16DataVec1 =
11895         squarizeVector(float16DataVector, 0); // Total Size: 2 * (square(square(sizeof(float16DataVector))))
11896     const vector<deFloat16> float16DataVec2 = squarizeVector(float16DataVector, 1);
11897     const vector<deFloat16> float16OutUnused(float16Data1.size(), 0);
11898     const vector<deFloat16> float16OutVecUnused(float16DataVec1.size(), 0);
11899 
11900     struct TestOp
11901     {
11902         const char *opCode;
11903         VerifyIOFunc verifyFuncNan;
11904         VerifyIOFunc verifyFuncNonNan;
11905         const uint32_t argCount;
11906     };
11907 
11908     const TestOp testOps[] = {
11909         {"OpIsNan", compareFP16Logical<fp16isNan, true, false, true>, compareFP16Logical<fp16isNan, true, false, false>,
11910          1},
11911         {"OpIsInf", compareFP16Logical<fp16isInf, true, false, true>, compareFP16Logical<fp16isInf, true, false, false>,
11912          1},
11913         {"OpFOrdEqual", compareFP16Logical<fp16isEqual, false, true, true>,
11914          compareFP16Logical<fp16isEqual, false, true, false>, 2},
11915         {"OpFUnordEqual", compareFP16Logical<fp16isEqual, false, false, true>,
11916          compareFP16Logical<fp16isEqual, false, false, false>, 2},
11917         {"OpFOrdNotEqual", compareFP16Logical<fp16isUnequal, false, true, true>,
11918          compareFP16Logical<fp16isUnequal, false, true, false>, 2},
11919         {"OpFUnordNotEqual", compareFP16Logical<fp16isUnequal, false, false, true>,
11920          compareFP16Logical<fp16isUnequal, false, false, false>, 2},
11921         {"OpFOrdLessThan", compareFP16Logical<fp16isLess, false, true, true>,
11922          compareFP16Logical<fp16isLess, false, true, false>, 2},
11923         {"OpFUnordLessThan", compareFP16Logical<fp16isLess, false, false, true>,
11924          compareFP16Logical<fp16isLess, false, false, false>, 2},
11925         {"OpFOrdGreaterThan", compareFP16Logical<fp16isGreater, false, true, true>,
11926          compareFP16Logical<fp16isGreater, false, true, false>, 2},
11927         {"OpFUnordGreaterThan", compareFP16Logical<fp16isGreater, false, false, true>,
11928          compareFP16Logical<fp16isGreater, false, false, false>, 2},
11929         {"OpFOrdLessThanEqual", compareFP16Logical<fp16isLessOrEqual, false, true, true>,
11930          compareFP16Logical<fp16isLessOrEqual, false, true, false>, 2},
11931         {"OpFUnordLessThanEqual", compareFP16Logical<fp16isLessOrEqual, false, false, true>,
11932          compareFP16Logical<fp16isLessOrEqual, false, false, false>, 2},
11933         {"OpFOrdGreaterThanEqual", compareFP16Logical<fp16isGreaterOrEqual, false, true, true>,
11934          compareFP16Logical<fp16isGreaterOrEqual, false, true, false>, 2},
11935         {"OpFUnordGreaterThanEqual", compareFP16Logical<fp16isGreaterOrEqual, false, false, true>,
11936          compareFP16Logical<fp16isGreaterOrEqual, false, false, false>, 2},
11937     };
11938 
11939     { // scalar cases
11940         const StringTemplate preMain("      %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11941                                      "     %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11942                                      "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11943                                      " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
11944                                      "            %f16 = OpTypeFloat 16\n"
11945                                      "          %v2f16 = OpTypeVector %f16 2\n"
11946                                      "        %c_f16_0 = OpConstant %f16 0.0\n"
11947                                      "        %c_f16_1 = OpConstant %f16 1.0\n"
11948                                      "         %up_u32 = OpTypePointer Uniform %u32\n"
11949                                      "         %ra_u32 = OpTypeArray %u32 %c_i32_hndp\n"
11950                                      "         %SSBO16 = OpTypeStruct %ra_u32\n"
11951                                      "      %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11952                                      "     %f16_i32_fn = OpTypeFunction %f16 %i32\n"
11953                                      "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11954                                      "      %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
11955                                      "      %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
11956                                      "       %ssbo_dst = OpVariable %up_SSBO16 Uniform\n");
11957 
11958         const StringTemplate decoration("OpDecorate %ra_u32 ArrayStride 4\n"
11959                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
11960                                         "OpDecorate %SSBO16 BufferBlock\n"
11961                                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
11962                                         "OpDecorate %ssbo_src0 Binding 0\n"
11963                                         "OpDecorate %ssbo_src1 DescriptorSet 0\n"
11964                                         "OpDecorate %ssbo_src1 Binding 1\n"
11965                                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
11966                                         "OpDecorate %ssbo_dst Binding 2\n");
11967 
11968         const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11969                                      "    %param = OpFunctionParameter %v4f32\n"
11970 
11971                                      "    %entry = OpLabel\n"
11972                                      "        %i = OpVariable %fp_i32 Function\n"
11973                                      "             OpStore %i %c_i32_0\n"
11974                                      "             OpBranch %loop\n"
11975 
11976                                      "     %loop = OpLabel\n"
11977                                      "    %i_cmp = OpLoad %i32 %i\n"
11978                                      "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11979                                      "             OpLoopMerge %merge %next None\n"
11980                                      "             OpBranchConditional %lt %write %merge\n"
11981 
11982                                      "    %write = OpLabel\n"
11983                                      "      %ndx = OpLoad %i32 %i\n"
11984 
11985                                      " %val_src0 = OpFunctionCall %f16 %ld_arg_ssbo_src0 %ndx\n"
11986 
11987                                      "${op_arg1_calc}"
11988 
11989                                      " %val_bdst = ${op_code} %bool %val_src0 ${op_arg1}\n"
11990                                      "  %val_dst = OpSelect %f16 %val_bdst %c_f16_1 %c_f16_0\n"
11991                                      "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11992                                      "             OpBranch %next\n"
11993 
11994                                      "     %next = OpLabel\n"
11995                                      "    %i_cur = OpLoad %i32 %i\n"
11996                                      "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11997                                      "             OpStore %i %i_new\n"
11998                                      "             OpBranch %loop\n"
11999 
12000                                      "    %merge = OpLabel\n"
12001                                      "             OpReturnValue %param\n"
12002 
12003                                      "             OpFunctionEnd\n");
12004 
12005         const StringTemplate arg1Calc(" %val_src1 = OpFunctionCall %f16 %ld_arg_ssbo_src1 %ndx\n");
12006 
12007         for (uint32_t testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
12008         {
12009             const size_t iterations = float16Data1.size();
12010             const TestOp &testOp    = testOps[testOpsIdx];
12011             const string testName   = de::toLower(string(testOp.opCode)) + "_scalar";
12012             SpecResource specResource;
12013             map<string, string> specs;
12014             VulkanFeatures features;
12015             map<string, string> fragments;
12016             vector<string> extensions;
12017 
12018             specs["num_data_points"] = de::toString(iterations);
12019             specs["op_code"]         = testOp.opCode;
12020             specs["op_arg1"]         = (testOp.argCount == 1) ? "" : "%val_src1";
12021             specs["op_arg1_calc"]    = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
12022 
12023             fragments["extension"]      = spvExtensions;
12024             fragments["capability"]     = spvCapabilities;
12025             fragments["execution_mode"] = spvExecutionMode;
12026             fragments["decoration"]     = decoration.specialize(specs);
12027             fragments["pre_main"]       = preMain.specialize(specs);
12028             fragments["testfun"]        = testFun.specialize(specs);
12029             fragments["testfun"] += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src0"}});
12030             if (testOp.argCount > 1)
12031             {
12032                 fragments["testfun"] += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src1"}});
12033             }
12034             fragments["testfun"] += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
12035 
12036             specResource.inputs.push_back(
12037                 Resource(BufferSp(new Float16Buffer(float16Data1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12038             specResource.inputs.push_back(
12039                 Resource(BufferSp(new Float16Buffer(float16Data2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12040             specResource.outputs.push_back(
12041                 Resource(BufferSp(new Float16Buffer(float16OutUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12042             specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
12043 
12044             extensions.push_back("VK_KHR_shader_float16_int8");
12045 
12046             if (nanSupported)
12047             {
12048                 extensions.push_back("VK_KHR_shader_float_controls");
12049 
12050                 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = true;
12051             }
12052 
12053             features.extFloat16Int8.shaderFloat16 = true;
12054             if (specResource.graphicsFeaturesRequired)
12055                 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
12056 
12057             finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
12058                                   IVec3(1, 1, 1));
12059         }
12060     }
12061     { // vector cases
12062         const StringTemplate preMain("        %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12063                                      "           %v2bool = OpTypeVector %bool 2\n"
12064                                      "              %f16 = OpTypeFloat 16\n"
12065                                      "          %c_f16_0 = OpConstant %f16 0.0\n"
12066                                      "          %c_f16_1 = OpConstant %f16 1.0\n"
12067                                      "            %v2f16 = OpTypeVector %f16 2\n"
12068                                      "      %c_v2f16_0_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
12069                                      "      %c_v2f16_1_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
12070                                      "           %up_u32 = OpTypePointer Uniform %u32\n"
12071                                      "           %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12072                                      "           %SSBO16 = OpTypeStruct %ra_u32\n"
12073                                      "        %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
12074                                      "     %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12075                                      "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
12076                                      "        %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
12077                                      "        %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
12078                                      "         %ssbo_dst = OpVariable %up_SSBO16 Uniform\n");
12079 
12080         const StringTemplate decoration("OpDecorate %ra_u32 ArrayStride 4\n"
12081                                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
12082                                         "OpDecorate %SSBO16 BufferBlock\n"
12083                                         "OpDecorate %ssbo_src0 DescriptorSet 0\n"
12084                                         "OpDecorate %ssbo_src0 Binding 0\n"
12085                                         "OpDecorate %ssbo_src1 DescriptorSet 0\n"
12086                                         "OpDecorate %ssbo_src1 Binding 1\n"
12087                                         "OpDecorate %ssbo_dst DescriptorSet 0\n"
12088                                         "OpDecorate %ssbo_dst Binding 2\n");
12089 
12090         const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12091                                      "    %param = OpFunctionParameter %v4f32\n"
12092 
12093                                      "    %entry = OpLabel\n"
12094                                      "        %i = OpVariable %fp_i32 Function\n"
12095                                      "             OpStore %i %c_i32_0\n"
12096                                      "             OpBranch %loop\n"
12097 
12098                                      "     %loop = OpLabel\n"
12099                                      "    %i_cmp = OpLoad %i32 %i\n"
12100                                      "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12101                                      "             OpLoopMerge %merge %next None\n"
12102                                      "             OpBranchConditional %lt %write %merge\n"
12103 
12104                                      "    %write = OpLabel\n"
12105                                      "      %ndx = OpLoad %i32 %i\n"
12106 
12107                                      " %val_src0 = OpFunctionCall %v2f16 %ld_arg_ssbo_src0 %ndx\n"
12108 
12109                                      "${op_arg1_calc}"
12110 
12111                                      " %val_bdst = ${op_code} %v2bool %val_src0 ${op_arg1}\n"
12112                                      "  %val_dst = OpSelect %v2f16 %val_bdst %c_v2f16_1_1 %c_v2f16_0_0\n"
12113                                      "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12114                                      "             OpBranch %next\n"
12115 
12116                                      "     %next = OpLabel\n"
12117                                      "    %i_cur = OpLoad %i32 %i\n"
12118                                      "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12119                                      "             OpStore %i %i_new\n"
12120                                      "             OpBranch %loop\n"
12121 
12122                                      "    %merge = OpLabel\n"
12123                                      "             OpReturnValue %param\n"
12124 
12125                                      "             OpFunctionEnd\n");
12126 
12127         const StringTemplate arg1Calc(" %val_src1 = OpFunctionCall %v2f16 %ld_arg_ssbo_src1 %ndx\n");
12128 
12129         for (uint32_t testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
12130         {
12131             const uint32_t itemsPerVec = 2;
12132             const size_t iterations    = float16DataVec1.size() / itemsPerVec;
12133             const TestOp &testOp       = testOps[testOpsIdx];
12134             const string testName      = de::toLower(string(testOp.opCode)) + "_vector";
12135             SpecResource specResource;
12136             map<string, string> specs;
12137             vector<string> extensions;
12138             VulkanFeatures features;
12139             map<string, string> fragments;
12140 
12141             specs["num_data_points"] = de::toString(iterations);
12142             specs["op_code"]         = testOp.opCode;
12143             specs["op_arg1"]         = (testOp.argCount == 1) ? "" : "%val_src1";
12144             specs["op_arg1_calc"]    = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
12145 
12146             fragments["extension"]      = spvExtensions;
12147             fragments["capability"]     = spvCapabilities;
12148             fragments["execution_mode"] = spvExecutionMode;
12149             fragments["decoration"]     = decoration.specialize(specs);
12150             fragments["pre_main"]       = preMain.specialize(specs);
12151             fragments["testfun"]        = testFun.specialize(specs);
12152             fragments["testfun"] += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src0"}});
12153             if (testOp.argCount > 1)
12154             {
12155                 fragments["testfun"] += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src1"}});
12156             }
12157             fragments["testfun"] += StringTemplate(storeV2F16AsUint).specialize({{"var", "ssbo_dst"}});
12158 
12159             specResource.inputs.push_back(
12160                 Resource(BufferSp(new Float16Buffer(float16DataVec1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12161             specResource.inputs.push_back(
12162                 Resource(BufferSp(new Float16Buffer(float16DataVec2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12163             specResource.outputs.push_back(
12164                 Resource(BufferSp(new Float16Buffer(float16OutVecUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12165             specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
12166 
12167             extensions.push_back("VK_KHR_shader_float16_int8");
12168 
12169             if (nanSupported)
12170             {
12171                 extensions.push_back("VK_KHR_shader_float_controls");
12172 
12173                 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = true;
12174             }
12175 
12176             features.extFloat16Int8.shaderFloat16 = true;
12177             if (specResource.graphicsFeaturesRequired)
12178                 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
12179 
12180             finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
12181                                   IVec3(1, 1, 1), true);
12182         }
12183     }
12184 
12185     return testGroup.release();
12186 }
12187 
compareFP16FunctionSetFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12188 bool compareFP16FunctionSetFunc(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
12189                                 const std::vector<Resource> &, TestLog &log)
12190 {
12191     if (inputs.size() != 1 || outputAllocs.size() != 1)
12192         return false;
12193 
12194     vector<uint8_t> input1Bytes;
12195 
12196     inputs[0].getBytes(input1Bytes);
12197 
12198     const uint16_t *const input1AsFP16 = (const uint16_t *)&input1Bytes[0];
12199     const uint16_t *const outputAsFP16 = (const uint16_t *)outputAllocs[0]->getHostPtr();
12200     std::string error;
12201 
12202     for (size_t idx = 0; idx < input1Bytes.size() / sizeof(uint16_t); ++idx)
12203     {
12204         if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
12205         {
12206             log << TestLog::Message << error << TestLog::EndMessage;
12207 
12208             return false;
12209         }
12210     }
12211 
12212     return true;
12213 }
12214 
12215 template <class SpecResource>
createFloat16FuncSet(tcu::TestContext & testCtx)12216 tcu::TestCaseGroup *createFloat16FuncSet(tcu::TestContext &testCtx)
12217 {
12218     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "function"));
12219 
12220     de::Random rnd(deStringHash(testGroup->getName()));
12221     const StringTemplate capabilities("OpCapability Float16\n");
12222     const uint32_t numDataPoints             = 256;
12223     const vector<deFloat16> float16InputData = getFloat16s(rnd, numDataPoints);
12224     const vector<deFloat16> float16OutputUnused(float16InputData.size(), 0);
12225     map<string, string> fragments;
12226 
12227     struct TestType
12228     {
12229         const uint32_t typeComponents;
12230         const char *typeName;
12231         const char *typeDecls;
12232         const char *typeStorage;
12233         const string loadFunc;
12234         const string storeFunc;
12235     };
12236 
12237     const TestType testTypes[] = {
12238         {1, "f16",
12239          "      %v2f16 = OpTypeVector %f16 2\n"
12240          "%f16_i32_fn = OpTypeFunction %f16 %i32\n"
12241          "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12242          "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12243          " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12244          "u32_hndp", loadScalarF16FromUint, storeScalarF16AsUint},
12245         {2, "v2f16",
12246          "      %v2f16 = OpTypeVector %f16 2\n"
12247          "  %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
12248          "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12249          "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
12250          "u32_ndp", loadV2F16FromUint, storeV2F16AsUint},
12251         {4, "v4f16",
12252          "      %v2f16 = OpTypeVector %f16 2\n"
12253          "      %v4f16 = OpTypeVector %f16 4\n"
12254          "  %c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
12255          "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12256          "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
12257          "ra_u32_2", loadV4F16FromUints, storeV4F16AsUints},
12258     };
12259 
12260     const StringTemplate preMain("  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12261                                  " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
12262                                  "     %v2bool = OpTypeVector %bool 2\n"
12263                                  "        %f16 = OpTypeFloat 16\n"
12264                                  "    %c_f16_0 = OpConstant %f16 0.0\n"
12265 
12266                                  "${type_decls}"
12267 
12268                                  "  %${tt}_fun = OpTypeFunction %${tt} %${tt}\n"
12269                                  "   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12270                                  "%ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
12271                                  " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12272                                  "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12273                                  "      %up_u32 = OpTypePointer Uniform %u32\n"
12274                                  "     %SSBO16 = OpTypeStruct %ra_${ts}\n"
12275                                  "  %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
12276                                  "   %ssbo_src = OpVariable %up_SSBO16 Uniform\n"
12277                                  "   %ssbo_dst = OpVariable %up_SSBO16 Uniform\n");
12278 
12279     const StringTemplate decoration("OpDecorate %ra_u32_2 ArrayStride 4\n"
12280                                     "OpDecorate %ra_u32_hndp ArrayStride 4\n"
12281                                     "OpDecorate %ra_u32_ndp ArrayStride 4\n"
12282                                     "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12283                                     "OpMemberDecorate %SSBO16 0 Offset 0\n"
12284                                     "OpDecorate %SSBO16 BufferBlock\n"
12285                                     "OpDecorate %ssbo_src DescriptorSet 0\n"
12286                                     "OpDecorate %ssbo_src Binding 0\n"
12287                                     "OpDecorate %ssbo_dst DescriptorSet 0\n"
12288                                     "OpDecorate %ssbo_dst Binding 1\n");
12289 
12290     const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12291                                  "    %param = OpFunctionParameter %v4f32\n"
12292                                  "    %entry = OpLabel\n"
12293 
12294                                  "        %i = OpVariable %fp_i32 Function\n"
12295                                  "             OpStore %i %c_i32_0\n"
12296                                  "             OpBranch %loop\n"
12297 
12298                                  "     %loop = OpLabel\n"
12299                                  "    %i_cmp = OpLoad %i32 %i\n"
12300                                  "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12301                                  "             OpLoopMerge %merge %next None\n"
12302                                  "             OpBranchConditional %lt %write %merge\n"
12303 
12304                                  "    %write = OpLabel\n"
12305                                  "      %ndx = OpLoad %i32 %i\n"
12306 
12307                                  "  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12308                                  "  %val_dst = OpFunctionCall %${tt} %pass_fun %val_src\n"
12309                                  "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12310                                  "             OpBranch %next\n"
12311 
12312                                  "     %next = OpLabel\n"
12313                                  "    %i_cur = OpLoad %i32 %i\n"
12314                                  "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12315                                  "             OpStore %i %i_new\n"
12316                                  "             OpBranch %loop\n"
12317 
12318                                  "    %merge = OpLabel\n"
12319                                  "             OpReturnValue %param\n"
12320 
12321                                  "             OpFunctionEnd\n"
12322 
12323                                  " %pass_fun = OpFunction %${tt} None %${tt}_fun\n"
12324                                  "   %param0 = OpFunctionParameter %${tt}\n"
12325                                  " %entry_pf = OpLabel\n"
12326                                  "     %res0 = OpFAdd %${tt} %param0 %c_${tt}_0\n"
12327                                  "             OpReturnValue %res0\n"
12328                                  "             OpFunctionEnd\n");
12329 
12330     for (uint32_t testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12331     {
12332         const TestType &testType    = testTypes[testTypeIdx];
12333         const string testName       = testType.typeName;
12334         const uint32_t itemsPerType = testType.typeComponents;
12335         const size_t iterations     = float16InputData.size() / itemsPerType;
12336         const size_t typeStride     = itemsPerType * sizeof(deFloat16);
12337         SpecResource specResource;
12338         map<string, string> specs;
12339         VulkanFeatures features;
12340         vector<string> extensions;
12341 
12342         specs["num_data_points"] = de::toString(iterations);
12343         specs["tt"]              = testType.typeName;
12344         specs["ts"]              = testType.typeStorage;
12345         specs["tt_stride"]       = de::toString(typeStride);
12346         specs["type_decls"]      = testType.typeDecls;
12347 
12348         fragments["capability"] = capabilities.specialize(specs);
12349         fragments["decoration"] = decoration.specialize(specs);
12350         fragments["pre_main"]   = preMain.specialize(specs);
12351         fragments["testfun"]    = testFun.specialize(specs);
12352         fragments["testfun"] += StringTemplate(testType.loadFunc).specialize({{"var", "ssbo_src"}});
12353         fragments["testfun"] += StringTemplate(testType.storeFunc).specialize({{"var", "ssbo_dst"}});
12354 
12355         specResource.inputs.push_back(
12356             Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12357         specResource.outputs.push_back(
12358             Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12359         specResource.verifyIO = compareFP16FunctionSetFunc;
12360 
12361         extensions.push_back("VK_KHR_shader_float16_int8");
12362 
12363         features.extFloat16Int8.shaderFloat16 = true;
12364         if (specResource.graphicsFeaturesRequired)
12365             features.coreFeatures.vertexPipelineStoresAndAtomics = true;
12366 
12367         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
12368                               IVec3(1, 1, 1));
12369     }
12370 
12371     return testGroup.release();
12372 }
12373 
compareFP16VectorExtractFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12374 bool compareFP16VectorExtractFunc(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
12375                                   const std::vector<Resource> &, TestLog &log)
12376 {
12377     if (inputs.size() != 2 || outputAllocs.size() != 1)
12378         return false;
12379 
12380     vector<uint8_t> input1Bytes;
12381     vector<uint8_t> input2Bytes;
12382 
12383     inputs[0].getBytes(input1Bytes);
12384     inputs[1].getBytes(input2Bytes);
12385 
12386     DE_ASSERT(input1Bytes.size() > 0);
12387     DE_ASSERT(input2Bytes.size() > 0);
12388     DE_ASSERT(input2Bytes.size() % sizeof(uint32_t) == 0);
12389 
12390     const size_t iterations             = input2Bytes.size() / sizeof(uint32_t);
12391     const size_t components             = input1Bytes.size() / (sizeof(deFloat16) * iterations);
12392     const deFloat16 *const input1AsFP16 = (const deFloat16 *)&input1Bytes[0];
12393     const uint32_t *const inputIndices  = (const uint32_t *)&input2Bytes[0];
12394     const deFloat16 *const outputAsFP16 = (const deFloat16 *)outputAllocs[0]->getHostPtr();
12395     std::string error;
12396 
12397     DE_ASSERT(components == 2 || components == 4);
12398     DE_ASSERT(input1Bytes.size() == iterations * components * sizeof(deFloat16));
12399 
12400     for (size_t idx = 0; idx < iterations; ++idx)
12401     {
12402         const uint32_t componentNdx = inputIndices[idx];
12403 
12404         DE_ASSERT(componentNdx < components);
12405 
12406         const deFloat16 expected = input1AsFP16[components * idx + componentNdx];
12407 
12408         if (!compare16BitFloat(expected, outputAsFP16[idx], error))
12409         {
12410             log << TestLog::Message << "At " << idx << error << TestLog::EndMessage;
12411 
12412             return false;
12413         }
12414     }
12415 
12416     return true;
12417 }
12418 
12419 template <class SpecResource>
createFloat16VectorExtractSet(tcu::TestContext & testCtx)12420 tcu::TestCaseGroup *createFloat16VectorExtractSet(tcu::TestContext &testCtx)
12421 {
12422     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "opvectorextractdynamic"));
12423 
12424     de::Random rnd(deStringHash(testGroup->getName()));
12425     const uint32_t numDataPoints             = 256;
12426     const vector<deFloat16> float16InputData = getFloat16s(rnd, numDataPoints);
12427     const vector<deFloat16> float16OutputUnused(float16InputData.size(), 0);
12428 
12429     struct TestType
12430     {
12431         const uint32_t typeComponents;
12432         const size_t typeStride;
12433         const char *typeName;
12434         const char *typeDecls;
12435         const char *typeStorage;
12436         const string loadFunction;
12437         const string storeFunction;
12438     };
12439 
12440     const TestType testTypes[] = {
12441         {2, 2 * sizeof(deFloat16), "v2f16",
12442          "      %v2f16 = OpTypeVector %f16 2\n"
12443          "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12444          "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12445          "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12446          " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12447          "u32", loadV2F16FromUint, storeScalarF16AsUint},
12448         {3, 4 * sizeof(deFloat16), "v3f16",
12449          "      %v2f16 = OpTypeVector %f16 2\n"
12450          "      %v3f16 = OpTypeVector %f16 3\n"
12451          "%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12452          "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12453          "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12454          " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12455          "ra_u32_2", loadV3F16FromUints, storeScalarF16AsUint},
12456         {4, 4 * sizeof(deFloat16), "v4f16",
12457          "      %v2f16 = OpTypeVector %f16 2\n"
12458          "      %v4f16 = OpTypeVector %f16 4\n"
12459          "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12460          "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12461          "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12462          " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12463          "ra_u32_2", loadV4F16FromUints, storeScalarF16AsUint},
12464     };
12465 
12466     const StringTemplate preMain("  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12467                                  " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
12468                                  "        %f16 = OpTypeFloat 16\n"
12469 
12470                                  "${type_decl}"
12471 
12472                                  "     %up_u32 = OpTypePointer Uniform %u32\n"
12473                                  "     %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12474                                  "   %SSBO_IDX = OpTypeStruct %ra_u32\n"
12475                                  "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12476 
12477                                  "   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12478                                  " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12479                                  "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12480                                  "   %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12481                                  "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12482 
12483                                  " %ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
12484                                  "   %SSBO_DST = OpTypeStruct %ra_u32_hndp\n"
12485                                  "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12486 
12487                                  "   %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12488                                  "   %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12489                                  "   %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n");
12490 
12491     const StringTemplate decoration("OpDecorate %ra_u32_2 ArrayStride 4\n"
12492                                     "OpDecorate %ra_u32_hndp ArrayStride 4\n"
12493                                     "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12494                                     "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12495                                     "OpDecorate %SSBO_SRC BufferBlock\n"
12496                                     "OpDecorate %ssbo_src DescriptorSet 0\n"
12497                                     "OpDecorate %ssbo_src Binding 0\n"
12498 
12499                                     "OpDecorate %ra_u32 ArrayStride 4\n"
12500                                     "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12501                                     "OpDecorate %SSBO_IDX BufferBlock\n"
12502                                     "OpDecorate %ssbo_idx DescriptorSet 0\n"
12503                                     "OpDecorate %ssbo_idx Binding 1\n"
12504 
12505                                     "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12506                                     "OpDecorate %SSBO_DST BufferBlock\n"
12507                                     "OpDecorate %ssbo_dst DescriptorSet 0\n"
12508                                     "OpDecorate %ssbo_dst Binding 2\n");
12509 
12510     const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12511                                  "    %param = OpFunctionParameter %v4f32\n"
12512                                  "    %entry = OpLabel\n"
12513 
12514                                  "        %i = OpVariable %fp_i32 Function\n"
12515                                  "             OpStore %i %c_i32_0\n"
12516 
12517                                  " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12518                                  "             OpSelectionMerge %end_if None\n"
12519                                  "             OpBranchConditional %will_run %run_test %end_if\n"
12520 
12521                                  " %run_test = OpLabel\n"
12522                                  "             OpBranch %loop\n"
12523 
12524                                  "     %loop = OpLabel\n"
12525                                  "    %i_cmp = OpLoad %i32 %i\n"
12526                                  "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12527                                  "             OpLoopMerge %merge %next None\n"
12528                                  "             OpBranchConditional %lt %write %merge\n"
12529 
12530                                  "    %write = OpLabel\n"
12531                                  "      %ndx = OpLoad %i32 %i\n"
12532 
12533                                  "  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12534 
12535                                  "  %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12536                                  "  %val_idx = OpLoad %u32 %src_idx\n"
12537 
12538                                  "  %val_dst = OpVectorExtractDynamic %f16 %val_src %val_idx\n"
12539                                  "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12540 
12541                                  "             OpBranch %next\n"
12542 
12543                                  "     %next = OpLabel\n"
12544                                  "    %i_cur = OpLoad %i32 %i\n"
12545                                  "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12546                                  "             OpStore %i %i_new\n"
12547                                  "             OpBranch %loop\n"
12548 
12549                                  "    %merge = OpLabel\n"
12550                                  "             OpBranch %end_if\n"
12551                                  "   %end_if = OpLabel\n"
12552                                  "             OpReturnValue %param\n"
12553 
12554                                  "             OpFunctionEnd\n");
12555 
12556     for (uint32_t testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12557     {
12558         const TestType &testType  = testTypes[testTypeIdx];
12559         const string testName     = testType.typeName;
12560         const size_t itemsPerType = testType.typeStride / sizeof(deFloat16);
12561         const size_t iterations   = float16InputData.size() / itemsPerType;
12562         SpecResource specResource;
12563         map<string, string> specs;
12564         VulkanFeatures features;
12565         vector<uint32_t> inputDataNdx;
12566         map<string, string> fragments;
12567         vector<string> extensions;
12568 
12569         for (uint32_t ndx = 0; ndx < iterations; ++ndx)
12570             inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12571 
12572         specs["num_data_points"] = de::toString(iterations);
12573         specs["tt"]              = testType.typeName;
12574         specs["ts"]              = testType.typeStorage;
12575         specs["tt_stride"]       = de::toString(testType.typeStride);
12576         specs["type_decl"]       = testType.typeDecls;
12577 
12578         fragments["capability"] = "OpCapability Float16\n";
12579         fragments["decoration"] = decoration.specialize(specs);
12580         fragments["pre_main"]   = preMain.specialize(specs);
12581         fragments["testfun"]    = testFun.specialize(specs);
12582         fragments["testfun"] += StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12583         fragments["testfun"] += StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12584 
12585         specResource.inputs.push_back(
12586             Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12587         specResource.inputs.push_back(
12588             Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12589         specResource.outputs.push_back(
12590             Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12591         specResource.verifyIO = compareFP16VectorExtractFunc;
12592 
12593         extensions.push_back("VK_KHR_shader_float16_int8");
12594 
12595         features.extFloat16Int8.shaderFloat16 = true;
12596         if (specResource.graphicsFeaturesRequired)
12597             features.coreFeatures.vertexPipelineStoresAndAtomics = true;
12598 
12599         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
12600                               IVec3(1, 1, 1));
12601     }
12602 
12603     return testGroup.release();
12604 }
12605 
12606 template <uint32_t COMPONENTS_COUNT, uint32_t REPLACEMENT>
compareFP16VectorInsertFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12607 bool compareFP16VectorInsertFunc(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
12608                                  const std::vector<Resource> &, TestLog &log)
12609 {
12610     if (inputs.size() != 2 || outputAllocs.size() != 1)
12611         return false;
12612 
12613     vector<uint8_t> input1Bytes;
12614     vector<uint8_t> input2Bytes;
12615 
12616     inputs[0].getBytes(input1Bytes);
12617     inputs[1].getBytes(input2Bytes);
12618 
12619     DE_ASSERT(input1Bytes.size() > 0);
12620     DE_ASSERT(input2Bytes.size() > 0);
12621     DE_ASSERT(input2Bytes.size() % sizeof(uint32_t) == 0);
12622 
12623     const size_t iterations             = input2Bytes.size() / sizeof(uint32_t);
12624     const size_t componentsStride       = input1Bytes.size() / (sizeof(deFloat16) * iterations);
12625     const deFloat16 *const input1AsFP16 = (const deFloat16 *)&input1Bytes[0];
12626     const uint32_t *const inputIndices  = (const uint32_t *)&input2Bytes[0];
12627     const deFloat16 *const outputAsFP16 = (const deFloat16 *)outputAllocs[0]->getHostPtr();
12628     const deFloat16 magic               = tcu::Float16(float(REPLACEMENT)).bits();
12629     std::string error;
12630 
12631     DE_ASSERT(componentsStride == 2 || componentsStride == 4);
12632     DE_ASSERT(input1Bytes.size() == iterations * componentsStride * sizeof(deFloat16));
12633 
12634     for (size_t idx = 0; idx < iterations; ++idx)
12635     {
12636         const deFloat16 *inputVec      = &input1AsFP16[componentsStride * idx];
12637         const deFloat16 *outputVec     = &outputAsFP16[componentsStride * idx];
12638         const uint32_t replacedCompNdx = inputIndices[idx];
12639 
12640         DE_ASSERT(replacedCompNdx < COMPONENTS_COUNT);
12641 
12642         for (size_t compNdx = 0; compNdx < COMPONENTS_COUNT; ++compNdx)
12643         {
12644             const deFloat16 expected = (compNdx == replacedCompNdx) ? magic : inputVec[compNdx];
12645 
12646             if (!compare16BitFloat(expected, outputVec[compNdx], error))
12647             {
12648                 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12649 
12650                 return false;
12651             }
12652         }
12653     }
12654 
12655     return true;
12656 }
12657 
12658 template <class SpecResource>
createFloat16VectorInsertSet(tcu::TestContext & testCtx)12659 tcu::TestCaseGroup *createFloat16VectorInsertSet(tcu::TestContext &testCtx)
12660 {
12661     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "opvectorinsertdynamic"));
12662 
12663     de::Random rnd(deStringHash(testGroup->getName()));
12664     const uint32_t replacement               = 42;
12665     const uint32_t numDataPoints             = 256;
12666     const vector<deFloat16> float16InputData = getFloat16s(rnd, numDataPoints);
12667     const vector<deFloat16> float16OutputUnused(float16InputData.size(), 0);
12668 
12669     struct TestType
12670     {
12671         const uint32_t typeComponents;
12672         const size_t typeStride;
12673         const char *typeName;
12674         const char *typeDecls;
12675         VerifyIOFunc verifyIOFunc;
12676         const char *typeStorage;
12677         const string loadFunction;
12678         const string storeFunction;
12679     };
12680 
12681     const TestType testTypes[] = {
12682         {2, 2 * sizeof(deFloat16), "v2f16",
12683          "      %v2f16 = OpTypeVector %f16 2\n"
12684          "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12685          "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
12686          compareFP16VectorInsertFunc<2, replacement>, "u32", loadV2F16FromUint, storeV2F16AsUint},
12687         {3, 4 * sizeof(deFloat16), "v3f16",
12688          "      %v2f16 = OpTypeVector %f16 2\n"
12689          "      %v3f16 = OpTypeVector %f16 3\n"
12690          "%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12691          "%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n",
12692          compareFP16VectorInsertFunc<3, replacement>, "ra_u32_2", loadV3F16FromUints, storeV3F16AsUints},
12693         {4, 4 * sizeof(deFloat16), "v4f16",
12694          "      %v2f16 = OpTypeVector %f16 2\n"
12695          "      %v4f16 = OpTypeVector %f16 4\n"
12696          "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12697          "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
12698          compareFP16VectorInsertFunc<4, replacement>, "ra_u32_2", loadV4F16FromUints, storeV4F16AsUints},
12699     };
12700 
12701     const StringTemplate preMain("  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12702                                  "        %f16 = OpTypeFloat 16\n"
12703                                  "  %c_f16_ins = OpConstant %f16 ${replacement}\n"
12704 
12705                                  "${type_decl}"
12706 
12707                                  "     %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12708                                  "      %up_u32 = OpTypePointer Uniform %u32\n"
12709                                  "   %SSBO_IDX = OpTypeStruct %ra_u32\n"
12710                                  "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12711 
12712                                  "   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12713                                  "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12714                                  "   %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12715                                  "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12716 
12717                                  "   %SSBO_DST = OpTypeStruct %ra_${ts}\n"
12718                                  "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12719 
12720                                  "   %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12721                                  "   %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12722                                  "   %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n");
12723 
12724     const StringTemplate decoration("OpDecorate %ra_u32_2 ArrayStride 4\n"
12725                                     "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12726                                     "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12727                                     "OpDecorate %SSBO_SRC BufferBlock\n"
12728                                     "OpDecorate %ssbo_src DescriptorSet 0\n"
12729                                     "OpDecorate %ssbo_src Binding 0\n"
12730 
12731                                     "OpDecorate %ra_u32 ArrayStride 4\n"
12732                                     "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12733                                     "OpDecorate %SSBO_IDX BufferBlock\n"
12734                                     "OpDecorate %ssbo_idx DescriptorSet 0\n"
12735                                     "OpDecorate %ssbo_idx Binding 1\n"
12736 
12737                                     "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12738                                     "OpDecorate %SSBO_DST BufferBlock\n"
12739                                     "OpDecorate %ssbo_dst DescriptorSet 0\n"
12740                                     "OpDecorate %ssbo_dst Binding 2\n");
12741 
12742     const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12743                                  "    %param = OpFunctionParameter %v4f32\n"
12744                                  "    %entry = OpLabel\n"
12745 
12746                                  "        %i = OpVariable %fp_i32 Function\n"
12747                                  "             OpStore %i %c_i32_0\n"
12748 
12749                                  " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12750                                  "             OpSelectionMerge %end_if None\n"
12751                                  "             OpBranchConditional %will_run %run_test %end_if\n"
12752 
12753                                  " %run_test = OpLabel\n"
12754                                  "             OpBranch %loop\n"
12755 
12756                                  "     %loop = OpLabel\n"
12757                                  "    %i_cmp = OpLoad %i32 %i\n"
12758                                  "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12759                                  "             OpLoopMerge %merge %next None\n"
12760                                  "             OpBranchConditional %lt %write %merge\n"
12761 
12762                                  "    %write = OpLabel\n"
12763                                  "      %ndx = OpLoad %i32 %i\n"
12764 
12765                                  "  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12766 
12767                                  "  %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12768                                  "  %val_idx = OpLoad %u32 %src_idx\n"
12769 
12770                                  "  %val_dst = OpVectorInsertDynamic %${tt} %val_src %c_f16_ins %val_idx\n"
12771                                  "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12772 
12773                                  "             OpBranch %next\n"
12774 
12775                                  "     %next = OpLabel\n"
12776                                  "    %i_cur = OpLoad %i32 %i\n"
12777                                  "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12778                                  "             OpStore %i %i_new\n"
12779                                  "             OpBranch %loop\n"
12780 
12781                                  "    %merge = OpLabel\n"
12782                                  "             OpBranch %end_if\n"
12783                                  "   %end_if = OpLabel\n"
12784                                  "             OpReturnValue %param\n"
12785 
12786                                  "             OpFunctionEnd\n");
12787 
12788     for (uint32_t testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12789     {
12790         const TestType &testType  = testTypes[testTypeIdx];
12791         const string testName     = testType.typeName;
12792         const size_t itemsPerType = testType.typeStride / sizeof(deFloat16);
12793         const size_t iterations   = float16InputData.size() / itemsPerType;
12794         SpecResource specResource;
12795         map<string, string> specs;
12796         VulkanFeatures features;
12797         vector<uint32_t> inputDataNdx;
12798         map<string, string> fragments;
12799         vector<string> extensions;
12800 
12801         for (uint32_t ndx = 0; ndx < iterations; ++ndx)
12802             inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12803 
12804         specs["num_data_points"] = de::toString(iterations);
12805         specs["tt"]              = testType.typeName;
12806         specs["ts"]              = testType.typeStorage;
12807         specs["tt_stride"]       = de::toString(testType.typeStride);
12808         specs["type_decl"]       = testType.typeDecls;
12809         specs["replacement"]     = de::toString(replacement);
12810 
12811         fragments["capability"] = "OpCapability Float16\n";
12812         fragments["decoration"] = decoration.specialize(specs);
12813         fragments["pre_main"]   = preMain.specialize(specs);
12814         fragments["testfun"]    = testFun.specialize(specs);
12815         fragments["testfun"] += StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12816         fragments["testfun"] += StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12817 
12818         specResource.inputs.push_back(
12819             Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12820         specResource.inputs.push_back(
12821             Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12822         specResource.outputs.push_back(
12823             Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12824         specResource.verifyIO = testType.verifyIOFunc;
12825 
12826         extensions.push_back("VK_KHR_shader_float16_int8");
12827 
12828         features.extFloat16Int8.shaderFloat16 = true;
12829         if (specResource.graphicsFeaturesRequired)
12830             features.coreFeatures.vertexPipelineStoresAndAtomics = true;
12831 
12832         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
12833                               IVec3(1, 1, 1));
12834     }
12835 
12836     return testGroup.release();
12837 }
12838 
getShuffledComponent(const size_t iteration,const size_t componentNdx,const deFloat16 * input1Vec,const deFloat16 * input2Vec,size_t vec1Len,size_t vec2Len,bool & validate)12839 inline deFloat16 getShuffledComponent(const size_t iteration, const size_t componentNdx, const deFloat16 *input1Vec,
12840                                       const deFloat16 *input2Vec, size_t vec1Len, size_t vec2Len, bool &validate)
12841 {
12842     const size_t compNdxCount   = (vec1Len + vec2Len + 1);
12843     const size_t compNdxLimited = iteration % (compNdxCount * compNdxCount);
12844     size_t comp;
12845 
12846     switch (componentNdx)
12847     {
12848     case 0:
12849         comp = compNdxLimited / compNdxCount;
12850         break;
12851     case 1:
12852         comp = compNdxLimited % compNdxCount;
12853         break;
12854     case 2:
12855         comp = 0;
12856         break;
12857     case 3:
12858         comp = 1;
12859         break;
12860     default:
12861         TCU_THROW(InternalError, "Impossible");
12862     }
12863 
12864     if (comp >= vec1Len + vec2Len)
12865     {
12866         validate = false;
12867         return 0;
12868     }
12869     else
12870     {
12871         validate = true;
12872         return (comp < vec1Len) ? input1Vec[comp] : input2Vec[comp - vec1Len];
12873     }
12874 }
12875 
12876 template <uint32_t DST_COMPONENTS_COUNT, uint32_t SRC0_COMPONENTS_COUNT, uint32_t SRC1_COMPONENTS_COUNT>
compareFP16VectorShuffleFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12877 bool compareFP16VectorShuffleFunc(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
12878                                   const std::vector<Resource> &, TestLog &log)
12879 {
12880     DE_STATIC_ASSERT(DST_COMPONENTS_COUNT == 2 || DST_COMPONENTS_COUNT == 3 || DST_COMPONENTS_COUNT == 4);
12881     DE_STATIC_ASSERT(SRC0_COMPONENTS_COUNT == 2 || SRC0_COMPONENTS_COUNT == 3 || SRC0_COMPONENTS_COUNT == 4);
12882     DE_STATIC_ASSERT(SRC1_COMPONENTS_COUNT == 2 || SRC1_COMPONENTS_COUNT == 3 || SRC1_COMPONENTS_COUNT == 4);
12883 
12884     if (inputs.size() != 2 || outputAllocs.size() != 1)
12885         return false;
12886 
12887     vector<uint8_t> input1Bytes;
12888     vector<uint8_t> input2Bytes;
12889 
12890     inputs[0].getBytes(input1Bytes);
12891     inputs[1].getBytes(input2Bytes);
12892 
12893     DE_ASSERT(input1Bytes.size() > 0);
12894     DE_ASSERT(input2Bytes.size() > 0);
12895     DE_ASSERT(input2Bytes.size() % sizeof(deFloat16) == 0);
12896 
12897     const size_t componentsStrideDst    = (DST_COMPONENTS_COUNT == 3) ? 4 : DST_COMPONENTS_COUNT;
12898     const size_t componentsStrideSrc0   = (SRC0_COMPONENTS_COUNT == 3) ? 4 : SRC0_COMPONENTS_COUNT;
12899     const size_t componentsStrideSrc1   = (SRC1_COMPONENTS_COUNT == 3) ? 4 : SRC1_COMPONENTS_COUNT;
12900     const size_t iterations             = input1Bytes.size() / (componentsStrideSrc0 * sizeof(deFloat16));
12901     const deFloat16 *const input1AsFP16 = (const deFloat16 *)&input1Bytes[0];
12902     const deFloat16 *const input2AsFP16 = (const deFloat16 *)&input2Bytes[0];
12903     const deFloat16 *const outputAsFP16 = (const deFloat16 *)outputAllocs[0]->getHostPtr();
12904     std::string error;
12905 
12906     DE_ASSERT(input1Bytes.size() == iterations * componentsStrideSrc0 * sizeof(deFloat16));
12907     DE_ASSERT(input2Bytes.size() == iterations * componentsStrideSrc1 * sizeof(deFloat16));
12908 
12909     for (size_t idx = 0; idx < iterations; ++idx)
12910     {
12911         const deFloat16 *input1Vec = &input1AsFP16[componentsStrideSrc0 * idx];
12912         const deFloat16 *input2Vec = &input2AsFP16[componentsStrideSrc1 * idx];
12913         const deFloat16 *outputVec = &outputAsFP16[componentsStrideDst * idx];
12914 
12915         for (size_t compNdx = 0; compNdx < DST_COMPONENTS_COUNT; ++compNdx)
12916         {
12917             bool validate      = true;
12918             deFloat16 expected = getShuffledComponent(idx, compNdx, input1Vec, input2Vec, SRC0_COMPONENTS_COUNT,
12919                                                       SRC1_COMPONENTS_COUNT, validate);
12920 
12921             if (validate && !compare16BitFloat(expected, outputVec[compNdx], error))
12922             {
12923                 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12924 
12925                 return false;
12926             }
12927         }
12928     }
12929 
12930     return true;
12931 }
12932 
getFloat16VectorShuffleVerifyIOFunc(uint32_t dstComponentsCount,uint32_t src0ComponentsCount,uint32_t src1ComponentsCount)12933 VerifyIOFunc getFloat16VectorShuffleVerifyIOFunc(uint32_t dstComponentsCount, uint32_t src0ComponentsCount,
12934                                                  uint32_t src1ComponentsCount)
12935 {
12936     DE_ASSERT(dstComponentsCount <= 4);
12937     DE_ASSERT(src0ComponentsCount <= 4);
12938     DE_ASSERT(src1ComponentsCount <= 4);
12939     uint32_t funcCode = 100 * dstComponentsCount + 10 * src0ComponentsCount + src1ComponentsCount;
12940 
12941     switch (funcCode)
12942     {
12943     case 222:
12944         return compareFP16VectorShuffleFunc<2, 2, 2>;
12945     case 223:
12946         return compareFP16VectorShuffleFunc<2, 2, 3>;
12947     case 224:
12948         return compareFP16VectorShuffleFunc<2, 2, 4>;
12949     case 232:
12950         return compareFP16VectorShuffleFunc<2, 3, 2>;
12951     case 233:
12952         return compareFP16VectorShuffleFunc<2, 3, 3>;
12953     case 234:
12954         return compareFP16VectorShuffleFunc<2, 3, 4>;
12955     case 242:
12956         return compareFP16VectorShuffleFunc<2, 4, 2>;
12957     case 243:
12958         return compareFP16VectorShuffleFunc<2, 4, 3>;
12959     case 244:
12960         return compareFP16VectorShuffleFunc<2, 4, 4>;
12961     case 322:
12962         return compareFP16VectorShuffleFunc<3, 2, 2>;
12963     case 323:
12964         return compareFP16VectorShuffleFunc<3, 2, 3>;
12965     case 324:
12966         return compareFP16VectorShuffleFunc<3, 2, 4>;
12967     case 332:
12968         return compareFP16VectorShuffleFunc<3, 3, 2>;
12969     case 333:
12970         return compareFP16VectorShuffleFunc<3, 3, 3>;
12971     case 334:
12972         return compareFP16VectorShuffleFunc<3, 3, 4>;
12973     case 342:
12974         return compareFP16VectorShuffleFunc<3, 4, 2>;
12975     case 343:
12976         return compareFP16VectorShuffleFunc<3, 4, 3>;
12977     case 344:
12978         return compareFP16VectorShuffleFunc<3, 4, 4>;
12979     case 422:
12980         return compareFP16VectorShuffleFunc<4, 2, 2>;
12981     case 423:
12982         return compareFP16VectorShuffleFunc<4, 2, 3>;
12983     case 424:
12984         return compareFP16VectorShuffleFunc<4, 2, 4>;
12985     case 432:
12986         return compareFP16VectorShuffleFunc<4, 3, 2>;
12987     case 433:
12988         return compareFP16VectorShuffleFunc<4, 3, 3>;
12989     case 434:
12990         return compareFP16VectorShuffleFunc<4, 3, 4>;
12991     case 442:
12992         return compareFP16VectorShuffleFunc<4, 4, 2>;
12993     case 443:
12994         return compareFP16VectorShuffleFunc<4, 4, 3>;
12995     case 444:
12996         return compareFP16VectorShuffleFunc<4, 4, 4>;
12997     default:
12998         TCU_THROW(InternalError, "Invalid number of components specified.");
12999     }
13000 }
13001 
13002 template <class SpecResource>
createFloat16VectorShuffleSet(tcu::TestContext & testCtx)13003 tcu::TestCaseGroup *createFloat16VectorShuffleSet(tcu::TestContext &testCtx)
13004 {
13005     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "opvectorshuffle"));
13006     const int testSpecificSeed = deStringHash(testGroup->getName());
13007     const int seed             = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
13008     de::Random rnd(seed);
13009     const uint32_t numDataPoints = 128;
13010     map<string, string> fragments;
13011 
13012     struct TestType
13013     {
13014         const uint32_t typeComponents;
13015         const char *typeName;
13016         const string loadFunction;
13017         const string storeFunction;
13018     };
13019 
13020     const TestType testTypes[] = {
13021         {2, "v2f16", loadV2F16FromUint, storeV2F16AsUint},
13022         {3, "v3f16", loadV3F16FromUints, storeV3F16AsUints},
13023         {4, "v4f16", loadV4F16FromUints, storeV4F16AsUints},
13024     };
13025 
13026     const StringTemplate preMain("    %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
13027                                  "     %c_i32_cc = OpConstant %i32 ${case_count}\n"
13028                                  "          %f16 = OpTypeFloat 16\n"
13029                                  "        %v2f16 = OpTypeVector %f16 2\n"
13030                                  "        %v3f16 = OpTypeVector %f16 3\n"
13031                                  "        %v4f16 = OpTypeVector %f16 4\n"
13032 
13033                                  "     %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
13034                                  "     %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
13035                                  "     %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
13036                                  "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
13037                                  "%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
13038                                  "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
13039 
13040                                  "     %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
13041                                  "   %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
13042                                  "  %ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
13043                                  "       %up_u32 = OpTypePointer Uniform %u32\n"
13044                                  "   %SSBO_v2f16 = OpTypeStruct %ra_u32_ndp\n"
13045                                  "   %SSBO_v3f16 = OpTypeStruct %ra_ra_u32_2\n"
13046                                  "   %SSBO_v4f16 = OpTypeStruct %ra_ra_u32_2\n"
13047 
13048                                  "%up_SSBO_v2f16 = OpTypePointer Uniform %SSBO_v2f16\n"
13049                                  "%up_SSBO_v3f16 = OpTypePointer Uniform %SSBO_v3f16\n"
13050                                  "%up_SSBO_v4f16 = OpTypePointer Uniform %SSBO_v4f16\n"
13051 
13052                                  "        %fun_t = OpTypeFunction %${tt_dst} %${tt_src0} %${tt_src1} %i32\n"
13053 
13054                                  "    %ssbo_src0 = OpVariable %up_SSBO_${tt_src0} Uniform\n"
13055                                  "    %ssbo_src1 = OpVariable %up_SSBO_${tt_src1} Uniform\n"
13056                                  "     %ssbo_dst = OpVariable %up_SSBO_${tt_dst} Uniform\n");
13057 
13058     const StringTemplate decoration("OpDecorate %ra_u32_2 ArrayStride 4\n"
13059                                     "OpDecorate %ra_u32_ndp ArrayStride 4\n"
13060                                     "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
13061 
13062                                     "OpMemberDecorate %SSBO_v2f16 0 Offset 0\n"
13063                                     "OpDecorate %SSBO_v2f16 BufferBlock\n"
13064 
13065                                     "OpMemberDecorate %SSBO_v3f16 0 Offset 0\n"
13066                                     "OpDecorate %SSBO_v3f16 BufferBlock\n"
13067 
13068                                     "OpMemberDecorate %SSBO_v4f16 0 Offset 0\n"
13069                                     "OpDecorate %SSBO_v4f16 BufferBlock\n"
13070 
13071                                     "OpDecorate %ssbo_src0 DescriptorSet 0\n"
13072                                     "OpDecorate %ssbo_src0 Binding 0\n"
13073                                     "OpDecorate %ssbo_src1 DescriptorSet 0\n"
13074                                     "OpDecorate %ssbo_src1 Binding 1\n"
13075                                     "OpDecorate %ssbo_dst DescriptorSet 0\n"
13076                                     "OpDecorate %ssbo_dst Binding 2\n");
13077 
13078     const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
13079                                  "    %param = OpFunctionParameter %v4f32\n"
13080                                  "    %entry = OpLabel\n"
13081 
13082                                  "        %i = OpVariable %fp_i32 Function\n"
13083                                  "             OpStore %i %c_i32_0\n"
13084 
13085                                  " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
13086                                  "             OpSelectionMerge %end_if None\n"
13087                                  "             OpBranchConditional %will_run %run_test %end_if\n"
13088 
13089                                  " %run_test = OpLabel\n"
13090                                  "             OpBranch %loop\n"
13091 
13092                                  "     %loop = OpLabel\n"
13093                                  "    %i_cmp = OpLoad %i32 %i\n"
13094                                  "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
13095                                  "             OpLoopMerge %merge %next None\n"
13096                                  "             OpBranchConditional %lt %write %merge\n"
13097 
13098                                  "    %write = OpLabel\n"
13099                                  "      %ndx = OpLoad %i32 %i\n"
13100                                  " %val_src0 = OpFunctionCall %${tt_src0} %ld_arg_ssbo_src0 %ndx\n"
13101                                  " %val_src1 = OpFunctionCall %${tt_src1} %ld_arg_ssbo_src1 %ndx\n"
13102                                  "  %val_dst = OpFunctionCall %${tt_dst} %sw_fun %val_src0 %val_src1 %ndx\n"
13103                                  "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
13104                                  "             OpBranch %next\n"
13105 
13106                                  "     %next = OpLabel\n"
13107                                  "    %i_cur = OpLoad %i32 %i\n"
13108                                  "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13109                                  "             OpStore %i %i_new\n"
13110                                  "             OpBranch %loop\n"
13111 
13112                                  "    %merge = OpLabel\n"
13113                                  "             OpBranch %end_if\n"
13114                                  "   %end_if = OpLabel\n"
13115                                  "             OpReturnValue %param\n"
13116                                  "             OpFunctionEnd\n"
13117                                  "\n"
13118 
13119                                  "   %sw_fun = OpFunction %${tt_dst} None %fun_t\n"
13120                                  "%sw_param0 = OpFunctionParameter %${tt_src0}\n"
13121                                  "%sw_param1 = OpFunctionParameter %${tt_src1}\n"
13122                                  "%sw_paramn = OpFunctionParameter %i32\n"
13123                                  " %sw_entry = OpLabel\n"
13124                                  "   %modulo = OpSMod %i32 %sw_paramn %c_i32_cc\n"
13125                                  "             OpSelectionMerge %switch_e None\n"
13126                                  "             OpSwitch %modulo %default ${case_list}\n"
13127                                  "${case_bodies}"
13128                                  "%default   = OpLabel\n"
13129                                  "             OpUnreachable\n" // Unreachable default case for switch statement
13130                                  "%switch_e  = OpLabel\n"
13131                                  "             OpUnreachable\n" // Unreachable merge block for switch statement
13132                                  "             OpFunctionEnd\n");
13133 
13134     const StringTemplate testCaseBody(
13135         "%case_${case_ndx}    = OpLabel\n"
13136         "%val_dst_${case_ndx} = OpVectorShuffle %${tt_dst} %sw_param0 %sw_param1 ${shuffle}\n"
13137         "             OpReturnValue %val_dst_${case_ndx}\n");
13138 
13139     for (uint32_t dstTypeIdx = 0; dstTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++dstTypeIdx)
13140     {
13141         const TestType &dstType = testTypes[dstTypeIdx];
13142 
13143         for (uint32_t comp0Idx = 0; comp0Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp0Idx)
13144         {
13145             const TestType &src0Type = testTypes[comp0Idx];
13146 
13147             for (uint32_t comp1Idx = 0; comp1Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp1Idx)
13148             {
13149                 const TestType &src1Type    = testTypes[comp1Idx];
13150                 const uint32_t input0Stride = (src0Type.typeComponents == 3) ? 4 : src0Type.typeComponents;
13151                 const uint32_t input1Stride = (src1Type.typeComponents == 3) ? 4 : src1Type.typeComponents;
13152                 const uint32_t outputStride = (dstType.typeComponents == 3) ? 4 : dstType.typeComponents;
13153                 const vector<deFloat16> float16Input0Data = getFloat16s(rnd, input0Stride * numDataPoints);
13154                 const vector<deFloat16> float16Input1Data = getFloat16s(rnd, input1Stride * numDataPoints);
13155                 const vector<deFloat16> float16OutputUnused(outputStride * numDataPoints, 0);
13156                 const string testName = de::toString(dstType.typeComponents) + de::toString(src0Type.typeComponents) +
13157                                         de::toString(src1Type.typeComponents);
13158                 uint32_t caseCount = 0;
13159                 SpecResource specResource;
13160                 map<string, string> specs;
13161                 vector<string> extensions;
13162                 VulkanFeatures features;
13163                 string caseBodies;
13164                 string caseList;
13165 
13166                 // Generate case
13167                 {
13168                     vector<string> componentList;
13169 
13170                     // Generate component possible indices for OpVectorShuffle for components 0 and 1 in output vector
13171                     {
13172                         uint32_t caseNo = 0;
13173 
13174                         for (uint32_t comp0IdxLocal = 0; comp0IdxLocal < src0Type.typeComponents; ++comp0IdxLocal)
13175                             componentList.push_back(de::toString(caseNo++));
13176                         for (uint32_t comp1IdxLocal = 0; comp1IdxLocal < src1Type.typeComponents; ++comp1IdxLocal)
13177                             componentList.push_back(de::toString(caseNo++));
13178                         componentList.push_back("0xFFFFFFFF");
13179                     }
13180 
13181                     for (uint32_t comp0IdxLocal = 0; comp0IdxLocal < componentList.size(); ++comp0IdxLocal)
13182                     {
13183                         for (uint32_t comp1IdxLocal = 0; comp1IdxLocal < componentList.size(); ++comp1IdxLocal)
13184                         {
13185                             map<string, string> specCase;
13186                             string shuffle = componentList[comp0IdxLocal] + " " + componentList[comp1IdxLocal];
13187 
13188                             for (uint32_t compIdx = 2; compIdx < dstType.typeComponents; ++compIdx)
13189                                 shuffle += " " + de::toString(compIdx - 2);
13190 
13191                             specCase["case_ndx"] = de::toString(caseCount);
13192                             specCase["shuffle"]  = shuffle;
13193                             specCase["tt_dst"]   = dstType.typeName;
13194 
13195                             caseBodies += testCaseBody.specialize(specCase);
13196                             caseList += de::toString(caseCount) + " %case_" + de::toString(caseCount) + " ";
13197 
13198                             caseCount++;
13199                         }
13200                     }
13201                 }
13202 
13203                 specs["num_data_points"] = de::toString(numDataPoints);
13204                 specs["tt_dst"]          = dstType.typeName;
13205                 specs["tt_src0"]         = src0Type.typeName;
13206                 specs["tt_src1"]         = src1Type.typeName;
13207                 specs["case_bodies"]     = caseBodies;
13208                 specs["case_list"]       = caseList;
13209                 specs["case_count"]      = de::toString(caseCount);
13210 
13211                 fragments["capability"] = "OpCapability Float16\n";
13212                 fragments["decoration"] = decoration.specialize(specs);
13213                 fragments["pre_main"]   = preMain.specialize(specs);
13214                 fragments["testfun"]    = testFun.specialize(specs);
13215                 fragments["testfun"] += StringTemplate(src0Type.loadFunction).specialize({{"var", "ssbo_src0"}});
13216                 fragments["testfun"] += StringTemplate(src1Type.loadFunction).specialize({{"var", "ssbo_src1"}});
13217                 fragments["testfun"] += StringTemplate(dstType.storeFunction).specialize({{"var", "ssbo_dst"}});
13218 
13219                 specResource.inputs.push_back(
13220                     Resource(BufferSp(new Float16Buffer(float16Input0Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13221                 specResource.inputs.push_back(
13222                     Resource(BufferSp(new Float16Buffer(float16Input1Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13223                 specResource.outputs.push_back(
13224                     Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13225                 specResource.verifyIO = getFloat16VectorShuffleVerifyIOFunc(
13226                     dstType.typeComponents, src0Type.typeComponents, src1Type.typeComponents);
13227 
13228                 extensions.push_back("VK_KHR_shader_float16_int8");
13229 
13230                 features.extFloat16Int8.shaderFloat16 = true;
13231                 if (specResource.graphicsFeaturesRequired)
13232                     features.coreFeatures.vertexPipelineStoresAndAtomics = true;
13233 
13234                 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features,
13235                                       extensions, IVec3(1, 1, 1));
13236             }
13237         }
13238     }
13239 
13240     return testGroup.release();
13241 }
13242 
compareFP16CompositeFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)13243 bool compareFP16CompositeFunc(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
13244                               const std::vector<Resource> &, TestLog &log)
13245 {
13246     if (inputs.size() != 1 || outputAllocs.size() != 1)
13247         return false;
13248 
13249     vector<uint8_t> input1Bytes;
13250 
13251     inputs[0].getBytes(input1Bytes);
13252 
13253     DE_ASSERT(input1Bytes.size() > 0);
13254     DE_ASSERT(input1Bytes.size() % sizeof(deFloat16) == 0);
13255 
13256     const size_t iterations             = input1Bytes.size() / sizeof(deFloat16);
13257     const deFloat16 *const input1AsFP16 = (const deFloat16 *)&input1Bytes[0];
13258     const deFloat16 *const outputAsFP16 = (const deFloat16 *)outputAllocs[0]->getHostPtr();
13259     const deFloat16 exceptionValue      = tcu::Float16(-1.0).bits();
13260     std::string error;
13261 
13262     for (size_t idx = 0; idx < iterations; ++idx)
13263     {
13264         if (input1AsFP16[idx] == exceptionValue)
13265             continue;
13266 
13267         if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
13268         {
13269             log << TestLog::Message << "At " << idx << ":" << error << TestLog::EndMessage;
13270 
13271             return false;
13272         }
13273     }
13274 
13275     return true;
13276 }
13277 
13278 template <class SpecResource>
createFloat16CompositeConstructSet(tcu::TestContext & testCtx)13279 tcu::TestCaseGroup *createFloat16CompositeConstructSet(tcu::TestContext &testCtx)
13280 {
13281     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "opcompositeconstruct"));
13282     const uint32_t numElements           = 8;
13283     const string testName                = "struct";
13284     const uint32_t structItemsCount      = 88;
13285     const uint32_t exceptionIndices[]    = {1, 7, 15, 17, 25, 33, 51, 55, 59, 63, 67, 71, 84, 85, 86, 87};
13286     const deFloat16 exceptionValue       = tcu::Float16(-1.0).bits();
13287     const uint32_t fieldModifier         = 2;
13288     const uint32_t fieldModifiedMulIndex = 60;
13289     const uint32_t fieldModifiedAddIndex = 66;
13290 
13291     const StringTemplate preMain("    %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
13292                                  "          %f16 = OpTypeFloat 16\n"
13293                                  "        %v2f16 = OpTypeVector %f16 2\n"
13294                                  "        %v3f16 = OpTypeVector %f16 3\n"
13295                                  "        %v4f16 = OpTypeVector %f16 4\n"
13296                                  "    %c_f16_mod = OpConstant %f16 ${field_modifier}\n"
13297 
13298                                  "${consts}"
13299 
13300                                  "     %c_f16_n1 = OpConstant %f16 -1.0\n"
13301                                  "   %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_n1 %c_f16_n1\n"
13302                                  "      %c_u32_5 = OpConstant %u32 5\n"
13303                                  "      %c_u32_6 = OpConstant %u32 6\n"
13304                                  "      %c_u32_7 = OpConstant %u32 7\n"
13305                                  "      %c_u32_8 = OpConstant %u32 8\n"
13306                                  "      %c_u32_9 = OpConstant %u32 9\n"
13307                                  "     %c_u32_10 = OpConstant %u32 10\n"
13308                                  "     %c_u32_11 = OpConstant %u32 11\n"
13309                                  "     %c_u32_12 = OpConstant %u32 12\n"
13310                                  "     %c_u32_13 = OpConstant %u32 13\n"
13311                                  "     %c_u32_14 = OpConstant %u32 14\n"
13312                                  "     %c_u32_15 = OpConstant %u32 15\n"
13313                                  "     %c_u32_16 = OpConstant %u32 16\n"
13314                                  "     %c_u32_17 = OpConstant %u32 17\n"
13315                                  "     %c_u32_18 = OpConstant %u32 18\n"
13316                                  "     %c_u32_19 = OpConstant %u32 19\n"
13317                                  "     %c_u32_20 = OpConstant %u32 20\n"
13318                                  "     %c_u32_21 = OpConstant %u32 21\n"
13319                                  "     %c_u32_22 = OpConstant %u32 22\n"
13320                                  "     %c_u32_23 = OpConstant %u32 23\n"
13321                                  "     %c_u32_24 = OpConstant %u32 24\n"
13322                                  "     %c_u32_25 = OpConstant %u32 25\n"
13323                                  "     %c_u32_26 = OpConstant %u32 26\n"
13324                                  "     %c_u32_27 = OpConstant %u32 27\n"
13325                                  "     %c_u32_28 = OpConstant %u32 28\n"
13326                                  "     %c_u32_29 = OpConstant %u32 29\n"
13327                                  "     %c_u32_30 = OpConstant %u32 30\n"
13328                                  "     %c_u32_31 = OpConstant %u32 31\n"
13329                                  "     %c_u32_33 = OpConstant %u32 33\n"
13330                                  "     %c_u32_34 = OpConstant %u32 34\n"
13331                                  "     %c_u32_35 = OpConstant %u32 35\n"
13332                                  "     %c_u32_36 = OpConstant %u32 36\n"
13333                                  "     %c_u32_37 = OpConstant %u32 37\n"
13334                                  "     %c_u32_38 = OpConstant %u32 38\n"
13335                                  "     %c_u32_39 = OpConstant %u32 39\n"
13336                                  "     %c_u32_40 = OpConstant %u32 40\n"
13337                                  "     %c_u32_41 = OpConstant %u32 41\n"
13338                                  "     %c_u32_44 = OpConstant %u32 44\n"
13339 
13340                                  " %f16arr3      = OpTypeArray %f16 %c_u32_3\n"
13341                                  " %v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
13342                                  " %v2f16arr5    = OpTypeArray %v2f16 %c_u32_5\n"
13343                                  " %v3f16arr5    = OpTypeArray %v3f16 %c_u32_5\n"
13344                                  " %v4f16arr3    = OpTypeArray %v4f16 %c_u32_3\n"
13345                                  " %struct16     = OpTypeStruct %f16 %v2f16arr3\n"
13346                                  " %struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
13347                                  " %st_test      = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr3 "
13348                                  "%v2f16arr5 %f16 %v3f16arr5 %v4f16arr3\n"
13349 
13350                                  "       %up_u32 = OpTypePointer Uniform %u32\n"
13351                                  "    %ra_u32_44 = OpTypeArray %u32 %c_u32_44\n"
13352                                  "    %ra_ra_u32 = OpTypeArray %ra_u32_44 %c_i32_ndp\n"
13353                                  "      %SSBO_st = OpTypeStruct %ra_ra_u32\n"
13354                                  "   %up_SSBO_st = OpTypePointer Uniform %SSBO_st\n"
13355 
13356                                  "     %ssbo_dst = OpVariable %up_SSBO_st Uniform\n");
13357 
13358     const StringTemplate decoration("OpDecorate %SSBO_st BufferBlock\n"
13359                                     "OpDecorate %ra_u32_44 ArrayStride 4\n"
13360                                     "OpDecorate %ra_ra_u32 ArrayStride ${struct_item_size}\n"
13361                                     "OpDecorate %ssbo_dst DescriptorSet 0\n"
13362                                     "OpDecorate %ssbo_dst Binding 1\n"
13363 
13364                                     "OpMemberDecorate %SSBO_st 0 Offset 0\n"
13365 
13366                                     "OpDecorate %v2f16arr3 ArrayStride 4\n"
13367                                     "OpMemberDecorate %struct16 0 Offset 0\n"
13368                                     "OpMemberDecorate %struct16 1 Offset 4\n"
13369                                     "OpDecorate %struct16arr3 ArrayStride 16\n"
13370                                     "OpDecorate %f16arr3 ArrayStride 2\n"
13371                                     "OpDecorate %v2f16arr5 ArrayStride 4\n"
13372                                     "OpDecorate %v3f16arr5 ArrayStride 8\n"
13373                                     "OpDecorate %v4f16arr3 ArrayStride 8\n"
13374 
13375                                     "OpMemberDecorate %st_test 0 Offset 0\n"
13376                                     "OpMemberDecorate %st_test 1 Offset 4\n"
13377                                     "OpMemberDecorate %st_test 2 Offset 8\n"
13378                                     "OpMemberDecorate %st_test 3 Offset 16\n"
13379                                     "OpMemberDecorate %st_test 4 Offset 24\n"
13380                                     "OpMemberDecorate %st_test 5 Offset 32\n"
13381                                     "OpMemberDecorate %st_test 6 Offset 80\n"
13382                                     "OpMemberDecorate %st_test 7 Offset 100\n"
13383                                     "OpMemberDecorate %st_test 8 Offset 104\n"
13384                                     "OpMemberDecorate %st_test 9 Offset 144\n");
13385 
13386     const StringTemplate testFun(
13387         " %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
13388         "     %param = OpFunctionParameter %v4f32\n"
13389         "     %entry = OpLabel\n"
13390 
13391         "         %i = OpVariable %fp_i32 Function\n"
13392         "              OpStore %i %c_i32_0\n"
13393 
13394         "  %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
13395         "              OpSelectionMerge %end_if None\n"
13396         "              OpBranchConditional %will_run %run_test %end_if\n"
13397 
13398         "  %run_test = OpLabel\n"
13399         "              OpBranch %loop\n"
13400 
13401         "      %loop = OpLabel\n"
13402         "     %i_cmp = OpLoad %i32 %i\n"
13403         "        %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
13404         "              OpLoopMerge %merge %next None\n"
13405         "              OpBranchConditional %lt %write %merge\n"
13406 
13407         "     %write = OpLabel\n"
13408         "       %ndx = OpLoad %i32 %i\n"
13409 
13410         "      %fld1 = OpCompositeConstruct %v2f16 %c_f16_2 %c_f16_3\n"
13411         "      %fld2 = OpCompositeConstruct %v3f16 %c_f16_4 %c_f16_5 %c_f16_6\n"
13412         "      %fld3 = OpCompositeConstruct %v4f16 %c_f16_8 %c_f16_9 %c_f16_10 %c_f16_11\n"
13413 
13414         "      %fld4 = OpCompositeConstruct %f16arr3 %c_f16_12 %c_f16_13 %c_f16_14\n"
13415 
13416         "%fld5_0_1_0 = OpCompositeConstruct %v2f16 %c_f16_18 %c_f16_19\n"
13417         "%fld5_0_1_1 = OpCompositeConstruct %v2f16 %c_f16_20 %c_f16_21\n"
13418         "%fld5_0_1_2 = OpCompositeConstruct %v2f16 %c_f16_22 %c_f16_23\n"
13419         "  %fld5_0_1 = OpCompositeConstruct %v2f16arr3 %fld5_0_1_0 %fld5_0_1_1 %fld5_0_1_2\n"
13420         "    %fld5_0 = OpCompositeConstruct %struct16 %c_f16_16 %fld5_0_1\n"
13421 
13422         "%fld5_1_1_0 = OpCompositeConstruct %v2f16 %c_f16_26 %c_f16_27\n"
13423         "%fld5_1_1_1 = OpCompositeConstruct %v2f16 %c_f16_28 %c_f16_29\n"
13424         "%fld5_1_1_2 = OpCompositeConstruct %v2f16 %c_f16_30 %c_f16_31\n"
13425         "  %fld5_1_1 = OpCompositeConstruct %v2f16arr3 %fld5_1_1_0 %fld5_1_1_1 %fld5_1_1_2\n"
13426         "    %fld5_1 = OpCompositeConstruct %struct16 %c_f16_24 %fld5_1_1\n"
13427 
13428         "%fld5_2_1_0 = OpCompositeConstruct %v2f16 %c_f16_34 %c_f16_35\n"
13429         "%fld5_2_1_1 = OpCompositeConstruct %v2f16 %c_f16_36 %c_f16_37\n"
13430         "%fld5_2_1_2 = OpCompositeConstruct %v2f16 %c_f16_38 %c_f16_39\n"
13431         "  %fld5_2_1 = OpCompositeConstruct %v2f16arr3 %fld5_2_1_0 %fld5_2_1_1 %fld5_2_1_2\n"
13432         "    %fld5_2 = OpCompositeConstruct %struct16 %c_f16_32 %fld5_2_1\n"
13433 
13434         "      %fld5 = OpCompositeConstruct %struct16arr3 %fld5_0 %fld5_1 %fld5_2\n"
13435 
13436         "    %fld6_0 = OpCompositeConstruct %v2f16 %c_f16_40 %c_f16_41\n"
13437         "    %fld6_1 = OpCompositeConstruct %v2f16 %c_f16_42 %c_f16_43\n"
13438         "    %fld6_2 = OpCompositeConstruct %v2f16 %c_f16_44 %c_f16_45\n"
13439         "    %fld6_3 = OpCompositeConstruct %v2f16 %c_f16_46 %c_f16_47\n"
13440         "    %fld6_4 = OpCompositeConstruct %v2f16 %c_f16_48 %c_f16_49\n"
13441         "      %fld6 = OpCompositeConstruct %v2f16arr5 %fld6_0 %fld6_1 %fld6_2 %fld6_3 %fld6_4\n"
13442 
13443         "      %fndx = OpConvertSToF %f16 %ndx\n"
13444         "  %fld8_2a0 = OpFMul %f16 %fndx %c_f16_mod\n"
13445         "  %fld8_3b1 = OpFAdd %f16 %fndx %c_f16_mod\n"
13446 
13447         "   %fld8_2a = OpCompositeConstruct %v2f16 %fld8_2a0 %c_f16_61\n"
13448         "   %fld8_3b = OpCompositeConstruct %v2f16 %c_f16_65 %fld8_3b1\n"
13449         "    %fld8_0 = OpCompositeConstruct %v3f16 %c_f16_52 %c_f16_53 %c_f16_54\n"
13450         "    %fld8_1 = OpCompositeConstruct %v3f16 %c_f16_56 %c_f16_57 %c_f16_58\n"
13451         "    %fld8_2 = OpCompositeConstruct %v3f16 %fld8_2a %c_f16_62\n"
13452         "    %fld8_3 = OpCompositeConstruct %v3f16 %c_f16_64 %fld8_3b\n"
13453         "    %fld8_4 = OpCompositeConstruct %v3f16 %c_f16_68 %c_f16_69 %c_f16_70\n"
13454         "      %fld8 = OpCompositeConstruct %v3f16arr5 %fld8_0 %fld8_1 %fld8_2 %fld8_3 %fld8_4\n"
13455 
13456         "    %fld9_0 = OpCompositeConstruct %v4f16 %c_f16_72 %c_f16_73 %c_f16_74 %c_f16_75\n"
13457         "    %fld9_1 = OpCompositeConstruct %v4f16 %c_f16_76 %c_f16_77 %c_f16_78 %c_f16_79\n"
13458         "    %fld9_2 = OpCompositeConstruct %v4f16 %c_f16_80 %c_f16_81 %c_f16_82 %c_f16_83\n"
13459         "      %fld9 = OpCompositeConstruct %v4f16arr3 %fld9_0 %fld9_1 %fld9_2\n"
13460 
13461         "    %st_val = OpCompositeConstruct %st_test %c_f16_0 %fld1 %fld2 %fld3 %fld4 %fld5 %fld6 %c_f16_50 %fld8 "
13462         "%fld9\n"
13463 
13464         // Storage section: all elements that are not directly accessed should
13465         // have the value of -1.0. This means for f16 and v3f16 stores the v2f16
13466         // is constructed with one element from a constant -1.0.
13467         // half offset 0
13468         "      %ex_0 = OpCompositeExtract %f16 %st_val 0\n"
13469         "     %vec_0 = OpCompositeConstruct %v2f16 %ex_0 %c_f16_n1\n"
13470         "      %bc_0 = OpBitcast %u32 %vec_0\n"
13471         "     %gep_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_0\n"
13472         "              OpStore %gep_0 %bc_0\n"
13473 
13474         // <2 x half> offset 4
13475         "      %ex_1 = OpCompositeExtract %v2f16 %st_val 1\n"
13476         "      %bc_1 = OpBitcast %u32 %ex_1\n"
13477         "     %gep_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_1\n"
13478         "              OpStore %gep_1 %bc_1\n"
13479 
13480         // <3 x half> offset 8
13481         "      %ex_2 = OpCompositeExtract %v3f16 %st_val 2\n"
13482         "    %ex_2_0 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 0 1\n"
13483         "    %ex_2_1 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 2 3\n"
13484         "    %bc_2_0 = OpBitcast %u32 %ex_2_0\n"
13485         "    %bc_2_1 = OpBitcast %u32 %ex_2_1\n"
13486         "   %gep_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_2\n"
13487         "   %gep_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_3\n"
13488         "              OpStore %gep_2_0 %bc_2_0\n"
13489         "              OpStore %gep_2_1 %bc_2_1\n"
13490 
13491         // <4 x half> offset 16
13492         "      %ex_3 = OpCompositeExtract %v4f16 %st_val 3\n"
13493         "    %ex_3_0 = OpVectorShuffle %v2f16 %ex_3 %ex_3 0 1\n"
13494         "    %ex_3_1 = OpVectorShuffle %v2f16 %ex_3 %ex_3 2 3\n"
13495         "    %bc_3_0 = OpBitcast %u32 %ex_3_0\n"
13496         "    %bc_3_1 = OpBitcast %u32 %ex_3_1\n"
13497         "   %gep_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_4\n"
13498         "   %gep_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_5\n"
13499         "              OpStore %gep_3_0 %bc_3_0\n"
13500         "              OpStore %gep_3_1 %bc_3_1\n"
13501 
13502         // [3 x half] offset 24
13503         "    %ex_4_0 = OpCompositeExtract %f16 %st_val 4 0\n"
13504         "    %ex_4_1 = OpCompositeExtract %f16 %st_val 4 1\n"
13505         "    %ex_4_2 = OpCompositeExtract %f16 %st_val 4 2\n"
13506         "   %vec_4_0 = OpCompositeConstruct %v2f16 %ex_4_0 %ex_4_1\n"
13507         "   %vec_4_1 = OpCompositeConstruct %v2f16 %ex_4_2 %c_f16_n1\n"
13508         "    %bc_4_0 = OpBitcast %u32 %vec_4_0\n"
13509         "    %bc_4_1 = OpBitcast %u32 %vec_4_1\n"
13510         "   %gep_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_6\n"
13511         "   %gep_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_7\n"
13512         "              OpStore %gep_4_0 %bc_4_0\n"
13513         "              OpStore %gep_4_1 %bc_4_1\n"
13514 
13515         // [3 x {half, [3 x <2 x half>]}] offset 32
13516         "    %ex_5_0 = OpCompositeExtract %struct16 %st_val 5 0\n"
13517         "    %ex_5_1 = OpCompositeExtract %struct16 %st_val 5 1\n"
13518         "    %ex_5_2 = OpCompositeExtract %struct16 %st_val 5 2\n"
13519         "  %ex_5_0_0 = OpCompositeExtract %f16 %ex_5_0 0\n"
13520         "  %ex_5_1_0 = OpCompositeExtract %f16 %ex_5_1 0\n"
13521         "  %ex_5_2_0 = OpCompositeExtract %f16 %ex_5_2 0\n"
13522         "%ex_5_0_1_0 = OpCompositeExtract %v2f16 %ex_5_0 1 0\n"
13523         "%ex_5_0_1_1 = OpCompositeExtract %v2f16 %ex_5_0 1 1\n"
13524         "%ex_5_0_1_2 = OpCompositeExtract %v2f16 %ex_5_0 1 2\n"
13525         "%ex_5_1_1_0 = OpCompositeExtract %v2f16 %ex_5_1 1 0\n"
13526         "%ex_5_1_1_1 = OpCompositeExtract %v2f16 %ex_5_1 1 1\n"
13527         "%ex_5_1_1_2 = OpCompositeExtract %v2f16 %ex_5_1 1 2\n"
13528         "%ex_5_2_1_0 = OpCompositeExtract %v2f16 %ex_5_2 1 0\n"
13529         "%ex_5_2_1_1 = OpCompositeExtract %v2f16 %ex_5_2 1 1\n"
13530         "%ex_5_2_1_2 = OpCompositeExtract %v2f16 %ex_5_2 1 2\n"
13531         " %vec_5_0_0 = OpCompositeConstruct %v2f16 %ex_5_0_0 %c_f16_n1\n"
13532         " %vec_5_1_0 = OpCompositeConstruct %v2f16 %ex_5_1_0 %c_f16_n1\n"
13533         " %vec_5_2_0 = OpCompositeConstruct %v2f16 %ex_5_2_0 %c_f16_n1\n"
13534         "  %bc_5_0_0 = OpBitcast %u32 %vec_5_0_0\n"
13535         "  %bc_5_1_0 = OpBitcast %u32 %vec_5_1_0\n"
13536         "  %bc_5_2_0 = OpBitcast %u32 %vec_5_2_0\n"
13537         "%bc_5_0_1_0 = OpBitcast %u32 %ex_5_0_1_0\n"
13538         "%bc_5_0_1_1 = OpBitcast %u32 %ex_5_0_1_1\n"
13539         "%bc_5_0_1_2 = OpBitcast %u32 %ex_5_0_1_2\n"
13540         "%bc_5_1_1_0 = OpBitcast %u32 %ex_5_1_1_0\n"
13541         "%bc_5_1_1_1 = OpBitcast %u32 %ex_5_1_1_1\n"
13542         "%bc_5_1_1_2 = OpBitcast %u32 %ex_5_1_1_2\n"
13543         "%bc_5_2_1_0 = OpBitcast %u32 %ex_5_2_1_0\n"
13544         "%bc_5_2_1_1 = OpBitcast %u32 %ex_5_2_1_1\n"
13545         "%bc_5_2_1_2 = OpBitcast %u32 %ex_5_2_1_2\n"
13546         "  %gep_5_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_8\n"
13547         "%gep_5_0_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_9\n"
13548         "%gep_5_0_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_10\n"
13549         "%gep_5_0_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_11\n"
13550         "  %gep_5_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_12\n"
13551         "%gep_5_1_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_13\n"
13552         "%gep_5_1_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_14\n"
13553         "%gep_5_1_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_15\n"
13554         "  %gep_5_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_16\n"
13555         "%gep_5_2_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_17\n"
13556         "%gep_5_2_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_18\n"
13557         "%gep_5_2_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_19\n"
13558         "              OpStore %gep_5_0_0 %bc_5_0_0\n"
13559         "              OpStore %gep_5_0_1_0 %bc_5_0_1_0\n"
13560         "              OpStore %gep_5_0_1_1 %bc_5_0_1_1\n"
13561         "              OpStore %gep_5_0_1_2 %bc_5_0_1_2\n"
13562         "              OpStore %gep_5_1_0 %bc_5_1_0\n"
13563         "              OpStore %gep_5_1_1_0 %bc_5_1_1_0\n"
13564         "              OpStore %gep_5_1_1_1 %bc_5_1_1_1\n"
13565         "              OpStore %gep_5_1_1_2 %bc_5_1_1_2\n"
13566         "              OpStore %gep_5_2_0 %bc_5_2_0\n"
13567         "              OpStore %gep_5_2_1_0 %bc_5_2_1_0\n"
13568         "              OpStore %gep_5_2_1_1 %bc_5_2_1_1\n"
13569         "              OpStore %gep_5_2_1_2 %bc_5_2_1_2\n"
13570 
13571         // [5 x <2 x half>] offset 80
13572         "    %ex_6_0 = OpCompositeExtract %v2f16 %st_val 6 0\n"
13573         "    %ex_6_1 = OpCompositeExtract %v2f16 %st_val 6 1\n"
13574         "    %ex_6_2 = OpCompositeExtract %v2f16 %st_val 6 2\n"
13575         "    %ex_6_3 = OpCompositeExtract %v2f16 %st_val 6 3\n"
13576         "    %ex_6_4 = OpCompositeExtract %v2f16 %st_val 6 4\n"
13577         "    %bc_6_0 = OpBitcast %u32 %ex_6_0\n"
13578         "    %bc_6_1 = OpBitcast %u32 %ex_6_1\n"
13579         "    %bc_6_2 = OpBitcast %u32 %ex_6_2\n"
13580         "    %bc_6_3 = OpBitcast %u32 %ex_6_3\n"
13581         "    %bc_6_4 = OpBitcast %u32 %ex_6_4\n"
13582         "   %gep_6_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_20\n"
13583         "   %gep_6_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_21\n"
13584         "   %gep_6_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_22\n"
13585         "   %gep_6_3 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_23\n"
13586         "   %gep_6_4 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_24\n"
13587         "              OpStore %gep_6_0 %bc_6_0\n"
13588         "              OpStore %gep_6_1 %bc_6_1\n"
13589         "              OpStore %gep_6_2 %bc_6_2\n"
13590         "              OpStore %gep_6_3 %bc_6_3\n"
13591         "              OpStore %gep_6_4 %bc_6_4\n"
13592 
13593         // half offset 100
13594         "      %ex_7 = OpCompositeExtract %f16 %st_val 7\n"
13595         "     %vec_7 = OpCompositeConstruct %v2f16 %ex_7 %c_f16_n1\n"
13596         "      %bc_7 = OpBitcast %u32 %vec_7\n"
13597         "     %gep_7 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_25\n"
13598         "              OpStore %gep_7 %bc_7\n"
13599 
13600         // [5 x <3 x half>] offset 104
13601         "    %ex_8_0 = OpCompositeExtract %v3f16 %st_val 8 0\n"
13602         "    %ex_8_1 = OpCompositeExtract %v3f16 %st_val 8 1\n"
13603         "    %ex_8_2 = OpCompositeExtract %v3f16 %st_val 8 2\n"
13604         "    %ex_8_3 = OpCompositeExtract %v3f16 %st_val 8 3\n"
13605         "    %ex_8_4 = OpCompositeExtract %v3f16 %st_val 8 4\n"
13606         " %vec_8_0_0 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 0 1\n"
13607         " %vec_8_0_1 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 2 3\n"
13608         " %vec_8_1_0 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 0 1\n"
13609         " %vec_8_1_1 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 2 3\n"
13610         " %vec_8_2_0 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 0 1\n"
13611         " %vec_8_2_1 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 2 3\n"
13612         " %vec_8_3_0 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 0 1\n"
13613         " %vec_8_3_1 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 2 3\n"
13614         " %vec_8_4_0 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 0 1\n"
13615         " %vec_8_4_1 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 2 3\n"
13616         "  %bc_8_0_0 = OpBitcast %u32 %vec_8_0_0\n"
13617         "  %bc_8_0_1 = OpBitcast %u32 %vec_8_0_1\n"
13618         "  %bc_8_1_0 = OpBitcast %u32 %vec_8_1_0\n"
13619         "  %bc_8_1_1 = OpBitcast %u32 %vec_8_1_1\n"
13620         "  %bc_8_2_0 = OpBitcast %u32 %vec_8_2_0\n"
13621         "  %bc_8_2_1 = OpBitcast %u32 %vec_8_2_1\n"
13622         "  %bc_8_3_0 = OpBitcast %u32 %vec_8_3_0\n"
13623         "  %bc_8_3_1 = OpBitcast %u32 %vec_8_3_1\n"
13624         "  %bc_8_4_0 = OpBitcast %u32 %vec_8_4_0\n"
13625         "  %bc_8_4_1 = OpBitcast %u32 %vec_8_4_1\n"
13626         " %gep_8_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_26\n"
13627         " %gep_8_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_27\n"
13628         " %gep_8_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_28\n"
13629         " %gep_8_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_29\n"
13630         " %gep_8_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_30\n"
13631         " %gep_8_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_31\n"
13632         " %gep_8_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_32\n"
13633         " %gep_8_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_33\n"
13634         " %gep_8_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_34\n"
13635         " %gep_8_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_35\n"
13636         "              OpStore %gep_8_0_0 %bc_8_0_0\n"
13637         "              OpStore %gep_8_0_1 %bc_8_0_1\n"
13638         "              OpStore %gep_8_1_0 %bc_8_1_0\n"
13639         "              OpStore %gep_8_1_1 %bc_8_1_1\n"
13640         "              OpStore %gep_8_2_0 %bc_8_2_0\n"
13641         "              OpStore %gep_8_2_1 %bc_8_2_1\n"
13642         "              OpStore %gep_8_3_0 %bc_8_3_0\n"
13643         "              OpStore %gep_8_3_1 %bc_8_3_1\n"
13644         "              OpStore %gep_8_4_0 %bc_8_4_0\n"
13645         "              OpStore %gep_8_4_1 %bc_8_4_1\n"
13646 
13647         // [3 x <4 x half>] offset 144
13648         "    %ex_9_0 = OpCompositeExtract %v4f16 %st_val 9 0\n"
13649         "    %ex_9_1 = OpCompositeExtract %v4f16 %st_val 9 1\n"
13650         "    %ex_9_2 = OpCompositeExtract %v4f16 %st_val 9 2\n"
13651         " %vec_9_0_0 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 0 1\n"
13652         " %vec_9_0_1 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 2 3\n"
13653         " %vec_9_1_0 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 0 1\n"
13654         " %vec_9_1_1 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 2 3\n"
13655         " %vec_9_2_0 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 0 1\n"
13656         " %vec_9_2_1 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 2 3\n"
13657         "  %bc_9_0_0 = OpBitcast %u32 %vec_9_0_0\n"
13658         "  %bc_9_0_1 = OpBitcast %u32 %vec_9_0_1\n"
13659         "  %bc_9_1_0 = OpBitcast %u32 %vec_9_1_0\n"
13660         "  %bc_9_1_1 = OpBitcast %u32 %vec_9_1_1\n"
13661         "  %bc_9_2_0 = OpBitcast %u32 %vec_9_2_0\n"
13662         "  %bc_9_2_1 = OpBitcast %u32 %vec_9_2_1\n"
13663         " %gep_9_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_36\n"
13664         " %gep_9_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_37\n"
13665         " %gep_9_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_38\n"
13666         " %gep_9_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_39\n"
13667         " %gep_9_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_40\n"
13668         " %gep_9_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_41\n"
13669         "              OpStore %gep_9_0_0 %bc_9_0_0\n"
13670         "              OpStore %gep_9_0_1 %bc_9_0_1\n"
13671         "              OpStore %gep_9_1_0 %bc_9_1_0\n"
13672         "              OpStore %gep_9_1_1 %bc_9_1_1\n"
13673         "              OpStore %gep_9_2_0 %bc_9_2_0\n"
13674         "              OpStore %gep_9_2_1 %bc_9_2_1\n"
13675 
13676         "              OpBranch %next\n"
13677 
13678         "      %next = OpLabel\n"
13679         "     %i_cur = OpLoad %i32 %i\n"
13680         "     %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13681         "              OpStore %i %i_new\n"
13682         "              OpBranch %loop\n"
13683 
13684         "     %merge = OpLabel\n"
13685         "              OpBranch %end_if\n"
13686         "    %end_if = OpLabel\n"
13687         "              OpReturnValue %param\n"
13688         "              OpFunctionEnd\n");
13689 
13690     {
13691         SpecResource specResource;
13692         map<string, string> specs;
13693         VulkanFeatures features;
13694         map<string, string> fragments;
13695         vector<string> extensions;
13696         vector<deFloat16> expectedOutput;
13697         string consts;
13698 
13699         for (uint32_t elementNdx = 0; elementNdx < numElements; ++elementNdx)
13700         {
13701             vector<deFloat16> expectedIterationOutput;
13702 
13703             for (uint32_t structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
13704                 expectedIterationOutput.push_back(tcu::Float16(float(structItemNdx)).bits());
13705 
13706             for (uint32_t structItemNdx = 0; structItemNdx < DE_LENGTH_OF_ARRAY(exceptionIndices); ++structItemNdx)
13707                 expectedIterationOutput[exceptionIndices[structItemNdx]] = exceptionValue;
13708 
13709             expectedIterationOutput[fieldModifiedMulIndex] = tcu::Float16(float(elementNdx * fieldModifier)).bits();
13710             expectedIterationOutput[fieldModifiedAddIndex] = tcu::Float16(float(elementNdx + fieldModifier)).bits();
13711 
13712             expectedOutput.insert(expectedOutput.end(), expectedIterationOutput.begin(), expectedIterationOutput.end());
13713         }
13714 
13715         for (uint32_t i = 0; i < structItemsCount; ++i)
13716             consts += "     %c_f16_" + de::toString(i) + " = OpConstant %f16 " + de::toString(i) + "\n";
13717 
13718         specs["num_elements"]     = de::toString(numElements);
13719         specs["struct_item_size"] = de::toString(structItemsCount * sizeof(deFloat16));
13720         specs["field_modifier"]   = de::toString(fieldModifier);
13721         specs["consts"]           = consts;
13722 
13723         fragments["capability"] = "OpCapability Float16\n";
13724         fragments["decoration"] = decoration.specialize(specs);
13725         fragments["pre_main"]   = preMain.specialize(specs);
13726         fragments["testfun"]    = testFun.specialize(specs);
13727 
13728         specResource.inputs.push_back(
13729             Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13730         specResource.outputs.push_back(
13731             Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13732         specResource.verifyIO = compareFP16CompositeFunc;
13733 
13734         extensions.push_back("VK_KHR_shader_float16_int8");
13735 
13736         features.extFloat16Int8.shaderFloat16 = true;
13737         if (specResource.graphicsFeaturesRequired)
13738             features.coreFeatures.vertexPipelineStoresAndAtomics = true;
13739 
13740         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
13741                               IVec3(1, 1, 1));
13742     }
13743 
13744     return testGroup.release();
13745 }
13746 
13747 template <class SpecResource>
createFloat16CompositeInsertExtractSet(tcu::TestContext & testCtx,const char * op)13748 tcu::TestCaseGroup *createFloat16CompositeInsertExtractSet(tcu::TestContext &testCtx, const char *op)
13749 {
13750     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, de::toLower(op).c_str()));
13751     const deFloat16 exceptionValue = tcu::Float16(-1.0).bits();
13752     const string opName(op);
13753     const uint32_t opIndex = (opName == "OpCompositeInsert")  ? 0 :
13754                              (opName == "OpCompositeExtract") ? 1 :
13755                                                                 std::numeric_limits<uint32_t>::max();
13756 
13757     const StringTemplate preMain("   %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
13758                                  "  %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
13759                                  "  %c_i32_size = OpConstant %i32 ${struct_u32s}\n"
13760                                  "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
13761                                  " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
13762                                  "         %f16 = OpTypeFloat 16\n"
13763                                  "       %v2f16 = OpTypeVector %f16 2\n"
13764                                  "       %v3f16 = OpTypeVector %f16 3\n"
13765                                  "       %v4f16 = OpTypeVector %f16 4\n"
13766                                  "    %c_f16_na = OpConstant %f16 -1.0\n"
13767                                  "  %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_na %c_f16_na\n"
13768                                  "     %c_u32_5 = OpConstant %u32 5\n"
13769                                  "     %c_i32_5 = OpConstant %i32 5\n"
13770                                  "     %c_i32_6 = OpConstant %i32 6\n"
13771                                  "     %c_i32_7 = OpConstant %i32 7\n"
13772                                  "     %c_i32_8 = OpConstant %i32 8\n"
13773                                  "     %c_i32_9 = OpConstant %i32 9\n"
13774                                  "    %c_i32_10 = OpConstant %i32 10\n"
13775                                  "    %c_i32_11 = OpConstant %i32 11\n"
13776 
13777                                  "%f16arr3      = OpTypeArray %f16 %c_u32_3\n"
13778                                  "%v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
13779                                  "%v2f16arr5    = OpTypeArray %v2f16 %c_u32_5\n"
13780                                  "%v3f16arr5    = OpTypeArray %v3f16 %c_u32_5\n"
13781                                  "%v4f16arr3    = OpTypeArray %v4f16 %c_u32_3\n"
13782                                  "%struct16     = OpTypeStruct %f16 %v2f16arr3\n"
13783                                  "%struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
13784                                  "%st_test      = OpTypeStruct %${field_type}\n"
13785 
13786                                  "      %ra_f16 = OpTypeArray %u32 %c_i32_hndp\n"
13787                                  "       %ra_st = OpTypeArray %u32 %c_i32_size\n"
13788                                  "      %up_u32 = OpTypePointer Uniform %u32\n"
13789                                  "     %st_test_i32_fn = OpTypeFunction %st_test %i32\n"
13790                                  "%void_st_test_i32_fn = OpTypeFunction %void %st_test %i32\n"
13791                                  "         %f16_i32_fn = OpTypeFunction %f16 %i32\n"
13792                                  "    %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
13793                                  "       %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
13794                                  "  %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
13795 
13796                                  "${op_premain_decls}"
13797 
13798                                  " %up_SSBO_src = OpTypePointer Uniform %SSBO_src\n"
13799                                  " %up_SSBO_dst = OpTypePointer Uniform %SSBO_dst\n"
13800 
13801                                  "    %ssbo_src = OpVariable %up_SSBO_src Uniform\n"
13802                                  "    %ssbo_dst = OpVariable %up_SSBO_dst Uniform\n");
13803 
13804     const StringTemplate decoration("OpDecorate %SSBO_src BufferBlock\n"
13805                                     "OpDecorate %SSBO_dst BufferBlock\n"
13806                                     "OpDecorate %ra_f16 ArrayStride 4\n"
13807                                     "OpDecorate %ra_st ArrayStride 4\n"
13808                                     "OpDecorate %ssbo_src DescriptorSet 0\n"
13809                                     "OpDecorate %ssbo_src Binding 0\n"
13810                                     "OpDecorate %ssbo_dst DescriptorSet 0\n"
13811                                     "OpDecorate %ssbo_dst Binding 1\n"
13812 
13813                                     "OpMemberDecorate %SSBO_src 0 Offset 0\n"
13814                                     "OpMemberDecorate %SSBO_dst 0 Offset 0\n"
13815 
13816                                     "OpDecorate %v2f16arr3 ArrayStride 4\n"
13817                                     "OpMemberDecorate %struct16 0 Offset 0\n"
13818                                     "OpMemberDecorate %struct16 1 Offset 4\n"
13819                                     "OpDecorate %struct16arr3 ArrayStride 16\n"
13820                                     "OpDecorate %f16arr3 ArrayStride 2\n"
13821                                     "OpDecorate %v2f16arr5 ArrayStride 4\n"
13822                                     "OpDecorate %v3f16arr5 ArrayStride 8\n"
13823                                     "OpDecorate %v4f16arr3 ArrayStride 8\n"
13824 
13825                                     "OpMemberDecorate %st_test 0 Offset 0\n");
13826 
13827     const StringTemplate testFun(" %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
13828                                  "     %param = OpFunctionParameter %v4f32\n"
13829                                  "     %entry = OpLabel\n"
13830 
13831                                  "         %i = OpVariable %fp_i32 Function\n"
13832                                  "              OpStore %i %c_i32_0\n"
13833 
13834                                  "  %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
13835                                  "              OpSelectionMerge %end_if None\n"
13836                                  "              OpBranchConditional %will_run %run_test %end_if\n"
13837 
13838                                  "  %run_test = OpLabel\n"
13839                                  "              OpBranch %loop\n"
13840 
13841                                  "      %loop = OpLabel\n"
13842                                  "     %i_cmp = OpLoad %i32 %i\n"
13843                                  "        %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
13844                                  "              OpLoopMerge %merge %next None\n"
13845                                  "              OpBranchConditional %lt %write %merge\n"
13846 
13847                                  "     %write = OpLabel\n"
13848                                  "       %ndx = OpLoad %i32 %i\n"
13849 
13850                                  "${op_sw_fun_call}"
13851 
13852                                  "    %dst_st = OpFunctionCall %void %${st_call} %val_dst %${st_ndx}\n"
13853                                  "              OpBranch %next\n"
13854 
13855                                  "      %next = OpLabel\n"
13856                                  "     %i_cur = OpLoad %i32 %i\n"
13857                                  "     %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13858                                  "              OpStore %i %i_new\n"
13859                                  "              OpBranch %loop\n"
13860 
13861                                  "     %merge = OpLabel\n"
13862                                  "              OpBranch %end_if\n"
13863                                  "    %end_if = OpLabel\n"
13864                                  "              OpReturnValue %param\n"
13865                                  "              OpFunctionEnd\n"
13866 
13867                                  "${op_sw_fun_header}"
13868                                  " %sw_param = OpFunctionParameter %st_test\n"
13869                                  "%sw_paramn = OpFunctionParameter %i32\n"
13870                                  " %sw_entry = OpLabel\n"
13871                                  "             OpSelectionMerge %switch_e None\n"
13872                                  "             OpSwitch %sw_paramn %default ${case_list}\n"
13873 
13874                                  "${case_bodies}"
13875 
13876                                  "%default   = OpLabel\n"
13877                                  "             OpReturnValue ${op_case_default_value}\n"
13878                                  "%switch_e  = OpLabel\n"
13879                                  "             OpUnreachable\n" // Unreachable merge block for switch statement
13880                                  "             OpFunctionEnd\n");
13881 
13882     const StringTemplate testCaseBody("%case_${case_ndx}    = OpLabel\n"
13883                                       "%val_ret_${case_ndx} = ${op_name} ${op_args_part} ${access_path}\n"
13884                                       "             OpReturnValue %val_ret_${case_ndx}\n");
13885 
13886     const string loadF16("        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13887                          "  %ld_${var}_param = OpFunctionParameter %i32\n"
13888                          "  %ld_${var}_entry = OpLabel\n"
13889                          "   %ld_${var}_call = OpFunctionCall %f16 %ld_arg_${var} %ld_${var}_param\n"
13890                          "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13891                          "                     OpReturnValue %ld_${var}_st_test\n"
13892                          "                     OpFunctionEnd\n" +
13893                          loadScalarF16FromUint);
13894 
13895     const string loadV2F16("        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13896                            "  %ld_${var}_param = OpFunctionParameter %i32\n"
13897                            "  %ld_${var}_entry = OpLabel\n"
13898                            "   %ld_${var}_call = OpFunctionCall %v2f16 %ld_arg_${var} %ld_${var}_param\n"
13899                            "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13900                            "                     OpReturnValue %ld_${var}_st_test\n"
13901                            "                     OpFunctionEnd\n" +
13902                            loadV2F16FromUint);
13903 
13904     const string loadV3F16("        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13905                            "  %ld_${var}_param = OpFunctionParameter %i32\n"
13906                            "  %ld_${var}_entry = OpLabel\n"
13907                            "  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13908                            "  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13909                            "   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13910                            "   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13911                            "   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13912                            "   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13913                            "    %ld_${var}_vec = OpVectorShuffle %v3f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2\n"
13914                            "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13915                            "                     OpReturnValue %ld_${var}_st_test\n"
13916                            "                     OpFunctionEnd\n");
13917 
13918     const string loadV4F16("        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13919                            "  %ld_${var}_param = OpFunctionParameter %i32\n"
13920                            "  %ld_${var}_entry = OpLabel\n"
13921                            "  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13922                            "  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13923                            "   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13924                            "   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13925                            "   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13926                            "   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13927                            "    %ld_${var}_vec = OpVectorShuffle %v4f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2 3\n"
13928                            "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13929                            "                     OpReturnValue %ld_${var}_st_test\n"
13930                            "                     OpFunctionEnd\n");
13931 
13932     const string loadF16Arr3(
13933         "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13934         "  %ld_${var}_param = OpFunctionParameter %i32\n"
13935         "  %ld_${var}_entry = OpLabel\n"
13936         "  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_0\n"
13937         "  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_1\n"
13938         "   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13939         "   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13940         "   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13941         "   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13942         "   %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0 0\n"
13943         "   %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_0 1\n"
13944         "   %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_1 0\n"
13945         "   %ld_${var}_cons = OpCompositeConstruct %f16arr3 %ld_${var}_ex_0 %ld_${var}_ex_1 %ld_${var}_ex_2\n"
13946         "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13947         "                     OpReturnValue %ld_${var}_st_test\n"
13948         "                     OpFunctionEnd\n");
13949 
13950     const string loadV2F16Arr5("        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13951                                "  %ld_${var}_param = OpFunctionParameter %i32\n"
13952                                "  %ld_${var}_label = OpLabel\n"
13953                                "  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13954                                "  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13955                                "  %ld_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13956                                "  %ld_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13957                                "  %ld_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13958                                "   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13959                                "   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13960                                "   %ld_${var}_ld_2 = OpLoad %u32 %ld_${var}_gep_2\n"
13961                                "   %ld_${var}_ld_3 = OpLoad %u32 %ld_${var}_gep_3\n"
13962                                "   %ld_${var}_ld_4 = OpLoad %u32 %ld_${var}_gep_4\n"
13963                                "   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13964                                "   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13965                                "   %ld_${var}_bc_2 = OpBitcast %v2f16 %ld_${var}_ld_2\n"
13966                                "   %ld_${var}_bc_3 = OpBitcast %v2f16 %ld_${var}_ld_3\n"
13967                                "   %ld_${var}_bc_4 = OpBitcast %v2f16 %ld_${var}_ld_4\n"
13968                                "   %ld_${var}_cons = OpCompositeConstruct %v2f16arr5 %ld_${var}_bc_0 %ld_${var}_bc_1 "
13969                                "%ld_${var}_bc_2 %ld_${var}_bc_3 %ld_${var}_bc_4\n"
13970                                "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13971                                "                     OpReturnValue %ld_${var}_st_test\n"
13972                                "                     OpFunctionEnd\n");
13973 
13974     const string loadV3F16Arr5("        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13975                                "  %ld_${var}_param = OpFunctionParameter %i32\n"
13976                                "  %ld_${var}_entry = OpLabel\n"
13977                                "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13978                                "%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13979                                "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13980                                "%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13981                                "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13982                                "%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13983                                "%ld_${var}_gep_3_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13984                                "%ld_${var}_gep_3_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13985                                "%ld_${var}_gep_4_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13986                                "%ld_${var}_gep_4_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13987                                " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13988                                " %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13989                                " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13990                                " %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13991                                " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13992                                " %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13993                                " %ld_${var}_ld_3_0 = OpLoad %u32 %ld_${var}_gep_3_0\n"
13994                                " %ld_${var}_ld_3_1 = OpLoad %u32 %ld_${var}_gep_3_1\n"
13995                                " %ld_${var}_ld_4_0 = OpLoad %u32 %ld_${var}_gep_4_0\n"
13996                                " %ld_${var}_ld_4_1 = OpLoad %u32 %ld_${var}_gep_4_1\n"
13997                                " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13998                                " %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13999                                " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
14000                                " %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
14001                                " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
14002                                " %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
14003                                " %ld_${var}_bc_3_0 = OpBitcast %v2f16 %ld_${var}_ld_3_0\n"
14004                                " %ld_${var}_bc_3_1 = OpBitcast %v2f16 %ld_${var}_ld_3_1\n"
14005                                " %ld_${var}_bc_4_0 = OpBitcast %v2f16 %ld_${var}_ld_4_0\n"
14006                                " %ld_${var}_bc_4_1 = OpBitcast %v2f16 %ld_${var}_ld_4_1\n"
14007                                "  %ld_${var}_vec_0 = OpVectorShuffle %v3f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2\n"
14008                                "  %ld_${var}_vec_1 = OpVectorShuffle %v3f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2\n"
14009                                "  %ld_${var}_vec_2 = OpVectorShuffle %v3f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2\n"
14010                                "  %ld_${var}_vec_3 = OpVectorShuffle %v3f16 %ld_${var}_bc_3_0 %ld_${var}_bc_3_1 0 1 2\n"
14011                                "  %ld_${var}_vec_4 = OpVectorShuffle %v3f16 %ld_${var}_bc_4_0 %ld_${var}_bc_4_1 0 1 2\n"
14012                                "   %ld_${var}_cons = OpCompositeConstruct %v3f16arr5 %ld_${var}_vec_0 %ld_${var}_vec_1 "
14013                                "%ld_${var}_vec_2 %ld_${var}_vec_3 %ld_${var}_vec_4\n"
14014                                "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
14015                                "                     OpReturnValue %ld_${var}_st_test\n"
14016                                "                     OpFunctionEnd\n");
14017 
14018     const string loadV4F16Arr3(
14019         "        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
14020         "  %ld_${var}_param = OpFunctionParameter %i32\n"
14021         "  %ld_${var}_entry = OpLabel\n"
14022         "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14023         "%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14024         "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
14025         "%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
14026         "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
14027         "%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
14028         " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
14029         " %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
14030         " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
14031         " %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
14032         " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
14033         " %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
14034         " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
14035         " %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
14036         " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
14037         " %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
14038         " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
14039         " %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
14040         "  %ld_${var}_vec_0 = OpVectorShuffle %v4f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2 3\n"
14041         "  %ld_${var}_vec_1 = OpVectorShuffle %v4f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2 3\n"
14042         "  %ld_${var}_vec_2 = OpVectorShuffle %v4f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2 3\n"
14043         "   %ld_${var}_cons = OpCompositeConstruct %v4f16arr3 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2\n"
14044         "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
14045         "                     OpReturnValue %ld_${var}_st_test\n"
14046         "                     OpFunctionEnd\n");
14047 
14048     const string loadStruct16Arr3(
14049         "          %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
14050         "    %ld_${var}_param = OpFunctionParameter %i32\n"
14051         "    %ld_${var}_entry = OpLabel\n"
14052         "%ld_${var}_gep_0_0   = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14053         "%ld_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14054         "%ld_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
14055         "%ld_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
14056         "%ld_${var}_gep_1_0   = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
14057         "%ld_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
14058         "%ld_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
14059         "%ld_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
14060         "%ld_${var}_gep_2_0   = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
14061         "%ld_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
14062         "%ld_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
14063         "%ld_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
14064         " %ld_${var}_ld_0_0   = OpLoad %u32 %ld_${var}_gep_0_0\n"
14065         " %ld_${var}_ld_0_1_0 = OpLoad %u32 %ld_${var}_gep_0_1_0\n"
14066         " %ld_${var}_ld_0_1_1 = OpLoad %u32 %ld_${var}_gep_0_1_1\n"
14067         " %ld_${var}_ld_0_1_2 = OpLoad %u32 %ld_${var}_gep_0_1_2\n"
14068         " %ld_${var}_ld_1_0   = OpLoad %u32 %ld_${var}_gep_1_0\n"
14069         " %ld_${var}_ld_1_1_0 = OpLoad %u32 %ld_${var}_gep_1_1_0\n"
14070         " %ld_${var}_ld_1_1_1 = OpLoad %u32 %ld_${var}_gep_1_1_1\n"
14071         " %ld_${var}_ld_1_1_2 = OpLoad %u32 %ld_${var}_gep_1_1_2\n"
14072         " %ld_${var}_ld_2_0   = OpLoad %u32 %ld_${var}_gep_2_0\n"
14073         " %ld_${var}_ld_2_1_0 = OpLoad %u32 %ld_${var}_gep_2_1_0\n"
14074         " %ld_${var}_ld_2_1_1 = OpLoad %u32 %ld_${var}_gep_2_1_1\n"
14075         " %ld_${var}_ld_2_1_2 = OpLoad %u32 %ld_${var}_gep_2_1_2\n"
14076         " %ld_${var}_bc_0_0   = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
14077         " %ld_${var}_bc_0_1_0 = OpBitcast %v2f16 %ld_${var}_ld_0_1_0\n"
14078         " %ld_${var}_bc_0_1_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1_1\n"
14079         " %ld_${var}_bc_0_1_2 = OpBitcast %v2f16 %ld_${var}_ld_0_1_2\n"
14080         " %ld_${var}_bc_1_0   = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
14081         " %ld_${var}_bc_1_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_1_0\n"
14082         " %ld_${var}_bc_1_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1_1\n"
14083         " %ld_${var}_bc_1_1_2 = OpBitcast %v2f16 %ld_${var}_ld_1_1_2\n"
14084         " %ld_${var}_bc_2_0   = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
14085         " %ld_${var}_bc_2_1_0 = OpBitcast %v2f16 %ld_${var}_ld_2_1_0\n"
14086         " %ld_${var}_bc_2_1_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1_1\n"
14087         " %ld_${var}_bc_2_1_2 = OpBitcast %v2f16 %ld_${var}_ld_2_1_2\n"
14088         "    %ld_${var}_arr_0 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_0_1_0 %ld_${var}_bc_0_1_1 "
14089         "%ld_${var}_bc_0_1_2\n"
14090         "    %ld_${var}_arr_1 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_1_1_0 %ld_${var}_bc_1_1_1 "
14091         "%ld_${var}_bc_1_1_2\n"
14092         "    %ld_${var}_arr_2 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_2_1_0 %ld_${var}_bc_2_1_1 "
14093         "%ld_${var}_bc_2_1_2\n"
14094         "     %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0_0 0\n"
14095         "     %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_1_0 0\n"
14096         "     %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_2_0 0\n"
14097         "     %ld_${var}_st_0 = OpCompositeConstruct %struct16 %ld_${var}_ex_0 %ld_${var}_arr_0\n"
14098         "     %ld_${var}_st_1 = OpCompositeConstruct %struct16 %ld_${var}_ex_1 %ld_${var}_arr_1\n"
14099         "     %ld_${var}_st_2 = OpCompositeConstruct %struct16 %ld_${var}_ex_2 %ld_${var}_arr_2\n"
14100         "     %ld_${var}_cons = OpCompositeConstruct %struct16arr3 %ld_${var}_st_0 %ld_${var}_st_1 %ld_${var}_st_2\n"
14101         "  %ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
14102         "                       OpReturnValue %ld_${var}_st_test\n"
14103         "                      OpFunctionEnd\n");
14104 
14105     const string storeF16("       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14106                           "%st_${var}_param1 = OpFunctionParameter %st_test\n"
14107                           "%st_${var}_param2 = OpFunctionParameter %i32\n"
14108                           " %st_${var}_entry = OpLabel\n"
14109                           "    %st_${var}_ex = OpCompositeExtract %f16 %st_${var}_param1 0\n"
14110                           "  %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
14111                           "                    OpReturn\n"
14112                           "                    OpFunctionEnd\n" +
14113                           storeScalarF16AsUint);
14114 
14115     const string storeV2F16("       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14116                             "%st_${var}_param1 = OpFunctionParameter %st_test\n"
14117                             "%st_${var}_param2 = OpFunctionParameter %i32\n"
14118                             " %st_${var}_entry = OpLabel\n"
14119                             "    %st_${var}_ex = OpCompositeExtract %v2f16 %st_${var}_param1 0\n"
14120                             "  %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
14121                             "                    OpReturn\n"
14122                             "                    OpFunctionEnd\n" +
14123                             storeV2F16AsUint);
14124 
14125     const string storeV3F16("       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14126                             "%st_${var}_param1 = OpFunctionParameter %st_test\n"
14127                             "%st_${var}_param2 = OpFunctionParameter %i32\n"
14128                             " %st_${var}_entry = OpLabel\n"
14129                             "    %st_${var}_ex = OpCompositeExtract %v3f16 %st_${var}_param1 0\n"
14130                             " %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
14131                             " %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
14132                             "  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
14133                             "  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
14134                             " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14135                             " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14136                             "                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
14137                             "                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
14138                             "                    OpReturn\n"
14139                             "                    OpFunctionEnd\n");
14140 
14141     const string storeV4F16("       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14142                             "%st_${var}_param1 = OpFunctionParameter %st_test\n"
14143                             "%st_${var}_param2 = OpFunctionParameter %i32\n"
14144                             " %st_${var}_entry = OpLabel\n"
14145                             "    %st_${var}_ex = OpCompositeExtract %v4f16 %st_${var}_param1 0\n"
14146                             " %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
14147                             " %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
14148                             "  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
14149                             "  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
14150                             " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14151                             " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14152                             "                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
14153                             "                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
14154                             "                    OpReturn\n"
14155                             "                    OpFunctionEnd\n");
14156 
14157     const string storeF16Arr3("       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14158                               "%st_${var}_param1 = OpFunctionParameter %st_test\n"
14159                               "%st_${var}_param2 = OpFunctionParameter %i32\n"
14160                               " %st_${var}_entry = OpLabel\n"
14161                               "  %st_${var}_ex_0 = OpCompositeExtract %f16 %st_${var}_param1 0 0\n"
14162                               "  %st_${var}_ex_1 = OpCompositeExtract %f16 %st_${var}_param1 0 1\n"
14163                               "  %st_${var}_ex_2 = OpCompositeExtract %f16 %st_${var}_param1 0 2\n"
14164                               " %st_${var}_vec_0 = OpCompositeConstruct %v2f16 %st_${var}_ex_0 %st_${var}_ex_1\n"
14165                               " %st_${var}_vec_1 = OpCompositeConstruct %v2f16 %st_${var}_ex_2 %c_f16_na\n"
14166                               "  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
14167                               "  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
14168                               " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14169                               " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14170                               "                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
14171                               "                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
14172                               "                    OpReturn\n"
14173                               "                    OpFunctionEnd\n");
14174 
14175     const string storeV2F16Arr5("       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14176                                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
14177                                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
14178                                 " %st_${var}_entry = OpLabel\n"
14179                                 "  %st_${var}_ex_0 = OpCompositeExtract %v2f16 %st_${var}_param1 0 0\n"
14180                                 "  %st_${var}_ex_1 = OpCompositeExtract %v2f16 %st_${var}_param1 0 1\n"
14181                                 "  %st_${var}_ex_2 = OpCompositeExtract %v2f16 %st_${var}_param1 0 2\n"
14182                                 "  %st_${var}_ex_3 = OpCompositeExtract %v2f16 %st_${var}_param1 0 3\n"
14183                                 "  %st_${var}_ex_4 = OpCompositeExtract %v2f16 %st_${var}_param1 0 4\n"
14184                                 "  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_ex_0\n"
14185                                 "  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_ex_1\n"
14186                                 "  %st_${var}_bc_2 = OpBitcast %u32 %st_${var}_ex_2\n"
14187                                 "  %st_${var}_bc_3 = OpBitcast %u32 %st_${var}_ex_3\n"
14188                                 "  %st_${var}_bc_4 = OpBitcast %u32 %st_${var}_ex_4\n"
14189                                 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14190                                 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14191                                 " %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
14192                                 " %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
14193                                 " %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
14194                                 "                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
14195                                 "                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
14196                                 "                    OpStore %st_${var}_gep_2 %st_${var}_bc_2\n"
14197                                 "                    OpStore %st_${var}_gep_3 %st_${var}_bc_3\n"
14198                                 "                    OpStore %st_${var}_gep_4 %st_${var}_bc_4\n"
14199                                 "                    OpReturn\n"
14200                                 "                    OpFunctionEnd\n");
14201 
14202     const string storeV3F16Arr5("       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14203                                 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
14204                                 "%st_${var}_param2 = OpFunctionParameter %i32\n"
14205                                 " %st_${var}_entry = OpLabel\n"
14206                                 "  %st_${var}_ex_0 = OpCompositeExtract %v3f16 %st_${var}_param1 0 0\n"
14207                                 "  %st_${var}_ex_1 = OpCompositeExtract %v3f16 %st_${var}_param1 0 1\n"
14208                                 "  %st_${var}_ex_2 = OpCompositeExtract %v3f16 %st_${var}_param1 0 2\n"
14209                                 "  %st_${var}_ex_3 = OpCompositeExtract %v3f16 %st_${var}_param1 0 3\n"
14210                                 "  %st_${var}_ex_4 = OpCompositeExtract %v3f16 %st_${var}_param1 0 4\n"
14211                                 "%st_${var}_v2_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 0 1\n"
14212                                 "%st_${var}_v2_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 2 3\n"
14213                                 "%st_${var}_v2_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 0 1\n"
14214                                 "%st_${var}_v2_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 2 3\n"
14215                                 "%st_${var}_v2_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 0 1\n"
14216                                 "%st_${var}_v2_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 2 3\n"
14217                                 "%st_${var}_v2_3_0 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 0 1\n"
14218                                 "%st_${var}_v2_3_1 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 2 3\n"
14219                                 "%st_${var}_v2_4_0 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 0 1\n"
14220                                 "%st_${var}_v2_4_1 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 2 3\n"
14221                                 "%st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
14222                                 "%st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
14223                                 "%st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
14224                                 "%st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
14225                                 "%st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
14226                                 "%st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
14227                                 "%st_${var}_bc_3_0 = OpBitcast %u32 %st_${var}_v2_3_0\n"
14228                                 "%st_${var}_bc_3_1 = OpBitcast %u32 %st_${var}_v2_3_1\n"
14229                                 "%st_${var}_bc_4_0 = OpBitcast %u32 %st_${var}_v2_4_0\n"
14230                                 "%st_${var}_bc_4_1 = OpBitcast %u32 %st_${var}_v2_4_1\n"
14231                                 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14232                                 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14233                                 " %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
14234                                 " %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
14235                                 " %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
14236                                 " %st_${var}_gep_5 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
14237                                 " %st_${var}_gep_6 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
14238                                 " %st_${var}_gep_7 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
14239                                 " %st_${var}_gep_8 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
14240                                 " %st_${var}_gep_9 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
14241                                 "                    OpStore %st_${var}_gep_0 %st_${var}_bc_0_0\n"
14242                                 "                    OpStore %st_${var}_gep_1 %st_${var}_bc_0_1\n"
14243                                 "                    OpStore %st_${var}_gep_2 %st_${var}_bc_1_0\n"
14244                                 "                    OpStore %st_${var}_gep_3 %st_${var}_bc_1_1\n"
14245                                 "                    OpStore %st_${var}_gep_4 %st_${var}_bc_2_0\n"
14246                                 "                    OpStore %st_${var}_gep_5 %st_${var}_bc_2_1\n"
14247                                 "                    OpStore %st_${var}_gep_6 %st_${var}_bc_3_0\n"
14248                                 "                    OpStore %st_${var}_gep_7 %st_${var}_bc_3_1\n"
14249                                 "                    OpStore %st_${var}_gep_8 %st_${var}_bc_4_0\n"
14250                                 "                    OpStore %st_${var}_gep_9 %st_${var}_bc_4_1\n"
14251                                 "                    OpReturn\n"
14252                                 "                    OpFunctionEnd\n");
14253 
14254     const string storeV4F16Arr3("        %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14255                                 " %st_${var}_param1 = OpFunctionParameter %st_test\n"
14256                                 " %st_${var}_param2 = OpFunctionParameter %i32\n"
14257                                 "  %st_${var}_entry = OpLabel\n"
14258                                 "   %st_${var}_ex_0 = OpCompositeExtract %v4f16 %st_${var}_param1 0 0\n"
14259                                 "   %st_${var}_ex_1 = OpCompositeExtract %v4f16 %st_${var}_param1 0 1\n"
14260                                 "   %st_${var}_ex_2 = OpCompositeExtract %v4f16 %st_${var}_param1 0 2\n"
14261                                 "%st_${var}_vec_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 0 1\n"
14262                                 "%st_${var}_vec_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 2 3\n"
14263                                 "%st_${var}_vec_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 0 1\n"
14264                                 "%st_${var}_vec_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 2 3\n"
14265                                 "%st_${var}_vec_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 0 1\n"
14266                                 "%st_${var}_vec_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 2 3\n"
14267                                 " %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_vec_0_0\n"
14268                                 " %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_vec_0_1\n"
14269                                 " %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_vec_1_0\n"
14270                                 " %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_vec_1_1\n"
14271                                 " %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_vec_2_0\n"
14272                                 " %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_vec_2_1\n"
14273                                 "%st_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14274                                 "%st_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14275                                 "%st_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
14276                                 "%st_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
14277                                 "%st_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
14278                                 "%st_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
14279                                 "                     OpStore %st_${var}_gep_0_0 %st_${var}_bc_0_0\n"
14280                                 "                     OpStore %st_${var}_gep_0_1 %st_${var}_bc_0_1\n"
14281                                 "                     OpStore %st_${var}_gep_1_0 %st_${var}_bc_1_0\n"
14282                                 "                     OpStore %st_${var}_gep_1_1 %st_${var}_bc_1_1\n"
14283                                 "                     OpStore %st_${var}_gep_2_0 %st_${var}_bc_2_0\n"
14284                                 "                     OpStore %st_${var}_gep_2_1 %st_${var}_bc_2_1\n"
14285                                 "                     OpReturn\n"
14286                                 "                     OpFunctionEnd\n");
14287 
14288     const string storeStruct16Arr3("          %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14289                                    "   %st_${var}_param1 = OpFunctionParameter %st_test\n"
14290                                    "   %st_${var}_param2 = OpFunctionParameter %i32\n"
14291                                    "    %st_${var}_entry = OpLabel\n"
14292                                    "     %st_${var}_st_0 = OpCompositeExtract %struct16 %st_${var}_param1 0 0\n"
14293                                    "     %st_${var}_st_1 = OpCompositeExtract %struct16 %st_${var}_param1 0 1\n"
14294                                    "     %st_${var}_st_2 = OpCompositeExtract %struct16 %st_${var}_param1 0 2\n"
14295                                    "   %st_${var}_el_0   = OpCompositeExtract   %f16 %st_${var}_st_0 0\n"
14296                                    "   %st_${var}_v2_0_0 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 0\n"
14297                                    "   %st_${var}_v2_0_1 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 1\n"
14298                                    "   %st_${var}_v2_0_2 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 2\n"
14299                                    "   %st_${var}_el_1   = OpCompositeExtract   %f16 %st_${var}_st_1 0\n"
14300                                    "   %st_${var}_v2_1_0 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 0\n"
14301                                    "   %st_${var}_v2_1_1 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 1\n"
14302                                    "   %st_${var}_v2_1_2 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 2\n"
14303                                    "   %st_${var}_el_2   = OpCompositeExtract   %f16 %st_${var}_st_2 0\n"
14304                                    "   %st_${var}_v2_2_0 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 0\n"
14305                                    "   %st_${var}_v2_2_1 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 1\n"
14306                                    "   %st_${var}_v2_2_2 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 2\n"
14307                                    "     %st_${var}_v2_0 = OpCompositeConstruct %v2f16 %st_${var}_el_0 %c_f16_na\n"
14308                                    "     %st_${var}_v2_1 = OpCompositeConstruct %v2f16 %st_${var}_el_1 %c_f16_na\n"
14309                                    "     %st_${var}_v2_2 = OpCompositeConstruct %v2f16 %st_${var}_el_2 %c_f16_na\n"
14310                                    "   %st_${var}_bc_0   = OpBitcast %u32 %st_${var}_v2_0\n"
14311                                    "   %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
14312                                    "   %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
14313                                    "   %st_${var}_bc_0_2 = OpBitcast %u32 %st_${var}_v2_0_2\n"
14314                                    "   %st_${var}_bc_1   = OpBitcast %u32 %st_${var}_v2_1\n"
14315                                    "   %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
14316                                    "   %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
14317                                    "   %st_${var}_bc_1_2 = OpBitcast %u32 %st_${var}_v2_1_2\n"
14318                                    "   %st_${var}_bc_2   = OpBitcast %u32 %st_${var}_v2_2\n"
14319                                    "   %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
14320                                    "   %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
14321                                    "   %st_${var}_bc_2_2 = OpBitcast %u32 %st_${var}_v2_2_2\n"
14322                                    "%st_${var}_gep_0_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14323                                    "%st_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14324                                    "%st_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
14325                                    "%st_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
14326                                    "%st_${var}_gep_1_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
14327                                    "%st_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
14328                                    "%st_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
14329                                    "%st_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
14330                                    "%st_${var}_gep_2_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
14331                                    "%st_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
14332                                    "%st_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
14333                                    "%st_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
14334                                    "                       OpStore %st_${var}_gep_0_0_0 %st_${var}_bc_0\n"
14335                                    "                       OpStore %st_${var}_gep_0_1_0 %st_${var}_bc_0_0\n"
14336                                    "                       OpStore %st_${var}_gep_0_1_1 %st_${var}_bc_0_1\n"
14337                                    "                       OpStore %st_${var}_gep_0_1_2 %st_${var}_bc_0_2\n"
14338                                    "                       OpStore %st_${var}_gep_1_0_0 %st_${var}_bc_1\n"
14339                                    "                       OpStore %st_${var}_gep_1_1_0 %st_${var}_bc_1_0\n"
14340                                    "                       OpStore %st_${var}_gep_1_1_1 %st_${var}_bc_1_1\n"
14341                                    "                       OpStore %st_${var}_gep_1_1_2 %st_${var}_bc_1_2\n"
14342                                    "                       OpStore %st_${var}_gep_2_0_0 %st_${var}_bc_2\n"
14343                                    "                       OpStore %st_${var}_gep_2_1_0 %st_${var}_bc_2_0\n"
14344                                    "                       OpStore %st_${var}_gep_2_1_1 %st_${var}_bc_2_1\n"
14345                                    "                       OpStore %st_${var}_gep_2_1_2 %st_${var}_bc_2_2\n"
14346                                    "                       OpReturn\n"
14347                                    "                       OpFunctionEnd\n");
14348 
14349     struct OpParts
14350     {
14351         const char *premainDecls;
14352         const char *swFunCall;
14353         const char *swFunHeader;
14354         const char *caseDefaultValue;
14355         const char *argsPartial;
14356     };
14357 
14358     OpParts opPartsArray[] = {
14359         // OpCompositeInsert
14360         {
14361             "       %fun_t = OpTypeFunction %st_test %f16 %st_test %i32\n"
14362             "    %SSBO_src = OpTypeStruct %ra_f16\n"
14363             "    %SSBO_dst = OpTypeStruct %ra_st\n",
14364 
14365             "   %val_new = OpFunctionCall %f16 %ld_arg_ssbo_src %ndx\n"
14366             "   %val_old = OpFunctionCall %st_test %ld_ssbo_dst %c_i32_0\n"
14367             "   %val_dst = OpFunctionCall %st_test %sw_fun %val_new %val_old %ndx\n",
14368 
14369             "   %sw_fun = OpFunction %st_test None %fun_t\n"
14370             "%sw_paramv = OpFunctionParameter %f16\n",
14371 
14372             "%sw_param",
14373 
14374             "%st_test %sw_paramv %sw_param",
14375         },
14376         // OpCompositeExtract
14377         {
14378             "       %fun_t = OpTypeFunction %f16 %st_test %i32\n"
14379             "    %SSBO_src = OpTypeStruct %ra_st\n"
14380             "    %SSBO_dst = OpTypeStruct %ra_f16\n",
14381 
14382             "   %val_src = OpFunctionCall %st_test %ld_ssbo_src %c_i32_0\n"
14383             "   %val_dst = OpFunctionCall %f16 %sw_fun %val_src %ndx\n",
14384 
14385             "   %sw_fun = OpFunction %f16 None %fun_t\n",
14386 
14387             "%c_f16_na",
14388 
14389             "%f16 %sw_param",
14390         },
14391     };
14392 
14393     DE_ASSERT(opIndex < DE_LENGTH_OF_ARRAY(opPartsArray));
14394 
14395     const char *accessPathF16[] = {
14396         "0", // %f16
14397         DE_NULL,
14398     };
14399     const char *accessPathV2F16[] = {
14400         "0 0", // %v2f16
14401         "0 1",
14402     };
14403     const char *accessPathV3F16[] = {
14404         "0 0", // %v3f16
14405         "0 1",
14406         "0 2",
14407         DE_NULL,
14408     };
14409     const char *accessPathV4F16[] = {
14410         "0 0", // %v4f16"
14411         "0 1",
14412         "0 2",
14413         "0 3",
14414     };
14415     const char *accessPathF16Arr3[] = {
14416         "0 0", // %f16arr3
14417         "0 1",
14418         "0 2",
14419         DE_NULL,
14420     };
14421     const char *accessPathStruct16Arr3[] = {
14422         "0 0 0", // %struct16arr3
14423         DE_NULL, "0 0 1 0 0", "0 0 1 0 1", "0 0 1 1 0", "0 0 1 1 1", "0 0 1 2 0", "0 0 1 2 1", "0 1 0",
14424         DE_NULL, "0 1 1 0 0", "0 1 1 0 1", "0 1 1 1 0", "0 1 1 1 1", "0 1 1 2 0", "0 1 1 2 1", "0 2 0",
14425         DE_NULL, "0 2 1 0 0", "0 2 1 0 1", "0 2 1 1 0", "0 2 1 1 1", "0 2 1 2 0", "0 2 1 2 1",
14426     };
14427     const char *accessPathV2F16Arr5[] = {
14428         "0 0 0", // %v2f16arr5
14429         "0 0 1", "0 1 0", "0 1 1", "0 2 0", "0 2 1", "0 3 0", "0 3 1", "0 4 0", "0 4 1",
14430     };
14431     const char *accessPathV3F16Arr5[] = {
14432         "0 0 0", // %v3f16arr5
14433         "0 0 1", "0 0 2", DE_NULL, "0 1 0", "0 1 1", "0 1 2", DE_NULL, "0 2 0", "0 2 1", "0 2 2",
14434         DE_NULL, "0 3 0", "0 3 1", "0 3 2", DE_NULL, "0 4 0", "0 4 1", "0 4 2", DE_NULL,
14435     };
14436     const char *accessPathV4F16Arr3[] = {
14437         "0 0 0", // %v4f16arr3
14438         "0 0 1", "0 0 2", "0 0 3", "0 1 0", "0 1 1", "0 1 2", "0 1 3", "0 2 0",
14439         "0 2 1", "0 2 2", "0 2 3", DE_NULL, DE_NULL, DE_NULL, DE_NULL,
14440     };
14441 
14442     struct TypeTestParameters
14443     {
14444         const char *name;
14445         size_t accessPathLength;
14446         const char **accessPath;
14447         const string loadFunction;
14448         const string storeFunction;
14449     };
14450 
14451     const TypeTestParameters typeTestParameters[] = {
14452         {"f16", DE_LENGTH_OF_ARRAY(accessPathF16), accessPathF16, loadF16, storeF16},
14453         {"v2f16", DE_LENGTH_OF_ARRAY(accessPathV2F16), accessPathV2F16, loadV2F16, storeV2F16},
14454         {"v3f16", DE_LENGTH_OF_ARRAY(accessPathV3F16), accessPathV3F16, loadV3F16, storeV3F16},
14455         {"v4f16", DE_LENGTH_OF_ARRAY(accessPathV4F16), accessPathV4F16, loadV4F16, storeV4F16},
14456         {"f16arr3", DE_LENGTH_OF_ARRAY(accessPathF16Arr3), accessPathF16Arr3, loadF16Arr3, storeF16Arr3},
14457         {"v2f16arr5", DE_LENGTH_OF_ARRAY(accessPathV2F16Arr5), accessPathV2F16Arr5, loadV2F16Arr5, storeV2F16Arr5},
14458         {"v3f16arr5", DE_LENGTH_OF_ARRAY(accessPathV3F16Arr5), accessPathV3F16Arr5, loadV3F16Arr5, storeV3F16Arr5},
14459         {"v4f16arr3", DE_LENGTH_OF_ARRAY(accessPathV4F16Arr3), accessPathV4F16Arr3, loadV4F16Arr3, storeV4F16Arr3},
14460         {"struct16arr3", DE_LENGTH_OF_ARRAY(accessPathStruct16Arr3), accessPathStruct16Arr3, loadStruct16Arr3,
14461          storeStruct16Arr3},
14462     };
14463 
14464     for (size_t typeTestNdx = 0; typeTestNdx < DE_LENGTH_OF_ARRAY(typeTestParameters); ++typeTestNdx)
14465     {
14466         const OpParts opParts         = opPartsArray[opIndex];
14467         const string testName         = typeTestParameters[typeTestNdx].name;
14468         const size_t structItemsCount = typeTestParameters[typeTestNdx].accessPathLength;
14469         const char **accessPath       = typeTestParameters[typeTestNdx].accessPath;
14470         SpecResource specResource;
14471         map<string, string> specs;
14472         VulkanFeatures features;
14473         map<string, string> fragments;
14474         vector<string> extensions;
14475         vector<deFloat16> inputFP16;
14476         vector<deFloat16> unusedFP16Output;
14477 
14478         // Generate values for input
14479         inputFP16.reserve(structItemsCount);
14480         for (uint32_t structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
14481             inputFP16.push_back((accessPath[structItemNdx] == DE_NULL) ? exceptionValue :
14482                                                                          tcu::Float16(float(structItemNdx)).bits());
14483 
14484         unusedFP16Output.resize(structItemsCount);
14485 
14486         // Generate cases for OpSwitch
14487         {
14488             string caseBodies;
14489             string caseList;
14490 
14491             for (uint32_t caseNdx = 0; caseNdx < structItemsCount; ++caseNdx)
14492                 if (accessPath[caseNdx] != DE_NULL)
14493                 {
14494                     map<string, string> specCase;
14495 
14496                     specCase["case_ndx"]     = de::toString(caseNdx);
14497                     specCase["access_path"]  = accessPath[caseNdx];
14498                     specCase["op_args_part"] = opParts.argsPartial;
14499                     specCase["op_name"]      = opName;
14500 
14501                     caseBodies += testCaseBody.specialize(specCase);
14502                     caseList += de::toString(caseNdx) + " %case_" + de::toString(caseNdx) + " ";
14503                 }
14504 
14505             specs["case_bodies"] = caseBodies;
14506             specs["case_list"]   = caseList;
14507         }
14508 
14509         specs["num_elements"]          = de::toString(structItemsCount);
14510         specs["field_type"]            = typeTestParameters[typeTestNdx].name;
14511         specs["struct_item_size"]      = de::toString(structItemsCount * sizeof(deFloat16));
14512         specs["struct_u32s"]           = de::toString(structItemsCount / 2);
14513         specs["op_premain_decls"]      = opParts.premainDecls;
14514         specs["op_sw_fun_call"]        = opParts.swFunCall;
14515         specs["op_sw_fun_header"]      = opParts.swFunHeader;
14516         specs["op_case_default_value"] = opParts.caseDefaultValue;
14517         if (opIndex == 0)
14518         {
14519             specs["st_call"] = "st_ssbo_dst";
14520             specs["st_ndx"]  = "c_i32_0";
14521         }
14522         else
14523         {
14524             specs["st_call"] = "st_fn_ssbo_dst";
14525             specs["st_ndx"]  = "ndx";
14526         }
14527 
14528         fragments["capability"] = "OpCapability Float16\n";
14529         fragments["decoration"] = decoration.specialize(specs);
14530         fragments["pre_main"]   = preMain.specialize(specs);
14531         fragments["testfun"]    = testFun.specialize(specs);
14532         if (opIndex == 0)
14533         {
14534             fragments["testfun"] += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src"}});
14535             fragments["testfun"] +=
14536                 StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_dst"}});
14537             fragments["testfun"] +=
14538                 StringTemplate(typeTestParameters[typeTestNdx].storeFunction).specialize({{"var", "ssbo_dst"}});
14539         }
14540         else
14541         {
14542             fragments["testfun"] +=
14543                 StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_src"}});
14544             fragments["testfun"] += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
14545         }
14546 
14547         specResource.inputs.push_back(
14548             Resource(BufferSp(new Float16Buffer(inputFP16)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14549         specResource.outputs.push_back(
14550             Resource(BufferSp(new Float16Buffer(unusedFP16Output)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14551         specResource.verifyIO = compareFP16CompositeFunc;
14552 
14553         extensions.push_back("VK_KHR_shader_float16_int8");
14554 
14555         features.extFloat16Int8.shaderFloat16 = true;
14556         if (specResource.graphicsFeaturesRequired)
14557             features.coreFeatures.vertexPipelineStoresAndAtomics = true;
14558 
14559         finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
14560                               IVec3(1, 1, 1));
14561     }
14562 
14563     return testGroup.release();
14564 }
14565 
14566 struct fp16PerComponent
14567 {
fp16PerComponentvkt::SpirVAssembly::fp16PerComponent14568     fp16PerComponent() : flavor(0), floatFormat16(-14, 15, 10, true), outCompCount(0), argCompCount(3, 0)
14569     {
14570     }
14571 
~fp16PerComponentvkt::SpirVAssembly::fp16PerComponent14572     virtual ~fp16PerComponent()
14573     {
14574     }
14575 
callOncePerComponentvkt::SpirVAssembly::fp16PerComponent14576     bool callOncePerComponent()
14577     {
14578         return true;
14579     }
getComponentValidityvkt::SpirVAssembly::fp16PerComponent14580     uint32_t getComponentValidity()
14581     {
14582         return static_cast<uint32_t>(-1);
14583     }
14584 
getULPsvkt::SpirVAssembly::fp16PerComponent14585     virtual double getULPs(vector<const deFloat16 *> &)
14586     {
14587         return 1.0;
14588     }
getMinvkt::SpirVAssembly::fp16PerComponent14589     virtual double getMin(double value, double ulps)
14590     {
14591         return value - floatFormat16.ulp(deAbs(value), ulps);
14592     }
getMaxvkt::SpirVAssembly::fp16PerComponent14593     virtual double getMax(double value, double ulps)
14594     {
14595         return value + floatFormat16.ulp(deAbs(value), ulps);
14596     }
14597 
getFlavorCountvkt::SpirVAssembly::fp16PerComponent14598     virtual size_t getFlavorCount()
14599     {
14600         return flavorNames.empty() ? 1 : flavorNames.size();
14601     }
setFlavorvkt::SpirVAssembly::fp16PerComponent14602     virtual void setFlavor(size_t flavorNo)
14603     {
14604         DE_ASSERT(flavorNo < getFlavorCount());
14605         flavor = flavorNo;
14606     }
getFlavorvkt::SpirVAssembly::fp16PerComponent14607     virtual size_t getFlavor()
14608     {
14609         return flavor;
14610     }
getCurrentFlavorNamevkt::SpirVAssembly::fp16PerComponent14611     virtual string getCurrentFlavorName()
14612     {
14613         return flavorNames.empty() ? string("") : flavorNames[getFlavor()];
14614     }
14615 
setOutCompCountvkt::SpirVAssembly::fp16PerComponent14616     virtual void setOutCompCount(size_t compCount)
14617     {
14618         outCompCount = compCount;
14619     }
getOutCompCountvkt::SpirVAssembly::fp16PerComponent14620     virtual size_t getOutCompCount()
14621     {
14622         return outCompCount;
14623     }
14624 
setArgCompCountvkt::SpirVAssembly::fp16PerComponent14625     virtual void setArgCompCount(size_t argNo, size_t compCount)
14626     {
14627         argCompCount[argNo] = compCount;
14628     }
getArgCompCountvkt::SpirVAssembly::fp16PerComponent14629     virtual size_t getArgCompCount(size_t argNo)
14630     {
14631         return argCompCount[argNo];
14632     }
14633 
14634 protected:
14635     size_t flavor;
14636     tcu::FloatFormat floatFormat16;
14637     size_t outCompCount;
14638     vector<size_t> argCompCount;
14639     vector<string> flavorNames;
14640 };
14641 
14642 struct fp16OpFNegate : public fp16PerComponent
14643 {
14644     template <class fp16type>
calcvkt::SpirVAssembly::fp16OpFNegate14645     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14646     {
14647         const fp16type x(*in[0]);
14648         const double d(x.asDouble());
14649         const double result(0.0 - d);
14650 
14651         out[0] = fp16type(result).bits();
14652         min[0] = getMin(result, getULPs(in));
14653         max[0] = getMax(result, getULPs(in));
14654 
14655         return true;
14656     }
14657 };
14658 
14659 struct fp16Round : public fp16PerComponent
14660 {
fp16Roundvkt::SpirVAssembly::fp16Round14661     fp16Round() : fp16PerComponent()
14662     {
14663         flavorNames.push_back("Floor(x+0.5)");
14664         flavorNames.push_back("Floor(x-0.5)");
14665         flavorNames.push_back("RoundEven");
14666     }
14667 
14668     template <class fp16type>
calcvkt::SpirVAssembly::fp16Round14669     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14670     {
14671         const fp16type x(*in[0]);
14672         const double d(x.asDouble());
14673         double result(0.0);
14674 
14675         switch (flavor)
14676         {
14677         case 0:
14678             result = deRound(d);
14679             break;
14680         case 1:
14681             result = deFloor(d - 0.5);
14682             break;
14683         case 2:
14684             result = deRoundEven(d);
14685             break;
14686         default:
14687             TCU_THROW(InternalError, "Invalid flavor specified");
14688         }
14689 
14690         out[0] = fp16type(result).bits();
14691         min[0] = getMin(result, getULPs(in));
14692         max[0] = getMax(result, getULPs(in));
14693 
14694         return true;
14695     }
14696 };
14697 
14698 struct fp16RoundEven : public fp16PerComponent
14699 {
14700     template <class fp16type>
calcvkt::SpirVAssembly::fp16RoundEven14701     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14702     {
14703         const fp16type x(*in[0]);
14704         const double d(x.asDouble());
14705         const double result(deRoundEven(d));
14706 
14707         out[0] = fp16type(result).bits();
14708         min[0] = getMin(result, getULPs(in));
14709         max[0] = getMax(result, getULPs(in));
14710 
14711         return true;
14712     }
14713 };
14714 
14715 struct fp16Trunc : public fp16PerComponent
14716 {
14717     template <class fp16type>
calcvkt::SpirVAssembly::fp16Trunc14718     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14719     {
14720         const fp16type x(*in[0]);
14721         const double d(x.asDouble());
14722         const double result(deTrunc(d));
14723 
14724         out[0] = fp16type(result).bits();
14725         min[0] = getMin(result, getULPs(in));
14726         max[0] = getMax(result, getULPs(in));
14727 
14728         return true;
14729     }
14730 };
14731 
14732 struct fp16FAbs : public fp16PerComponent
14733 {
14734     template <class fp16type>
calcvkt::SpirVAssembly::fp16FAbs14735     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14736     {
14737         const fp16type x(*in[0]);
14738         const double d(x.asDouble());
14739         const double result(deAbs(d));
14740 
14741         out[0] = fp16type(result).bits();
14742         min[0] = getMin(result, getULPs(in));
14743         max[0] = getMax(result, getULPs(in));
14744 
14745         return true;
14746     }
14747 };
14748 
14749 struct fp16FSign : public fp16PerComponent
14750 {
14751     template <class fp16type>
calcvkt::SpirVAssembly::fp16FSign14752     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14753     {
14754         const fp16type x(*in[0]);
14755         const double d(x.asDouble());
14756         const double result(deSign(d));
14757 
14758         if (x.isNaN())
14759             return false;
14760 
14761         out[0] = fp16type(result).bits();
14762         min[0] = getMin(result, getULPs(in));
14763         max[0] = getMax(result, getULPs(in));
14764 
14765         return true;
14766     }
14767 };
14768 
14769 struct fp16Floor : public fp16PerComponent
14770 {
14771     template <class fp16type>
calcvkt::SpirVAssembly::fp16Floor14772     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14773     {
14774         const fp16type x(*in[0]);
14775         const double d(x.asDouble());
14776         const double result(deFloor(d));
14777 
14778         out[0] = fp16type(result).bits();
14779         min[0] = getMin(result, getULPs(in));
14780         max[0] = getMax(result, getULPs(in));
14781 
14782         return true;
14783     }
14784 };
14785 
14786 struct fp16Ceil : public fp16PerComponent
14787 {
14788     template <class fp16type>
calcvkt::SpirVAssembly::fp16Ceil14789     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14790     {
14791         const fp16type x(*in[0]);
14792         const double d(x.asDouble());
14793         const double result(deCeil(d));
14794 
14795         out[0] = fp16type(result).bits();
14796         min[0] = getMin(result, getULPs(in));
14797         max[0] = getMax(result, getULPs(in));
14798 
14799         return true;
14800     }
14801 };
14802 
14803 struct fp16Fract : public fp16PerComponent
14804 {
14805     template <class fp16type>
calcvkt::SpirVAssembly::fp16Fract14806     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14807     {
14808         const fp16type x(*in[0]);
14809         const double d(x.asDouble());
14810         const double result(deFrac(d));
14811 
14812         out[0] = fp16type(result).bits();
14813         min[0] = getMin(result, getULPs(in));
14814         max[0] = getMax(result, getULPs(in));
14815 
14816         return true;
14817     }
14818 };
14819 
14820 struct fp16Radians : public fp16PerComponent
14821 {
getULPsvkt::SpirVAssembly::fp16Radians14822     virtual double getULPs(vector<const deFloat16 *> &in)
14823     {
14824         DE_UNREF(in);
14825 
14826         return 2.5;
14827     }
14828 
14829     template <class fp16type>
calcvkt::SpirVAssembly::fp16Radians14830     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14831     {
14832         const fp16type x(*in[0]);
14833         const float d(x.asFloat());
14834         const float result(deFloatRadians(d));
14835 
14836         out[0] = fp16type(result).bits();
14837         min[0] = getMin(result, getULPs(in));
14838         max[0] = getMax(result, getULPs(in));
14839 
14840         return true;
14841     }
14842 };
14843 
14844 struct fp16Degrees : public fp16PerComponent
14845 {
getULPsvkt::SpirVAssembly::fp16Degrees14846     virtual double getULPs(vector<const deFloat16 *> &in)
14847     {
14848         DE_UNREF(in);
14849 
14850         return 2.5;
14851     }
14852 
14853     template <class fp16type>
calcvkt::SpirVAssembly::fp16Degrees14854     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14855     {
14856         const fp16type x(*in[0]);
14857         const float d(x.asFloat());
14858         const float result(deFloatDegrees(d));
14859 
14860         out[0] = fp16type(result).bits();
14861         min[0] = getMin(result, getULPs(in));
14862         max[0] = getMax(result, getULPs(in));
14863 
14864         return true;
14865     }
14866 };
14867 
14868 struct fp16Sin : public fp16PerComponent
14869 {
14870     template <class fp16type>
calcvkt::SpirVAssembly::fp16Sin14871     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14872     {
14873         const fp16type x(*in[0]);
14874         const double d(x.asDouble());
14875         const double result(deSin(d));
14876         const double unspecUlp(16.0);
14877         const double err(de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) :
14878                                                                        floatFormat16.ulp(deAbs(result), unspecUlp));
14879 
14880         if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14881             return false;
14882 
14883         out[0] = fp16type(result).bits();
14884         min[0] = result - err;
14885         max[0] = result + err;
14886 
14887         return true;
14888     }
14889 };
14890 
14891 struct fp16Cos : public fp16PerComponent
14892 {
14893     template <class fp16type>
calcvkt::SpirVAssembly::fp16Cos14894     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14895     {
14896         const fp16type x(*in[0]);
14897         const double d(x.asDouble());
14898         const double result(deCos(d));
14899         const double unspecUlp(16.0);
14900         const double err(de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) :
14901                                                                        floatFormat16.ulp(deAbs(result), unspecUlp));
14902 
14903         if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14904             return false;
14905 
14906         out[0] = fp16type(result).bits();
14907         min[0] = result - err;
14908         max[0] = result + err;
14909 
14910         return true;
14911     }
14912 };
14913 
14914 struct fp16Tan : public fp16PerComponent
14915 {
14916     template <class fp16type>
calcvkt::SpirVAssembly::fp16Tan14917     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14918     {
14919         const fp16type x(*in[0]);
14920         const double d(x.asDouble());
14921         const double result(deTan(d));
14922 
14923         if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14924             return false;
14925 
14926         out[0] = fp16type(result).bits();
14927         {
14928             const double err        = deLdExp(1.0, -7);
14929             const double s1         = deSin(d) + err;
14930             const double s2         = deSin(d) - err;
14931             const double c1         = deCos(d) + err;
14932             const double c2         = deCos(d) - err;
14933             const double edgeVals[] = {s1 / c1, s1 / c2, s2 / c1, s2 / c2};
14934             double edgeLeft         = out[0];
14935             double edgeRight        = out[0];
14936 
14937             if (deSign(c1 * c2) < 0.0)
14938             {
14939                 edgeLeft  = -std::numeric_limits<double>::infinity();
14940                 edgeRight = +std::numeric_limits<double>::infinity();
14941             }
14942             else
14943             {
14944                 edgeLeft  = *std::min_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14945                 edgeRight = *std::max_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14946             }
14947 
14948             min[0] = edgeLeft;
14949             max[0] = edgeRight;
14950         }
14951 
14952         return true;
14953     }
14954 };
14955 
14956 struct fp16Asin : public fp16PerComponent
14957 {
14958     template <class fp16type>
calcvkt::SpirVAssembly::fp16Asin14959     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14960     {
14961         const fp16type x(*in[0]);
14962         const double d(x.asDouble());
14963         const double result(deAsin(d));
14964         const double error(deAtan2(d, sqrt(1.0 - d * d)));
14965 
14966         if (!x.isNaN() && deAbs(d) > 1.0)
14967             return false;
14968 
14969         out[0] = fp16type(result).bits();
14970         min[0] =
14971             result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14972         max[0] =
14973             result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14974 
14975         return true;
14976     }
14977 };
14978 
14979 struct fp16Acos : public fp16PerComponent
14980 {
14981     template <class fp16type>
calcvkt::SpirVAssembly::fp16Acos14982     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14983     {
14984         const fp16type x(*in[0]);
14985         const double d(x.asDouble());
14986         const double result(deAcos(d));
14987         const double error(deAtan2(sqrt(1.0 - d * d), d));
14988 
14989         if (!x.isNaN() && deAbs(d) > 1.0)
14990             return false;
14991 
14992         out[0] = fp16type(result).bits();
14993         min[0] =
14994             result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14995         max[0] =
14996             result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14997 
14998         return true;
14999     }
15000 };
15001 
15002 struct fp16Atan : public fp16PerComponent
15003 {
getULPsvkt::SpirVAssembly::fp16Atan15004     virtual double getULPs(vector<const deFloat16 *> &in)
15005     {
15006         DE_UNREF(in);
15007 
15008         return 2 * 5.0; // This is not a precision test. Value is not from spec
15009     }
15010 
15011     template <class fp16type>
calcvkt::SpirVAssembly::fp16Atan15012     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15013     {
15014         const fp16type x(*in[0]);
15015         const double d(x.asDouble());
15016         const double result(deAtanOver(d));
15017 
15018         out[0] = fp16type(result).bits();
15019         min[0] = getMin(result, getULPs(in));
15020         max[0] = getMax(result, getULPs(in));
15021 
15022         return true;
15023     }
15024 };
15025 
15026 struct fp16Sinh : public fp16PerComponent
15027 {
fp16Sinhvkt::SpirVAssembly::fp16Sinh15028     fp16Sinh() : fp16PerComponent()
15029     {
15030         flavorNames.push_back("Double");
15031         flavorNames.push_back("ExpFP16");
15032     }
15033 
15034     template <class fp16type>
calcvkt::SpirVAssembly::fp16Sinh15035     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15036     {
15037         const fp16type x(*in[0]);
15038         const double d(x.asDouble());
15039         const double ulps(64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
15040         double result(0.0);
15041         double error(0.0);
15042 
15043         if (getFlavor() == 0)
15044         {
15045             result = deSinh(d);
15046             error  = floatFormat16.ulp(deAbs(result), ulps);
15047         }
15048         else if (getFlavor() == 1)
15049         {
15050             const fp16type epx(deExp(d));
15051             const fp16type enx(deExp(-d));
15052             const fp16type esx(epx.asDouble() - enx.asDouble());
15053             const fp16type sx2(esx.asDouble() / 2.0);
15054 
15055             result = sx2.asDouble();
15056             error  = deAbs(floatFormat16.ulp(epx.asDouble(), ulps)) + deAbs(floatFormat16.ulp(enx.asDouble(), ulps));
15057         }
15058         else
15059         {
15060             TCU_THROW(InternalError, "Unknown flavor");
15061         }
15062 
15063         out[0] = fp16type(result).bits();
15064         min[0] = result - error;
15065         max[0] = result + error;
15066 
15067         return true;
15068     }
15069 };
15070 
15071 struct fp16Cosh : public fp16PerComponent
15072 {
fp16Coshvkt::SpirVAssembly::fp16Cosh15073     fp16Cosh() : fp16PerComponent()
15074     {
15075         flavorNames.push_back("Double");
15076         flavorNames.push_back("ExpFP16");
15077     }
15078 
15079     template <class fp16type>
calcvkt::SpirVAssembly::fp16Cosh15080     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15081     {
15082         const fp16type x(*in[0]);
15083         const double d(x.asDouble());
15084         const double ulps(64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
15085         double result(0.0);
15086 
15087         if (getFlavor() == 0)
15088         {
15089             result = deCosh(d);
15090         }
15091         else if (getFlavor() == 1)
15092         {
15093             const fp16type epx(deExp(d));
15094             const fp16type enx(deExp(-d));
15095             const fp16type esx(epx.asDouble() + enx.asDouble());
15096             const fp16type sx2(esx.asDouble() / 2.0);
15097 
15098             result = sx2.asDouble();
15099         }
15100         else
15101         {
15102             TCU_THROW(InternalError, "Unknown flavor");
15103         }
15104 
15105         out[0] = fp16type(result).bits();
15106         min[0] = result - floatFormat16.ulp(deAbs(result), ulps);
15107         max[0] = result + floatFormat16.ulp(deAbs(result), ulps);
15108 
15109         return true;
15110     }
15111 };
15112 
15113 struct fp16Tanh : public fp16PerComponent
15114 {
fp16Tanhvkt::SpirVAssembly::fp16Tanh15115     fp16Tanh() : fp16PerComponent()
15116     {
15117         flavorNames.push_back("Tanh");
15118         flavorNames.push_back("SinhCosh");
15119         flavorNames.push_back("SinhCoshFP16");
15120         flavorNames.push_back("PolyFP16");
15121     }
15122 
getULPsvkt::SpirVAssembly::fp16Tanh15123     virtual double getULPs(vector<const deFloat16 *> &in)
15124     {
15125         const tcu::Float16 x(*in[0]);
15126         const double d(x.asDouble());
15127 
15128         return 2 * (1.0 + 2 * deAbs(d)); // This is not a precision test. Value is not from spec
15129     }
15130 
15131     template <class fp16type>
calcPolyvkt::SpirVAssembly::fp16Tanh15132     inline double calcPoly(const fp16type &espx, const fp16type &esnx, const fp16type &ecpx, const fp16type &ecnx)
15133     {
15134         const fp16type esx(espx.asDouble() - esnx.asDouble());
15135         const fp16type sx2(esx.asDouble() / 2.0);
15136         const fp16type ecx(ecpx.asDouble() + ecnx.asDouble());
15137         const fp16type cx2(ecx.asDouble() / 2.0);
15138         const fp16type tg(sx2.asDouble() / cx2.asDouble());
15139         const double rez(tg.asDouble());
15140 
15141         return rez;
15142     }
15143 
15144     template <class fp16type>
calcvkt::SpirVAssembly::fp16Tanh15145     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15146     {
15147         const fp16type x(*in[0]);
15148         const double d(x.asDouble());
15149         double result(0.0);
15150 
15151         if (getFlavor() == 0)
15152         {
15153             result = deTanh(d);
15154             min[0] = getMin(result, getULPs(in));
15155             max[0] = getMax(result, getULPs(in));
15156         }
15157         else if (getFlavor() == 1)
15158         {
15159             result = deSinh(d) / deCosh(d);
15160             min[0] = getMin(result, getULPs(in));
15161             max[0] = getMax(result, getULPs(in));
15162         }
15163         else if (getFlavor() == 2)
15164         {
15165             const fp16type s(deSinh(d));
15166             const fp16type c(deCosh(d));
15167 
15168             result = s.asDouble() / c.asDouble();
15169             min[0] = getMin(result, getULPs(in));
15170             max[0] = getMax(result, getULPs(in));
15171         }
15172         else if (getFlavor() == 3)
15173         {
15174             const double ulps(getULPs(in));
15175             const double epxm(deExp(d));
15176             const double enxm(deExp(-d));
15177             const double epxmerr = floatFormat16.ulp(epxm, ulps);
15178             const double enxmerr = floatFormat16.ulp(enxm, ulps);
15179             const fp16type epx[] = {fp16type(epxm - epxmerr), fp16type(epxm + epxmerr)};
15180             const fp16type enx[] = {fp16type(enxm - enxmerr), fp16type(enxm + enxmerr)};
15181             const fp16type epxm16(epxm);
15182             const fp16type enxm16(enxm);
15183             vector<double> tgs;
15184 
15185             for (size_t spNdx = 0; spNdx < DE_LENGTH_OF_ARRAY(epx); ++spNdx)
15186                 for (size_t snNdx = 0; snNdx < DE_LENGTH_OF_ARRAY(enx); ++snNdx)
15187                     for (size_t cpNdx = 0; cpNdx < DE_LENGTH_OF_ARRAY(epx); ++cpNdx)
15188                         for (size_t cnNdx = 0; cnNdx < DE_LENGTH_OF_ARRAY(enx); ++cnNdx)
15189                         {
15190                             const double tgh = calcPoly(epx[spNdx], enx[snNdx], epx[cpNdx], enx[cnNdx]);
15191 
15192                             tgs.push_back(tgh);
15193                         }
15194 
15195             result = calcPoly(epxm16, enxm16, epxm16, enxm16);
15196             min[0] = *std::min_element(tgs.begin(), tgs.end());
15197             max[0] = *std::max_element(tgs.begin(), tgs.end());
15198         }
15199         else
15200         {
15201             TCU_THROW(InternalError, "Unknown flavor");
15202         }
15203 
15204         out[0] = fp16type(result).bits();
15205 
15206         return true;
15207     }
15208 };
15209 
15210 struct fp16Asinh : public fp16PerComponent
15211 {
fp16Asinhvkt::SpirVAssembly::fp16Asinh15212     fp16Asinh() : fp16PerComponent()
15213     {
15214         flavorNames.push_back("Double");
15215         flavorNames.push_back("PolyFP16Wiki");
15216         flavorNames.push_back("PolyFP16Abs");
15217     }
15218 
getULPsvkt::SpirVAssembly::fp16Asinh15219     virtual double getULPs(vector<const deFloat16 *> &in)
15220     {
15221         DE_UNREF(in);
15222 
15223         return 256.0; // This is not a precision test. Value is not from spec
15224     }
15225 
15226     template <class fp16type>
calcvkt::SpirVAssembly::fp16Asinh15227     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15228     {
15229         const fp16type x(*in[0]);
15230         const double d(x.asDouble());
15231         double result(0.0);
15232 
15233         if (getFlavor() == 0)
15234         {
15235             result = deAsinh(d);
15236         }
15237         else if (getFlavor() == 1)
15238         {
15239             const fp16type x2(d * d);
15240             const fp16type x2p1(x2.asDouble() + 1.0);
15241             const fp16type sq(deSqrt(x2p1.asDouble()));
15242             const fp16type sxsq(d + sq.asDouble());
15243             const fp16type lsxsq(deLog(sxsq.asDouble()));
15244 
15245             if (lsxsq.isInf())
15246                 return false;
15247 
15248             result = lsxsq.asDouble();
15249         }
15250         else if (getFlavor() == 2)
15251         {
15252             const fp16type x2(d * d);
15253             const fp16type x2p1(x2.asDouble() + 1.0);
15254             const fp16type sq(deSqrt(x2p1.asDouble()));
15255             const fp16type sxsq(deAbs(d) + sq.asDouble());
15256             const fp16type lsxsq(deLog(sxsq.asDouble()));
15257 
15258             result = deSign(d) * lsxsq.asDouble();
15259         }
15260         else
15261         {
15262             TCU_THROW(InternalError, "Unknown flavor");
15263         }
15264 
15265         out[0] = fp16type(result).bits();
15266         min[0] = getMin(result, getULPs(in));
15267         max[0] = getMax(result, getULPs(in));
15268 
15269         return true;
15270     }
15271 };
15272 
15273 struct fp16Acosh : public fp16PerComponent
15274 {
fp16Acoshvkt::SpirVAssembly::fp16Acosh15275     fp16Acosh() : fp16PerComponent()
15276     {
15277         flavorNames.push_back("Double");
15278         flavorNames.push_back("PolyFP16");
15279     }
15280 
getULPsvkt::SpirVAssembly::fp16Acosh15281     virtual double getULPs(vector<const deFloat16 *> &in)
15282     {
15283         DE_UNREF(in);
15284 
15285         return 16.0; // This is not a precision test. Value is not from spec
15286     }
15287 
15288     template <class fp16type>
calcvkt::SpirVAssembly::fp16Acosh15289     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15290     {
15291         const fp16type x(*in[0]);
15292         const double d(x.asDouble());
15293         double result(0.0);
15294 
15295         if (!x.isNaN() && d < 1.0)
15296             return false;
15297 
15298         if (getFlavor() == 0)
15299         {
15300             result = deAcosh(d);
15301         }
15302         else if (getFlavor() == 1)
15303         {
15304             const fp16type x2(d * d);
15305             const fp16type x2m1(x2.asDouble() - 1.0);
15306             const fp16type sq(deSqrt(x2m1.asDouble()));
15307             const fp16type sxsq(d + sq.asDouble());
15308             const fp16type lsxsq(deLog(sxsq.asDouble()));
15309 
15310             result = lsxsq.asDouble();
15311         }
15312         else
15313         {
15314             TCU_THROW(InternalError, "Unknown flavor");
15315         }
15316 
15317         out[0] = fp16type(result).bits();
15318         min[0] = getMin(result, getULPs(in));
15319         max[0] = getMax(result, getULPs(in));
15320 
15321         return true;
15322     }
15323 };
15324 
15325 struct fp16Atanh : public fp16PerComponent
15326 {
fp16Atanhvkt::SpirVAssembly::fp16Atanh15327     fp16Atanh() : fp16PerComponent()
15328     {
15329         flavorNames.push_back("Double");
15330         flavorNames.push_back("PolyFP16");
15331     }
15332 
15333     template <class fp16type>
calcvkt::SpirVAssembly::fp16Atanh15334     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15335     {
15336         const fp16type x(*in[0]);
15337         const double d(x.asDouble());
15338         double result(0.0);
15339 
15340         if (deAbs(d) >= 1.0)
15341             return false;
15342 
15343         if (getFlavor() == 0)
15344         {
15345             const double ulps(16.0); // This is not a precision test. Value is not from spec
15346 
15347             result = deAtanh(d);
15348             min[0] = getMin(result, ulps);
15349             max[0] = getMax(result, ulps);
15350         }
15351         else if (getFlavor() == 1)
15352         {
15353             const fp16type x1a(1.0 + d);
15354             const fp16type x1b(1.0 - d);
15355             const fp16type x1d(x1a.asDouble() / x1b.asDouble());
15356             const fp16type lx1d(deLog(x1d.asDouble()));
15357             const fp16type lx1d2(0.5 * lx1d.asDouble());
15358             const double error(2 * (de::inRange(deAbs(x1d.asDouble()), 0.5, 2.0) ?
15359                                         deLdExp(2.0, -7) :
15360                                         floatFormat16.ulp(deAbs(x1d.asDouble()), 3.0)));
15361 
15362             result = lx1d2.asDouble();
15363             min[0] = result - error;
15364             max[0] = result + error;
15365         }
15366         else
15367         {
15368             TCU_THROW(InternalError, "Unknown flavor");
15369         }
15370 
15371         out[0] = fp16type(result).bits();
15372 
15373         return true;
15374     }
15375 };
15376 
15377 struct fp16Exp : public fp16PerComponent
15378 {
15379     template <class fp16type>
calcvkt::SpirVAssembly::fp16Exp15380     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15381     {
15382         const fp16type x(*in[0]);
15383         const double d(x.asDouble());
15384         const double ulps(10.0 * (1.0 + 2.0 * deAbs(d)));
15385         const double result(deExp(d));
15386 
15387         out[0] = fp16type(result).bits();
15388         min[0] = getMin(result, ulps);
15389         max[0] = getMax(result, ulps);
15390 
15391         return true;
15392     }
15393 };
15394 
15395 struct fp16Log : public fp16PerComponent
15396 {
15397     template <class fp16type>
calcvkt::SpirVAssembly::fp16Log15398     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15399     {
15400         const fp16type x(*in[0]);
15401         const double d(x.asDouble());
15402         const double result(deLog(d));
15403         const double error(de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15404 
15405         if (d <= 0.0)
15406             return false;
15407 
15408         out[0] = fp16type(result).bits();
15409         min[0] = result - error;
15410         max[0] = result + error;
15411 
15412         return true;
15413     }
15414 };
15415 
15416 struct fp16Exp2 : public fp16PerComponent
15417 {
15418     template <class fp16type>
calcvkt::SpirVAssembly::fp16Exp215419     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15420     {
15421         const fp16type x(*in[0]);
15422         const double d(x.asDouble());
15423         const double result(deExp2(d));
15424         const double ulps(1.0 + 2.0 * deAbs(fp16type(in[0][0]).asDouble()));
15425 
15426         out[0] = fp16type(result).bits();
15427         min[0] = getMin(result, ulps);
15428         max[0] = getMax(result, ulps);
15429 
15430         return true;
15431     }
15432 };
15433 
15434 struct fp16Log2 : public fp16PerComponent
15435 {
15436     template <class fp16type>
calcvkt::SpirVAssembly::fp16Log215437     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15438     {
15439         const fp16type x(*in[0]);
15440         const double d(x.asDouble());
15441         const double result(deLog2(d));
15442         const double error(de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15443 
15444         if (d <= 0.0)
15445             return false;
15446 
15447         out[0] = fp16type(result).bits();
15448         min[0] = result - error;
15449         max[0] = result + error;
15450 
15451         return true;
15452     }
15453 };
15454 
15455 struct fp16Sqrt : public fp16PerComponent
15456 {
getULPsvkt::SpirVAssembly::fp16Sqrt15457     virtual double getULPs(vector<const deFloat16 *> &in)
15458     {
15459         DE_UNREF(in);
15460 
15461         return 6.0;
15462     }
15463 
15464     template <class fp16type>
calcvkt::SpirVAssembly::fp16Sqrt15465     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15466     {
15467         const fp16type x(*in[0]);
15468         const double d(x.asDouble());
15469         const double result(deSqrt(d));
15470 
15471         if (!x.isNaN() && d < 0.0)
15472             return false;
15473 
15474         out[0] = fp16type(result).bits();
15475         min[0] = getMin(result, getULPs(in));
15476         max[0] = getMax(result, getULPs(in));
15477 
15478         return true;
15479     }
15480 };
15481 
15482 struct fp16InverseSqrt : public fp16PerComponent
15483 {
getULPsvkt::SpirVAssembly::fp16InverseSqrt15484     virtual double getULPs(vector<const deFloat16 *> &in)
15485     {
15486         DE_UNREF(in);
15487 
15488         return 2.0;
15489     }
15490 
15491     template <class fp16type>
calcvkt::SpirVAssembly::fp16InverseSqrt15492     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15493     {
15494         const fp16type x(*in[0]);
15495         const double d(x.asDouble());
15496         const double result(1.0 / deSqrt(d));
15497 
15498         if (!x.isNaN() && d <= 0.0)
15499             return false;
15500 
15501         out[0] = fp16type(result).bits();
15502         min[0] = getMin(result, getULPs(in));
15503         max[0] = getMax(result, getULPs(in));
15504 
15505         return true;
15506     }
15507 };
15508 
15509 struct fp16ModfFrac : public fp16PerComponent
15510 {
15511     template <class fp16type>
calcvkt::SpirVAssembly::fp16ModfFrac15512     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15513     {
15514         const fp16type x(*in[0]);
15515         const double d(x.asDouble());
15516         double i(0.0);
15517         const double result(deModf(d, &i));
15518 
15519         if (x.isInf() || x.isNaN())
15520             return false;
15521 
15522         out[0] = fp16type(result).bits();
15523         min[0] = getMin(result, getULPs(in));
15524         max[0] = getMax(result, getULPs(in));
15525 
15526         return true;
15527     }
15528 };
15529 
15530 struct fp16ModfInt : public fp16PerComponent
15531 {
15532     template <class fp16type>
calcvkt::SpirVAssembly::fp16ModfInt15533     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15534     {
15535         const fp16type x(*in[0]);
15536         const double d(x.asDouble());
15537         double i(0.0);
15538         const double unused(deModf(d, &i));
15539         const double result(i);
15540 
15541         DE_UNREF(unused);
15542 
15543         if (x.isInf() || x.isNaN())
15544             return false;
15545 
15546         out[0] = fp16type(result).bits();
15547         min[0] = getMin(result, getULPs(in));
15548         max[0] = getMax(result, getULPs(in));
15549 
15550         return true;
15551     }
15552 };
15553 
15554 struct fp16FrexpS : public fp16PerComponent
15555 {
15556     template <class fp16type>
calcvkt::SpirVAssembly::fp16FrexpS15557     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15558     {
15559         const fp16type x(*in[0]);
15560         const double d(x.asDouble());
15561         int e(0);
15562         const double result(deFrExp(d, &e));
15563 
15564         if (x.isNaN() || x.isInf())
15565             return false;
15566 
15567         out[0] = fp16type(result).bits();
15568         min[0] = getMin(result, getULPs(in));
15569         max[0] = getMax(result, getULPs(in));
15570 
15571         return true;
15572     }
15573 };
15574 
15575 struct fp16FrexpE : public fp16PerComponent
15576 {
15577     template <class fp16type>
calcvkt::SpirVAssembly::fp16FrexpE15578     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15579     {
15580         const fp16type x(*in[0]);
15581         const double d(x.asDouble());
15582         int e(0);
15583         const double unused(deFrExp(d, &e));
15584         const double result(static_cast<double>(e));
15585 
15586         DE_UNREF(unused);
15587 
15588         if (x.isNaN() || x.isInf())
15589             return false;
15590 
15591         out[0] = fp16type(result).bits();
15592         min[0] = getMin(result, getULPs(in));
15593         max[0] = getMax(result, getULPs(in));
15594 
15595         return true;
15596     }
15597 };
15598 
15599 struct fp16OpFAdd : public fp16PerComponent
15600 {
15601     template <class fp16type>
calcvkt::SpirVAssembly::fp16OpFAdd15602     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15603     {
15604         const fp16type x(*in[0]);
15605         const fp16type y(*in[1]);
15606         const double xd(x.asDouble());
15607         const double yd(y.asDouble());
15608         const double result(xd + yd);
15609 
15610         out[0] = fp16type(result).bits();
15611         min[0] = getMin(result, getULPs(in));
15612         max[0] = getMax(result, getULPs(in));
15613 
15614         return true;
15615     }
15616 };
15617 
15618 struct fp16OpFSub : public fp16PerComponent
15619 {
15620     template <class fp16type>
calcvkt::SpirVAssembly::fp16OpFSub15621     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15622     {
15623         const fp16type x(*in[0]);
15624         const fp16type y(*in[1]);
15625         const double xd(x.asDouble());
15626         const double yd(y.asDouble());
15627         const double result(xd - yd);
15628 
15629         out[0] = fp16type(result).bits();
15630         min[0] = getMin(result, getULPs(in));
15631         max[0] = getMax(result, getULPs(in));
15632 
15633         return true;
15634     }
15635 };
15636 
15637 struct fp16OpFMul : public fp16PerComponent
15638 {
15639     template <class fp16type>
calcvkt::SpirVAssembly::fp16OpFMul15640     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15641     {
15642         const fp16type x(*in[0]);
15643         const fp16type y(*in[1]);
15644         const double xd(x.asDouble());
15645         const double yd(y.asDouble());
15646         const double result(xd * yd);
15647 
15648         out[0] = fp16type(result).bits();
15649         min[0] = getMin(result, getULPs(in));
15650         max[0] = getMax(result, getULPs(in));
15651 
15652         return true;
15653     }
15654 };
15655 
15656 struct fp16OpFDiv : public fp16PerComponent
15657 {
fp16OpFDivvkt::SpirVAssembly::fp16OpFDiv15658     fp16OpFDiv() : fp16PerComponent()
15659     {
15660         flavorNames.push_back("DirectDiv");
15661         flavorNames.push_back("InverseDiv");
15662     }
15663 
15664     template <class fp16type>
calcvkt::SpirVAssembly::fp16OpFDiv15665     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15666     {
15667         const fp16type x(*in[0]);
15668         const fp16type y(*in[1]);
15669         const double xd(x.asDouble());
15670         const double yd(y.asDouble());
15671         const double unspecUlp(16.0);
15672         const double ulpCnt(de::inRange(deAbs(yd), deLdExp(1, -14), deLdExp(1, 14)) ? 2.5 : unspecUlp);
15673         double result(0.0);
15674 
15675         if (y.isZero())
15676             return false;
15677 
15678         if (getFlavor() == 0)
15679         {
15680             result = (xd / yd);
15681         }
15682         else if (getFlavor() == 1)
15683         {
15684             const double invyd(1.0 / yd);
15685             const fp16type invy(invyd);
15686 
15687             result = (xd * invy.asDouble());
15688         }
15689         else
15690         {
15691             TCU_THROW(InternalError, "Unknown flavor");
15692         }
15693 
15694         out[0] = fp16type(result).bits();
15695         min[0] = getMin(result, ulpCnt);
15696         max[0] = getMax(result, ulpCnt);
15697 
15698         return true;
15699     }
15700 };
15701 
15702 struct fp16Atan2 : public fp16PerComponent
15703 {
fp16Atan2vkt::SpirVAssembly::fp16Atan215704     fp16Atan2() : fp16PerComponent()
15705     {
15706         flavorNames.push_back("DoubleCalc");
15707         flavorNames.push_back("DoubleCalc_PI");
15708     }
15709 
getULPsvkt::SpirVAssembly::fp16Atan215710     virtual double getULPs(vector<const deFloat16 *> &in)
15711     {
15712         DE_UNREF(in);
15713 
15714         return 2 * 5.0; // This is not a precision test. Value is not from spec
15715     }
15716 
15717     template <class fp16type>
calcvkt::SpirVAssembly::fp16Atan215718     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15719     {
15720         const fp16type x(*in[0]);
15721         const fp16type y(*in[1]);
15722         const double xd(x.asDouble());
15723         const double yd(y.asDouble());
15724         double result(0.0);
15725 
15726         if ((x.isZero() && y.isZero()) || (x.isInf() && y.isInf()))
15727             return false;
15728 
15729         if (getFlavor() == 0)
15730         {
15731             result = deAtan2(xd, yd);
15732         }
15733         else if (getFlavor() == 1)
15734         {
15735             const double ulps(2.0 * 5.0); // This is not a precision test. Value is not from spec
15736             const double eps(floatFormat16.ulp(DE_PI_DOUBLE, ulps));
15737 
15738             result = deAtan2(xd, yd);
15739 
15740             if (de::inRange(deAbs(result), DE_PI_DOUBLE - eps, DE_PI_DOUBLE + eps))
15741                 result = -result;
15742         }
15743         else
15744         {
15745             TCU_THROW(InternalError, "Unknown flavor");
15746         }
15747 
15748         out[0] = fp16type(result).bits();
15749         min[0] = getMin(result, getULPs(in));
15750         max[0] = getMax(result, getULPs(in));
15751 
15752         return true;
15753     }
15754 };
15755 
15756 struct fp16Pow : public fp16PerComponent
15757 {
fp16Powvkt::SpirVAssembly::fp16Pow15758     fp16Pow() : fp16PerComponent()
15759     {
15760         flavorNames.push_back("Pow");
15761         flavorNames.push_back("PowLog2");
15762         flavorNames.push_back("PowLog2FP16");
15763     }
15764 
15765     template <class fp16type>
calcvkt::SpirVAssembly::fp16Pow15766     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15767     {
15768         const fp16type x(*in[0]);
15769         const fp16type y(*in[1]);
15770         const double xd(x.asDouble());
15771         const double yd(y.asDouble());
15772         const double logxeps(de::inRange(deAbs(xd), 0.5, 2.0) ? deLdExp(1.0, -7) : floatFormat16.ulp(deLog2(xd), 3.0));
15773         const double ulps1(1.0 + 4.0 * deAbs(yd * (deLog2(xd) - logxeps)));
15774         const double ulps2(1.0 + 4.0 * deAbs(yd * (deLog2(xd) + logxeps)));
15775         const double ulps(deMax(deAbs(ulps1), deAbs(ulps2)));
15776         double result(0.0);
15777 
15778         if (xd < 0.0)
15779             return false;
15780 
15781         if (x.isZero() && yd <= 0.0)
15782             return false;
15783 
15784         if (getFlavor() == 0)
15785         {
15786             result = dePow(xd, yd);
15787         }
15788         else if (getFlavor() == 1)
15789         {
15790             const double l2d(deLog2(xd));
15791             const double e2d(deExp2(yd * l2d));
15792 
15793             result = e2d;
15794         }
15795         else if (getFlavor() == 2)
15796         {
15797             const double l2d(deLog2(xd));
15798             const fp16type l2(l2d);
15799             const double e2d(deExp2(yd * l2.asDouble()));
15800             const fp16type e2(e2d);
15801 
15802             result = e2.asDouble();
15803         }
15804         else
15805         {
15806             TCU_THROW(InternalError, "Unknown flavor");
15807         }
15808 
15809         out[0] = fp16type(result).bits();
15810         min[0] = getMin(result, ulps);
15811         max[0] = getMax(result, ulps);
15812 
15813         return true;
15814     }
15815 };
15816 
15817 struct fp16FMin : public fp16PerComponent
15818 {
15819     template <class fp16type>
calcvkt::SpirVAssembly::fp16FMin15820     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15821     {
15822         const fp16type x(*in[0]);
15823         const fp16type y(*in[1]);
15824         const double xd(x.asDouble());
15825         const double yd(y.asDouble());
15826         const double result(deMin(xd, yd));
15827 
15828         if (x.isNaN() || y.isNaN())
15829             return false;
15830 
15831         out[0] = fp16type(result).bits();
15832         min[0] = getMin(result, getULPs(in));
15833         max[0] = getMax(result, getULPs(in));
15834 
15835         return true;
15836     }
15837 };
15838 
15839 struct fp16FMax : public fp16PerComponent
15840 {
15841     template <class fp16type>
calcvkt::SpirVAssembly::fp16FMax15842     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15843     {
15844         const fp16type x(*in[0]);
15845         const fp16type y(*in[1]);
15846         const double xd(x.asDouble());
15847         const double yd(y.asDouble());
15848         const double result(deMax(xd, yd));
15849 
15850         if (x.isNaN() || y.isNaN())
15851             return false;
15852 
15853         out[0] = fp16type(result).bits();
15854         min[0] = getMin(result, getULPs(in));
15855         max[0] = getMax(result, getULPs(in));
15856 
15857         return true;
15858     }
15859 };
15860 
15861 struct fp16Step : public fp16PerComponent
15862 {
15863     template <class fp16type>
calcvkt::SpirVAssembly::fp16Step15864     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15865     {
15866         const fp16type edge(*in[0]);
15867         const fp16type x(*in[1]);
15868         const double edged(edge.asDouble());
15869         const double xd(x.asDouble());
15870         const double result(deStep(edged, xd));
15871 
15872         out[0] = fp16type(result).bits();
15873         min[0] = getMin(result, getULPs(in));
15874         max[0] = getMax(result, getULPs(in));
15875 
15876         return true;
15877     }
15878 };
15879 
15880 struct fp16Ldexp : public fp16PerComponent
15881 {
15882     template <class fp16type>
calcvkt::SpirVAssembly::fp16Ldexp15883     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15884     {
15885         const fp16type x(*in[0]);
15886         const fp16type y(*in[1]);
15887         const double xd(x.asDouble());
15888         const int yd(static_cast<int>(deTrunc(y.asDouble())));
15889         const double result(deLdExp(xd, yd));
15890 
15891         if (y.isNaN() || y.isInf() || y.isDenorm() || yd < -14 || yd > 15)
15892             return false;
15893 
15894         // Spec: "If this product is too large to be represented in the floating-point type, the result is undefined."
15895         if (fp16type(result).isInf())
15896             return false;
15897 
15898         out[0] = fp16type(result).bits();
15899         min[0] = getMin(result, getULPs(in));
15900         max[0] = getMax(result, getULPs(in));
15901 
15902         return true;
15903     }
15904 };
15905 
15906 struct fp16FClamp : public fp16PerComponent
15907 {
15908     template <class fp16type>
calcvkt::SpirVAssembly::fp16FClamp15909     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15910     {
15911         const fp16type x(*in[0]);
15912         const fp16type minVal(*in[1]);
15913         const fp16type maxVal(*in[2]);
15914         const double xd(x.asDouble());
15915         const double minVald(minVal.asDouble());
15916         const double maxVald(maxVal.asDouble());
15917         const double result(deClamp(xd, minVald, maxVald));
15918 
15919         if (minVal.isNaN() || maxVal.isNaN() || minVald > maxVald)
15920             return false;
15921 
15922         out[0] = fp16type(result).bits();
15923         min[0] = getMin(result, getULPs(in));
15924         max[0] = getMax(result, getULPs(in));
15925 
15926         return true;
15927     }
15928 };
15929 
15930 struct fp16FMix : public fp16PerComponent
15931 {
fp16FMixvkt::SpirVAssembly::fp16FMix15932     fp16FMix() : fp16PerComponent()
15933     {
15934         flavorNames.push_back("DoubleCalc");
15935         flavorNames.push_back("EmulatingFP16");
15936         flavorNames.push_back("EmulatingFP16YminusX");
15937     }
15938 
15939     template <class fp16type>
calcvkt::SpirVAssembly::fp16FMix15940     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15941     {
15942         const fp16type x(*in[0]);
15943         const fp16type y(*in[1]);
15944         const fp16type a(*in[2]);
15945         const double ulps(8.0); // This is not a precision test. Value is not from spec
15946         double result(0.0);
15947 
15948         if (getFlavor() == 0)
15949         {
15950             const double xd(x.asDouble());
15951             const double yd(y.asDouble());
15952             const double ad(a.asDouble());
15953             const double xeps(floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15954             const double yeps(floatFormat16.ulp(deAbs(yd * ad), ulps));
15955             const double eps(xeps + yeps);
15956 
15957             result = deMix(xd, yd, ad);
15958             min[0] = result - eps;
15959             max[0] = result + eps;
15960         }
15961         else if (getFlavor() == 1)
15962         {
15963             const double xd(x.asDouble());
15964             const double yd(y.asDouble());
15965             const double ad(a.asDouble());
15966             const fp16type am(1.0 - ad);
15967             const double amd(am.asDouble());
15968             const fp16type xam(xd * amd);
15969             const double xamd(xam.asDouble());
15970             const fp16type ya(yd * ad);
15971             const double yad(ya.asDouble());
15972             const double xeps(floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15973             const double yeps(floatFormat16.ulp(deAbs(yd * ad), ulps));
15974             const double eps(xeps + yeps);
15975 
15976             result = xamd + yad;
15977             min[0] = result - eps;
15978             max[0] = result + eps;
15979         }
15980         else if (getFlavor() == 2)
15981         {
15982             const double xd(x.asDouble());
15983             const double yd(y.asDouble());
15984             const double ad(a.asDouble());
15985             const fp16type ymx(yd - xd);
15986             const double ymxd(ymx.asDouble());
15987             const fp16type ymxa(ymxd * ad);
15988             const double ymxad(ymxa.asDouble());
15989             const double xeps(floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15990             const double yeps(floatFormat16.ulp(deAbs(yd * ad), ulps));
15991             const double eps(xeps + yeps);
15992 
15993             result = xd + ymxad;
15994             min[0] = result - eps;
15995             max[0] = result + eps;
15996         }
15997         else
15998         {
15999             TCU_THROW(InternalError, "Unknown flavor");
16000         }
16001 
16002         out[0] = fp16type(result).bits();
16003 
16004         return true;
16005     }
16006 };
16007 
16008 struct fp16SmoothStep : public fp16PerComponent
16009 {
fp16SmoothStepvkt::SpirVAssembly::fp16SmoothStep16010     fp16SmoothStep() : fp16PerComponent()
16011     {
16012         flavorNames.push_back("FloatCalc");
16013         flavorNames.push_back("EmulatingFP16");
16014         flavorNames.push_back("EmulatingFP16WClamp");
16015     }
16016 
getULPsvkt::SpirVAssembly::fp16SmoothStep16017     virtual double getULPs(vector<const deFloat16 *> &in)
16018     {
16019         DE_UNREF(in);
16020 
16021         return 4.0; // This is not a precision test. Value is not from spec
16022     }
16023 
16024     template <class fp16type>
calcvkt::SpirVAssembly::fp16SmoothStep16025     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16026     {
16027         const fp16type edge0(*in[0]);
16028         const fp16type edge1(*in[1]);
16029         const fp16type x(*in[2]);
16030         double result(0.0);
16031 
16032         if (edge0.isNaN() || edge1.isNaN() || x.isNaN() || edge0.asDouble() >= edge1.asDouble())
16033             return false;
16034 
16035         if (edge0.isInf() || edge1.isInf() || x.isInf())
16036             return false;
16037 
16038         if (getFlavor() == 0)
16039         {
16040             const float edge0d(edge0.asFloat());
16041             const float edge1d(edge1.asFloat());
16042             const float xd(x.asFloat());
16043             const float sstep(deFloatSmoothStep(edge0d, edge1d, xd));
16044 
16045             result = sstep;
16046         }
16047         else if (getFlavor() == 1)
16048         {
16049             const double edge0d(edge0.asDouble());
16050             const double edge1d(edge1.asDouble());
16051             const double xd(x.asDouble());
16052 
16053             if (xd <= edge0d)
16054                 result = 0.0;
16055             else if (xd >= edge1d)
16056                 result = 1.0;
16057             else
16058             {
16059                 const fp16type a(xd - edge0d);
16060                 const fp16type b(edge1d - edge0d);
16061                 const fp16type t(a.asDouble() / b.asDouble());
16062                 const fp16type t2(2.0 * t.asDouble());
16063                 const fp16type t3(3.0 - t2.asDouble());
16064                 const fp16type t4(t.asDouble() * t3.asDouble());
16065                 const fp16type t5(t.asDouble() * t4.asDouble());
16066 
16067                 result = t5.asDouble();
16068             }
16069         }
16070         else if (getFlavor() == 2)
16071         {
16072             const double edge0d(edge0.asDouble());
16073             const double edge1d(edge1.asDouble());
16074             const double xd(x.asDouble());
16075             const fp16type a(xd - edge0d);
16076             const fp16type b(edge1d - edge0d);
16077             const fp16type bi(1.0 / b.asDouble());
16078             const fp16type t0(a.asDouble() * bi.asDouble());
16079             const double tc(deClamp(t0.asDouble(), 0.0, 1.0));
16080             const fp16type t(tc);
16081             const fp16type t2(2.0 * t.asDouble());
16082             const fp16type t3(3.0 - t2.asDouble());
16083             const fp16type t4(t.asDouble() * t3.asDouble());
16084             const fp16type t5(t.asDouble() * t4.asDouble());
16085 
16086             result = t5.asDouble();
16087         }
16088         else
16089         {
16090             TCU_THROW(InternalError, "Unknown flavor");
16091         }
16092 
16093         out[0] = fp16type(result).bits();
16094         min[0] = getMin(result, getULPs(in));
16095         max[0] = getMax(result, getULPs(in));
16096 
16097         return true;
16098     }
16099 };
16100 
16101 struct fp16Fma : public fp16PerComponent
16102 {
fp16Fmavkt::SpirVAssembly::fp16Fma16103     fp16Fma()
16104     {
16105         flavorNames.push_back("DoubleCalc");
16106         flavorNames.push_back("EmulatingFP16");
16107     }
16108 
getULPsvkt::SpirVAssembly::fp16Fma16109     virtual double getULPs(vector<const deFloat16 *> &in)
16110     {
16111         DE_UNREF(in);
16112 
16113         return 16.0;
16114     }
16115 
16116     template <class fp16type>
calcvkt::SpirVAssembly::fp16Fma16117     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16118     {
16119         DE_ASSERT(in.size() == 3);
16120         DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16121         DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16122         DE_ASSERT(getArgCompCount(2) == getOutCompCount());
16123         DE_ASSERT(getOutCompCount() > 0);
16124 
16125         const fp16type a(*in[0]);
16126         const fp16type b(*in[1]);
16127         const fp16type c(*in[2]);
16128         double result(0.0);
16129 
16130         if (getFlavor() == 0)
16131         {
16132             const double ad(a.asDouble());
16133             const double bd(b.asDouble());
16134             const double cd(c.asDouble());
16135 
16136             result = deMadd(ad, bd, cd);
16137         }
16138         else if (getFlavor() == 1)
16139         {
16140             const double ad(a.asDouble());
16141             const double bd(b.asDouble());
16142             const double cd(c.asDouble());
16143             const fp16type ab(ad * bd);
16144             const fp16type r(ab.asDouble() + cd);
16145 
16146             result = r.asDouble();
16147         }
16148         else
16149         {
16150             TCU_THROW(InternalError, "Unknown flavor");
16151         }
16152 
16153         out[0] = fp16type(result).bits();
16154         min[0] = getMin(result, getULPs(in));
16155         max[0] = getMax(result, getULPs(in));
16156 
16157         return true;
16158     }
16159 };
16160 
16161 struct fp16AllComponents : public fp16PerComponent
16162 {
callOncePerComponentvkt::SpirVAssembly::fp16AllComponents16163     bool callOncePerComponent()
16164     {
16165         return false;
16166     }
16167 };
16168 
16169 struct fp16Length : public fp16AllComponents
16170 {
fp16Lengthvkt::SpirVAssembly::fp16Length16171     fp16Length() : fp16AllComponents()
16172     {
16173         flavorNames.push_back("EmulatingFP16");
16174         flavorNames.push_back("DoubleCalc");
16175     }
16176 
getULPsvkt::SpirVAssembly::fp16Length16177     virtual double getULPs(vector<const deFloat16 *> &in)
16178     {
16179         DE_UNREF(in);
16180 
16181         return 4.0;
16182     }
16183 
16184     template <class fp16type>
calcvkt::SpirVAssembly::fp16Length16185     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16186     {
16187         DE_ASSERT(getOutCompCount() == 1);
16188         DE_ASSERT(in.size() == 1);
16189 
16190         double result(0.0);
16191 
16192         if (getFlavor() == 0)
16193         {
16194             fp16type r(0.0);
16195 
16196             for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16197             {
16198                 const fp16type x(in[0][componentNdx]);
16199                 const fp16type q(x.asDouble() * x.asDouble());
16200 
16201                 r = fp16type(r.asDouble() + q.asDouble());
16202             }
16203 
16204             result = deSqrt(r.asDouble());
16205 
16206             out[0] = fp16type(result).bits();
16207         }
16208         else if (getFlavor() == 1)
16209         {
16210             double r(0.0);
16211 
16212             for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16213             {
16214                 const fp16type x(in[0][componentNdx]);
16215                 const double q(x.asDouble() * x.asDouble());
16216 
16217                 r += q;
16218             }
16219 
16220             result = deSqrt(r);
16221 
16222             out[0] = fp16type(result).bits();
16223         }
16224         else
16225         {
16226             TCU_THROW(InternalError, "Unknown flavor");
16227         }
16228 
16229         min[0] = getMin(result, getULPs(in));
16230         max[0] = getMax(result, getULPs(in));
16231 
16232         return true;
16233     }
16234 };
16235 
16236 struct fp16Distance : public fp16AllComponents
16237 {
fp16Distancevkt::SpirVAssembly::fp16Distance16238     fp16Distance() : fp16AllComponents()
16239     {
16240         flavorNames.push_back("EmulatingFP16");
16241         flavorNames.push_back("DoubleCalc");
16242     }
16243 
getULPsvkt::SpirVAssembly::fp16Distance16244     virtual double getULPs(vector<const deFloat16 *> &in)
16245     {
16246         DE_UNREF(in);
16247 
16248         return 4.0;
16249     }
16250 
16251     template <class fp16type>
calcvkt::SpirVAssembly::fp16Distance16252     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16253     {
16254         DE_ASSERT(getOutCompCount() == 1);
16255         DE_ASSERT(in.size() == 2);
16256         DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
16257 
16258         double result(0.0);
16259 
16260         if (getFlavor() == 0)
16261         {
16262             fp16type r(0.0);
16263 
16264             for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16265             {
16266                 const fp16type x(in[0][componentNdx]);
16267                 const fp16type y(in[1][componentNdx]);
16268                 const fp16type d(x.asDouble() - y.asDouble());
16269                 const fp16type q(d.asDouble() * d.asDouble());
16270 
16271                 r = fp16type(r.asDouble() + q.asDouble());
16272             }
16273 
16274             result = deSqrt(r.asDouble());
16275         }
16276         else if (getFlavor() == 1)
16277         {
16278             double r(0.0);
16279 
16280             for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16281             {
16282                 const fp16type x(in[0][componentNdx]);
16283                 const fp16type y(in[1][componentNdx]);
16284                 const double d(x.asDouble() - y.asDouble());
16285                 const double q(d * d);
16286 
16287                 r += q;
16288             }
16289 
16290             result = deSqrt(r);
16291         }
16292         else
16293         {
16294             TCU_THROW(InternalError, "Unknown flavor");
16295         }
16296 
16297         out[0] = fp16type(result).bits();
16298         min[0] = getMin(result, getULPs(in));
16299         max[0] = getMax(result, getULPs(in));
16300 
16301         return true;
16302     }
16303 };
16304 
16305 struct fp16Cross : public fp16AllComponents
16306 {
fp16Crossvkt::SpirVAssembly::fp16Cross16307     fp16Cross() : fp16AllComponents()
16308     {
16309         flavorNames.push_back("EmulatingFP16");
16310         flavorNames.push_back("DoubleCalc");
16311     }
16312 
getULPsvkt::SpirVAssembly::fp16Cross16313     virtual double getULPs(vector<const deFloat16 *> &in)
16314     {
16315         DE_UNREF(in);
16316 
16317         return 4.0;
16318     }
16319 
16320     template <class fp16type>
calcvkt::SpirVAssembly::fp16Cross16321     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16322     {
16323         DE_ASSERT(getOutCompCount() == 3);
16324         DE_ASSERT(in.size() == 2);
16325         DE_ASSERT(getArgCompCount(0) == 3);
16326         DE_ASSERT(getArgCompCount(1) == 3);
16327 
16328         if (getFlavor() == 0)
16329         {
16330             const fp16type x0(in[0][0]);
16331             const fp16type x1(in[0][1]);
16332             const fp16type x2(in[0][2]);
16333             const fp16type y0(in[1][0]);
16334             const fp16type y1(in[1][1]);
16335             const fp16type y2(in[1][2]);
16336             const fp16type x1y2(x1.asDouble() * y2.asDouble());
16337             const fp16type y1x2(y1.asDouble() * x2.asDouble());
16338             const fp16type x2y0(x2.asDouble() * y0.asDouble());
16339             const fp16type y2x0(y2.asDouble() * x0.asDouble());
16340             const fp16type x0y1(x0.asDouble() * y1.asDouble());
16341             const fp16type y0x1(y0.asDouble() * x1.asDouble());
16342 
16343             out[0] = fp16type(x1y2.asDouble() - y1x2.asDouble()).bits();
16344             out[1] = fp16type(x2y0.asDouble() - y2x0.asDouble()).bits();
16345             out[2] = fp16type(x0y1.asDouble() - y0x1.asDouble()).bits();
16346         }
16347         else if (getFlavor() == 1)
16348         {
16349             const fp16type x0(in[0][0]);
16350             const fp16type x1(in[0][1]);
16351             const fp16type x2(in[0][2]);
16352             const fp16type y0(in[1][0]);
16353             const fp16type y1(in[1][1]);
16354             const fp16type y2(in[1][2]);
16355             const double x1y2(x1.asDouble() * y2.asDouble());
16356             const double y1x2(y1.asDouble() * x2.asDouble());
16357             const double x2y0(x2.asDouble() * y0.asDouble());
16358             const double y2x0(y2.asDouble() * x0.asDouble());
16359             const double x0y1(x0.asDouble() * y1.asDouble());
16360             const double y0x1(y0.asDouble() * x1.asDouble());
16361 
16362             out[0] = fp16type(x1y2 - y1x2).bits();
16363             out[1] = fp16type(x2y0 - y2x0).bits();
16364             out[2] = fp16type(x0y1 - y0x1).bits();
16365         }
16366         else
16367         {
16368             TCU_THROW(InternalError, "Unknown flavor");
16369         }
16370 
16371         for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16372             min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16373         for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16374             max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16375 
16376         return true;
16377     }
16378 };
16379 
16380 struct fp16Normalize : public fp16AllComponents
16381 {
fp16Normalizevkt::SpirVAssembly::fp16Normalize16382     fp16Normalize() : fp16AllComponents()
16383     {
16384         flavorNames.push_back("EmulatingFP16");
16385         flavorNames.push_back("DoubleCalc");
16386 
16387         permutationsFlavorStart = 0;
16388         permutationsFlavorEnd   = flavorNames.size();
16389 
16390         // flavorNames will be extended later
16391     }
16392 
setArgCompCountvkt::SpirVAssembly::fp16Normalize16393     virtual void setArgCompCount(size_t argNo, size_t compCount)
16394     {
16395         DE_ASSERT(argCompCount[argNo] == 0); // Once only
16396 
16397         if (argNo == 0 && argCompCount[argNo] == 0)
16398         {
16399             const size_t maxPermutationsCount = 24u; // Equal to 4!
16400             std::vector<int> indices;
16401 
16402             for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16403                 indices.push_back(static_cast<int>(componentNdx));
16404 
16405             m_permutations.reserve(maxPermutationsCount);
16406 
16407             permutationsFlavorStart = flavorNames.size();
16408 
16409             do
16410             {
16411                 tcu::UVec4 permutation;
16412                 std::string name = "Permutted_";
16413 
16414                 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16415                 {
16416                     permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16417                     name += de::toString(indices[componentNdx]);
16418                 }
16419 
16420                 m_permutations.push_back(permutation);
16421                 flavorNames.push_back(name);
16422 
16423             } while (std::next_permutation(indices.begin(), indices.end()));
16424 
16425             permutationsFlavorEnd = flavorNames.size();
16426         }
16427 
16428         fp16AllComponents::setArgCompCount(argNo, compCount);
16429     }
getULPsvkt::SpirVAssembly::fp16Normalize16430     virtual double getULPs(vector<const deFloat16 *> &in)
16431     {
16432         DE_UNREF(in);
16433 
16434         return 8.0;
16435     }
16436 
16437     template <class fp16type>
calcvkt::SpirVAssembly::fp16Normalize16438     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16439     {
16440         DE_ASSERT(in.size() == 1);
16441         DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16442 
16443         if (getFlavor() == 0)
16444         {
16445             fp16type r(0.0);
16446 
16447             for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16448             {
16449                 const fp16type x(in[0][componentNdx]);
16450                 const fp16type q(x.asDouble() * x.asDouble());
16451 
16452                 r = fp16type(r.asDouble() + q.asDouble());
16453             }
16454 
16455             r = fp16type(deSqrt(r.asDouble()));
16456 
16457             if (r.isZero())
16458                 return false;
16459 
16460             for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16461             {
16462                 const fp16type x(in[0][componentNdx]);
16463 
16464                 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16465             }
16466         }
16467         else if (getFlavor() == 1)
16468         {
16469             double r(0.0);
16470 
16471             for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16472             {
16473                 const fp16type x(in[0][componentNdx]);
16474                 const double q(x.asDouble() * x.asDouble());
16475 
16476                 r += q;
16477             }
16478 
16479             r = deSqrt(r);
16480 
16481             if (r == 0)
16482                 return false;
16483 
16484             for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16485             {
16486                 const fp16type x(in[0][componentNdx]);
16487 
16488                 out[componentNdx] = fp16type(x.asDouble() / r).bits();
16489             }
16490         }
16491         else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
16492         {
16493             const int compCount(static_cast<int>(getArgCompCount(0)));
16494             const size_t permutationNdx(getFlavor() - permutationsFlavorStart);
16495             const tcu::UVec4 &permutation(m_permutations[permutationNdx]);
16496             fp16type r(0.0);
16497 
16498             for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16499             {
16500                 const size_t componentNdx(permutation[permComponentNdx]);
16501                 const fp16type x(in[0][componentNdx]);
16502                 const fp16type q(x.asDouble() * x.asDouble());
16503 
16504                 r = fp16type(r.asDouble() + q.asDouble());
16505             }
16506 
16507             r = fp16type(deSqrt(r.asDouble()));
16508 
16509             if (r.isZero())
16510                 return false;
16511 
16512             for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16513             {
16514                 const size_t componentNdx(permutation[permComponentNdx]);
16515                 const fp16type x(in[0][componentNdx]);
16516 
16517                 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16518             }
16519         }
16520         else
16521         {
16522             TCU_THROW(InternalError, "Unknown flavor");
16523         }
16524 
16525         for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16526             min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16527         for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16528             max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16529 
16530         return true;
16531     }
16532 
16533 private:
16534     std::vector<tcu::UVec4> m_permutations;
16535     size_t permutationsFlavorStart;
16536     size_t permutationsFlavorEnd;
16537 };
16538 
16539 struct fp16FaceForward : public fp16AllComponents
16540 {
getULPsvkt::SpirVAssembly::fp16FaceForward16541     virtual double getULPs(vector<const deFloat16 *> &in)
16542     {
16543         DE_UNREF(in);
16544 
16545         return 4.0;
16546     }
16547 
16548     template <class fp16type>
calcvkt::SpirVAssembly::fp16FaceForward16549     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16550     {
16551         DE_ASSERT(in.size() == 3);
16552         DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16553         DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16554         DE_ASSERT(getArgCompCount(2) == getOutCompCount());
16555 
16556         fp16type dp(0.0);
16557 
16558         for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16559         {
16560             const fp16type x(in[1][componentNdx]);
16561             const fp16type y(in[2][componentNdx]);
16562             const double xd(x.asDouble());
16563             const double yd(y.asDouble());
16564             const fp16type q(xd * yd);
16565 
16566             dp = fp16type(dp.asDouble() + q.asDouble());
16567         }
16568 
16569         if (dp.isNaN() || dp.isZero())
16570             return false;
16571 
16572         for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16573         {
16574             const fp16type n(in[0][componentNdx]);
16575 
16576             out[componentNdx] = (dp.signBit() == 1) ? n.bits() : fp16type(-n.asDouble()).bits();
16577         }
16578 
16579         for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16580             min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16581         for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16582             max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16583 
16584         return true;
16585     }
16586 };
16587 
16588 struct fp16Reflect : public fp16AllComponents
16589 {
fp16Reflectvkt::SpirVAssembly::fp16Reflect16590     fp16Reflect() : fp16AllComponents()
16591     {
16592         flavorNames.push_back("EmulatingFP16");
16593         flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16594         flavorNames.push_back("FloatCalc");
16595         flavorNames.push_back("FloatCalc+KeepZeroSign");
16596         flavorNames.push_back("EmulatingFP16+2Nfirst");
16597         flavorNames.push_back("EmulatingFP16+2Ifirst");
16598     }
16599 
getULPsvkt::SpirVAssembly::fp16Reflect16600     virtual double getULPs(vector<const deFloat16 *> &in)
16601     {
16602         DE_UNREF(in);
16603 
16604         return 256.0; // This is not a precision test. Value is not from spec
16605     }
16606 
16607     template <class fp16type>
calcvkt::SpirVAssembly::fp16Reflect16608     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16609     {
16610         DE_ASSERT(in.size() == 2);
16611         DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16612         DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16613 
16614         if (getFlavor() < 4)
16615         {
16616             const bool keepZeroSign((flavor & 1) != 0 ? true : false);
16617             const bool floatCalc((flavor & 2) != 0 ? true : false);
16618 
16619             if (floatCalc)
16620             {
16621                 float dp(0.0f);
16622 
16623                 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16624                 {
16625                     const fp16type i(in[0][componentNdx]);
16626                     const fp16type n(in[1][componentNdx]);
16627                     const float id(i.asFloat());
16628                     const float nd(n.asFloat());
16629                     const float qd(id * nd);
16630 
16631                     if (keepZeroSign)
16632                         dp = (componentNdx == 0) ? qd : dp + qd;
16633                     else
16634                         dp = dp + qd;
16635                 }
16636 
16637                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16638                 {
16639                     const fp16type i(in[0][componentNdx]);
16640                     const fp16type n(in[1][componentNdx]);
16641                     const float dpnd(dp * n.asFloat());
16642                     const float dpn2d(2.0f * dpnd);
16643                     const float idpn2d(i.asFloat() - dpn2d);
16644                     const fp16type result(idpn2d);
16645 
16646                     out[componentNdx] = result.bits();
16647                 }
16648             }
16649             else
16650             {
16651                 fp16type dp(0.0);
16652 
16653                 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16654                 {
16655                     const fp16type i(in[0][componentNdx]);
16656                     const fp16type n(in[1][componentNdx]);
16657                     const double id(i.asDouble());
16658                     const double nd(n.asDouble());
16659                     const fp16type q(id * nd);
16660 
16661                     if (keepZeroSign)
16662                         dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16663                     else
16664                         dp = fp16type(dp.asDouble() + q.asDouble());
16665                 }
16666 
16667                 if (dp.isNaN())
16668                     return false;
16669 
16670                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16671                 {
16672                     const fp16type i(in[0][componentNdx]);
16673                     const fp16type n(in[1][componentNdx]);
16674                     const fp16type dpn(dp.asDouble() * n.asDouble());
16675                     const fp16type dpn2(2 * dpn.asDouble());
16676                     const fp16type idpn2(i.asDouble() - dpn2.asDouble());
16677 
16678                     out[componentNdx] = idpn2.bits();
16679                 }
16680             }
16681         }
16682         else if (getFlavor() == 4)
16683         {
16684             fp16type dp(0.0);
16685 
16686             for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16687             {
16688                 const fp16type i(in[0][componentNdx]);
16689                 const fp16type n(in[1][componentNdx]);
16690                 const double id(i.asDouble());
16691                 const double nd(n.asDouble());
16692                 const fp16type q(id * nd);
16693 
16694                 dp = fp16type(dp.asDouble() + q.asDouble());
16695             }
16696 
16697             if (dp.isNaN())
16698                 return false;
16699 
16700             for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16701             {
16702                 const fp16type i(in[0][componentNdx]);
16703                 const fp16type n(in[1][componentNdx]);
16704                 const fp16type n2(2 * n.asDouble());
16705                 const fp16type dpn2(dp.asDouble() * n2.asDouble());
16706                 const fp16type idpn2(i.asDouble() - dpn2.asDouble());
16707 
16708                 out[componentNdx] = idpn2.bits();
16709             }
16710         }
16711         else if (getFlavor() == 5)
16712         {
16713             fp16type dp2(0.0);
16714 
16715             for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16716             {
16717                 const fp16type i(in[0][componentNdx]);
16718                 const fp16type n(in[1][componentNdx]);
16719                 const fp16type i2(2.0 * i.asDouble());
16720                 const double i2d(i2.asDouble());
16721                 const double nd(n.asDouble());
16722                 const fp16type q(i2d * nd);
16723 
16724                 dp2 = fp16type(dp2.asDouble() + q.asDouble());
16725             }
16726 
16727             if (dp2.isNaN())
16728                 return false;
16729 
16730             for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16731             {
16732                 const fp16type i(in[0][componentNdx]);
16733                 const fp16type n(in[1][componentNdx]);
16734                 const fp16type dpn2(dp2.asDouble() * n.asDouble());
16735                 const fp16type idpn2(i.asDouble() - dpn2.asDouble());
16736 
16737                 out[componentNdx] = idpn2.bits();
16738             }
16739         }
16740         else
16741         {
16742             TCU_THROW(InternalError, "Unknown flavor");
16743         }
16744 
16745         for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16746             min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16747         for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16748             max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16749 
16750         return true;
16751     }
16752 };
16753 
16754 struct fp16Refract : public fp16AllComponents
16755 {
fp16Refractvkt::SpirVAssembly::fp16Refract16756     fp16Refract() : fp16AllComponents()
16757     {
16758         flavorNames.push_back("EmulatingFP16");
16759         flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16760         flavorNames.push_back("FloatCalc");
16761         flavorNames.push_back("FloatCalc+KeepZeroSign");
16762     }
16763 
getULPsvkt::SpirVAssembly::fp16Refract16764     virtual double getULPs(vector<const deFloat16 *> &in)
16765     {
16766         DE_UNREF(in);
16767 
16768         return 8192.0; // This is not a precision test. Value is not from spec
16769     }
16770 
16771     template <class fp16type>
calcvkt::SpirVAssembly::fp16Refract16772     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16773     {
16774         DE_ASSERT(in.size() == 3);
16775         DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16776         DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16777         DE_ASSERT(getArgCompCount(2) == 1);
16778 
16779         const bool keepZeroSign((flavor & 1) != 0 ? true : false);
16780         const bool doubleCalc((flavor & 2) != 0 ? true : false);
16781         const fp16type eta(*in[2]);
16782 
16783         if (doubleCalc)
16784         {
16785             double dp(0.0);
16786 
16787             for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16788             {
16789                 const fp16type i(in[0][componentNdx]);
16790                 const fp16type n(in[1][componentNdx]);
16791                 const double id(i.asDouble());
16792                 const double nd(n.asDouble());
16793                 const double qd(id * nd);
16794 
16795                 if (keepZeroSign)
16796                     dp = (componentNdx == 0) ? qd : dp + qd;
16797                 else
16798                     dp = dp + qd;
16799             }
16800 
16801             const double eta2(eta.asDouble() * eta.asDouble());
16802             const double dp2(dp * dp);
16803             const double dp1(1.0 - dp2);
16804             const double dpe(eta2 * dp1);
16805             const double k(1.0 - dpe);
16806 
16807             if (k < 0.0)
16808             {
16809                 const fp16type zero(0.0);
16810 
16811                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16812                     out[componentNdx] = zero.bits();
16813             }
16814             else
16815             {
16816                 const double sk(deSqrt(k));
16817 
16818                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16819                 {
16820                     const fp16type i(in[0][componentNdx]);
16821                     const fp16type n(in[1][componentNdx]);
16822                     const double etai(i.asDouble() * eta.asDouble());
16823                     const double etadp(eta.asDouble() * dp);
16824                     const double etadpk(etadp + sk);
16825                     const double etadpkn(etadpk * n.asDouble());
16826                     const double full(etai - etadpkn);
16827                     const fp16type result(full);
16828 
16829                     if (result.isInf())
16830                         return false;
16831 
16832                     out[componentNdx] = result.bits();
16833                 }
16834             }
16835         }
16836         else
16837         {
16838             fp16type dp(0.0);
16839 
16840             for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16841             {
16842                 const fp16type i(in[0][componentNdx]);
16843                 const fp16type n(in[1][componentNdx]);
16844                 const double id(i.asDouble());
16845                 const double nd(n.asDouble());
16846                 const fp16type q(id * nd);
16847 
16848                 if (keepZeroSign)
16849                     dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16850                 else
16851                     dp = fp16type(dp.asDouble() + q.asDouble());
16852             }
16853 
16854             if (dp.isNaN())
16855                 return false;
16856 
16857             const fp16type eta2(eta.asDouble() * eta.asDouble());
16858             const fp16type dp2(dp.asDouble() * dp.asDouble());
16859             const fp16type dp1(1.0 - dp2.asDouble());
16860             const fp16type dpe(eta2.asDouble() * dp1.asDouble());
16861             const fp16type k(1.0 - dpe.asDouble());
16862 
16863             if (k.asDouble() < 0.0)
16864             {
16865                 const fp16type zero(0.0);
16866 
16867                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16868                     out[componentNdx] = zero.bits();
16869             }
16870             else
16871             {
16872                 const fp16type sk(deSqrt(k.asDouble()));
16873 
16874                 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16875                 {
16876                     const fp16type i(in[0][componentNdx]);
16877                     const fp16type n(in[1][componentNdx]);
16878                     const fp16type etai(i.asDouble() * eta.asDouble());
16879                     const fp16type etadp(eta.asDouble() * dp.asDouble());
16880                     const fp16type etadpk(etadp.asDouble() + sk.asDouble());
16881                     const fp16type etadpkn(etadpk.asDouble() * n.asDouble());
16882                     const fp16type full(etai.asDouble() - etadpkn.asDouble());
16883 
16884                     if (full.isNaN() || full.isInf())
16885                         return false;
16886 
16887                     out[componentNdx] = full.bits();
16888                 }
16889             }
16890         }
16891 
16892         for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16893             min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16894         for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16895             max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16896 
16897         return true;
16898     }
16899 };
16900 
16901 struct fp16Dot : public fp16AllComponents
16902 {
fp16Dotvkt::SpirVAssembly::fp16Dot16903     fp16Dot() : fp16AllComponents()
16904     {
16905         flavorNames.push_back("EmulatingFP16");
16906         flavorNames.push_back("FloatCalc");
16907         flavorNames.push_back("DoubleCalc");
16908 
16909         permutationsFlavorStart = 0;
16910         permutationsFlavorEnd   = flavorNames.size();
16911 
16912         // flavorNames will be extended later
16913     }
16914 
setArgCompCountvkt::SpirVAssembly::fp16Dot16915     virtual void setArgCompCount(size_t argNo, size_t compCount)
16916     {
16917         DE_ASSERT(argCompCount[argNo] == 0); // Once only
16918 
16919         if (argNo == 0 && argCompCount[argNo] == 0)
16920         {
16921             const size_t maxPermutationsCount = 24u; // Equal to 4!
16922             std::vector<int> indices;
16923 
16924             for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16925                 indices.push_back(static_cast<int>(componentNdx));
16926 
16927             m_permutations.reserve(maxPermutationsCount);
16928 
16929             permutationsFlavorStart = flavorNames.size();
16930 
16931             do
16932             {
16933                 tcu::UVec4 permutation;
16934                 std::string name = "Permutted_";
16935 
16936                 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16937                 {
16938                     permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16939                     name += de::toString(indices[componentNdx]);
16940                 }
16941 
16942                 m_permutations.push_back(permutation);
16943                 flavorNames.push_back(name);
16944 
16945             } while (std::next_permutation(indices.begin(), indices.end()));
16946 
16947             permutationsFlavorEnd = flavorNames.size();
16948         }
16949 
16950         fp16AllComponents::setArgCompCount(argNo, compCount);
16951     }
16952 
getULPsvkt::SpirVAssembly::fp16Dot16953     virtual double getULPs(vector<const deFloat16 *> &in)
16954     {
16955         DE_UNREF(in);
16956 
16957         return 16.0; // This is not a precision test. Value is not from spec
16958     }
16959 
16960     template <class fp16type>
calcvkt::SpirVAssembly::fp16Dot16961     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16962     {
16963         DE_ASSERT(in.size() == 2);
16964         DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
16965         DE_ASSERT(getOutCompCount() == 1);
16966 
16967         double result(0.0);
16968         double eps(0.0);
16969 
16970         if (getFlavor() == 0)
16971         {
16972             fp16type dp(0.0);
16973 
16974             for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16975             {
16976                 const fp16type x(in[0][componentNdx]);
16977                 const fp16type y(in[1][componentNdx]);
16978                 const fp16type q(x.asDouble() * y.asDouble());
16979 
16980                 dp = fp16type(dp.asDouble() + q.asDouble());
16981                 eps += floatFormat16.ulp(q.asDouble(), 2.0);
16982             }
16983 
16984             result = dp.asDouble();
16985         }
16986         else if (getFlavor() == 1)
16987         {
16988             float dp(0.0);
16989 
16990             for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16991             {
16992                 const fp16type x(in[0][componentNdx]);
16993                 const fp16type y(in[1][componentNdx]);
16994                 const float q(x.asFloat() * y.asFloat());
16995 
16996                 dp += q;
16997                 eps += floatFormat16.ulp(static_cast<double>(q), 2.0);
16998             }
16999 
17000             result = dp;
17001         }
17002         else if (getFlavor() == 2)
17003         {
17004             double dp(0.0);
17005 
17006             for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
17007             {
17008                 const fp16type x(in[0][componentNdx]);
17009                 const fp16type y(in[1][componentNdx]);
17010                 const double q(x.asDouble() * y.asDouble());
17011 
17012                 dp += q;
17013                 eps += floatFormat16.ulp(q, 2.0);
17014             }
17015 
17016             result = dp;
17017         }
17018         else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
17019         {
17020             const int compCount(static_cast<int>(getArgCompCount(1)));
17021             const size_t permutationNdx(getFlavor() - permutationsFlavorStart);
17022             const tcu::UVec4 &permutation(m_permutations[permutationNdx]);
17023             fp16type dp(0.0);
17024 
17025             for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
17026             {
17027                 const size_t componentNdx(permutation[permComponentNdx]);
17028                 const fp16type x(in[0][componentNdx]);
17029                 const fp16type y(in[1][componentNdx]);
17030                 const fp16type q(x.asDouble() * y.asDouble());
17031 
17032                 dp = fp16type(dp.asDouble() + q.asDouble());
17033                 eps += floatFormat16.ulp(q.asDouble(), 2.0);
17034             }
17035 
17036             result = dp.asDouble();
17037         }
17038         else
17039         {
17040             TCU_THROW(InternalError, "Unknown flavor");
17041         }
17042 
17043         out[0] = fp16type(result).bits();
17044         min[0] = result - eps;
17045         max[0] = result + eps;
17046 
17047         return true;
17048     }
17049 
17050 private:
17051     std::vector<tcu::UVec4> m_permutations;
17052     size_t permutationsFlavorStart;
17053     size_t permutationsFlavorEnd;
17054 };
17055 
17056 struct fp16VectorTimesScalar : public fp16AllComponents
17057 {
getULPsvkt::SpirVAssembly::fp16VectorTimesScalar17058     virtual double getULPs(vector<const deFloat16 *> &in)
17059     {
17060         DE_UNREF(in);
17061 
17062         return 2.0;
17063     }
17064 
17065     template <class fp16type>
calcvkt::SpirVAssembly::fp16VectorTimesScalar17066     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17067     {
17068         DE_ASSERT(in.size() == 2);
17069         DE_ASSERT(getArgCompCount(0) == getOutCompCount());
17070         DE_ASSERT(getArgCompCount(1) == 1);
17071 
17072         fp16type s(*in[1]);
17073 
17074         for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
17075         {
17076             const fp16type x(in[0][componentNdx]);
17077             const double result(s.asDouble() * x.asDouble());
17078             const fp16type m(result);
17079 
17080             out[componentNdx] = m.bits();
17081             min[componentNdx] = getMin(result, getULPs(in));
17082             max[componentNdx] = getMax(result, getULPs(in));
17083         }
17084 
17085         return true;
17086     }
17087 };
17088 
17089 struct fp16MatrixBase : public fp16AllComponents
17090 {
getComponentValidityvkt::SpirVAssembly::fp16MatrixBase17091     uint32_t getComponentValidity()
17092     {
17093         return static_cast<uint32_t>(-1);
17094     }
17095 
getNdxvkt::SpirVAssembly::fp16MatrixBase17096     inline size_t getNdx(const size_t rowCount, const size_t col, const size_t row)
17097     {
17098         const size_t minComponentCount = 0;
17099         const size_t maxComponentCount = 3;
17100         const size_t alignedRowsCount  = (rowCount == 3) ? 4 : rowCount;
17101 
17102         DE_ASSERT(de::inRange(rowCount, minComponentCount + 1, maxComponentCount + 1));
17103         DE_ASSERT(de::inRange(col, minComponentCount, maxComponentCount));
17104         DE_ASSERT(de::inBounds(row, minComponentCount, rowCount));
17105         DE_UNREF(minComponentCount);
17106         DE_UNREF(maxComponentCount);
17107 
17108         return col * alignedRowsCount + row;
17109     }
17110 
getComponentMatrixValidityMaskvkt::SpirVAssembly::fp16MatrixBase17111     uint32_t getComponentMatrixValidityMask(size_t cols, size_t rows)
17112     {
17113         uint32_t result = 0u;
17114 
17115         for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17116             for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17117             {
17118                 const size_t bitNdx = getNdx(rows, colNdx, rowNdx);
17119 
17120                 DE_ASSERT(bitNdx < sizeof(result) * 8);
17121 
17122                 result |= (1 << bitNdx);
17123             }
17124 
17125         return result;
17126     }
17127 };
17128 
17129 template <size_t cols, size_t rows>
17130 struct fp16Transpose : public fp16MatrixBase
17131 {
getULPsvkt::SpirVAssembly::fp16Transpose17132     virtual double getULPs(vector<const deFloat16 *> &in)
17133     {
17134         DE_UNREF(in);
17135 
17136         return 1.0;
17137     }
17138 
getComponentValidityvkt::SpirVAssembly::fp16Transpose17139     uint32_t getComponentValidity()
17140     {
17141         return getComponentMatrixValidityMask(rows, cols);
17142     }
17143 
17144     template <class fp16type>
calcvkt::SpirVAssembly::fp16Transpose17145     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17146     {
17147         DE_ASSERT(in.size() == 1);
17148 
17149         const size_t alignedCols = (cols == 3) ? 4 : cols;
17150         const size_t alignedRows = (rows == 3) ? 4 : rows;
17151         vector<deFloat16> output(alignedCols * alignedRows, 0);
17152 
17153         DE_ASSERT(output.size() == alignedCols * alignedRows);
17154 
17155         for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17156             for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17157                 output[rowNdx * alignedCols + colNdx] = in[0][colNdx * alignedRows + rowNdx];
17158 
17159         deMemcpy(out, &output[0], sizeof(deFloat16) * output.size());
17160         deMemcpy(min, &output[0], sizeof(deFloat16) * output.size());
17161         deMemcpy(max, &output[0], sizeof(deFloat16) * output.size());
17162 
17163         return true;
17164     }
17165 };
17166 
17167 template <size_t cols, size_t rows>
17168 struct fp16MatrixTimesScalar : public fp16MatrixBase
17169 {
getULPsvkt::SpirVAssembly::fp16MatrixTimesScalar17170     virtual double getULPs(vector<const deFloat16 *> &in)
17171     {
17172         DE_UNREF(in);
17173 
17174         return 4.0;
17175     }
17176 
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesScalar17177     uint32_t getComponentValidity()
17178     {
17179         return getComponentMatrixValidityMask(cols, rows);
17180     }
17181 
17182     template <class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesScalar17183     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17184     {
17185         DE_ASSERT(in.size() == 2);
17186         DE_ASSERT(getArgCompCount(1) == 1);
17187 
17188         const fp16type y(in[1][0]);
17189         const float scalar(y.asFloat());
17190         const size_t alignedCols = (cols == 3) ? 4 : cols;
17191         const size_t alignedRows = (rows == 3) ? 4 : rows;
17192 
17193         DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
17194         DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
17195         DE_UNREF(alignedCols);
17196 
17197         for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17198             for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17199             {
17200                 const size_t ndx(colNdx * alignedRows + rowNdx);
17201                 const fp16type x(in[0][ndx]);
17202                 const double result(scalar * x.asFloat());
17203 
17204                 out[ndx] = fp16type(result).bits();
17205                 min[ndx] = getMin(result, getULPs(in));
17206                 max[ndx] = getMax(result, getULPs(in));
17207             }
17208 
17209         return true;
17210     }
17211 };
17212 
17213 template <size_t cols, size_t rows>
17214 struct fp16VectorTimesMatrix : public fp16MatrixBase
17215 {
fp16VectorTimesMatrixvkt::SpirVAssembly::fp16VectorTimesMatrix17216     fp16VectorTimesMatrix() : fp16MatrixBase()
17217     {
17218         flavorNames.push_back("EmulatingFP16");
17219         flavorNames.push_back("FloatCalc");
17220     }
17221 
getULPsvkt::SpirVAssembly::fp16VectorTimesMatrix17222     virtual double getULPs(vector<const deFloat16 *> &in)
17223     {
17224         DE_UNREF(in);
17225 
17226         return (8.0 * cols);
17227     }
17228 
getComponentValidityvkt::SpirVAssembly::fp16VectorTimesMatrix17229     uint32_t getComponentValidity()
17230     {
17231         return getComponentMatrixValidityMask(cols, 1);
17232     }
17233 
17234     template <class fp16type>
calcvkt::SpirVAssembly::fp16VectorTimesMatrix17235     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17236     {
17237         DE_ASSERT(in.size() == 2);
17238 
17239         const size_t alignedCols = (cols == 3) ? 4 : cols;
17240         const size_t alignedRows = (rows == 3) ? 4 : rows;
17241 
17242         DE_ASSERT(getOutCompCount() == cols);
17243         DE_ASSERT(getArgCompCount(0) == rows);
17244         DE_ASSERT(getArgCompCount(1) == alignedCols * alignedRows);
17245         DE_UNREF(alignedCols);
17246 
17247         if (getFlavor() == 0)
17248         {
17249             for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17250             {
17251                 fp16type s(fp16type::zero(1));
17252 
17253                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17254                 {
17255                     const fp16type v(in[0][rowNdx]);
17256                     const float vf(v.asFloat());
17257                     const size_t ndx(colNdx * alignedRows + rowNdx);
17258                     const fp16type x(in[1][ndx]);
17259                     const float xf(x.asFloat());
17260                     const fp16type m(vf * xf);
17261 
17262                     s = fp16type(s.asFloat() + m.asFloat());
17263                 }
17264 
17265                 out[colNdx] = s.bits();
17266                 min[colNdx] = getMin(s.asDouble(), getULPs(in));
17267                 max[colNdx] = getMax(s.asDouble(), getULPs(in));
17268             }
17269         }
17270         else if (getFlavor() == 1)
17271         {
17272             for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17273             {
17274                 float s(0.0f);
17275 
17276                 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17277                 {
17278                     const fp16type v(in[0][rowNdx]);
17279                     const float vf(v.asFloat());
17280                     const size_t ndx(colNdx * alignedRows + rowNdx);
17281                     const fp16type x(in[1][ndx]);
17282                     const float xf(x.asFloat());
17283                     const float m(vf * xf);
17284 
17285                     s += m;
17286                 }
17287 
17288                 out[colNdx] = fp16type(s).bits();
17289                 min[colNdx] = getMin(static_cast<double>(s), getULPs(in));
17290                 max[colNdx] = getMax(static_cast<double>(s), getULPs(in));
17291             }
17292         }
17293         else
17294         {
17295             TCU_THROW(InternalError, "Unknown flavor");
17296         }
17297 
17298         return true;
17299     }
17300 };
17301 
17302 template <size_t cols, size_t rows>
17303 struct fp16MatrixTimesVector : public fp16MatrixBase
17304 {
fp16MatrixTimesVectorvkt::SpirVAssembly::fp16MatrixTimesVector17305     fp16MatrixTimesVector() : fp16MatrixBase()
17306     {
17307         flavorNames.push_back("EmulatingFP16");
17308         flavorNames.push_back("FloatCalc");
17309     }
17310 
getULPsvkt::SpirVAssembly::fp16MatrixTimesVector17311     virtual double getULPs(vector<const deFloat16 *> &in)
17312     {
17313         DE_UNREF(in);
17314 
17315         return (8.0 * rows);
17316     }
17317 
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesVector17318     uint32_t getComponentValidity()
17319     {
17320         return getComponentMatrixValidityMask(rows, 1);
17321     }
17322 
17323     template <class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesVector17324     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17325     {
17326         DE_ASSERT(in.size() == 2);
17327 
17328         const size_t alignedCols = (cols == 3) ? 4 : cols;
17329         const size_t alignedRows = (rows == 3) ? 4 : rows;
17330 
17331         DE_ASSERT(getOutCompCount() == rows);
17332         DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
17333         DE_ASSERT(getArgCompCount(1) == cols);
17334         DE_UNREF(alignedCols);
17335 
17336         if (getFlavor() == 0)
17337         {
17338             for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17339             {
17340                 fp16type s(fp16type::zero(1));
17341 
17342                 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17343                 {
17344                     const size_t ndx(colNdx * alignedRows + rowNdx);
17345                     const fp16type x(in[0][ndx]);
17346                     const float xf(x.asFloat());
17347                     const fp16type v(in[1][colNdx]);
17348                     const float vf(v.asFloat());
17349                     const fp16type m(vf * xf);
17350 
17351                     s = fp16type(s.asFloat() + m.asFloat());
17352                 }
17353 
17354                 out[rowNdx] = s.bits();
17355                 min[rowNdx] = getMin(s.asDouble(), getULPs(in));
17356                 max[rowNdx] = getMax(s.asDouble(), getULPs(in));
17357             }
17358         }
17359         else if (getFlavor() == 1)
17360         {
17361             for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17362             {
17363                 float s(0.0f);
17364 
17365                 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17366                 {
17367                     const size_t ndx(colNdx * alignedRows + rowNdx);
17368                     const fp16type x(in[0][ndx]);
17369                     const float xf(x.asFloat());
17370                     const fp16type v(in[1][colNdx]);
17371                     const float vf(v.asFloat());
17372                     const float m(vf * xf);
17373 
17374                     s += m;
17375                 }
17376 
17377                 out[rowNdx] = fp16type(s).bits();
17378                 min[rowNdx] = getMin(static_cast<double>(s), getULPs(in));
17379                 max[rowNdx] = getMax(static_cast<double>(s), getULPs(in));
17380             }
17381         }
17382         else
17383         {
17384             TCU_THROW(InternalError, "Unknown flavor");
17385         }
17386 
17387         return true;
17388     }
17389 };
17390 
17391 template <size_t colsL, size_t rowsL, size_t colsR, size_t rowsR>
17392 struct fp16MatrixTimesMatrix : public fp16MatrixBase
17393 {
fp16MatrixTimesMatrixvkt::SpirVAssembly::fp16MatrixTimesMatrix17394     fp16MatrixTimesMatrix() : fp16MatrixBase()
17395     {
17396         flavorNames.push_back("EmulatingFP16");
17397         flavorNames.push_back("FloatCalc");
17398     }
17399 
getULPsvkt::SpirVAssembly::fp16MatrixTimesMatrix17400     virtual double getULPs(vector<const deFloat16 *> &in)
17401     {
17402         DE_UNREF(in);
17403 
17404         return 32.0;
17405     }
17406 
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesMatrix17407     uint32_t getComponentValidity()
17408     {
17409         return getComponentMatrixValidityMask(colsR, rowsL);
17410     }
17411 
17412     template <class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesMatrix17413     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17414     {
17415         DE_STATIC_ASSERT(colsL == rowsR);
17416 
17417         DE_ASSERT(in.size() == 2);
17418 
17419         const size_t alignedColsL = (colsL == 3) ? 4 : colsL;
17420         const size_t alignedRowsL = (rowsL == 3) ? 4 : rowsL;
17421         const size_t alignedColsR = (colsR == 3) ? 4 : colsR;
17422         const size_t alignedRowsR = (rowsR == 3) ? 4 : rowsR;
17423 
17424         DE_ASSERT(getOutCompCount() == alignedColsR * alignedRowsL);
17425         DE_ASSERT(getArgCompCount(0) == alignedColsL * alignedRowsL);
17426         DE_ASSERT(getArgCompCount(1) == alignedColsR * alignedRowsR);
17427         DE_UNREF(alignedColsL);
17428         DE_UNREF(alignedColsR);
17429 
17430         if (getFlavor() == 0)
17431         {
17432             for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17433             {
17434                 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17435                 {
17436                     const size_t ndx(colNdx * alignedRowsL + rowNdx);
17437                     fp16type s(fp16type::zero(1));
17438 
17439                     for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17440                     {
17441                         const size_t ndxl(commonNdx * alignedRowsL + rowNdx);
17442                         const fp16type l(in[0][ndxl]);
17443                         const float lf(l.asFloat());
17444                         const size_t ndxr(colNdx * alignedRowsR + commonNdx);
17445                         const fp16type r(in[1][ndxr]);
17446                         const float rf(r.asFloat());
17447                         const fp16type m(lf * rf);
17448 
17449                         s = fp16type(s.asFloat() + m.asFloat());
17450                     }
17451 
17452                     out[ndx] = s.bits();
17453                     min[ndx] = getMin(s.asDouble(), getULPs(in));
17454                     max[ndx] = getMax(s.asDouble(), getULPs(in));
17455                 }
17456             }
17457         }
17458         else if (getFlavor() == 1)
17459         {
17460             for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17461             {
17462                 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17463                 {
17464                     const size_t ndx(colNdx * alignedRowsL + rowNdx);
17465                     float s(0.0f);
17466 
17467                     for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17468                     {
17469                         const size_t ndxl(commonNdx * alignedRowsL + rowNdx);
17470                         const fp16type l(in[0][ndxl]);
17471                         const float lf(l.asFloat());
17472                         const size_t ndxr(colNdx * alignedRowsR + commonNdx);
17473                         const fp16type r(in[1][ndxr]);
17474                         const float rf(r.asFloat());
17475                         const float m(lf * rf);
17476 
17477                         s += m;
17478                     }
17479 
17480                     out[ndx] = fp16type(s).bits();
17481                     min[ndx] = getMin(static_cast<double>(s), getULPs(in));
17482                     max[ndx] = getMax(static_cast<double>(s), getULPs(in));
17483                 }
17484             }
17485         }
17486         else
17487         {
17488             TCU_THROW(InternalError, "Unknown flavor");
17489         }
17490 
17491         return true;
17492     }
17493 };
17494 
17495 template <size_t cols, size_t rows>
17496 struct fp16OuterProduct : public fp16MatrixBase
17497 {
getULPsvkt::SpirVAssembly::fp16OuterProduct17498     virtual double getULPs(vector<const deFloat16 *> &in)
17499     {
17500         DE_UNREF(in);
17501 
17502         return 2.0;
17503     }
17504 
getComponentValidityvkt::SpirVAssembly::fp16OuterProduct17505     uint32_t getComponentValidity()
17506     {
17507         return getComponentMatrixValidityMask(cols, rows);
17508     }
17509 
17510     template <class fp16type>
calcvkt::SpirVAssembly::fp16OuterProduct17511     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17512     {
17513         DE_ASSERT(in.size() == 2);
17514 
17515         const size_t alignedCols = (cols == 3) ? 4 : cols;
17516         const size_t alignedRows = (rows == 3) ? 4 : rows;
17517 
17518         DE_ASSERT(getArgCompCount(0) == rows);
17519         DE_ASSERT(getArgCompCount(1) == cols);
17520         DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
17521         DE_UNREF(alignedCols);
17522 
17523         for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17524         {
17525             for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17526             {
17527                 const size_t ndx(colNdx * alignedRows + rowNdx);
17528                 const fp16type x(in[0][rowNdx]);
17529                 const float xf(x.asFloat());
17530                 const fp16type y(in[1][colNdx]);
17531                 const float yf(y.asFloat());
17532                 const fp16type m(xf * yf);
17533 
17534                 out[ndx] = m.bits();
17535                 min[ndx] = getMin(m.asDouble(), getULPs(in));
17536                 max[ndx] = getMax(m.asDouble(), getULPs(in));
17537             }
17538         }
17539 
17540         return true;
17541     }
17542 };
17543 
17544 template <size_t size>
17545 struct fp16Determinant;
17546 
17547 template <>
17548 struct fp16Determinant<2> : public fp16MatrixBase
17549 {
getULPsvkt::SpirVAssembly::fp16Determinant17550     virtual double getULPs(vector<const deFloat16 *> &in)
17551     {
17552         DE_UNREF(in);
17553 
17554         return 128.0; // This is not a precision test. Value is not from spec
17555     }
17556 
getComponentValidityvkt::SpirVAssembly::fp16Determinant17557     uint32_t getComponentValidity()
17558     {
17559         return 1;
17560     }
17561 
17562     template <class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17563     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17564     {
17565         const size_t cols        = 2;
17566         const size_t rows        = 2;
17567         const size_t alignedCols = (cols == 3) ? 4 : cols;
17568         const size_t alignedRows = (rows == 3) ? 4 : rows;
17569 
17570         DE_ASSERT(in.size() == 1);
17571         DE_ASSERT(getOutCompCount() == 1);
17572         DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17573         DE_UNREF(alignedCols);
17574         DE_UNREF(alignedRows);
17575 
17576         // [ a b ]
17577         // [ c d ]
17578         const float a(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17579         const float b(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17580         const float c(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17581         const float d(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17582         const float ad(a * d);
17583         const fp16type adf16(ad);
17584         const float bc(b * c);
17585         const fp16type bcf16(bc);
17586         const float r(adf16.asFloat() - bcf16.asFloat());
17587         const fp16type rf16(r);
17588 
17589         out[0] = rf16.bits();
17590         min[0] = getMin(r, getULPs(in));
17591         max[0] = getMax(r, getULPs(in));
17592 
17593         return true;
17594     }
17595 };
17596 
17597 template <>
17598 struct fp16Determinant<3> : public fp16MatrixBase
17599 {
getULPsvkt::SpirVAssembly::fp16Determinant17600     virtual double getULPs(vector<const deFloat16 *> &in)
17601     {
17602         DE_UNREF(in);
17603 
17604         return 128.0; // This is not a precision test. Value is not from spec
17605     }
17606 
getComponentValidityvkt::SpirVAssembly::fp16Determinant17607     uint32_t getComponentValidity()
17608     {
17609         return 1;
17610     }
17611 
17612     template <class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17613     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17614     {
17615         const size_t cols        = 3;
17616         const size_t rows        = 3;
17617         const size_t alignedCols = (cols == 3) ? 4 : cols;
17618         const size_t alignedRows = (rows == 3) ? 4 : rows;
17619 
17620         DE_ASSERT(in.size() == 1);
17621         DE_ASSERT(getOutCompCount() == 1);
17622         DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17623         DE_UNREF(alignedCols);
17624         DE_UNREF(alignedRows);
17625 
17626         // [ a b c ]
17627         // [ d e f ]
17628         // [ g h i ]
17629         const float a(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17630         const float b(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17631         const float c(fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17632         const float d(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17633         const float e(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17634         const float f(fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17635         const float g(fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17636         const float h(fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17637         const float i(fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17638         const fp16type aei(a * e * i);
17639         const fp16type bfg(b * f * g);
17640         const fp16type cdh(c * d * h);
17641         const fp16type ceg(c * e * g);
17642         const fp16type bdi(b * d * i);
17643         const fp16type afh(a * f * h);
17644         const float r(aei.asFloat() + bfg.asFloat() + cdh.asFloat() - ceg.asFloat() - bdi.asFloat() - afh.asFloat());
17645         const fp16type rf16(r);
17646 
17647         out[0] = rf16.bits();
17648         min[0] = getMin(r, getULPs(in));
17649         max[0] = getMax(r, getULPs(in));
17650 
17651         return true;
17652     }
17653 };
17654 
17655 template <>
17656 struct fp16Determinant<4> : public fp16MatrixBase
17657 {
getULPsvkt::SpirVAssembly::fp16Determinant17658     virtual double getULPs(vector<const deFloat16 *> &in)
17659     {
17660         DE_UNREF(in);
17661 
17662         return 128.0; // This is not a precision test. Value is not from spec
17663     }
17664 
getComponentValidityvkt::SpirVAssembly::fp16Determinant17665     uint32_t getComponentValidity()
17666     {
17667         return 1;
17668     }
17669 
17670     template <class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17671     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17672     {
17673         const size_t rows        = 4;
17674         const size_t cols        = 4;
17675         const size_t alignedCols = (cols == 3) ? 4 : cols;
17676         const size_t alignedRows = (rows == 3) ? 4 : rows;
17677 
17678         DE_ASSERT(in.size() == 1);
17679         DE_ASSERT(getOutCompCount() == 1);
17680         DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17681         DE_UNREF(alignedCols);
17682         DE_UNREF(alignedRows);
17683 
17684         // [ a b c d ]
17685         // [ e f g h ]
17686         // [ i j k l ]
17687         // [ m n o p ]
17688         const float a(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17689         const float b(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17690         const float c(fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17691         const float d(fp16type(in[0][getNdx(rows, 3, 0)]).asFloat());
17692         const float e(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17693         const float f(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17694         const float g(fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17695         const float h(fp16type(in[0][getNdx(rows, 3, 1)]).asFloat());
17696         const float i(fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17697         const float j(fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17698         const float k(fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17699         const float l(fp16type(in[0][getNdx(rows, 3, 2)]).asFloat());
17700         const float m(fp16type(in[0][getNdx(rows, 0, 3)]).asFloat());
17701         const float n(fp16type(in[0][getNdx(rows, 1, 3)]).asFloat());
17702         const float o(fp16type(in[0][getNdx(rows, 2, 3)]).asFloat());
17703         const float p(fp16type(in[0][getNdx(rows, 3, 3)]).asFloat());
17704 
17705         // [ f g h ]
17706         // [ j k l ]
17707         // [ n o p ]
17708         const fp16type fkp(f * k * p);
17709         const fp16type gln(g * l * n);
17710         const fp16type hjo(h * j * o);
17711         const fp16type hkn(h * k * n);
17712         const fp16type gjp(g * j * p);
17713         const fp16type flo(f * l * o);
17714         const fp16type detA(
17715             a * (fkp.asFloat() + gln.asFloat() + hjo.asFloat() - hkn.asFloat() - gjp.asFloat() - flo.asFloat()));
17716 
17717         // [ e g h ]
17718         // [ i k l ]
17719         // [ m o p ]
17720         const fp16type ekp(e * k * p);
17721         const fp16type glm(g * l * m);
17722         const fp16type hio(h * i * o);
17723         const fp16type hkm(h * k * m);
17724         const fp16type gip(g * i * p);
17725         const fp16type elo(e * l * o);
17726         const fp16type detB(
17727             b * (ekp.asFloat() + glm.asFloat() + hio.asFloat() - hkm.asFloat() - gip.asFloat() - elo.asFloat()));
17728 
17729         // [ e f h ]
17730         // [ i j l ]
17731         // [ m n p ]
17732         const fp16type ejp(e * j * p);
17733         const fp16type flm(f * l * m);
17734         const fp16type hin(h * i * n);
17735         const fp16type hjm(h * j * m);
17736         const fp16type fip(f * i * p);
17737         const fp16type eln(e * l * n);
17738         const fp16type detC(
17739             c * (ejp.asFloat() + flm.asFloat() + hin.asFloat() - hjm.asFloat() - fip.asFloat() - eln.asFloat()));
17740 
17741         // [ e f g ]
17742         // [ i j k ]
17743         // [ m n o ]
17744         const fp16type ejo(e * j * o);
17745         const fp16type fkm(f * k * m);
17746         const fp16type gin(g * i * n);
17747         const fp16type gjm(g * j * m);
17748         const fp16type fio(f * i * o);
17749         const fp16type ekn(e * k * n);
17750         const fp16type detD(
17751             d * (ejo.asFloat() + fkm.asFloat() + gin.asFloat() - gjm.asFloat() - fio.asFloat() - ekn.asFloat()));
17752 
17753         const float r(detA.asFloat() - detB.asFloat() + detC.asFloat() - detD.asFloat());
17754         const fp16type rf16(r);
17755 
17756         out[0] = rf16.bits();
17757         min[0] = getMin(r, getULPs(in));
17758         max[0] = getMax(r, getULPs(in));
17759 
17760         return true;
17761     }
17762 };
17763 
17764 template <size_t size>
17765 struct fp16Inverse;
17766 
17767 template <>
17768 struct fp16Inverse<2> : public fp16MatrixBase
17769 {
getULPsvkt::SpirVAssembly::fp16Inverse17770     virtual double getULPs(vector<const deFloat16 *> &in)
17771     {
17772         DE_UNREF(in);
17773 
17774         return 128.0; // This is not a precision test. Value is not from spec
17775     }
17776 
getComponentValidityvkt::SpirVAssembly::fp16Inverse17777     uint32_t getComponentValidity()
17778     {
17779         return getComponentMatrixValidityMask(2, 2);
17780     }
17781 
17782     template <class fp16type>
calcvkt::SpirVAssembly::fp16Inverse17783     bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17784     {
17785         const size_t cols        = 2;
17786         const size_t rows        = 2;
17787         const size_t alignedCols = (cols == 3) ? 4 : cols;
17788         const size_t alignedRows = (rows == 3) ? 4 : rows;
17789 
17790         DE_ASSERT(in.size() == 1);
17791         DE_ASSERT(getOutCompCount() == alignedRows * alignedCols);
17792         DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17793         DE_UNREF(alignedCols);
17794 
17795         // [ a b ]
17796         // [ c d ]
17797         const float a(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17798         const float b(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17799         const float c(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17800         const float d(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17801         const float ad(a * d);
17802         const fp16type adf16(ad);
17803         const float bc(b * c);
17804         const fp16type bcf16(bc);
17805         const float det(adf16.asFloat() - bcf16.asFloat());
17806         const fp16type det16(det);
17807 
17808         out[0] = fp16type(d / det16.asFloat()).bits();
17809         out[1] = fp16type(-c / det16.asFloat()).bits();
17810         out[2] = fp16type(-b / det16.asFloat()).bits();
17811         out[3] = fp16type(a / det16.asFloat()).bits();
17812 
17813         for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17814             for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17815             {
17816                 const size_t ndx(colNdx * alignedRows + rowNdx);
17817                 const fp16type s(out[ndx]);
17818 
17819                 min[ndx] = getMin(s.asDouble(), getULPs(in));
17820                 max[ndx] = getMax(s.asDouble(), getULPs(in));
17821             }
17822 
17823         return true;
17824     }
17825 };
17826 
fp16ToString(deFloat16 val)17827 inline std::string fp16ToString(deFloat16 val)
17828 {
17829     return tcu::toHex<4>(val).toString() + " (" + de::floatToString(tcu::Float16(val).asFloat(), 10) + ")";
17830 }
17831 
17832 template <size_t RES_COMPONENTS, size_t ARG0_COMPONENTS, size_t ARG1_COMPONENTS, size_t ARG2_COMPONENTS,
17833           class TestedArithmeticFunction>
compareFP16ArithmeticFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)17834 bool compareFP16ArithmeticFunc(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
17835                                const std::vector<Resource> &expectedOutputs, TestLog &log)
17836 {
17837     if (inputs.size() < 1 || inputs.size() > 3 || outputAllocs.size() != 1 || expectedOutputs.size() != 1)
17838         return false;
17839 
17840     const size_t resultStep      = (RES_COMPONENTS == 3) ? 4 : RES_COMPONENTS;
17841     const size_t iterationsCount = expectedOutputs[0].getByteSize() / (sizeof(deFloat16) * resultStep);
17842     const size_t inputsSteps[3]  = {
17843         (ARG0_COMPONENTS == 3) ? 4 : ARG0_COMPONENTS,
17844         (ARG1_COMPONENTS == 3) ? 4 : ARG1_COMPONENTS,
17845         (ARG2_COMPONENTS == 3) ? 4 : ARG2_COMPONENTS,
17846     };
17847 
17848     DE_ASSERT(expectedOutputs[0].getByteSize() > 0);
17849     DE_ASSERT(expectedOutputs[0].getByteSize() == sizeof(deFloat16) * iterationsCount * resultStep);
17850 
17851     for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17852     {
17853         DE_ASSERT(inputs[inputNdx].getByteSize() > 0);
17854         DE_ASSERT(inputs[inputNdx].getByteSize() == sizeof(deFloat16) * iterationsCount * inputsSteps[inputNdx]);
17855     }
17856 
17857     const deFloat16 *const outputAsFP16 = (const deFloat16 *)outputAllocs[0]->getHostPtr();
17858     TestedArithmeticFunction func;
17859 
17860     func.setOutCompCount(RES_COMPONENTS);
17861     func.setArgCompCount(0, ARG0_COMPONENTS);
17862     func.setArgCompCount(1, ARG1_COMPONENTS);
17863     func.setArgCompCount(2, ARG2_COMPONENTS);
17864 
17865     const bool callOncePerComponent           = func.callOncePerComponent();
17866     const uint32_t componentValidityMask      = func.getComponentValidity();
17867     const size_t denormModesCount             = 2;
17868     const char *denormModes[denormModesCount] = {"keep denormal numbers", "flush to zero"};
17869     const size_t successfulRunsPerComponent   = denormModesCount * func.getFlavorCount();
17870     bool success                              = true;
17871     size_t validatedCount                     = 0;
17872 
17873     vector<uint8_t> inputBytes[3];
17874 
17875     for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17876         inputs[inputNdx].getBytes(inputBytes[inputNdx]);
17877 
17878     const deFloat16 *const inputsAsFP16[3] = {
17879         inputs.size() >= 1 ? (const deFloat16 *)&inputBytes[0][0] : DE_NULL,
17880         inputs.size() >= 2 ? (const deFloat16 *)&inputBytes[1][0] : DE_NULL,
17881         inputs.size() >= 3 ? (const deFloat16 *)&inputBytes[2][0] : DE_NULL,
17882     };
17883 
17884     for (size_t idx = 0; idx < iterationsCount; ++idx)
17885     {
17886         std::vector<size_t> successfulRuns(RES_COMPONENTS, successfulRunsPerComponent);
17887         std::vector<std::string> errors(RES_COMPONENTS);
17888         bool iterationValidated(true);
17889 
17890         for (size_t denormNdx = 0; denormNdx < 2; ++denormNdx)
17891         {
17892             for (size_t flavorNdx = 0; flavorNdx < func.getFlavorCount(); ++flavorNdx)
17893             {
17894                 func.setFlavor(flavorNdx);
17895 
17896                 const deFloat16 *iterationOutputFP16 = &outputAsFP16[idx * resultStep];
17897                 vector<deFloat16> iterationCalculatedFP16(resultStep, 0);
17898                 vector<double> iterationEdgeMin(resultStep, 0.0);
17899                 vector<double> iterationEdgeMax(resultStep, 0.0);
17900                 vector<const deFloat16 *> arguments;
17901 
17902                 for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
17903                 {
17904                     std::string error;
17905                     bool reportError = false;
17906 
17907                     if (callOncePerComponent || componentNdx == 0)
17908                     {
17909                         bool funcCallResult;
17910 
17911                         arguments.clear();
17912 
17913                         for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17914                             arguments.push_back(&inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + componentNdx]);
17915 
17916                         if (denormNdx == 0)
17917                             funcCallResult = func.template calc<tcu::Float16>(
17918                                 arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx],
17919                                 &iterationEdgeMax[componentNdx]);
17920                         else
17921                             funcCallResult = func.template calc<tcu::Float16Denormless>(
17922                                 arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx],
17923                                 &iterationEdgeMax[componentNdx]);
17924 
17925                         if (!funcCallResult)
17926                         {
17927                             iterationValidated = false;
17928 
17929                             if (callOncePerComponent)
17930                                 continue;
17931                             else
17932                                 break;
17933                         }
17934                     }
17935 
17936                     if ((componentValidityMask != 0) && (componentValidityMask & (1 << componentNdx)) == 0)
17937                         continue;
17938 
17939                     reportError = !compare16BitFloat(iterationCalculatedFP16[componentNdx],
17940                                                      iterationOutputFP16[componentNdx], error);
17941 
17942                     if (reportError)
17943                     {
17944                         tcu::Float16 expected(iterationCalculatedFP16[componentNdx]);
17945                         tcu::Float16 outputted(iterationOutputFP16[componentNdx]);
17946                         tcu::Float64 edgeMin(iterationEdgeMin[componentNdx]);
17947                         tcu::Float64 edgeMax(iterationEdgeMax[componentNdx]);
17948 
17949                         if (reportError && expected.isNaN())
17950                             reportError = false;
17951 
17952                         if (reportError && !expected.isNaN() && !outputted.isNaN())
17953                         {
17954                             if (reportError && !expected.isInf() && !outputted.isInf())
17955                             {
17956                                 // Ignore rounding
17957                                 if (expected.bits() == outputted.bits() + 1 || expected.bits() + 1 == outputted.bits())
17958                                     reportError = false;
17959                             }
17960 
17961                             if (reportError && expected.isInf())
17962                             {
17963                                 // RTZ rounding mode returns +/-65504 instead of Inf on overflow
17964                                 if (expected.sign() == 1 && outputted.bits() == 0x7bff &&
17965                                     edgeMin.asDouble() <= std::numeric_limits<double>::max())
17966                                     reportError = false;
17967                                 else if (expected.sign() == -1 && outputted.bits() == 0xfbff &&
17968                                          edgeMax.asDouble() >= -std::numeric_limits<double>::max())
17969                                     reportError = false;
17970                             }
17971 
17972                             if (reportError)
17973                             {
17974                                 const double outputtedDouble = outputted.asDouble();
17975 
17976                                 DE_ASSERT(edgeMin.isNaN() || edgeMax.isNaN() ||
17977                                           (edgeMin.asDouble() <= edgeMax.asDouble()));
17978 
17979                                 if (de::inRange(outputtedDouble, edgeMin.asDouble(), edgeMax.asDouble()))
17980                                     reportError = false;
17981                             }
17982                         }
17983 
17984                         if (reportError)
17985                         {
17986                             const size_t inputsComps[3] = {
17987                                 ARG0_COMPONENTS,
17988                                 ARG1_COMPONENTS,
17989                                 ARG2_COMPONENTS,
17990                             };
17991                             string inputsValues("Inputs:");
17992                             string flavorName(func.getFlavorCount() == 1 ?
17993                                                   "" :
17994                                                   string(" flavor ") + de::toString(flavorNdx) + " (" +
17995                                                       func.getCurrentFlavorName() + ")");
17996                             std::stringstream errStream;
17997 
17998                             for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17999                             {
18000                                 const size_t inputCompsCount = inputsComps[inputNdx];
18001 
18002                                 inputsValues += " [" + de::toString(inputNdx) + "]=(";
18003 
18004                                 for (size_t compNdx = 0; compNdx < inputCompsCount; ++compNdx)
18005                                 {
18006                                     const deFloat16 inputComponentValue =
18007                                         inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + compNdx];
18008 
18009                                     inputsValues += fp16ToString(inputComponentValue) +
18010                                                     ((compNdx + 1 == inputCompsCount) ? ")" : ", ");
18011                                 }
18012                             }
18013 
18014                             errStream << "At"
18015                                       << " iteration " << de::toString(idx) << " component "
18016                                       << de::toString(componentNdx) << " denormMode " << de::toString(denormNdx) << " ("
18017                                       << denormModes[denormNdx] << ")"
18018                                       << " " << flavorName << " " << inputsValues
18019                                       << " outputted:" + fp16ToString(iterationOutputFP16[componentNdx])
18020                                       << " expected:" + fp16ToString(iterationCalculatedFP16[componentNdx])
18021                                       << " or in range: [" << iterationEdgeMin[componentNdx] << ", "
18022                                       << iterationEdgeMax[componentNdx] << "]."
18023                                       << " " << error << "." << std::endl;
18024 
18025                             errors[componentNdx] += errStream.str();
18026 
18027                             successfulRuns[componentNdx]--;
18028                         }
18029                     }
18030                 }
18031             }
18032         }
18033 
18034         for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
18035         {
18036             // Check if any component has total failure
18037             if (successfulRuns[componentNdx] == 0)
18038             {
18039                 // Test failed in all denorm modes and all flavors for certain component: dump errors
18040                 log << TestLog::Message << errors[componentNdx] << TestLog::EndMessage;
18041 
18042                 success = false;
18043             }
18044         }
18045 
18046         if (iterationValidated)
18047             validatedCount++;
18048     }
18049 
18050     if (validatedCount < 16)
18051         TCU_THROW(InternalError, "Too few samples have been validated.");
18052 
18053     return success;
18054 }
18055 
18056 // IEEE-754 floating point numbers:
18057 // +--------+------+----------+-------------+
18058 // | binary | sign | exponent | significand |
18059 // +--------+------+----------+-------------+
18060 // | 16-bit |  1   |    5     |     10      |
18061 // +--------+------+----------+-------------+
18062 // | 32-bit |  1   |    8     |     23      |
18063 // +--------+------+----------+-------------+
18064 //
18065 // 16-bit floats:
18066 //
18067 // 0   000 00   00 0000 0001 (0x0001: 2e-24:         minimum positive denormalized)
18068 // 0   000 00   11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
18069 // 0   000 01   00 0000 0000 (0x0400: 2e-14:         minimum positive normalized)
18070 // 0   111 10   11 1111 1111 (0x7bff: 65504:         maximum positive normalized)
18071 //
18072 // 0   000 00   00 0000 0000 (0x0000: +0)
18073 // 0   111 11   00 0000 0000 (0x7c00: +Inf)
18074 // 0   000 00   11 1111 0000 (0x03f0: +Denorm)
18075 // 0   000 01   00 0000 0001 (0x0401: +Norm)
18076 // 0   111 11   00 0000 1111 (0x7c0f: +SNaN)
18077 // 0   111 11   11 1111 0000 (0x7ff0: +QNaN)
18078 // Generate and return 16-bit floats and their corresponding 32-bit values.
18079 //
18080 // The first 14 number pairs are manually picked, while the rest are randomly generated.
18081 // Expected count to be at least 14 (numPicks).
getFloat16a(de::Random & rnd,uint32_t count)18082 vector<deFloat16> getFloat16a(de::Random &rnd, uint32_t count)
18083 {
18084     vector<deFloat16> float16;
18085 
18086     float16.reserve(count);
18087 
18088     // Zero
18089     float16.push_back(uint16_t(0x0000));
18090     float16.push_back(uint16_t(0x8000));
18091     // Infinity
18092     float16.push_back(uint16_t(0x7c00));
18093     float16.push_back(uint16_t(0xfc00));
18094     // Normalized
18095     float16.push_back(uint16_t(0x0401));
18096     float16.push_back(uint16_t(0x8401));
18097     // Some normal number
18098     float16.push_back(uint16_t(0x14cb));
18099     float16.push_back(uint16_t(0x94cb));
18100     // Min/max positive normal
18101     float16.push_back(uint16_t(0x0400));
18102     float16.push_back(uint16_t(0x7bff));
18103     // Min/max negative normal
18104     float16.push_back(uint16_t(0x8400));
18105     float16.push_back(uint16_t(0xfbff));
18106     // PI
18107     float16.push_back(uint16_t(0x4248)); // 3.140625
18108     float16.push_back(uint16_t(0xb248)); // -3.140625
18109     // PI/2
18110     float16.push_back(uint16_t(0x3e48)); // 1.5703125
18111     float16.push_back(uint16_t(0xbe48)); // -1.5703125
18112     float16.push_back(uint16_t(0x3c00)); // 1.0
18113     float16.push_back(uint16_t(0x3800)); // 0.5
18114     // Some useful constants
18115     float16.push_back(tcu::Float16(-2.5f).bits());
18116     float16.push_back(tcu::Float16(-1.0f).bits());
18117     float16.push_back(tcu::Float16(0.4f).bits());
18118     float16.push_back(tcu::Float16(2.5f).bits());
18119 
18120     const uint32_t numPicks = static_cast<uint32_t>(float16.size());
18121 
18122     DE_ASSERT(count >= numPicks);
18123     count -= numPicks;
18124 
18125     for (uint32_t numIdx = 0; numIdx < count; ++numIdx)
18126     {
18127         int sign          = (rnd.getUint16() % 2 == 0) ? +1 : -1;
18128         int exponent      = (rnd.getUint16() % 29) - 14 + 1;
18129         uint16_t mantissa = static_cast<uint16_t>(2 * (rnd.getUint16() % 512));
18130 
18131         // Exclude power of -14 to avoid denorms
18132         DE_ASSERT(de::inRange(exponent, -13, 15));
18133 
18134         float16.push_back(tcu::Float16::constructBits(sign, exponent, mantissa).bits());
18135     }
18136 
18137     return float16;
18138 }
18139 
getInputData1(uint32_t seed,size_t count,size_t argNo)18140 static inline vector<deFloat16> getInputData1(uint32_t seed, size_t count, size_t argNo)
18141 {
18142     DE_UNREF(argNo);
18143 
18144     de::Random rnd(seed);
18145 
18146     return getFloat16a(rnd, static_cast<uint32_t>(count));
18147 }
18148 
getInputData2(uint32_t seed,size_t count,size_t argNo)18149 static inline vector<deFloat16> getInputData2(uint32_t seed, size_t count, size_t argNo)
18150 {
18151     de::Random rnd(seed);
18152     size_t newCount = static_cast<size_t>(deSqrt(double(count)));
18153 
18154     DE_ASSERT(newCount * newCount == count);
18155 
18156     vector<deFloat16> float16 = getFloat16a(rnd, static_cast<uint32_t>(newCount));
18157 
18158     return squarize(float16, static_cast<uint32_t>(argNo));
18159 }
18160 
getInputData3(uint32_t seed,size_t count,size_t argNo)18161 static inline vector<deFloat16> getInputData3(uint32_t seed, size_t count, size_t argNo)
18162 {
18163     if (argNo == 0 || argNo == 1)
18164         return getInputData2(seed, count, argNo);
18165     else
18166         return getInputData1(seed << argNo, count, argNo);
18167 }
18168 
getInputData(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18169 vector<deFloat16> getInputData(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18170                                size_t argNo)
18171 {
18172     DE_UNREF(stride);
18173 
18174     vector<deFloat16> result;
18175 
18176     switch (argCount)
18177     {
18178     case 1:
18179         result = getInputData1(seed, count, argNo);
18180         break;
18181     case 2:
18182         result = getInputData2(seed, count, argNo);
18183         break;
18184     case 3:
18185         result = getInputData3(seed, count, argNo);
18186         break;
18187     default:
18188         TCU_THROW(InternalError, "Invalid argument count specified");
18189     }
18190 
18191     if (compCount == 3)
18192     {
18193         const size_t newCount = (3 * count) / 4;
18194         vector<deFloat16> newResult;
18195 
18196         newResult.reserve(result.size());
18197 
18198         for (size_t ndx = 0; ndx < newCount; ++ndx)
18199         {
18200             newResult.push_back(result[ndx]);
18201 
18202             if (ndx % 3 == 2)
18203                 newResult.push_back(0);
18204         }
18205 
18206         result = newResult;
18207     }
18208 
18209     DE_ASSERT(result.size() == count);
18210 
18211     return result;
18212 }
18213 
18214 // Generator for functions requiring data in range [1, inf]
getInputDataAC(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18215 vector<deFloat16> getInputDataAC(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18216                                  size_t argNo)
18217 {
18218     vector<deFloat16> result;
18219 
18220     result = getInputData(seed, count, compCount, stride, argCount, argNo);
18221 
18222     // Filter out values below 1.0 from upper half of numbers
18223     for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
18224     {
18225         const float f = tcu::Float16(result[idx]).asFloat();
18226 
18227         if (f < 1.0f)
18228             result[idx] = tcu::Float16(1.0f - f).bits();
18229     }
18230 
18231     return result;
18232 }
18233 
18234 // Generator for functions requiring data in range [-1, 1]
getInputDataA(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18235 vector<deFloat16> getInputDataA(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18236                                 size_t argNo)
18237 {
18238     vector<deFloat16> result;
18239 
18240     result = getInputData(seed, count, compCount, stride, argCount, argNo);
18241 
18242     for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
18243     {
18244         const float f = tcu::Float16(result[idx]).asFloat();
18245 
18246         if (!de::inRange(f, -1.0f, 1.0f))
18247             result[idx] = tcu::Float16(deFloatFrac(f)).bits();
18248     }
18249 
18250     return result;
18251 }
18252 
18253 // Generator for functions requiring data in range [-pi, pi]
getInputDataPI(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18254 vector<deFloat16> getInputDataPI(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18255                                  size_t argNo)
18256 {
18257     vector<deFloat16> result;
18258 
18259     result = getInputData(seed, count, compCount, stride, argCount, argNo);
18260 
18261     for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
18262     {
18263         const float f = tcu::Float16(result[idx]).asFloat();
18264 
18265         if (!de::inRange(f, -DE_PI, DE_PI))
18266             result[idx] = tcu::Float16(fmodf(f, DE_PI)).bits();
18267     }
18268 
18269     return result;
18270 }
18271 
18272 // Generator for functions requiring data in range [0, inf]
getInputDataP(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18273 vector<deFloat16> getInputDataP(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18274                                 size_t argNo)
18275 {
18276     vector<deFloat16> result;
18277 
18278     result = getInputData(seed, count, compCount, stride, argCount, argNo);
18279 
18280     if (argNo == 0)
18281     {
18282         for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
18283             result[idx] &= static_cast<deFloat16>(~0x8000);
18284     }
18285 
18286     return result;
18287 }
18288 
getInputDataV(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18289 vector<deFloat16> getInputDataV(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18290                                 size_t argNo)
18291 {
18292     DE_UNREF(stride);
18293     DE_UNREF(argCount);
18294 
18295     vector<deFloat16> result;
18296 
18297     if (argNo == 0)
18298         result = getInputData2(seed, count, argNo);
18299     else
18300     {
18301         const size_t alignedCount = (compCount == 3) ? 4 : compCount;
18302         const size_t newCountX    = static_cast<size_t>(deSqrt(double(count * alignedCount)));
18303         const size_t newCountY    = count / newCountX;
18304         de::Random rnd(seed);
18305         vector<deFloat16> float16 = getFloat16a(rnd, static_cast<uint32_t>(newCountX));
18306 
18307         DE_ASSERT(newCountX * newCountX == alignedCount * count);
18308 
18309         for (size_t numIdx = 0; numIdx < newCountX; ++numIdx)
18310         {
18311             const vector<deFloat16> tmp(newCountY, float16[numIdx]);
18312 
18313             result.insert(result.end(), tmp.begin(), tmp.end());
18314         }
18315     }
18316 
18317     DE_ASSERT(result.size() == count);
18318 
18319     return result;
18320 }
18321 
getInputDataM(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18322 vector<deFloat16> getInputDataM(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18323                                 size_t argNo)
18324 {
18325     DE_UNREF(compCount);
18326     DE_UNREF(stride);
18327     DE_UNREF(argCount);
18328 
18329     de::Random rnd(seed << argNo);
18330     vector<deFloat16> result;
18331 
18332     result = getFloat16a(rnd, static_cast<uint32_t>(count));
18333 
18334     DE_ASSERT(result.size() == count);
18335 
18336     return result;
18337 }
18338 
getInputDataD(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18339 vector<deFloat16> getInputDataD(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18340                                 size_t argNo)
18341 {
18342     DE_UNREF(compCount);
18343     DE_UNREF(argCount);
18344 
18345     de::Random rnd(seed << argNo);
18346     vector<deFloat16> result;
18347 
18348     for (uint32_t numIdx = 0; numIdx < count; ++numIdx)
18349     {
18350         int num = (rnd.getUint16() % 16) - 8;
18351 
18352         result.push_back(tcu::Float16(float(num)).bits());
18353     }
18354 
18355     result[0 * stride] = uint16_t(0x7c00); // +Inf
18356     result[1 * stride] = uint16_t(0xfc00); // -Inf
18357 
18358     DE_ASSERT(result.size() == count);
18359 
18360     return result;
18361 }
18362 
18363 // Generator for smoothstep function
getInputDataSS(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18364 vector<deFloat16> getInputDataSS(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18365                                  size_t argNo)
18366 {
18367     vector<deFloat16> result;
18368 
18369     result = getInputDataD(seed, count, compCount, stride, argCount, argNo);
18370 
18371     if (argNo == 0)
18372     {
18373         for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
18374         {
18375             const float f = tcu::Float16(result[idx]).asFloat();
18376 
18377             if (f > 4.0f)
18378                 result[idx] = tcu::Float16(-f).bits();
18379         }
18380     }
18381 
18382     if (argNo == 1)
18383     {
18384         for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
18385         {
18386             const float f = tcu::Float16(result[idx]).asFloat();
18387 
18388             if (f < 4.0f)
18389                 result[idx] = tcu::Float16(-f).bits();
18390         }
18391     }
18392 
18393     return result;
18394 }
18395 
18396 // Generates normalized vectors for arguments 0 and 1
getInputDataN(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18397 vector<deFloat16> getInputDataN(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18398                                 size_t argNo)
18399 {
18400     DE_UNREF(compCount);
18401     DE_UNREF(argCount);
18402 
18403     de::Random rnd(seed << argNo);
18404     vector<deFloat16> result;
18405 
18406     if (argNo == 0 || argNo == 1)
18407     {
18408         // The input parameters for the incident vector I and the surface normal N must already be normalized
18409         for (size_t numIdx = 0; numIdx < count; numIdx += stride)
18410         {
18411             vector<float> unnormolized;
18412             float sum = 0;
18413 
18414             for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18415                 unnormolized.push_back(float((rnd.getUint16() % 16) - 8));
18416 
18417             for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18418                 sum += unnormolized[compIdx] * unnormolized[compIdx];
18419 
18420             sum = deFloatSqrt(sum);
18421             if (sum == 0.0f)
18422                 unnormolized[0] = sum = 1.0f;
18423 
18424             for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18425                 result.push_back(tcu::Float16(unnormolized[compIdx] / sum).bits());
18426 
18427             for (size_t compIdx = compCount; compIdx < stride; ++compIdx)
18428                 result.push_back(0);
18429         }
18430     }
18431     else
18432     {
18433         // Input parameter eta
18434         for (uint32_t numIdx = 0; numIdx < count; ++numIdx)
18435         {
18436             int num = (rnd.getUint16() % 16) - 8;
18437 
18438             result.push_back(tcu::Float16(float(num)).bits());
18439         }
18440     }
18441 
18442     DE_ASSERT(result.size() == count);
18443 
18444     return result;
18445 }
18446 
18447 // Data generator for complex matrix functions like determinant and inverse
getInputDataC(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18448 vector<deFloat16> getInputDataC(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18449                                 size_t argNo)
18450 {
18451     DE_UNREF(compCount);
18452     DE_UNREF(stride);
18453     DE_UNREF(argCount);
18454 
18455     de::Random rnd(seed << argNo);
18456     vector<deFloat16> result;
18457 
18458     for (uint32_t numIdx = 0; numIdx < count; ++numIdx)
18459     {
18460         int num = (rnd.getUint16() % 16) - 8;
18461 
18462         result.push_back(tcu::Float16(float(num)).bits());
18463     }
18464 
18465     DE_ASSERT(result.size() == count);
18466 
18467     return result;
18468 }
18469 
18470 struct Math16TestType
18471 {
18472     const char *typePrefix;
18473     const size_t typeComponents;
18474     const size_t typeArrayStride;
18475     const size_t typeStructStride;
18476     const char *storage_type;
18477 };
18478 
18479 enum Math16DataTypes
18480 {
18481     NONE   = 0,
18482     SCALAR = 1,
18483     VEC2   = 2,
18484     VEC3   = 3,
18485     VEC4   = 4,
18486     MAT2X2,
18487     MAT2X3,
18488     MAT2X4,
18489     MAT3X2,
18490     MAT3X3,
18491     MAT3X4,
18492     MAT4X2,
18493     MAT4X3,
18494     MAT4X4,
18495     MATH16_TYPE_LAST
18496 };
18497 
18498 struct Math16ArgFragments
18499 {
18500     const char *bodies;
18501     const char *variables;
18502     const char *decorations;
18503     const char *funcVariables;
18504 };
18505 
18506 typedef vector<deFloat16> Math16GetInputData(uint32_t seed, size_t count, size_t compCount, size_t stride,
18507                                              size_t argCount, size_t argNo);
18508 
18509 struct Math16TestFunc
18510 {
18511     const char *funcName;
18512     const char *funcSuffix;
18513     size_t funcArgsCount;
18514     size_t typeResult;
18515     size_t typeArg0;
18516     size_t typeArg1;
18517     size_t typeArg2;
18518     Math16GetInputData *getInputDataFunc;
18519     VerifyIOFunc verifyFunc;
18520 };
18521 
18522 template <class SpecResource>
createFloat16ArithmeticFuncTest(tcu::TestContext & testCtx,tcu::TestCaseGroup & testGroup,const size_t testTypeIdx,const Math16TestFunc & testFunc)18523 void createFloat16ArithmeticFuncTest(tcu::TestContext &testCtx, tcu::TestCaseGroup &testGroup, const size_t testTypeIdx,
18524                                      const Math16TestFunc &testFunc)
18525 {
18526     const int testSpecificSeed                       = deStringHash(testGroup.getName());
18527     const int seed                                   = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
18528     const size_t numDataPointsByAxis                 = 32;
18529     const size_t numDataPoints                       = numDataPointsByAxis * numDataPointsByAxis;
18530     const char *componentType                        = "f16";
18531     const Math16TestType testTypes[MATH16_TYPE_LAST] = {
18532         {"", 0, 0, 0, ""},
18533         {"", 1, 1 * sizeof(deFloat16), 2 * sizeof(deFloat16), "u32_half_ndp"},
18534         {"v2", 2, 2 * sizeof(deFloat16), 2 * sizeof(deFloat16), "u32_ndp"},
18535         {"v3", 3, 4 * sizeof(deFloat16), 4 * sizeof(deFloat16), "u32_ndp_2"},
18536         {"v4", 4, 4 * sizeof(deFloat16), 4 * sizeof(deFloat16), "u32_ndp_2"},
18537         {"m2x2", 0, 4 * sizeof(deFloat16), 4 * sizeof(deFloat16), "u32_ndp_2"},
18538         {"m2x3", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_4"},
18539         {"m2x4", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_4"},
18540         {"m3x2", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_3"},
18541         {"m3x3", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6"},
18542         {"m3x4", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6"},
18543         {"m4x2", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_4"},
18544         {"m4x3", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8"},
18545         {"m4x4", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8"},
18546     };
18547 
18548     DE_ASSERT(testTypeIdx == testTypes[testTypeIdx].typeComponents);
18549 
18550     const StringTemplate preMain("     %c_i32_ndp  = OpConstant %i32 ${num_data_points}\n"
18551 
18552                                  "        %f16     = OpTypeFloat 16\n"
18553                                  "        %v2f16   = OpTypeVector %f16 2\n"
18554                                  "        %v3f16   = OpTypeVector %f16 3\n"
18555                                  "        %v4f16   = OpTypeVector %f16 4\n"
18556                                  "        %m2x2f16 = OpTypeMatrix %v2f16 2\n"
18557                                  "        %m2x3f16 = OpTypeMatrix %v3f16 2\n"
18558                                  "        %m2x4f16 = OpTypeMatrix %v4f16 2\n"
18559                                  "        %m3x2f16 = OpTypeMatrix %v2f16 3\n"
18560                                  "        %m3x3f16 = OpTypeMatrix %v3f16 3\n"
18561                                  "        %m3x4f16 = OpTypeMatrix %v4f16 3\n"
18562                                  "        %m4x2f16 = OpTypeMatrix %v2f16 4\n"
18563                                  "        %m4x3f16 = OpTypeMatrix %v3f16 4\n"
18564                                  "        %m4x4f16 = OpTypeMatrix %v4f16 4\n"
18565 
18566                                  "       %fp_v2i32 = OpTypePointer Function %v2i32\n"
18567                                  "       %fp_v3i32 = OpTypePointer Function %v3i32\n"
18568                                  "       %fp_v4i32 = OpTypePointer Function %v4i32\n"
18569 
18570                                  "      %c_u32_ndp = OpConstant %u32 ${num_data_points}\n"
18571                                  " %c_u32_half_ndp = OpSpecConstantOp %u32 UDiv %c_i32_ndp %c_u32_2\n"
18572                                  "        %c_u32_5 = OpConstant %u32 5\n"
18573                                  "        %c_u32_6 = OpConstant %u32 6\n"
18574                                  "        %c_u32_7 = OpConstant %u32 7\n"
18575                                  "        %c_u32_8 = OpConstant %u32 8\n"
18576                                  "        %c_f16_0 = OpConstant %f16 0\n"
18577                                  "        %c_f16_1 = OpConstant %f16 1\n"
18578                                  "      %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
18579                                  "         %up_u32 = OpTypePointer Uniform %u32\n"
18580                                  "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
18581                                  " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
18582 
18583                                  "    %ra_u32_half_ndp = OpTypeArray %u32 %c_u32_half_ndp\n"
18584                                  "  %SSBO_u32_half_ndp = OpTypeStruct %ra_u32_half_ndp\n"
18585                                  "%up_SSBO_u32_half_ndp = OpTypePointer Uniform %SSBO_u32_half_ndp\n"
18586                                  "         %ra_u32_ndp = OpTypeArray %u32 %c_u32_ndp\n"
18587                                  "       %SSBO_u32_ndp = OpTypeStruct %ra_u32_ndp\n"
18588                                  "    %up_SSBO_u32_ndp = OpTypePointer Uniform %SSBO_u32_ndp\n"
18589                                  "           %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
18590                                  "        %up_ra_u32_2 = OpTypePointer Uniform %ra_u32_2\n"
18591                                  "      %ra_ra_u32_ndp = OpTypeArray %ra_u32_2 %c_u32_ndp\n"
18592                                  "     %SSBO_u32_ndp_2 = OpTypeStruct %ra_ra_u32_ndp\n"
18593                                  "  %up_SSBO_u32_ndp_2 = OpTypePointer Uniform %SSBO_u32_ndp_2\n"
18594                                  "           %ra_u32_4 = OpTypeArray %u32 %c_u32_4\n"
18595                                  "        %up_ra_u32_4 = OpTypePointer Uniform %ra_u32_4\n"
18596                                  "        %ra_ra_u32_4 = OpTypeArray %ra_u32_4 %c_u32_ndp\n"
18597                                  "     %SSBO_u32_ndp_4 = OpTypeStruct %ra_ra_u32_4\n"
18598                                  "  %up_SSBO_u32_ndp_4 = OpTypePointer Uniform %SSBO_u32_ndp_4\n"
18599                                  "           %ra_u32_3 = OpTypeArray %u32 %c_u32_3\n"
18600                                  "        %up_ra_u32_3 = OpTypePointer Uniform %ra_u32_3\n"
18601                                  "        %ra_ra_u32_3 = OpTypeArray %ra_u32_3 %c_u32_ndp\n"
18602                                  "     %SSBO_u32_ndp_3 = OpTypeStruct %ra_ra_u32_3\n"
18603                                  "  %up_SSBO_u32_ndp_3 = OpTypePointer Uniform %SSBO_u32_ndp_3\n"
18604                                  "           %ra_u32_6 = OpTypeArray %u32 %c_u32_6\n"
18605                                  "        %up_ra_u32_6 = OpTypePointer Uniform %ra_u32_6\n"
18606                                  "        %ra_ra_u32_6 = OpTypeArray %ra_u32_6 %c_u32_ndp\n"
18607                                  "     %SSBO_u32_ndp_6 = OpTypeStruct %ra_ra_u32_6\n"
18608                                  "  %up_SSBO_u32_ndp_6 = OpTypePointer Uniform %SSBO_u32_ndp_6\n"
18609                                  "           %ra_u32_8 = OpTypeArray %u32 %c_u32_8\n"
18610                                  "        %up_ra_u32_8 = OpTypePointer Uniform %ra_u32_8\n"
18611                                  "        %ra_ra_u32_8 = OpTypeArray %ra_u32_8 %c_u32_ndp\n"
18612                                  "     %SSBO_u32_ndp_8 = OpTypeStruct %ra_ra_u32_8\n"
18613                                  "  %up_SSBO_u32_ndp_8 = OpTypePointer Uniform %SSBO_u32_ndp_8\n"
18614 
18615                                  "         %f16_i32_fn = OpTypeFunction %f16 %i32\n"
18616                                  "       %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
18617                                  "       %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
18618                                  "       %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
18619                                  "     %m2x2f16_i32_fn = OpTypeFunction %m2x2f16 %i32\n"
18620                                  "     %m2x3f16_i32_fn = OpTypeFunction %m2x3f16 %i32\n"
18621                                  "     %m2x4f16_i32_fn = OpTypeFunction %m2x4f16 %i32\n"
18622                                  "     %m3x2f16_i32_fn = OpTypeFunction %m3x2f16 %i32\n"
18623                                  "     %m3x3f16_i32_fn = OpTypeFunction %m3x3f16 %i32\n"
18624                                  "     %m3x4f16_i32_fn = OpTypeFunction %m3x4f16 %i32\n"
18625                                  "     %m4x2f16_i32_fn = OpTypeFunction %m4x2f16 %i32\n"
18626                                  "     %m4x3f16_i32_fn = OpTypeFunction %m4x3f16 %i32\n"
18627                                  "     %m4x4f16_i32_fn = OpTypeFunction %m4x4f16 %i32\n"
18628                                  "    %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
18629                                  "  %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
18630                                  "  %void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
18631                                  "  %void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
18632                                  "%void_m2x2f16_i32_fn = OpTypeFunction %void %m2x2f16 %i32\n"
18633                                  "%void_m2x3f16_i32_fn = OpTypeFunction %void %m2x3f16 %i32\n"
18634                                  "%void_m2x4f16_i32_fn = OpTypeFunction %void %m2x4f16 %i32\n"
18635                                  "%void_m3x2f16_i32_fn = OpTypeFunction %void %m3x2f16 %i32\n"
18636                                  "%void_m3x3f16_i32_fn = OpTypeFunction %void %m3x3f16 %i32\n"
18637                                  "%void_m3x4f16_i32_fn = OpTypeFunction %void %m3x4f16 %i32\n"
18638                                  "%void_m4x2f16_i32_fn = OpTypeFunction %void %m4x2f16 %i32\n"
18639                                  "%void_m4x3f16_i32_fn = OpTypeFunction %void %m4x3f16 %i32\n"
18640                                  "%void_m4x4f16_i32_fn = OpTypeFunction %void %m4x4f16 %i32\n"
18641                                  "${arg_vars}");
18642 
18643     const StringTemplate decoration("OpDecorate %ra_u32_half_ndp ArrayStride 4\n"
18644                                     "OpMemberDecorate %SSBO_u32_half_ndp 0 Offset 0\n"
18645                                     "OpDecorate %SSBO_u32_half_ndp BufferBlock\n"
18646 
18647                                     "OpDecorate %ra_u32_ndp ArrayStride 4\n"
18648                                     "OpMemberDecorate %SSBO_u32_ndp 0 Offset 0\n"
18649                                     "OpDecorate %SSBO_u32_ndp BufferBlock\n"
18650 
18651                                     "OpDecorate %ra_u32_2 ArrayStride 4\n"
18652                                     "OpDecorate %ra_ra_u32_ndp ArrayStride 8\n"
18653                                     "OpMemberDecorate %SSBO_u32_ndp_2 0 Offset 0\n"
18654                                     "OpDecorate %SSBO_u32_ndp_2 BufferBlock\n"
18655 
18656                                     "OpDecorate %ra_u32_4 ArrayStride 4\n"
18657                                     "OpDecorate %ra_ra_u32_4 ArrayStride 16\n"
18658                                     "OpMemberDecorate %SSBO_u32_ndp_4 0 Offset 0\n"
18659                                     "OpDecorate %SSBO_u32_ndp_4 BufferBlock\n"
18660 
18661                                     "OpDecorate %ra_u32_3 ArrayStride 4\n"
18662                                     "OpDecorate %ra_ra_u32_3 ArrayStride 16\n"
18663                                     "OpMemberDecorate %SSBO_u32_ndp_3 0 Offset 0\n"
18664                                     "OpDecorate %SSBO_u32_ndp_3 BufferBlock\n"
18665 
18666                                     "OpDecorate %ra_u32_6 ArrayStride 4\n"
18667                                     "OpDecorate %ra_ra_u32_6 ArrayStride 32\n"
18668                                     "OpMemberDecorate %SSBO_u32_ndp_6 0 Offset 0\n"
18669                                     "OpDecorate %SSBO_u32_ndp_6 BufferBlock\n"
18670 
18671                                     "OpDecorate %ra_u32_8 ArrayStride 4\n"
18672                                     "OpDecorate %ra_ra_u32_8 ArrayStride 32\n"
18673                                     "OpMemberDecorate %SSBO_u32_ndp_8 0 Offset 0\n"
18674                                     "OpDecorate %SSBO_u32_ndp_8 BufferBlock\n"
18675 
18676                                     "${arg_decorations}");
18677 
18678     const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
18679                                  "    %param = OpFunctionParameter %v4f32\n"
18680                                  "    %entry = OpLabel\n"
18681 
18682                                  "        %i = OpVariable %fp_i32 Function\n"
18683                                  "${arg_infunc_vars}"
18684                                  "             OpStore %i %c_i32_0\n"
18685                                  "             OpBranch %loop\n"
18686 
18687                                  "     %loop = OpLabel\n"
18688                                  "    %i_cmp = OpLoad %i32 %i\n"
18689                                  "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
18690                                  "             OpLoopMerge %merge %next None\n"
18691                                  "             OpBranchConditional %lt %write %merge\n"
18692 
18693                                  "    %write = OpLabel\n"
18694                                  "      %ndx = OpLoad %i32 %i\n"
18695 
18696                                  "${arg_func_call}"
18697 
18698                                  "             OpBranch %next\n"
18699 
18700                                  "     %next = OpLabel\n"
18701                                  "    %i_cur = OpLoad %i32 %i\n"
18702                                  "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
18703                                  "             OpStore %i %i_new\n"
18704                                  "             OpBranch %loop\n"
18705 
18706                                  "    %merge = OpLabel\n"
18707                                  "             OpReturnValue %param\n"
18708                                  "             OpFunctionEnd\n");
18709 
18710     const Math16ArgFragments argFragment1 = {
18711         "     %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18712         "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0\n"
18713         "     %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18714         "",
18715         "",
18716         "",
18717     };
18718 
18719     const Math16ArgFragments argFragment2 = {
18720         " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18721         " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18722         "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1\n"
18723         "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18724         "",
18725         "",
18726         "",
18727     };
18728 
18729     const Math16ArgFragments argFragment3 = {
18730         " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18731         " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18732         " %val_src2 = OpFunctionCall %${t2} %ld_arg_ssbo_src2 %ndx\n"
18733         "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1 %val_src2\n"
18734         "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18735         "",
18736         "",
18737         "",
18738     };
18739 
18740     const Math16ArgFragments argFragmentLdExp = {
18741         " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18742         " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18743         "%val_src1i = OpConvertFToS %${dr}i32 %val_src1\n"
18744         "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1i\n"
18745         "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18746 
18747         "",
18748 
18749         "",
18750 
18751         "",
18752     };
18753 
18754     const Math16ArgFragments argFragmentModfFrac = {
18755         " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18756         "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18757         "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18758 
18759         "   %fp_tmp = OpTypePointer Function %${tr}\n",
18760 
18761         "",
18762 
18763         "      %tmp = OpVariable %fp_tmp Function\n",
18764     };
18765 
18766     const Math16ArgFragments argFragmentModfInt = {
18767         " %val_src0  = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18768         "%val_unused = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18769         "     %tmp0  = OpAccessChain %fp_tmp %tmp\n"
18770         "  %val_dst  = OpLoad %${tr} %tmp0\n"
18771         "      %dst  = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18772 
18773         "   %fp_tmp  = OpTypePointer Function %${tr}\n",
18774 
18775         "",
18776 
18777         "      %tmp  = OpVariable %fp_tmp Function\n",
18778     };
18779 
18780     const Math16ArgFragments argFragmentModfStruct = {
18781         " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18782         "  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18783         "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18784         "             OpStore %tmp_ptr_s %val_tmp\n"
18785         "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_${struct_member}\n"
18786         "  %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18787         "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18788 
18789         "  %fp_${tr} = OpTypePointer Function %${tr}\n"
18790         "   %st_tmp = OpTypeStruct %${tr} %${tr}\n"
18791         "   %fp_tmp = OpTypePointer Function %st_tmp\n"
18792         "   %c_frac = OpConstant %i32 0\n"
18793         "    %c_int = OpConstant %i32 1\n",
18794 
18795         "OpMemberDecorate %st_tmp 0 Offset 0\n"
18796         "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18797 
18798         "      %tmp = OpVariable %fp_tmp Function\n",
18799     };
18800 
18801     const Math16ArgFragments argFragmentFrexpStructS = {
18802         " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18803         "  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18804         "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18805         "             OpStore %tmp_ptr_s %val_tmp\n"
18806         "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_i32_0\n"
18807         "  %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18808         "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18809 
18810         "  %fp_${tr} = OpTypePointer Function %${tr}\n"
18811         "   %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18812         "   %fp_tmp = OpTypePointer Function %st_tmp\n",
18813 
18814         "OpMemberDecorate %st_tmp 0 Offset 0\n"
18815         "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18816 
18817         "      %tmp = OpVariable %fp_tmp Function\n",
18818     };
18819 
18820     const Math16ArgFragments argFragmentFrexpStructE = {
18821         " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18822         "  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18823         "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18824         "             OpStore %tmp_ptr_s %val_tmp\n"
18825         "%tmp_ptr_l = OpAccessChain %fp_${dr}i32 %tmp %c_i32_1\n"
18826         "%val_dst_i = OpLoad %${dr}i32 %tmp_ptr_l\n"
18827         "  %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
18828         "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18829 
18830         "   %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18831         "   %fp_tmp = OpTypePointer Function %st_tmp\n",
18832 
18833         "OpMemberDecorate %st_tmp 0 Offset 0\n"
18834         "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18835 
18836         "      %tmp = OpVariable %fp_tmp Function\n",
18837     };
18838 
18839     const Math16ArgFragments argFragmentFrexpS = {
18840         " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18841         "  %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
18842         "  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18843         "      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18844 
18845         "",
18846 
18847         "",
18848 
18849         "      %tmp = OpVariable %fp_${dr}i32 Function\n",
18850     };
18851 
18852     const Math16ArgFragments argFragmentFrexpE = {
18853         " %val_src0  = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18854         "  %out_exp  = OpAccessChain %fp_${dr}i32 %tmp\n"
18855         "%val_unused = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18856         "%val_dst_i  = OpLoad %${dr}i32 %out_exp\n"
18857         "  %val_dst  = OpConvertSToF %${tr} %val_dst_i\n"
18858         "      %dst  = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18859 
18860         "",
18861 
18862         "",
18863 
18864         "      %tmp  = OpVariable %fp_${dr}i32 Function\n",
18865     };
18866 
18867     string load_funcs[MATH16_TYPE_LAST];
18868     load_funcs[SCALAR] = loadScalarF16FromUint;
18869     load_funcs[VEC2]   = loadV2F16FromUint;
18870     load_funcs[VEC3]   = loadV3F16FromUints;
18871     load_funcs[VEC4]   = loadV4F16FromUints;
18872     load_funcs[MAT2X2] = loadM2x2F16FromUints;
18873     load_funcs[MAT2X3] = loadM2x3F16FromUints;
18874     load_funcs[MAT2X4] = loadM2x4F16FromUints;
18875     load_funcs[MAT3X2] = loadM3x2F16FromUints;
18876     load_funcs[MAT3X3] = loadM3x3F16FromUints;
18877     load_funcs[MAT3X4] = loadM3x4F16FromUints;
18878     load_funcs[MAT4X2] = loadM4x2F16FromUints;
18879     load_funcs[MAT4X3] = loadM4x3F16FromUints;
18880     load_funcs[MAT4X4] = loadM4x4F16FromUints;
18881 
18882     string store_funcs[MATH16_TYPE_LAST];
18883     store_funcs[SCALAR] = storeScalarF16AsUint;
18884     store_funcs[VEC2]   = storeV2F16AsUint;
18885     store_funcs[VEC3]   = storeV3F16AsUints;
18886     store_funcs[VEC4]   = storeV4F16AsUints;
18887     store_funcs[MAT2X2] = storeM2x2F16AsUints;
18888     store_funcs[MAT2X3] = storeM2x3F16AsUints;
18889     store_funcs[MAT2X4] = storeM2x4F16AsUints;
18890     store_funcs[MAT3X2] = storeM3x2F16AsUints;
18891     store_funcs[MAT3X3] = storeM3x3F16AsUints;
18892     store_funcs[MAT3X4] = storeM3x4F16AsUints;
18893     store_funcs[MAT4X2] = storeM4x2F16AsUints;
18894     store_funcs[MAT4X3] = storeM4x3F16AsUints;
18895     store_funcs[MAT4X4] = storeM4x4F16AsUints;
18896 
18897     const Math16TestType &testType         = testTypes[testTypeIdx];
18898     const string funcNameString            = string(testFunc.funcName) + string(testFunc.funcSuffix);
18899     const string testName                  = de::toLower(funcNameString);
18900     const Math16ArgFragments *argFragments = DE_NULL;
18901     const size_t typeStructStride          = testType.typeStructStride;
18902     const bool extInst                     = !(testFunc.funcName[0] == 'O' && testFunc.funcName[1] == 'p');
18903     const size_t numFloatsPerArg0Type      = testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16);
18904     const size_t iterations                = numDataPoints / numFloatsPerArg0Type;
18905     const size_t numFloatsPerResultType    = testTypes[testFunc.typeResult].typeArrayStride / sizeof(deFloat16);
18906     const vector<deFloat16> float16UnusedOutput(iterations * numFloatsPerResultType, 0);
18907     VulkanFeatures features;
18908     SpecResource specResource;
18909     map<string, string> specs;
18910     map<string, string> fragments;
18911     vector<string> extensions;
18912     string funcCall;
18913     string funcVariables;
18914     string variables;
18915     string declarations;
18916     string decorations;
18917     string functions;
18918 
18919     switch (testFunc.funcArgsCount)
18920     {
18921     case 1:
18922     {
18923         argFragments = &argFragment1;
18924 
18925         if (funcNameString == "ModfFrac")
18926             argFragments = &argFragmentModfFrac;
18927         if (funcNameString == "ModfInt")
18928             argFragments = &argFragmentModfInt;
18929         if (funcNameString == "ModfStructFrac")
18930             argFragments = &argFragmentModfStruct;
18931         if (funcNameString == "ModfStructInt")
18932             argFragments = &argFragmentModfStruct;
18933         if (funcNameString == "FrexpS")
18934             argFragments = &argFragmentFrexpS;
18935         if (funcNameString == "FrexpE")
18936             argFragments = &argFragmentFrexpE;
18937         if (funcNameString == "FrexpStructS")
18938             argFragments = &argFragmentFrexpStructS;
18939         if (funcNameString == "FrexpStructE")
18940             argFragments = &argFragmentFrexpStructE;
18941 
18942         break;
18943     }
18944     case 2:
18945     {
18946         argFragments = &argFragment2;
18947 
18948         if (funcNameString == "Ldexp")
18949             argFragments = &argFragmentLdExp;
18950 
18951         break;
18952     }
18953     case 3:
18954     {
18955         argFragments = &argFragment3;
18956 
18957         break;
18958     }
18959     default:
18960     {
18961         TCU_THROW(InternalError, "Invalid number of arguments");
18962     }
18963     }
18964 
18965     functions = StringTemplate(store_funcs[testFunc.typeResult]).specialize({{"var", "ssbo_dst"}});
18966     if (testFunc.funcArgsCount == 1)
18967     {
18968         functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18969         variables += " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18970                      "  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18971 
18972         decorations += "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18973                        "OpDecorate %ssbo_src0 Binding 0\n"
18974                        "OpDecorate %ssbo_dst DescriptorSet 0\n"
18975                        "OpDecorate %ssbo_dst Binding 1\n";
18976     }
18977     else if (testFunc.funcArgsCount == 2)
18978     {
18979         functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18980         functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18981         variables += " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18982                      " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18983                      "  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18984 
18985         decorations += "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18986                        "OpDecorate %ssbo_src0 Binding 0\n"
18987                        "OpDecorate %ssbo_src1 DescriptorSet 0\n"
18988                        "OpDecorate %ssbo_src1 Binding 1\n"
18989                        "OpDecorate %ssbo_dst DescriptorSet 0\n"
18990                        "OpDecorate %ssbo_dst Binding 2\n";
18991     }
18992     else if (testFunc.funcArgsCount == 3)
18993     {
18994         functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18995         functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18996         functions += StringTemplate(load_funcs[testFunc.typeArg2]).specialize({{"var", "ssbo_src2"}});
18997         variables += " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18998                      " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18999                      " %ssbo_src2 = OpVariable %up_SSBO_${store_t2} Uniform\n"
19000                      "  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
19001 
19002         decorations += "OpDecorate %ssbo_src0 DescriptorSet 0\n"
19003                        "OpDecorate %ssbo_src0 Binding 0\n"
19004                        "OpDecorate %ssbo_src1 DescriptorSet 0\n"
19005                        "OpDecorate %ssbo_src1 Binding 1\n"
19006                        "OpDecorate %ssbo_src2 DescriptorSet 0\n"
19007                        "OpDecorate %ssbo_src2 Binding 2\n"
19008                        "OpDecorate %ssbo_dst DescriptorSet 0\n"
19009                        "OpDecorate %ssbo_dst Binding 3\n";
19010     }
19011     else
19012     {
19013         TCU_THROW(InternalError, "Invalid number of function arguments");
19014     }
19015 
19016     variables += argFragments->variables;
19017     decorations += argFragments->decorations;
19018 
19019     specs["dr"]            = testTypes[testFunc.typeResult].typePrefix;
19020     specs["d0"]            = testTypes[testFunc.typeArg0].typePrefix;
19021     specs["d1"]            = testTypes[testFunc.typeArg1].typePrefix;
19022     specs["d2"]            = testTypes[testFunc.typeArg2].typePrefix;
19023     specs["tr"]            = string(testTypes[testFunc.typeResult].typePrefix) + componentType;
19024     specs["t0"]            = string(testTypes[testFunc.typeArg0].typePrefix) + componentType;
19025     specs["t1"]            = string(testTypes[testFunc.typeArg1].typePrefix) + componentType;
19026     specs["t2"]            = string(testTypes[testFunc.typeArg2].typePrefix) + componentType;
19027     specs["store_tr"]      = string(testTypes[testFunc.typeResult].storage_type);
19028     specs["store_t0"]      = string(testTypes[testFunc.typeArg0].storage_type);
19029     specs["store_t1"]      = string(testTypes[testFunc.typeArg1].storage_type);
19030     specs["store_t2"]      = string(testTypes[testFunc.typeArg2].storage_type);
19031     specs["struct_stride"] = de::toString(typeStructStride);
19032     specs["op"]            = extInst ? "OpExtInst" : testFunc.funcName;
19033     specs["ext_inst"]      = extInst ? string("%ext_import ") + testFunc.funcName : "";
19034     specs["struct_member"] = de::toLower(testFunc.funcSuffix);
19035 
19036     variables     = StringTemplate(variables).specialize(specs);
19037     decorations   = StringTemplate(decorations).specialize(specs);
19038     funcVariables = StringTemplate(argFragments->funcVariables).specialize(specs);
19039     funcCall      = StringTemplate(argFragments->bodies).specialize(specs);
19040 
19041     specs["num_data_points"] = de::toString(iterations);
19042     specs["arg_vars"]        = variables;
19043     specs["arg_decorations"] = decorations;
19044     specs["arg_infunc_vars"] = funcVariables;
19045     specs["arg_func_call"]   = funcCall;
19046 
19047     fragments["extension"]  = "%ext_import = OpExtInstImport \"GLSL.std.450\"";
19048     fragments["capability"] = "OpCapability Matrix\nOpCapability Float16\n";
19049     fragments["decoration"] = decoration.specialize(specs);
19050     fragments["pre_main"]   = preMain.specialize(specs) + functions;
19051     fragments["testfun"]    = testFun.specialize(specs);
19052 
19053     for (size_t inputArgNdx = 0; inputArgNdx < testFunc.funcArgsCount; ++inputArgNdx)
19054     {
19055         const size_t numFloatsPerItem =
19056             (inputArgNdx == 0) ? testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16) :
19057             (inputArgNdx == 1) ? testTypes[testFunc.typeArg1].typeArrayStride / sizeof(deFloat16) :
19058             (inputArgNdx == 2) ? testTypes[testFunc.typeArg2].typeArrayStride / sizeof(deFloat16) :
19059                                  -1;
19060         const vector<deFloat16> inputData = testFunc.getInputDataFunc(
19061             seed, numFloatsPerItem * iterations, testTypeIdx, numFloatsPerItem, testFunc.funcArgsCount, inputArgNdx);
19062 
19063         specResource.inputs.push_back(
19064             Resource(BufferSp(new Float16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
19065     }
19066 
19067     specResource.outputs.push_back(
19068         Resource(BufferSp(new Float16Buffer(float16UnusedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
19069     specResource.verifyIO = testFunc.verifyFunc;
19070 
19071     extensions.push_back("VK_KHR_shader_float16_int8");
19072 
19073     features.extFloat16Int8.shaderFloat16 = true;
19074     if (specResource.graphicsFeaturesRequired)
19075         features.coreFeatures.vertexPipelineStoresAndAtomics = true;
19076 
19077     finalizeTestsCreation(specResource, fragments, testCtx, testGroup, testName, features, extensions, IVec3(1, 1, 1));
19078 }
19079 
19080 template <size_t C, class SpecResource>
createFloat16ArithmeticSet(tcu::TestContext & testCtx)19081 tcu::TestCaseGroup *createFloat16ArithmeticSet(tcu::TestContext &testCtx)
19082 {
19083     DE_STATIC_ASSERT(C >= 1 && C <= 4);
19084 
19085     const std::string testGroupName(string("arithmetic_") + de::toString(C));
19086     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, testGroupName.c_str()));
19087     const Math16TestFunc testFuncs[] = {
19088         {"OpFNegate", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16OpFNegate>},
19089         {"Round", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Round>},
19090         {"RoundEven", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16RoundEven>},
19091         {"Trunc", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Trunc>},
19092         {"FAbs", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16FAbs>},
19093         {"FSign", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16FSign>},
19094         {"Floor", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Floor>},
19095         {"Ceil", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Ceil>},
19096         {"Fract", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Fract>},
19097         {"Radians", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Radians>},
19098         {"Degrees", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Degrees>},
19099         {"Sin", "", 1, C, C, 0, 0, &getInputDataPI, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Sin>},
19100         {"Cos", "", 1, C, C, 0, 0, &getInputDataPI, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Cos>},
19101         {"Tan", "", 1, C, C, 0, 0, &getInputDataPI, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Tan>},
19102         {"Asin", "", 1, C, C, 0, 0, &getInputDataA, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Asin>},
19103         {"Acos", "", 1, C, C, 0, 0, &getInputDataA, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Acos>},
19104         {"Atan", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Atan>},
19105         {"Sinh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Sinh>},
19106         {"Cosh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Cosh>},
19107         {"Tanh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Tanh>},
19108         {"Asinh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Asinh>},
19109         {"Acosh", "", 1, C, C, 0, 0, &getInputDataAC, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Acosh>},
19110         {"Atanh", "", 1, C, C, 0, 0, &getInputDataA, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Atanh>},
19111         {"Exp", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Exp>},
19112         {"Log", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Log>},
19113         {"Exp2", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Exp2>},
19114         {"Log2", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Log2>},
19115         {"Sqrt", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Sqrt>},
19116         {"InverseSqrt", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc<C, C, 0, 0, fp16InverseSqrt>},
19117         {"Modf", "Frac", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16ModfFrac>},
19118         {"Modf", "Int", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16ModfInt>},
19119         {"ModfStruct", "Frac", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16ModfFrac>},
19120         {"ModfStruct", "Int", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16ModfInt>},
19121         {"Frexp", "S", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16FrexpS>},
19122         {"Frexp", "E", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16FrexpE>},
19123         {"FrexpStruct", "S", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16FrexpS>},
19124         {"FrexpStruct", "E", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16FrexpE>},
19125         {"OpFAdd", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16OpFAdd>},
19126         {"OpFSub", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16OpFSub>},
19127         {"OpFMul", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16OpFMul>},
19128         {"OpFDiv", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16OpFDiv>},
19129         {"Atan2", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16Atan2>},
19130         {"Pow", "", 2, C, C, C, 0, &getInputDataP, compareFP16ArithmeticFunc<C, C, C, 0, fp16Pow>},
19131         {"FMin", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16FMin>},
19132         {"FMax", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16FMax>},
19133         {"Step", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16Step>},
19134         {"Ldexp", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16Ldexp>},
19135         {"FClamp", "", 3, C, C, C, C, &getInputData, compareFP16ArithmeticFunc<C, C, C, C, fp16FClamp>},
19136         {"FMix", "", 3, C, C, C, C, &getInputDataD, compareFP16ArithmeticFunc<C, C, C, C, fp16FMix>},
19137         {"SmoothStep", "", 3, C, C, C, C, &getInputDataSS, compareFP16ArithmeticFunc<C, C, C, C, fp16SmoothStep>},
19138         {"Fma", "", 3, C, C, C, C, &getInputData, compareFP16ArithmeticFunc<C, C, C, C, fp16Fma>},
19139         {"Length", "", 1, 1, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<1, C, 0, 0, fp16Length>},
19140         {"Distance", "", 2, 1, C, C, 0, &getInputData, compareFP16ArithmeticFunc<1, C, C, 0, fp16Distance>},
19141         {"Cross", "", 2, C, C, C, 0, &getInputDataD, compareFP16ArithmeticFunc<C, C, C, 0, fp16Cross>},
19142         {"Normalize", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Normalize>},
19143         {"FaceForward", "", 3, C, C, C, C, &getInputDataD, compareFP16ArithmeticFunc<C, C, C, C, fp16FaceForward>},
19144         {"Reflect", "", 2, C, C, C, 0, &getInputDataD, compareFP16ArithmeticFunc<C, C, C, 0, fp16Reflect>},
19145         {"Refract", "", 3, C, C, C, 1, &getInputDataN, compareFP16ArithmeticFunc<C, C, C, 1, fp16Refract>},
19146         {"OpDot", "", 2, 1, C, C, 0, &getInputDataD, compareFP16ArithmeticFunc<1, C, C, 0, fp16Dot>},
19147         {"OpVectorTimesScalar", "", 2, C, C, 1, 0, &getInputDataV,
19148          compareFP16ArithmeticFunc<C, C, 1, 0, fp16VectorTimesScalar>},
19149     };
19150 
19151     for (uint32_t testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
19152     {
19153         const Math16TestFunc &testFunc = testFuncs[testFuncIdx];
19154         const string funcNameString    = testFunc.funcName;
19155 
19156         if ((C != 3) && funcNameString == "Cross")
19157             continue;
19158 
19159         if ((C < 2) && funcNameString == "OpDot")
19160             continue;
19161 
19162         if ((C < 2) && funcNameString == "OpVectorTimesScalar")
19163             continue;
19164 
19165         createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), C, testFunc);
19166     }
19167 
19168     return testGroup.release();
19169 }
19170 
19171 template <class SpecResource>
createFloat16ArithmeticSet(tcu::TestContext & testCtx)19172 tcu::TestCaseGroup *createFloat16ArithmeticSet(tcu::TestContext &testCtx)
19173 {
19174     const std::string testGroupName("arithmetic");
19175     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, testGroupName.c_str()));
19176     const Math16TestFunc testFuncs[] = {
19177         {"OpTranspose", "2x2", 1, MAT2X2, MAT2X2, 0, 0, &getInputDataM,
19178          compareFP16ArithmeticFunc<4, 4, 0, 0, fp16Transpose<2, 2>>},
19179         {"OpTranspose", "3x2", 1, MAT2X3, MAT3X2, 0, 0, &getInputDataM,
19180          compareFP16ArithmeticFunc<8, 8, 0, 0, fp16Transpose<3, 2>>},
19181         {"OpTranspose", "4x2", 1, MAT2X4, MAT4X2, 0, 0, &getInputDataM,
19182          compareFP16ArithmeticFunc<8, 8, 0, 0, fp16Transpose<4, 2>>},
19183         {"OpTranspose", "2x3", 1, MAT3X2, MAT2X3, 0, 0, &getInputDataM,
19184          compareFP16ArithmeticFunc<8, 8, 0, 0, fp16Transpose<2, 3>>},
19185         {"OpTranspose", "3x3", 1, MAT3X3, MAT3X3, 0, 0, &getInputDataM,
19186          compareFP16ArithmeticFunc<16, 16, 0, 0, fp16Transpose<3, 3>>},
19187         {"OpTranspose", "4x3", 1, MAT3X4, MAT4X3, 0, 0, &getInputDataM,
19188          compareFP16ArithmeticFunc<16, 16, 0, 0, fp16Transpose<4, 3>>},
19189         {"OpTranspose", "2x4", 1, MAT4X2, MAT2X4, 0, 0, &getInputDataM,
19190          compareFP16ArithmeticFunc<8, 8, 0, 0, fp16Transpose<2, 4>>},
19191         {"OpTranspose", "3x4", 1, MAT4X3, MAT3X4, 0, 0, &getInputDataM,
19192          compareFP16ArithmeticFunc<16, 16, 0, 0, fp16Transpose<3, 4>>},
19193         {"OpTranspose", "4x4", 1, MAT4X4, MAT4X4, 0, 0, &getInputDataM,
19194          compareFP16ArithmeticFunc<16, 16, 0, 0, fp16Transpose<4, 4>>},
19195         {"OpMatrixTimesScalar", "2x2", 2, MAT2X2, MAT2X2, 1, 0, &getInputDataD,
19196          compareFP16ArithmeticFunc<4, 4, 1, 0, fp16MatrixTimesScalar<2, 2>>},
19197         {"OpMatrixTimesScalar", "2x3", 2, MAT2X3, MAT2X3, 1, 0, &getInputDataD,
19198          compareFP16ArithmeticFunc<8, 8, 1, 0, fp16MatrixTimesScalar<2, 3>>},
19199         {"OpMatrixTimesScalar", "2x4", 2, MAT2X4, MAT2X4, 1, 0, &getInputDataD,
19200          compareFP16ArithmeticFunc<8, 8, 1, 0, fp16MatrixTimesScalar<2, 4>>},
19201         {"OpMatrixTimesScalar", "3x2", 2, MAT3X2, MAT3X2, 1, 0, &getInputDataD,
19202          compareFP16ArithmeticFunc<8, 8, 1, 0, fp16MatrixTimesScalar<3, 2>>},
19203         {"OpMatrixTimesScalar", "3x3", 2, MAT3X3, MAT3X3, 1, 0, &getInputDataD,
19204          compareFP16ArithmeticFunc<16, 16, 1, 0, fp16MatrixTimesScalar<3, 3>>},
19205         {"OpMatrixTimesScalar", "3x4", 2, MAT3X4, MAT3X4, 1, 0, &getInputDataD,
19206          compareFP16ArithmeticFunc<16, 16, 1, 0, fp16MatrixTimesScalar<3, 4>>},
19207         {"OpMatrixTimesScalar", "4x2", 2, MAT4X2, MAT4X2, 1, 0, &getInputDataD,
19208          compareFP16ArithmeticFunc<8, 8, 1, 0, fp16MatrixTimesScalar<4, 2>>},
19209         {"OpMatrixTimesScalar", "4x3", 2, MAT4X3, MAT4X3, 1, 0, &getInputDataD,
19210          compareFP16ArithmeticFunc<16, 16, 1, 0, fp16MatrixTimesScalar<4, 3>>},
19211         {"OpMatrixTimesScalar", "4x4", 2, MAT4X4, MAT4X4, 1, 0, &getInputDataD,
19212          compareFP16ArithmeticFunc<16, 16, 1, 0, fp16MatrixTimesScalar<4, 4>>},
19213         {"OpVectorTimesMatrix", "2x2", 2, VEC2, VEC2, MAT2X2, 0, &getInputDataD,
19214          compareFP16ArithmeticFunc<2, 2, 4, 0, fp16VectorTimesMatrix<2, 2>>},
19215         {"OpVectorTimesMatrix", "2x3", 2, VEC2, VEC3, MAT2X3, 0, &getInputDataD,
19216          compareFP16ArithmeticFunc<2, 3, 8, 0, fp16VectorTimesMatrix<2, 3>>},
19217         {"OpVectorTimesMatrix", "2x4", 2, VEC2, VEC4, MAT2X4, 0, &getInputDataD,
19218          compareFP16ArithmeticFunc<2, 4, 8, 0, fp16VectorTimesMatrix<2, 4>>},
19219         {"OpVectorTimesMatrix", "3x2", 2, VEC3, VEC2, MAT3X2, 0, &getInputDataD,
19220          compareFP16ArithmeticFunc<3, 2, 8, 0, fp16VectorTimesMatrix<3, 2>>},
19221         {"OpVectorTimesMatrix", "3x3", 2, VEC3, VEC3, MAT3X3, 0, &getInputDataD,
19222          compareFP16ArithmeticFunc<3, 3, 16, 0, fp16VectorTimesMatrix<3, 3>>},
19223         {"OpVectorTimesMatrix", "3x4", 2, VEC3, VEC4, MAT3X4, 0, &getInputDataD,
19224          compareFP16ArithmeticFunc<3, 4, 16, 0, fp16VectorTimesMatrix<3, 4>>},
19225         {"OpVectorTimesMatrix", "4x2", 2, VEC4, VEC2, MAT4X2, 0, &getInputDataD,
19226          compareFP16ArithmeticFunc<4, 2, 8, 0, fp16VectorTimesMatrix<4, 2>>},
19227         {"OpVectorTimesMatrix", "4x3", 2, VEC4, VEC3, MAT4X3, 0, &getInputDataD,
19228          compareFP16ArithmeticFunc<4, 3, 16, 0, fp16VectorTimesMatrix<4, 3>>},
19229         {"OpVectorTimesMatrix", "4x4", 2, VEC4, VEC4, MAT4X4, 0, &getInputDataD,
19230          compareFP16ArithmeticFunc<4, 4, 16, 0, fp16VectorTimesMatrix<4, 4>>},
19231         {"OpMatrixTimesVector", "2x2", 2, VEC2, MAT2X2, VEC2, 0, &getInputDataD,
19232          compareFP16ArithmeticFunc<2, 4, 2, 0, fp16MatrixTimesVector<2, 2>>},
19233         {"OpMatrixTimesVector", "2x3", 2, VEC3, MAT2X3, VEC2, 0, &getInputDataD,
19234          compareFP16ArithmeticFunc<3, 8, 2, 0, fp16MatrixTimesVector<2, 3>>},
19235         {"OpMatrixTimesVector", "2x4", 2, VEC4, MAT2X4, VEC2, 0, &getInputDataD,
19236          compareFP16ArithmeticFunc<4, 8, 2, 0, fp16MatrixTimesVector<2, 4>>},
19237         {"OpMatrixTimesVector", "3x2", 2, VEC2, MAT3X2, VEC3, 0, &getInputDataD,
19238          compareFP16ArithmeticFunc<2, 8, 3, 0, fp16MatrixTimesVector<3, 2>>},
19239         {"OpMatrixTimesVector", "3x3", 2, VEC3, MAT3X3, VEC3, 0, &getInputDataD,
19240          compareFP16ArithmeticFunc<3, 16, 3, 0, fp16MatrixTimesVector<3, 3>>},
19241         {"OpMatrixTimesVector", "3x4", 2, VEC4, MAT3X4, VEC3, 0, &getInputDataD,
19242          compareFP16ArithmeticFunc<4, 16, 3, 0, fp16MatrixTimesVector<3, 4>>},
19243         {"OpMatrixTimesVector", "4x2", 2, VEC2, MAT4X2, VEC4, 0, &getInputDataD,
19244          compareFP16ArithmeticFunc<2, 8, 4, 0, fp16MatrixTimesVector<4, 2>>},
19245         {"OpMatrixTimesVector", "4x3", 2, VEC3, MAT4X3, VEC4, 0, &getInputDataD,
19246          compareFP16ArithmeticFunc<3, 16, 4, 0, fp16MatrixTimesVector<4, 3>>},
19247         {"OpMatrixTimesVector", "4x4", 2, VEC4, MAT4X4, VEC4, 0, &getInputDataD,
19248          compareFP16ArithmeticFunc<4, 16, 4, 0, fp16MatrixTimesVector<4, 4>>},
19249         {"OpMatrixTimesMatrix", "2x2_2x2", 2, MAT2X2, MAT2X2, MAT2X2, 0, &getInputDataD,
19250          compareFP16ArithmeticFunc<4, 4, 4, 0, fp16MatrixTimesMatrix<2, 2, 2, 2>>},
19251         {"OpMatrixTimesMatrix", "2x2_3x2", 2, MAT3X2, MAT2X2, MAT3X2, 0, &getInputDataD,
19252          compareFP16ArithmeticFunc<8, 4, 8, 0, fp16MatrixTimesMatrix<2, 2, 3, 2>>},
19253         {"OpMatrixTimesMatrix", "2x2_4x2", 2, MAT4X2, MAT2X2, MAT4X2, 0, &getInputDataD,
19254          compareFP16ArithmeticFunc<8, 4, 8, 0, fp16MatrixTimesMatrix<2, 2, 4, 2>>},
19255         {"OpMatrixTimesMatrix", "2x3_2x2", 2, MAT2X3, MAT2X3, MAT2X2, 0, &getInputDataD,
19256          compareFP16ArithmeticFunc<8, 8, 4, 0, fp16MatrixTimesMatrix<2, 3, 2, 2>>},
19257         {"OpMatrixTimesMatrix", "2x3_3x2", 2, MAT3X3, MAT2X3, MAT3X2, 0, &getInputDataD,
19258          compareFP16ArithmeticFunc<16, 8, 8, 0, fp16MatrixTimesMatrix<2, 3, 3, 2>>},
19259         {"OpMatrixTimesMatrix", "2x3_4x2", 2, MAT4X3, MAT2X3, MAT4X2, 0, &getInputDataD,
19260          compareFP16ArithmeticFunc<16, 8, 8, 0, fp16MatrixTimesMatrix<2, 3, 4, 2>>},
19261         {"OpMatrixTimesMatrix", "2x4_2x2", 2, MAT2X4, MAT2X4, MAT2X2, 0, &getInputDataD,
19262          compareFP16ArithmeticFunc<8, 8, 4, 0, fp16MatrixTimesMatrix<2, 4, 2, 2>>},
19263         {"OpMatrixTimesMatrix", "2x4_3x2", 2, MAT3X4, MAT2X4, MAT3X2, 0, &getInputDataD,
19264          compareFP16ArithmeticFunc<16, 8, 8, 0, fp16MatrixTimesMatrix<2, 4, 3, 2>>},
19265         {"OpMatrixTimesMatrix", "2x4_4x2", 2, MAT4X4, MAT2X4, MAT4X2, 0, &getInputDataD,
19266          compareFP16ArithmeticFunc<16, 8, 8, 0, fp16MatrixTimesMatrix<2, 4, 4, 2>>},
19267         {"OpMatrixTimesMatrix", "3x2_2x3", 2, MAT2X2, MAT3X2, MAT2X3, 0, &getInputDataD,
19268          compareFP16ArithmeticFunc<4, 8, 8, 0, fp16MatrixTimesMatrix<3, 2, 2, 3>>},
19269         {"OpMatrixTimesMatrix", "3x2_3x3", 2, MAT3X2, MAT3X2, MAT3X3, 0, &getInputDataD,
19270          compareFP16ArithmeticFunc<8, 8, 16, 0, fp16MatrixTimesMatrix<3, 2, 3, 3>>},
19271         {"OpMatrixTimesMatrix", "3x2_4x3", 2, MAT4X2, MAT3X2, MAT4X3, 0, &getInputDataD,
19272          compareFP16ArithmeticFunc<8, 8, 16, 0, fp16MatrixTimesMatrix<3, 2, 4, 3>>},
19273         {"OpMatrixTimesMatrix", "3x3_2x3", 2, MAT2X3, MAT3X3, MAT2X3, 0, &getInputDataD,
19274          compareFP16ArithmeticFunc<8, 16, 8, 0, fp16MatrixTimesMatrix<3, 3, 2, 3>>},
19275         {"OpMatrixTimesMatrix", "3x3_3x3", 2, MAT3X3, MAT3X3, MAT3X3, 0, &getInputDataD,
19276          compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<3, 3, 3, 3>>},
19277         {"OpMatrixTimesMatrix", "3x3_4x3", 2, MAT4X3, MAT3X3, MAT4X3, 0, &getInputDataD,
19278          compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<3, 3, 4, 3>>},
19279         {"OpMatrixTimesMatrix", "3x4_2x3", 2, MAT2X4, MAT3X4, MAT2X3, 0, &getInputDataD,
19280          compareFP16ArithmeticFunc<8, 16, 8, 0, fp16MatrixTimesMatrix<3, 4, 2, 3>>},
19281         {"OpMatrixTimesMatrix", "3x4_3x3", 2, MAT3X4, MAT3X4, MAT3X3, 0, &getInputDataD,
19282          compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<3, 4, 3, 3>>},
19283         {"OpMatrixTimesMatrix", "3x4_4x3", 2, MAT4X4, MAT3X4, MAT4X3, 0, &getInputDataD,
19284          compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<3, 4, 4, 3>>},
19285         {"OpMatrixTimesMatrix", "4x2_2x4", 2, MAT2X2, MAT4X2, MAT2X4, 0, &getInputDataD,
19286          compareFP16ArithmeticFunc<4, 8, 8, 0, fp16MatrixTimesMatrix<4, 2, 2, 4>>},
19287         {"OpMatrixTimesMatrix", "4x2_3x4", 2, MAT3X2, MAT4X2, MAT3X4, 0, &getInputDataD,
19288          compareFP16ArithmeticFunc<8, 8, 16, 0, fp16MatrixTimesMatrix<4, 2, 3, 4>>},
19289         {"OpMatrixTimesMatrix", "4x2_4x4", 2, MAT4X2, MAT4X2, MAT4X4, 0, &getInputDataD,
19290          compareFP16ArithmeticFunc<8, 8, 16, 0, fp16MatrixTimesMatrix<4, 2, 4, 4>>},
19291         {"OpMatrixTimesMatrix", "4x3_2x4", 2, MAT2X3, MAT4X3, MAT2X4, 0, &getInputDataD,
19292          compareFP16ArithmeticFunc<8, 16, 8, 0, fp16MatrixTimesMatrix<4, 3, 2, 4>>},
19293         {"OpMatrixTimesMatrix", "4x3_3x4", 2, MAT3X3, MAT4X3, MAT3X4, 0, &getInputDataD,
19294          compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<4, 3, 3, 4>>},
19295         {"OpMatrixTimesMatrix", "4x3_4x4", 2, MAT4X3, MAT4X3, MAT4X4, 0, &getInputDataD,
19296          compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<4, 3, 4, 4>>},
19297         {"OpMatrixTimesMatrix", "4x4_2x4", 2, MAT2X4, MAT4X4, MAT2X4, 0, &getInputDataD,
19298          compareFP16ArithmeticFunc<8, 16, 8, 0, fp16MatrixTimesMatrix<4, 4, 2, 4>>},
19299         {"OpMatrixTimesMatrix", "4x4_3x4", 2, MAT3X4, MAT4X4, MAT3X4, 0, &getInputDataD,
19300          compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<4, 4, 3, 4>>},
19301         {"OpMatrixTimesMatrix", "4x4_4x4", 2, MAT4X4, MAT4X4, MAT4X4, 0, &getInputDataD,
19302          compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<4, 4, 4, 4>>},
19303         {"OpOuterProduct", "2x2", 2, MAT2X2, VEC2, VEC2, 0, &getInputDataD,
19304          compareFP16ArithmeticFunc<4, 2, 2, 0, fp16OuterProduct<2, 2>>},
19305         {"OpOuterProduct", "2x3", 2, MAT2X3, VEC3, VEC2, 0, &getInputDataD,
19306          compareFP16ArithmeticFunc<8, 3, 2, 0, fp16OuterProduct<2, 3>>},
19307         {"OpOuterProduct", "2x4", 2, MAT2X4, VEC4, VEC2, 0, &getInputDataD,
19308          compareFP16ArithmeticFunc<8, 4, 2, 0, fp16OuterProduct<2, 4>>},
19309         {"OpOuterProduct", "3x2", 2, MAT3X2, VEC2, VEC3, 0, &getInputDataD,
19310          compareFP16ArithmeticFunc<8, 2, 3, 0, fp16OuterProduct<3, 2>>},
19311         {"OpOuterProduct", "3x3", 2, MAT3X3, VEC3, VEC3, 0, &getInputDataD,
19312          compareFP16ArithmeticFunc<16, 3, 3, 0, fp16OuterProduct<3, 3>>},
19313         {"OpOuterProduct", "3x4", 2, MAT3X4, VEC4, VEC3, 0, &getInputDataD,
19314          compareFP16ArithmeticFunc<16, 4, 3, 0, fp16OuterProduct<3, 4>>},
19315         {"OpOuterProduct", "4x2", 2, MAT4X2, VEC2, VEC4, 0, &getInputDataD,
19316          compareFP16ArithmeticFunc<8, 2, 4, 0, fp16OuterProduct<4, 2>>},
19317         {"OpOuterProduct", "4x3", 2, MAT4X3, VEC3, VEC4, 0, &getInputDataD,
19318          compareFP16ArithmeticFunc<16, 3, 4, 0, fp16OuterProduct<4, 3>>},
19319         {"OpOuterProduct", "4x4", 2, MAT4X4, VEC4, VEC4, 0, &getInputDataD,
19320          compareFP16ArithmeticFunc<16, 4, 4, 0, fp16OuterProduct<4, 4>>},
19321         {"Determinant", "2x2", 1, SCALAR, MAT2X2, NONE, 0, &getInputDataC,
19322          compareFP16ArithmeticFunc<1, 4, 0, 0, fp16Determinant<2>>},
19323         {"Determinant", "3x3", 1, SCALAR, MAT3X3, NONE, 0, &getInputDataC,
19324          compareFP16ArithmeticFunc<1, 16, 0, 0, fp16Determinant<3>>},
19325         {"Determinant", "4x4", 1, SCALAR, MAT4X4, NONE, 0, &getInputDataC,
19326          compareFP16ArithmeticFunc<1, 16, 0, 0, fp16Determinant<4>>},
19327         {"MatrixInverse", "2x2", 1, MAT2X2, MAT2X2, NONE, 0, &getInputDataC,
19328          compareFP16ArithmeticFunc<4, 4, 0, 0, fp16Inverse<2>>},
19329     };
19330 
19331     for (uint32_t testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
19332     {
19333         const Math16TestFunc &testFunc = testFuncs[testFuncIdx];
19334 
19335         createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), 0, testFunc);
19336     }
19337 
19338     return testGroup.release();
19339 }
19340 
19341 struct ComparisonCase
19342 {
19343     string name;
19344     string desc;
19345 };
19346 
19347 template <size_t C>
createFloat32ComparisonComputeSet(tcu::TestContext & testCtx)19348 tcu::TestCaseGroup *createFloat32ComparisonComputeSet(tcu::TestContext &testCtx)
19349 {
19350     const string testGroupName("comparison_" + de::toString(C));
19351     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, testGroupName.c_str()));
19352 #ifndef CTS_USES_VULKANSC
19353     const char *dataDir = "spirv_assembly/instruction/float32/comparison";
19354 
19355     const ComparisonCase amberTests[] = {{"modfstruct", "modf and modfStruct"},
19356                                          {"frexpstruct", "frexp and frexpStruct"}};
19357 
19358     for (ComparisonCase test : amberTests)
19359     {
19360         const string caseDesc("Compare output of " + test.desc);
19361         const string fileName(test.name + "_" + de::toString(C) + "_comp.amber");
19362 
19363         testGroup->addChild(
19364             cts_amber::createAmberTestCase(testCtx, test.name.c_str(), caseDesc.c_str(), dataDir, fileName));
19365     }
19366 #endif
19367     return testGroup.release();
19368 }
19369 
19370 struct ShaderStage
19371 {
19372     string name;
19373     vector<string> requirement;
19374 };
19375 
19376 template <size_t C>
createFloat32ComparisonGraphicsSet(tcu::TestContext & testCtx)19377 tcu::TestCaseGroup *createFloat32ComparisonGraphicsSet(tcu::TestContext &testCtx)
19378 {
19379     const string testGroupName("comparison_" + de::toString(C));
19380     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, testGroupName.c_str()));
19381 #ifndef CTS_USES_VULKANSC
19382     const char *dataDir = "spirv_assembly/instruction/float32/comparison";
19383 
19384     const ShaderStage stages[] = {
19385         {"vert", vector<string>(1, "Features.vertexPipelineStoresAndAtomics")},
19386         {"tesc", vector<string>({"Features.vertexPipelineStoresAndAtomics", "Features.tessellationShader"})},
19387         {"tese", vector<string>({"Features.vertexPipelineStoresAndAtomics", "Features.tessellationShader"})},
19388         {"geom", vector<string>({"Features.vertexPipelineStoresAndAtomics", "Features.geometryShader"})},
19389         {"frag", vector<string>(0)}};
19390 
19391     const ComparisonCase amberTests[] = {{"modfstruct", "modf and modfStruct"},
19392                                          {"frexpstruct", "frexp and frexpStruct"}};
19393 
19394     for (ComparisonCase test : amberTests)
19395         for (ShaderStage stage : stages)
19396         {
19397             const string caseName(test.name + "_" + stage.name);
19398             const string caseDesc("Compare output of " + test.desc);
19399             const string fileName(test.name + "_" + de::toString(C) + "_" + stage.name + ".amber");
19400 
19401             testGroup->addChild(cts_amber::createAmberTestCase(testCtx, caseName.c_str(), caseDesc.c_str(), dataDir,
19402                                                                fileName, stage.requirement));
19403         }
19404 #endif
19405 
19406     return testGroup.release();
19407 }
19408 
getNumberTypeName(const NumberType type)19409 const string getNumberTypeName(const NumberType type)
19410 {
19411     if (type == NUMBERTYPE_INT32)
19412     {
19413         return "int";
19414     }
19415     else if (type == NUMBERTYPE_UINT32)
19416     {
19417         return "uint";
19418     }
19419     else if (type == NUMBERTYPE_FLOAT32)
19420     {
19421         return "float";
19422     }
19423     else
19424     {
19425         DE_ASSERT(false);
19426         return "";
19427     }
19428 }
19429 
getInt(de::Random & rnd)19430 int32_t getInt(de::Random &rnd)
19431 {
19432     return rnd.getInt(std::numeric_limits<int>::min(), std::numeric_limits<int>::max());
19433 }
19434 
repeatString(const string & str,int times)19435 const string repeatString(const string &str, int times)
19436 {
19437     string filler;
19438     for (int i = 0; i < times; ++i)
19439     {
19440         filler += str;
19441     }
19442     return filler;
19443 }
19444 
getRandomConstantString(const NumberType type,de::Random & rnd)19445 const string getRandomConstantString(const NumberType type, de::Random &rnd)
19446 {
19447     if (type == NUMBERTYPE_INT32)
19448     {
19449         return numberToString<int32_t>(getInt(rnd));
19450     }
19451     else if (type == NUMBERTYPE_UINT32)
19452     {
19453         return numberToString<uint32_t>(rnd.getUint32());
19454     }
19455     else if (type == NUMBERTYPE_FLOAT32)
19456     {
19457         return numberToString<float>(rnd.getFloat());
19458     }
19459     else
19460     {
19461         DE_ASSERT(false);
19462         return "";
19463     }
19464 }
19465 
createVectorCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19466 void createVectorCompositeCases(vector<map<string, string>> &testCases, de::Random &rnd, const NumberType type)
19467 {
19468     map<string, string> params;
19469 
19470     // Vec2 to Vec4
19471     for (int width = 2; width <= 4; ++width)
19472     {
19473         const string randomConst    = numberToString(getInt(rnd));
19474         const string widthStr       = numberToString(width);
19475         const string composite_type = "${customType}vec" + widthStr;
19476         const int index             = rnd.getInt(0, width - 1);
19477 
19478         params["type"]          = "vec";
19479         params["name"]          = params["type"] + "_" + widthStr;
19480         params["compositeDecl"] = composite_type + " = OpTypeVector ${customType} " + widthStr + "\n";
19481         params["compositeType"] = composite_type;
19482         params["filler"] = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19483         params["compositeConstruct"] =
19484             "%instance  = OpCompositeConstruct " + composite_type + repeatString(" %filler", width) + "\n";
19485         params["indexes"] = numberToString(index);
19486         testCases.push_back(params);
19487     }
19488 }
19489 
createArrayCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19490 void createArrayCompositeCases(vector<map<string, string>> &testCases, de::Random &rnd, const NumberType type)
19491 {
19492     const int limit = 10;
19493     map<string, string> params;
19494 
19495     for (int width = 2; width <= limit; ++width)
19496     {
19497         string randomConst = numberToString(getInt(rnd));
19498         string widthStr    = numberToString(width);
19499         int index          = rnd.getInt(0, width - 1);
19500 
19501         params["type"]          = "array";
19502         params["name"]          = params["type"] + "_" + widthStr;
19503         params["compositeDecl"] = string("%arraywidth = OpConstant %u32 " + widthStr + "\n") +
19504                                   "%composite = OpTypeArray ${customType} %arraywidth\n";
19505         params["compositeType"] = "%composite";
19506         params["filler"] = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19507         params["compositeConstruct"] =
19508             "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19509         params["indexes"] = numberToString(index);
19510         testCases.push_back(params);
19511     }
19512 }
19513 
createStructCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19514 void createStructCompositeCases(vector<map<string, string>> &testCases, de::Random &rnd, const NumberType type)
19515 {
19516     const int limit = 10;
19517     map<string, string> params;
19518 
19519     for (int width = 2; width <= limit; ++width)
19520     {
19521         string randomConst = numberToString(getInt(rnd));
19522         int index          = rnd.getInt(0, width - 1);
19523 
19524         params["type"]          = "struct";
19525         params["name"]          = params["type"] + "_" + numberToString(width);
19526         params["compositeDecl"] = "%composite = OpTypeStruct" + repeatString(" ${customType}", width) + "\n";
19527         params["compositeType"] = "%composite";
19528         params["filler"] = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19529         params["compositeConstruct"] =
19530             "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19531         params["indexes"] = numberToString(index);
19532         testCases.push_back(params);
19533     }
19534 }
19535 
createMatrixCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19536 void createMatrixCompositeCases(vector<map<string, string>> &testCases, de::Random &rnd, const NumberType type)
19537 {
19538     map<string, string> params;
19539 
19540     // Vec2 to Vec4
19541     for (int width = 2; width <= 4; ++width)
19542     {
19543         string widthStr = numberToString(width);
19544 
19545         for (int column = 2; column <= 4; ++column)
19546         {
19547             int index_0      = rnd.getInt(0, column - 1);
19548             int index_1      = rnd.getInt(0, width - 1);
19549             string columnStr = numberToString(column);
19550 
19551             params["type"]          = "matrix";
19552             params["name"]          = params["type"] + "_" + widthStr + "x" + columnStr;
19553             params["compositeDecl"] = string("%vectype   = OpTypeVector ${customType} " + widthStr + "\n") +
19554                                       "%composite = OpTypeMatrix %vectype " + columnStr + "\n";
19555             params["compositeType"] = "%composite";
19556 
19557             params["filler"] = string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) +
19558                                "\n" + "%fillerVec = OpConstantComposite %vectype" + repeatString(" %filler", width) +
19559                                "\n";
19560 
19561             params["compositeConstruct"] =
19562                 "%instance  = OpCompositeConstruct %composite" + repeatString(" %fillerVec", column) + "\n";
19563             params["indexes"] = numberToString(index_0) + " " + numberToString(index_1);
19564             testCases.push_back(params);
19565         }
19566     }
19567 }
19568 
createCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19569 void createCompositeCases(vector<map<string, string>> &testCases, de::Random &rnd, const NumberType type)
19570 {
19571     createVectorCompositeCases(testCases, rnd, type);
19572     createArrayCompositeCases(testCases, rnd, type);
19573     createStructCompositeCases(testCases, rnd, type);
19574     // Matrix only supports float types
19575     if (type == NUMBERTYPE_FLOAT32)
19576     {
19577         createMatrixCompositeCases(testCases, rnd, type);
19578     }
19579 }
19580 
getAssemblyTypeDeclaration(const NumberType type)19581 const string getAssemblyTypeDeclaration(const NumberType type)
19582 {
19583     switch (type)
19584     {
19585     case NUMBERTYPE_INT32:
19586         return "OpTypeInt 32 1";
19587     case NUMBERTYPE_UINT32:
19588         return "OpTypeInt 32 0";
19589     case NUMBERTYPE_FLOAT32:
19590         return "OpTypeFloat 32";
19591     default:
19592         DE_ASSERT(false);
19593         return "";
19594     }
19595 }
19596 
getAssemblyTypeName(const NumberType type)19597 const string getAssemblyTypeName(const NumberType type)
19598 {
19599     switch (type)
19600     {
19601     case NUMBERTYPE_INT32:
19602         return "%i32";
19603     case NUMBERTYPE_UINT32:
19604         return "%u32";
19605     case NUMBERTYPE_FLOAT32:
19606         return "%f32";
19607     default:
19608         DE_ASSERT(false);
19609         return "";
19610     }
19611 }
19612 
specializeCompositeInsertShaderTemplate(const NumberType type,const map<string,string> & params)19613 const string specializeCompositeInsertShaderTemplate(const NumberType type, const map<string, string> &params)
19614 {
19615     map<string, string> parameters(params);
19616 
19617     const string customType = getAssemblyTypeName(type);
19618     map<string, string> substCustomType;
19619     substCustomType["customType"]    = customType;
19620     parameters["compositeDecl"]      = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19621     parameters["compositeType"]      = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19622     parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19623     parameters["filler"]             = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19624     parameters["customType"]         = customType;
19625     parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19626 
19627     if (parameters.at("compositeType") != "%u32vec3")
19628     {
19629         parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
19630     }
19631 
19632     return StringTemplate("OpCapability Shader\n"
19633                           "OpCapability Matrix\n"
19634                           "OpMemoryModel Logical GLSL450\n"
19635                           "OpEntryPoint GLCompute %main \"main\" %id\n"
19636                           "OpExecutionMode %main LocalSize 1 1 1\n"
19637 
19638                           "OpSource GLSL 430\n"
19639                           "OpName %main           \"main\"\n"
19640                           "OpName %id             \"gl_GlobalInvocationID\"\n"
19641 
19642                           // Decorators
19643                           "OpDecorate %id BuiltIn GlobalInvocationId\n"
19644                           "OpDecorate %buf BufferBlock\n"
19645                           "OpDecorate %indata DescriptorSet 0\n"
19646                           "OpDecorate %indata Binding 0\n"
19647                           "OpDecorate %outdata DescriptorSet 0\n"
19648                           "OpDecorate %outdata Binding 1\n"
19649                           "OpDecorate %customarr ArrayStride 4\n"
19650                           "${compositeDecorator}"
19651                           "OpMemberDecorate %buf 0 Offset 0\n"
19652 
19653                           // General types
19654                           "%void      = OpTypeVoid\n"
19655                           "%voidf     = OpTypeFunction %void\n"
19656                           "%u32       = OpTypeInt 32 0\n"
19657                           "%i32       = OpTypeInt 32 1\n"
19658                           "%f32       = OpTypeFloat 32\n"
19659 
19660                           // Composite declaration
19661                           "${compositeDecl}"
19662 
19663                           // Constants
19664                           "${filler}"
19665 
19666                           "${u32vec3Decl:opt}"
19667                           "%uvec3ptr  = OpTypePointer Input %u32vec3\n"
19668 
19669                           // Inherited from custom
19670                           "%customptr = OpTypePointer Uniform ${customType}\n"
19671                           "%customarr = OpTypeRuntimeArray ${customType}\n"
19672                           "%buf       = OpTypeStruct %customarr\n"
19673                           "%bufptr    = OpTypePointer Uniform %buf\n"
19674 
19675                           "%indata    = OpVariable %bufptr Uniform\n"
19676                           "%outdata   = OpVariable %bufptr Uniform\n"
19677 
19678                           "%id        = OpVariable %uvec3ptr Input\n"
19679                           "%zero      = OpConstant %i32 0\n"
19680 
19681                           "%main      = OpFunction %void None %voidf\n"
19682                           "%label     = OpLabel\n"
19683                           "%idval     = OpLoad %u32vec3 %id\n"
19684                           "%x         = OpCompositeExtract %u32 %idval 0\n"
19685 
19686                           "%inloc     = OpAccessChain %customptr %indata %zero %x\n"
19687                           "%outloc    = OpAccessChain %customptr %outdata %zero %x\n"
19688                           // Read the input value
19689                           "%inval     = OpLoad ${customType} %inloc\n"
19690                           // Create the composite and fill it
19691                           "${compositeConstruct}"
19692                           // Insert the input value to a place
19693                           "%instance2 = OpCompositeInsert ${compositeType} %inval %instance ${indexes}\n"
19694                           // Read back the value from the position
19695                           "%out_val   = OpCompositeExtract ${customType} %instance2 ${indexes}\n"
19696                           // Store it in the output position
19697                           "             OpStore %outloc %out_val\n"
19698                           "             OpReturn\n"
19699                           "             OpFunctionEnd\n")
19700         .specialize(parameters);
19701 }
19702 
19703 template <typename T>
createCompositeBuffer(T number)19704 BufferSp createCompositeBuffer(T number)
19705 {
19706     return BufferSp(new Buffer<T>(vector<T>(1, number)));
19707 }
19708 
createOpCompositeInsertGroup(tcu::TestContext & testCtx)19709 tcu::TestCaseGroup *createOpCompositeInsertGroup(tcu::TestContext &testCtx)
19710 {
19711     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opcompositeinsert"));
19712     de::Random rnd(deStringHash(group->getName()));
19713 
19714     for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19715     {
19716         NumberType numberType = NumberType(type);
19717         const string typeName = getNumberTypeName(numberType);
19718         de::MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(testCtx, typeName.c_str()));
19719         vector<map<string, string>> testCases;
19720 
19721         createCompositeCases(testCases, rnd, numberType);
19722 
19723         for (vector<map<string, string>>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19724         {
19725             ComputeShaderSpec spec;
19726 
19727             spec.assembly = specializeCompositeInsertShaderTemplate(numberType, *test);
19728 
19729             switch (numberType)
19730             {
19731             case NUMBERTYPE_INT32:
19732             {
19733                 int32_t number = getInt(rnd);
19734                 spec.inputs.push_back(createCompositeBuffer<int32_t>(number));
19735                 spec.outputs.push_back(createCompositeBuffer<int32_t>(number));
19736                 break;
19737             }
19738             case NUMBERTYPE_UINT32:
19739             {
19740                 uint32_t number = rnd.getUint32();
19741                 spec.inputs.push_back(createCompositeBuffer<uint32_t>(number));
19742                 spec.outputs.push_back(createCompositeBuffer<uint32_t>(number));
19743                 break;
19744             }
19745             case NUMBERTYPE_FLOAT32:
19746             {
19747                 float number = rnd.getFloat();
19748                 spec.inputs.push_back(createCompositeBuffer<float>(number));
19749                 spec.outputs.push_back(createCompositeBuffer<float>(number));
19750                 break;
19751             }
19752             default:
19753                 DE_ASSERT(false);
19754             }
19755 
19756             spec.numWorkGroups = IVec3(1, 1, 1);
19757             subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), spec));
19758         }
19759         group->addChild(subGroup.release());
19760     }
19761     return group.release();
19762 }
19763 
19764 struct AssemblyStructInfo
19765 {
AssemblyStructInfovkt::SpirVAssembly::AssemblyStructInfo19766     AssemblyStructInfo(const uint32_t comp, const uint32_t idx) : components(comp), index(idx)
19767     {
19768     }
19769 
19770     uint32_t components;
19771     uint32_t index;
19772 };
19773 
specializeInBoundsShaderTemplate(const NumberType type,const AssemblyStructInfo & structInfo,const map<string,string> & params)19774 const string specializeInBoundsShaderTemplate(const NumberType type, const AssemblyStructInfo &structInfo,
19775                                               const map<string, string> &params)
19776 {
19777     // Create the full index string
19778     string fullIndex = numberToString(structInfo.index) + " " + params.at("indexes");
19779     // Convert it to list of indexes
19780     vector<string> indexes = de::splitString(fullIndex, ' ');
19781 
19782     map<string, string> parameters(params);
19783     parameters["structType"]      = repeatString(" ${compositeType}", structInfo.components);
19784     parameters["structConstruct"] = repeatString(" %instance", structInfo.components);
19785     parameters["insertIndexes"]   = fullIndex;
19786 
19787     // In matrix cases the last two index is the CompositeExtract indexes
19788     const uint32_t extractIndexes = (parameters["type"] == "matrix") ? 2 : 1;
19789 
19790     // Construct the extractIndex
19791     for (vector<string>::const_iterator index = indexes.end() - extractIndexes; index != indexes.end(); ++index)
19792     {
19793         parameters["extractIndexes"] += " " + *index;
19794     }
19795 
19796     // Remove the last 1 or 2 element depends on matrix case or not
19797     indexes.erase(indexes.end() - extractIndexes, indexes.end());
19798 
19799     uint32_t id = 0;
19800     // Generate AccessChain index expressions (except for the last one, because we use ptr to the composite)
19801     for (vector<string>::const_iterator index = indexes.begin(); index != indexes.end(); ++index)
19802     {
19803         string indexId = "%index_" + numberToString(id++);
19804         parameters["accessChainConstDeclaration"] += indexId + "   = OpConstant %u32 " + *index + "\n";
19805         parameters["accessChainIndexes"] += " " + indexId;
19806     }
19807 
19808     parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19809 
19810     const string customType = getAssemblyTypeName(type);
19811     map<string, string> substCustomType;
19812     substCustomType["customType"]    = customType;
19813     parameters["compositeDecl"]      = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19814     parameters["compositeType"]      = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19815     parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19816     parameters["filler"]             = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19817     parameters["customType"]         = customType;
19818 
19819     const string compositeType = parameters.at("compositeType");
19820     map<string, string> substCompositeType;
19821     substCompositeType["compositeType"] = compositeType;
19822     parameters["structType"]            = StringTemplate(parameters.at("structType")).specialize(substCompositeType);
19823     if (compositeType != "%u32vec3")
19824     {
19825         parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
19826     }
19827 
19828     return StringTemplate("OpCapability Shader\n"
19829                           "OpCapability Matrix\n"
19830                           "OpMemoryModel Logical GLSL450\n"
19831                           "OpEntryPoint GLCompute %main \"main\" %id\n"
19832                           "OpExecutionMode %main LocalSize 1 1 1\n"
19833 
19834                           "OpSource GLSL 430\n"
19835                           "OpName %main           \"main\"\n"
19836                           "OpName %id             \"gl_GlobalInvocationID\"\n"
19837                           // Decorators
19838                           "OpDecorate %id BuiltIn GlobalInvocationId\n"
19839                           "OpDecorate %buf BufferBlock\n"
19840                           "OpDecorate %indata DescriptorSet 0\n"
19841                           "OpDecorate %indata Binding 0\n"
19842                           "OpDecorate %outdata DescriptorSet 0\n"
19843                           "OpDecorate %outdata Binding 1\n"
19844                           "OpDecorate %customarr ArrayStride 4\n"
19845                           "${compositeDecorator}"
19846                           "OpMemberDecorate %buf 0 Offset 0\n"
19847                           // General types
19848                           "%void      = OpTypeVoid\n"
19849                           "%voidf     = OpTypeFunction %void\n"
19850                           "%i32       = OpTypeInt 32 1\n"
19851                           "%u32       = OpTypeInt 32 0\n"
19852                           "%f32       = OpTypeFloat 32\n"
19853                           // Custom types
19854                           "${compositeDecl}"
19855                           // %u32vec3 if not already declared in ${compositeDecl}
19856                           "${u32vec3Decl:opt}"
19857                           "%uvec3ptr  = OpTypePointer Input %u32vec3\n"
19858                           // Inherited from composite
19859                           "%composite_p = OpTypePointer Function ${compositeType}\n"
19860                           "%struct_t  = OpTypeStruct${structType}\n"
19861                           "%struct_p  = OpTypePointer Function %struct_t\n"
19862                           // Constants
19863                           "${filler}"
19864                           "${accessChainConstDeclaration}"
19865                           // Inherited from custom
19866                           "%customptr = OpTypePointer Uniform ${customType}\n"
19867                           "%customarr = OpTypeRuntimeArray ${customType}\n"
19868                           "%buf       = OpTypeStruct %customarr\n"
19869                           "%bufptr    = OpTypePointer Uniform %buf\n"
19870                           "%indata    = OpVariable %bufptr Uniform\n"
19871                           "%outdata   = OpVariable %bufptr Uniform\n"
19872 
19873                           "%id        = OpVariable %uvec3ptr Input\n"
19874                           "%zero      = OpConstant %u32 0\n"
19875                           "%main      = OpFunction %void None %voidf\n"
19876                           "%label     = OpLabel\n"
19877                           "%struct_v  = OpVariable %struct_p Function\n"
19878                           "%idval     = OpLoad %u32vec3 %id\n"
19879                           "%x         = OpCompositeExtract %u32 %idval 0\n"
19880                           // Create the input/output type
19881                           "%inloc     = OpInBoundsAccessChain %customptr %indata %zero %x\n"
19882                           "%outloc    = OpInBoundsAccessChain %customptr %outdata %zero %x\n"
19883                           // Read the input value
19884                           "%inval     = OpLoad ${customType} %inloc\n"
19885                           // Create the composite and fill it
19886                           "${compositeConstruct}"
19887                           // Create the struct and fill it with the composite
19888                           "%struct    = OpCompositeConstruct %struct_t${structConstruct}\n"
19889                           // Insert the value
19890                           "%comp_obj  = OpCompositeInsert %struct_t %inval %struct ${insertIndexes}\n"
19891                           // Store the object
19892                           "             OpStore %struct_v %comp_obj\n"
19893                           // Get deepest possible composite pointer
19894                           "%inner_ptr = OpInBoundsAccessChain %composite_p %struct_v${accessChainIndexes}\n"
19895                           "%read_obj  = OpLoad ${compositeType} %inner_ptr\n"
19896                           // Read back the stored value
19897                           "%read_val  = OpCompositeExtract ${customType} %read_obj${extractIndexes}\n"
19898                           "             OpStore %outloc %read_val\n"
19899                           "             OpReturn\n"
19900                           "             OpFunctionEnd\n")
19901         .specialize(parameters);
19902 }
19903 
createOpInBoundsAccessChainGroup(tcu::TestContext & testCtx)19904 tcu::TestCaseGroup *createOpInBoundsAccessChainGroup(tcu::TestContext &testCtx)
19905 {
19906     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opinboundsaccesschain"));
19907     de::Random rnd(deStringHash(group->getName()));
19908 
19909     for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19910     {
19911         NumberType numberType = NumberType(type);
19912         const string typeName = getNumberTypeName(numberType);
19913         de::MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(testCtx, typeName.c_str()));
19914 
19915         vector<map<string, string>> testCases;
19916         createCompositeCases(testCases, rnd, numberType);
19917 
19918         for (vector<map<string, string>>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19919         {
19920             ComputeShaderSpec spec;
19921 
19922             // Number of components inside of a struct
19923             uint32_t structComponents = rnd.getInt(2, 8);
19924             // Component index value
19925             uint32_t structIndex = rnd.getInt(0, structComponents - 1);
19926             AssemblyStructInfo structInfo(structComponents, structIndex);
19927 
19928             spec.assembly = specializeInBoundsShaderTemplate(numberType, structInfo, *test);
19929 
19930             switch (numberType)
19931             {
19932             case NUMBERTYPE_INT32:
19933             {
19934                 int32_t number = getInt(rnd);
19935                 spec.inputs.push_back(createCompositeBuffer<int32_t>(number));
19936                 spec.outputs.push_back(createCompositeBuffer<int32_t>(number));
19937                 break;
19938             }
19939             case NUMBERTYPE_UINT32:
19940             {
19941                 uint32_t number = rnd.getUint32();
19942                 spec.inputs.push_back(createCompositeBuffer<uint32_t>(number));
19943                 spec.outputs.push_back(createCompositeBuffer<uint32_t>(number));
19944                 break;
19945             }
19946             case NUMBERTYPE_FLOAT32:
19947             {
19948                 float number = rnd.getFloat();
19949                 spec.inputs.push_back(createCompositeBuffer<float>(number));
19950                 spec.outputs.push_back(createCompositeBuffer<float>(number));
19951                 break;
19952             }
19953             default:
19954                 DE_ASSERT(false);
19955             }
19956             spec.numWorkGroups = IVec3(1, 1, 1);
19957             subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), spec));
19958         }
19959         group->addChild(subGroup.release());
19960     }
19961     return group.release();
19962 }
19963 
19964 // If the params missing, uninitialized case
19965 const string specializeDefaultOutputShaderTemplate(const NumberType type,
19966                                                    const map<string, string> &params = map<string, string>())
19967 {
19968     map<string, string> parameters(params);
19969 
19970     parameters["customType"] = getAssemblyTypeName(type);
19971 
19972     // Declare the const value, and use it in the initializer
19973     if (params.find("constValue") != params.end())
19974     {
19975         parameters["variableInitializer"] = " %const";
19976     }
19977     // Uninitialized case
19978     else
19979     {
19980         parameters["commentDecl"] = ";";
19981     }
19982 
19983     return StringTemplate("OpCapability Shader\n"
19984                           "OpMemoryModel Logical GLSL450\n"
19985                           "OpEntryPoint GLCompute %main \"main\" %id\n"
19986                           "OpExecutionMode %main LocalSize 1 1 1\n"
19987                           "OpSource GLSL 430\n"
19988                           "OpName %main           \"main\"\n"
19989                           "OpName %id             \"gl_GlobalInvocationID\"\n"
19990                           // Decorators
19991                           "OpDecorate %id BuiltIn GlobalInvocationId\n"
19992                           "OpDecorate %indata DescriptorSet 0\n"
19993                           "OpDecorate %indata Binding 0\n"
19994                           "OpDecorate %outdata DescriptorSet 0\n"
19995                           "OpDecorate %outdata Binding 1\n"
19996                           "OpDecorate %in_arr ArrayStride 4\n"
19997                           "OpDecorate %in_buf BufferBlock\n"
19998                           "OpMemberDecorate %in_buf 0 Offset 0\n"
19999                           // Base types
20000                           "%void       = OpTypeVoid\n"
20001                           "%voidf      = OpTypeFunction %void\n"
20002                           "%u32        = OpTypeInt 32 0\n"
20003                           "%i32        = OpTypeInt 32 1\n"
20004                           "%f32        = OpTypeFloat 32\n"
20005                           "%uvec3      = OpTypeVector %u32 3\n"
20006                           "%uvec3ptr   = OpTypePointer Input %uvec3\n"
20007                           "${commentDecl:opt}%const      = OpConstant ${customType} ${constValue:opt}\n"
20008                           // Derived types
20009                           "%in_ptr     = OpTypePointer Uniform ${customType}\n"
20010                           "%in_arr     = OpTypeRuntimeArray ${customType}\n"
20011                           "%in_buf     = OpTypeStruct %in_arr\n"
20012                           "%in_bufptr  = OpTypePointer Uniform %in_buf\n"
20013                           "%indata     = OpVariable %in_bufptr Uniform\n"
20014                           "%outdata    = OpVariable %in_bufptr Uniform\n"
20015                           "%id         = OpVariable %uvec3ptr Input\n"
20016                           "%var_ptr    = OpTypePointer Function ${customType}\n"
20017                           // Constants
20018                           "%zero       = OpConstant %i32 0\n"
20019                           // Main function
20020                           "%main       = OpFunction %void None %voidf\n"
20021                           "%label      = OpLabel\n"
20022                           "%out_var    = OpVariable %var_ptr Function${variableInitializer:opt}\n"
20023                           "%idval      = OpLoad %uvec3 %id\n"
20024                           "%x          = OpCompositeExtract %u32 %idval 0\n"
20025                           "%inloc      = OpAccessChain %in_ptr %indata %zero %x\n"
20026                           "%outloc     = OpAccessChain %in_ptr %outdata %zero %x\n"
20027 
20028                           "%outval     = OpLoad ${customType} %out_var\n"
20029                           "              OpStore %outloc %outval\n"
20030                           "              OpReturn\n"
20031                           "              OpFunctionEnd\n")
20032         .specialize(parameters);
20033 }
20034 
compareFloats(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)20035 bool compareFloats(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
20036                    const std::vector<Resource> &expectedOutputs, TestLog &log)
20037 {
20038     DE_ASSERT(outputAllocs.size() != 0);
20039     DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
20040 
20041     // Use custom epsilon because of the float->string conversion
20042     const float epsilon = 0.00001f;
20043 
20044     for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
20045     {
20046         vector<uint8_t> expectedBytes;
20047         float expected;
20048         float actual;
20049 
20050         expectedOutputs[outputNdx].getBytes(expectedBytes);
20051         memcpy(&expected, &expectedBytes.front(), expectedBytes.size());
20052         memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size());
20053 
20054         // Test with epsilon
20055         if (fabs(expected - actual) > epsilon)
20056         {
20057             log << TestLog::Message << "Error: The actual and expected values not matching."
20058                 << " Expected: " << expected << " Actual: " << actual << " Epsilon: " << epsilon << TestLog::EndMessage;
20059             return false;
20060         }
20061     }
20062     return true;
20063 }
20064 
20065 // Checks if the driver crash with uninitialized cases
passthruVerify(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)20066 bool passthruVerify(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
20067                     const std::vector<Resource> &expectedOutputs, TestLog &)
20068 {
20069     DE_ASSERT(outputAllocs.size() != 0);
20070     DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
20071 
20072     // Copy and discard the result.
20073     for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
20074     {
20075         vector<uint8_t> expectedBytes;
20076         expectedOutputs[outputNdx].getBytes(expectedBytes);
20077 
20078         const size_t width = expectedBytes.size();
20079         vector<char> data(width);
20080 
20081         memcpy(&data[0], outputAllocs[outputNdx]->getHostPtr(), width);
20082     }
20083     return true;
20084 }
20085 
createShaderDefaultOutputGroup(tcu::TestContext & testCtx)20086 tcu::TestCaseGroup *createShaderDefaultOutputGroup(tcu::TestContext &testCtx)
20087 {
20088     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "shader_default_output"));
20089     de::Random rnd(deStringHash(group->getName()));
20090 
20091     for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
20092     {
20093         NumberType numberType = NumberType(type);
20094         const string typeName = getNumberTypeName(numberType);
20095         de::MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(testCtx, typeName.c_str()));
20096 
20097         // 2 similar subcases (initialized and uninitialized)
20098         for (int subCase = 0; subCase < 2; ++subCase)
20099         {
20100             ComputeShaderSpec spec;
20101             spec.numWorkGroups = IVec3(1, 1, 1);
20102 
20103             map<string, string> params;
20104 
20105             switch (numberType)
20106             {
20107             case NUMBERTYPE_INT32:
20108             {
20109                 int32_t number = getInt(rnd);
20110                 spec.inputs.push_back(createCompositeBuffer<int32_t>(number));
20111                 spec.outputs.push_back(createCompositeBuffer<int32_t>(number));
20112                 params["constValue"] = numberToString(number);
20113                 break;
20114             }
20115             case NUMBERTYPE_UINT32:
20116             {
20117                 uint32_t number = rnd.getUint32();
20118                 spec.inputs.push_back(createCompositeBuffer<uint32_t>(number));
20119                 spec.outputs.push_back(createCompositeBuffer<uint32_t>(number));
20120                 params["constValue"] = numberToString(number);
20121                 break;
20122             }
20123             case NUMBERTYPE_FLOAT32:
20124             {
20125                 float number = rnd.getFloat();
20126                 spec.inputs.push_back(createCompositeBuffer<float>(number));
20127                 spec.outputs.push_back(createCompositeBuffer<float>(number));
20128                 spec.verifyIO        = &compareFloats;
20129                 params["constValue"] = numberToString(number);
20130                 break;
20131             }
20132             default:
20133                 DE_ASSERT(false);
20134             }
20135 
20136             // Initialized subcase
20137             if (!subCase)
20138             {
20139                 spec.assembly = specializeDefaultOutputShaderTemplate(numberType, params);
20140                 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "initialized", spec));
20141             }
20142             // Uninitialized subcase
20143             else
20144             {
20145                 spec.assembly = specializeDefaultOutputShaderTemplate(numberType);
20146                 spec.verifyIO = &passthruVerify;
20147                 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "uninitialized", spec));
20148             }
20149         }
20150         group->addChild(subGroup.release());
20151     }
20152     return group.release();
20153 }
20154 
createOpNopTests(tcu::TestContext & testCtx)20155 tcu::TestCaseGroup *createOpNopTests(tcu::TestContext &testCtx)
20156 {
20157     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "opnop"));
20158     RGBA defaultColors[4];
20159     map<string, string> opNopFragments;
20160 
20161     getDefaultColors(defaultColors);
20162 
20163     opNopFragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20164                                 "%param1 = OpFunctionParameter %v4f32\n"
20165                                 "%label_testfun = OpLabel\n"
20166                                 "OpNop\n"
20167                                 "OpNop\n"
20168                                 "OpNop\n"
20169                                 "OpNop\n"
20170                                 "OpNop\n"
20171                                 "OpNop\n"
20172                                 "OpNop\n"
20173                                 "OpNop\n"
20174                                 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20175                                 "%b = OpFAdd %f32 %a %a\n"
20176                                 "OpNop\n"
20177                                 "%c = OpFSub %f32 %b %a\n"
20178                                 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
20179                                 "OpNop\n"
20180                                 "OpNop\n"
20181                                 "OpReturnValue %ret\n"
20182                                 "OpFunctionEnd\n";
20183 
20184     createTestsForAllStages("opnop", defaultColors, defaultColors, opNopFragments, testGroup.get());
20185 
20186     return testGroup.release();
20187 }
20188 
createOpNameTests(tcu::TestContext & testCtx)20189 tcu::TestCaseGroup *createOpNameTests(tcu::TestContext &testCtx)
20190 {
20191     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "opname"));
20192     RGBA defaultColors[4];
20193     map<string, string> opNameFragments;
20194 
20195     getDefaultColors(defaultColors);
20196 
20197     opNameFragments["testfun"] = "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20198                                  "%param1     = OpFunctionParameter %v4f32\n"
20199                                  "%label_func = OpLabel\n"
20200                                  "%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20201                                  "%b          = OpFAdd %f32 %a %a\n"
20202                                  "%c          = OpFSub %f32 %b %a\n"
20203                                  "%ret        = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
20204                                  "OpReturnValue %ret\n"
20205                                  "OpFunctionEnd\n";
20206 
20207     opNameFragments["debug"] = "OpName %BP_main \"not_main\"";
20208 
20209     createTestsForAllStages("opname", defaultColors, defaultColors, opNameFragments, testGroup.get());
20210 
20211     return testGroup.release();
20212 }
20213 
createFloat16Tests(tcu::TestContext & testCtx)20214 tcu::TestCaseGroup *createFloat16Tests(tcu::TestContext &testCtx)
20215 {
20216     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "float16"));
20217 
20218     testGroup->addChild(createOpConstantFloat16Tests(testCtx));
20219     testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITH_NAN));
20220     testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITHOUT_NAN));
20221     testGroup->addChild(createFloat16FuncSet<GraphicsResources>(testCtx));
20222     testGroup->addChild(createFloat16VectorExtractSet<GraphicsResources>(testCtx));
20223     testGroup->addChild(createFloat16VectorInsertSet<GraphicsResources>(testCtx));
20224     testGroup->addChild(createFloat16VectorShuffleSet<GraphicsResources>(testCtx));
20225     testGroup->addChild(createFloat16CompositeConstructSet<GraphicsResources>(testCtx));
20226     testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeExtract"));
20227     testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeInsert"));
20228     testGroup->addChild(createFloat16ArithmeticSet<GraphicsResources>(testCtx));
20229     testGroup->addChild(createFloat16ArithmeticSet<1, GraphicsResources>(testCtx));
20230     testGroup->addChild(createFloat16ArithmeticSet<2, GraphicsResources>(testCtx));
20231     testGroup->addChild(createFloat16ArithmeticSet<3, GraphicsResources>(testCtx));
20232     testGroup->addChild(createFloat16ArithmeticSet<4, GraphicsResources>(testCtx));
20233 
20234     return testGroup.release();
20235 }
20236 
createFloat32Tests(tcu::TestContext & testCtx)20237 tcu::TestCaseGroup *createFloat32Tests(tcu::TestContext &testCtx)
20238 {
20239     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "float32"));
20240 
20241     testGroup->addChild(createFloat32ComparisonGraphicsSet<1>(testCtx));
20242     testGroup->addChild(createFloat32ComparisonGraphicsSet<2>(testCtx));
20243     testGroup->addChild(createFloat32ComparisonGraphicsSet<3>(testCtx));
20244     testGroup->addChild(createFloat32ComparisonGraphicsSet<4>(testCtx));
20245 
20246     return testGroup.release();
20247 }
20248 
createFloat16Group(tcu::TestContext & testCtx)20249 tcu::TestCaseGroup *createFloat16Group(tcu::TestContext &testCtx)
20250 {
20251     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "float16"));
20252 
20253     testGroup->addChild(createFloat16OpConstantCompositeGroup(testCtx));
20254     testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITH_NAN));
20255     testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITHOUT_NAN));
20256     testGroup->addChild(createFloat16FuncSet<ComputeShaderSpec>(testCtx));
20257     testGroup->addChild(createFloat16VectorExtractSet<ComputeShaderSpec>(testCtx));
20258     testGroup->addChild(createFloat16VectorInsertSet<ComputeShaderSpec>(testCtx));
20259     testGroup->addChild(createFloat16VectorShuffleSet<ComputeShaderSpec>(testCtx));
20260     testGroup->addChild(createFloat16CompositeConstructSet<ComputeShaderSpec>(testCtx));
20261     testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeExtract"));
20262     testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeInsert"));
20263     testGroup->addChild(createFloat16ArithmeticSet<ComputeShaderSpec>(testCtx));
20264     testGroup->addChild(createFloat16ArithmeticSet<1, ComputeShaderSpec>(testCtx));
20265     testGroup->addChild(createFloat16ArithmeticSet<2, ComputeShaderSpec>(testCtx));
20266     testGroup->addChild(createFloat16ArithmeticSet<3, ComputeShaderSpec>(testCtx));
20267     testGroup->addChild(createFloat16ArithmeticSet<4, ComputeShaderSpec>(testCtx));
20268 
20269     return testGroup.release();
20270 }
20271 
createFloat32Group(tcu::TestContext & testCtx)20272 tcu::TestCaseGroup *createFloat32Group(tcu::TestContext &testCtx)
20273 {
20274     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "float32"));
20275 
20276     testGroup->addChild(createFloat32ComparisonComputeSet<1>(testCtx));
20277     testGroup->addChild(createFloat32ComparisonComputeSet<2>(testCtx));
20278     testGroup->addChild(createFloat32ComparisonComputeSet<3>(testCtx));
20279     testGroup->addChild(createFloat32ComparisonComputeSet<4>(testCtx));
20280 
20281     return testGroup.release();
20282 }
20283 
createBoolMixedBitSizeGroup(tcu::TestContext & testCtx)20284 tcu::TestCaseGroup *createBoolMixedBitSizeGroup(tcu::TestContext &testCtx)
20285 {
20286     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "mixed_bitsize"));
20287 
20288     de::Random rnd(deStringHash(group->getName()));
20289     const int numElements = 100;
20290     vector<float> inputData(numElements, 0);
20291     vector<float> outputData(numElements, 0);
20292     fillRandomScalars(rnd, 0.0f, 100.0f, &inputData[0], 100);
20293 
20294     const StringTemplate shaderTemplate("${CAPS}\n"
20295                                         "OpMemoryModel Logical GLSL450\n"
20296                                         "OpEntryPoint GLCompute %main \"main\" %id\n"
20297                                         "OpExecutionMode %main LocalSize 1 1 1\n"
20298                                         "OpSource GLSL 430\n"
20299                                         "OpName %main           \"main\"\n"
20300                                         "OpName %id             \"gl_GlobalInvocationID\"\n"
20301 
20302                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
20303 
20304                                         + string(getComputeAsmInputOutputBufferTraits()) +
20305                                         string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
20306 
20307                                         "%id        = OpVariable %uvec3ptr Input\n"
20308                                         "${CONST}\n"
20309                                         "%main      = OpFunction %void None %voidf\n"
20310                                         "%label     = OpLabel\n"
20311                                         "%idval     = OpLoad %uvec3 %id\n"
20312                                         "%x         = OpCompositeExtract %u32 %idval 0\n"
20313                                         "%inloc     = OpAccessChain %f32ptr %indata %c0i32 %x\n"
20314 
20315                                         "${TEST}\n"
20316 
20317                                         "%outloc    = OpAccessChain %f32ptr %outdata %c0i32 %x\n"
20318                                         "             OpStore %outloc %res\n"
20319                                         "             OpReturn\n"
20320                                         "             OpFunctionEnd\n");
20321 
20322     // Each test case produces 4 boolean values, and we want each of these values
20323     // to come froma different combination of the available bit-sizes, so compute
20324     // all possible combinations here.
20325     vector<uint32_t> widths;
20326     widths.push_back(32);
20327     widths.push_back(16);
20328     widths.push_back(8);
20329 
20330     vector<IVec4> cases;
20331     for (size_t width0 = 0; width0 < widths.size(); width0++)
20332     {
20333         for (size_t width1 = 0; width1 < widths.size(); width1++)
20334         {
20335             for (size_t width2 = 0; width2 < widths.size(); width2++)
20336             {
20337                 for (size_t width3 = 0; width3 < widths.size(); width3++)
20338                 {
20339                     cases.push_back(IVec4(widths[width0], widths[width1], widths[width2], widths[width3]));
20340                 }
20341             }
20342         }
20343     }
20344 
20345     for (size_t caseNdx = 0; caseNdx < cases.size(); caseNdx++)
20346     {
20347         /// Skip cases where all bitsizes are the same, we are only interested in testing booleans produced from instructions with different native bit-sizes
20348         if (cases[caseNdx][0] == cases[caseNdx][1] && cases[caseNdx][0] == cases[caseNdx][2] &&
20349             cases[caseNdx][0] == cases[caseNdx][3])
20350             continue;
20351 
20352         map<string, string> specializations;
20353         ComputeShaderSpec spec;
20354 
20355         // Inject appropriate capabilities and reference constants depending
20356         // on the bit-sizes required by this test case
20357         bool hasFloat32 =
20358             cases[caseNdx][0] == 32 || cases[caseNdx][1] == 32 || cases[caseNdx][2] == 32 || cases[caseNdx][3] == 32;
20359         bool hasFloat16 =
20360             cases[caseNdx][0] == 16 || cases[caseNdx][1] == 16 || cases[caseNdx][2] == 16 || cases[caseNdx][3] == 16;
20361         bool hasInt8 =
20362             cases[caseNdx][0] == 8 || cases[caseNdx][1] == 8 || cases[caseNdx][2] == 8 || cases[caseNdx][3] == 8;
20363 
20364         string capsStr  = "OpCapability Shader\n";
20365         string constStr = "%c0i32     = OpConstant %i32 0\n"
20366                           "%c1f32     = OpConstant %f32 1.0\n"
20367                           "%c0f32     = OpConstant %f32 0.0\n";
20368 
20369         if (hasFloat32)
20370         {
20371             constStr += "%c10f32    = OpConstant %f32 10.0\n"
20372                         "%c25f32    = OpConstant %f32 25.0\n"
20373                         "%c50f32    = OpConstant %f32 50.0\n"
20374                         "%c90f32    = OpConstant %f32 90.0\n";
20375         }
20376 
20377         if (hasFloat16)
20378         {
20379             capsStr += "OpCapability Float16\n";
20380             constStr += "%f16       = OpTypeFloat 16\n"
20381                         "%c10f16    = OpConstant %f16 10.0\n"
20382                         "%c25f16    = OpConstant %f16 25.0\n"
20383                         "%c50f16    = OpConstant %f16 50.0\n"
20384                         "%c90f16    = OpConstant %f16 90.0\n";
20385         }
20386 
20387         if (hasInt8)
20388         {
20389             capsStr += "OpCapability Int8\n";
20390             constStr += "%i8        = OpTypeInt 8 1\n"
20391                         "%c10i8     = OpConstant %i8 10\n"
20392                         "%c25i8     = OpConstant %i8 25\n"
20393                         "%c50i8     = OpConstant %i8 50\n"
20394                         "%c90i8     = OpConstant %i8 90\n";
20395         }
20396 
20397         // Each invocation reads a different float32 value as input. Depending on
20398         // the bit-sizes required by the particular test case, we also produce
20399         // float16 and/or and int8 values by converting from the 32-bit float.
20400         string testStr = "";
20401         testStr += "%inval32   = OpLoad %f32 %inloc\n";
20402         if (hasFloat16)
20403             testStr += "%inval16   = OpFConvert %f16 %inval32\n";
20404         if (hasInt8)
20405             testStr += "%inval8    = OpConvertFToS %i8 %inval32\n";
20406 
20407         // Because conversions from Float to Int round towards 0 we want our "greater" comparisons to be >=,
20408         // that way a float32/float16 comparison such as 50.6f >= 50.0f will preserve its result
20409         // when converted to int8, since FtoS(50.6f) results in 50. For "less" comparisons, it is the
20410         // other way around, so in this case we want < instead of <=.
20411         if (cases[caseNdx][0] == 32)
20412             testStr += "%cmp1      = OpFOrdGreaterThanEqual %bool %inval32 %c25f32\n";
20413         else if (cases[caseNdx][0] == 16)
20414             testStr += "%cmp1      = OpFOrdGreaterThanEqual %bool %inval16 %c25f16\n";
20415         else
20416             testStr += "%cmp1      = OpSGreaterThanEqual %bool %inval8 %c25i8\n";
20417 
20418         if (cases[caseNdx][1] == 32)
20419             testStr += "%cmp2      = OpFOrdLessThan %bool %inval32 %c50f32\n";
20420         else if (cases[caseNdx][1] == 16)
20421             testStr += "%cmp2      = OpFOrdLessThan %bool %inval16 %c50f16\n";
20422         else
20423             testStr += "%cmp2      = OpSLessThan %bool %inval8 %c50i8\n";
20424 
20425         if (cases[caseNdx][2] == 32)
20426             testStr += "%cmp3      = OpFOrdLessThan %bool %inval32 %c10f32\n";
20427         else if (cases[caseNdx][2] == 16)
20428             testStr += "%cmp3      = OpFOrdLessThan %bool %inval16 %c10f16\n";
20429         else
20430             testStr += "%cmp3      = OpSLessThan %bool %inval8 %c10i8\n";
20431 
20432         if (cases[caseNdx][3] == 32)
20433             testStr += "%cmp4      = OpFOrdGreaterThanEqual %bool %inval32 %c90f32\n";
20434         else if (cases[caseNdx][3] == 16)
20435             testStr += "%cmp4      = OpFOrdGreaterThanEqual %bool %inval16 %c90f16\n";
20436         else
20437             testStr += "%cmp4      = OpSGreaterThanEqual %bool %inval8 %c90i8\n";
20438 
20439         testStr += "%and1      = OpLogicalAnd %bool %cmp1 %cmp2\n";
20440         testStr += "%or1       = OpLogicalOr %bool %cmp3 %cmp4\n";
20441         testStr += "%or2       = OpLogicalOr %bool %and1 %or1\n";
20442         testStr += "%not1      = OpLogicalNot %bool %or2\n";
20443         testStr += "%res       = OpSelect %f32 %not1 %c1f32 %c0f32\n";
20444 
20445         specializations["CAPS"]  = capsStr;
20446         specializations["CONST"] = constStr;
20447         specializations["TEST"]  = testStr;
20448 
20449         // Compute expected result by evaluating the boolean expression computed in the shader for each input value
20450         for (size_t ndx = 0; ndx < numElements; ++ndx)
20451             outputData[ndx] = !((inputData[ndx] >= 25.0f && inputData[ndx] < 50.0f) ||
20452                                 (inputData[ndx] < 10.0f || inputData[ndx] >= 90.0f));
20453 
20454         spec.assembly = shaderTemplate.specialize(specializations);
20455         spec.inputs.push_back(BufferSp(new Float32Buffer(inputData)));
20456         spec.outputs.push_back(BufferSp(new Float32Buffer(outputData)));
20457         spec.numWorkGroups = IVec3(numElements, 1, 1);
20458         if (hasFloat16)
20459             spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
20460         if (hasInt8)
20461             spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
20462         spec.extensions.push_back("VK_KHR_shader_float16_int8");
20463 
20464         string testName = "b" + de::toString(cases[caseNdx][0]) + "b" + de::toString(cases[caseNdx][1]) + "b" +
20465                           de::toString(cases[caseNdx][2]) + "b" + de::toString(cases[caseNdx][3]);
20466         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
20467     }
20468 
20469     return group.release();
20470 }
20471 
createBoolGroup(tcu::TestContext & testCtx)20472 tcu::TestCaseGroup *createBoolGroup(tcu::TestContext &testCtx)
20473 {
20474     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "bool"));
20475 
20476     testGroup->addChild(createBoolMixedBitSizeGroup(testCtx));
20477 
20478     return testGroup.release();
20479 }
20480 
createOpNameAbuseTests(tcu::TestContext & testCtx)20481 tcu::TestCaseGroup *createOpNameAbuseTests(tcu::TestContext &testCtx)
20482 {
20483     de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opname_abuse"));
20484     vector<CaseParameter> abuseCases;
20485     RGBA defaultColors[4];
20486     map<string, string> opNameFragments;
20487 
20488     getOpNameAbuseCases(abuseCases);
20489     getDefaultColors(defaultColors);
20490 
20491     opNameFragments["testfun"] = "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20492                                  "%param1     = OpFunctionParameter %v4f32\n"
20493                                  "%label_func = OpLabel\n"
20494                                  "%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20495                                  "%b          = OpFAdd %f32 %a %a\n"
20496                                  "%c          = OpFSub %f32 %b %a\n"
20497                                  "%ret        = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
20498                                  "OpReturnValue %ret\n"
20499                                  "OpFunctionEnd\n";
20500 
20501     for (unsigned int i = 0; i < abuseCases.size(); i++)
20502     {
20503         string casename;
20504         casename = string("main") + abuseCases[i].name;
20505 
20506         opNameFragments["debug"] = "OpName %BP_main \"" + abuseCases[i].param + "\"";
20507 
20508         createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20509     }
20510 
20511     for (unsigned int i = 0; i < abuseCases.size(); i++)
20512     {
20513         string casename;
20514         casename = string("b") + abuseCases[i].name;
20515 
20516         opNameFragments["debug"] = "OpName %b \"" + abuseCases[i].param + "\"";
20517 
20518         createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20519     }
20520 
20521     {
20522         opNameFragments["debug"] = "OpName %test_code \"name1\"\n"
20523                                    "OpName %param1    \"name2\"\n"
20524                                    "OpName %a         \"name3\"\n"
20525                                    "OpName %b         \"name4\"\n"
20526                                    "OpName %c         \"name5\"\n"
20527                                    "OpName %ret       \"name6\"\n";
20528 
20529         createTestsForAllStages("everything_named", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20530     }
20531 
20532     {
20533         opNameFragments["debug"] = "OpName %test_code \"the_same\"\n"
20534                                    "OpName %param1    \"the_same\"\n"
20535                                    "OpName %a         \"the_same\"\n"
20536                                    "OpName %b         \"the_same\"\n"
20537                                    "OpName %c         \"the_same\"\n"
20538                                    "OpName %ret       \"the_same\"\n";
20539 
20540         createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opNameFragments,
20541                                 abuseGroup.get());
20542     }
20543 
20544     {
20545         opNameFragments["debug"] = "OpName %BP_main \"to_be\"\n"
20546                                    "OpName %BP_main \"or_not\"\n"
20547                                    "OpName %BP_main \"to_be\"\n";
20548 
20549         createTestsForAllStages("main_has_multiple_names", defaultColors, defaultColors, opNameFragments,
20550                                 abuseGroup.get());
20551     }
20552 
20553     {
20554         opNameFragments["debug"] = "OpName %b \"to_be\"\n"
20555                                    "OpName %b \"or_not\"\n"
20556                                    "OpName %b \"to_be\"\n";
20557 
20558         createTestsForAllStages("b_has_multiple_names", defaultColors, defaultColors, opNameFragments,
20559                                 abuseGroup.get());
20560     }
20561 
20562     return abuseGroup.release();
20563 }
20564 
createOpMemberNameAbuseTests(tcu::TestContext & testCtx)20565 tcu::TestCaseGroup *createOpMemberNameAbuseTests(tcu::TestContext &testCtx)
20566 {
20567     de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opmembername_abuse"));
20568     vector<CaseParameter> abuseCases;
20569     RGBA defaultColors[4];
20570     map<string, string> opMemberNameFragments;
20571 
20572     getOpNameAbuseCases(abuseCases);
20573     getDefaultColors(defaultColors);
20574 
20575     opMemberNameFragments["pre_main"] = "%f3str = OpTypeStruct %f32 %f32 %f32\n";
20576 
20577     opMemberNameFragments["testfun"] = "%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20578                                        "%param1     = OpFunctionParameter %v4f32\n"
20579                                        "%label_func = OpLabel\n"
20580                                        "%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20581                                        "%b          = OpFAdd %f32 %a %a\n"
20582                                        "%c          = OpFSub %f32 %b %a\n"
20583                                        "%cstr       = OpCompositeConstruct %f3str %c %c %c\n"
20584                                        "%d          = OpCompositeExtract %f32 %cstr 0\n"
20585                                        "%ret        = OpVectorInsertDynamic %v4f32 %param1 %d %c_i32_0\n"
20586                                        "OpReturnValue %ret\n"
20587                                        "OpFunctionEnd\n";
20588 
20589     for (unsigned int i = 0; i < abuseCases.size(); i++)
20590     {
20591         string casename;
20592         casename = string("f3str_x") + abuseCases[i].name;
20593 
20594         opMemberNameFragments["debug"] = "OpMemberName %f3str 0 \"" + abuseCases[i].param + "\"";
20595 
20596         createTestsForAllStages(casename, defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20597     }
20598 
20599     {
20600         opMemberNameFragments["debug"] = "OpMemberName %f3str 0 \"name1\"\n"
20601                                          "OpMemberName %f3str 1 \"name2\"\n"
20602                                          "OpMemberName %f3str 2 \"name3\"\n";
20603 
20604         createTestsForAllStages("everything_named", defaultColors, defaultColors, opMemberNameFragments,
20605                                 abuseGroup.get());
20606     }
20607 
20608     {
20609         opMemberNameFragments["debug"] = "OpMemberName %f3str 0 \"the_same\"\n"
20610                                          "OpMemberName %f3str 1 \"the_same\"\n"
20611                                          "OpMemberName %f3str 2 \"the_same\"\n";
20612 
20613         createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opMemberNameFragments,
20614                                 abuseGroup.get());
20615     }
20616 
20617     {
20618         opMemberNameFragments["debug"] = "OpMemberName %f3str 0 \"to_be\"\n"
20619                                          "OpMemberName %f3str 1 \"or_not\"\n"
20620                                          "OpMemberName %f3str 0 \"to_be\"\n"
20621                                          "OpMemberName %f3str 2 \"makes_no\"\n"
20622                                          "OpMemberName %f3str 0 \"difference\"\n"
20623                                          "OpMemberName %f3str 0 \"to_me\"\n";
20624 
20625         createTestsForAllStages("f3str_x_has_multiple_names", defaultColors, defaultColors, opMemberNameFragments,
20626                                 abuseGroup.get());
20627     }
20628 
20629     return abuseGroup.release();
20630 }
20631 
getSparseIdsAbuseData(const uint32_t numDataPoints,const uint32_t seed)20632 vector<uint32_t> getSparseIdsAbuseData(const uint32_t numDataPoints, const uint32_t seed)
20633 {
20634     vector<uint32_t> result;
20635     de::Random rnd(seed);
20636 
20637     result.reserve(numDataPoints);
20638 
20639     for (uint32_t dataPointNdx = 0; dataPointNdx < numDataPoints; ++dataPointNdx)
20640         result.push_back(rnd.getUint32());
20641 
20642     return result;
20643 }
20644 
getSparseIdsAbuseResults(const vector<uint32_t> & inData1,const vector<uint32_t> & inData2)20645 vector<uint32_t> getSparseIdsAbuseResults(const vector<uint32_t> &inData1, const vector<uint32_t> &inData2)
20646 {
20647     vector<uint32_t> result;
20648 
20649     result.reserve(inData1.size());
20650 
20651     for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20652         result.push_back(inData1[dataPointNdx] + inData2[dataPointNdx]);
20653 
20654     return result;
20655 }
20656 
20657 template <class SpecResource>
createSparseIdsAbuseTest(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> & testGroup)20658 void createSparseIdsAbuseTest(tcu::TestContext &testCtx, de::MovePtr<tcu::TestCaseGroup> &testGroup)
20659 {
20660     const uint32_t numDataPoints = 16;
20661     const std::string testName("sparse_ids");
20662     const uint32_t seed(deStringHash(testName.c_str()));
20663     const vector<uint32_t> inData1(getSparseIdsAbuseData(numDataPoints, seed + 1));
20664     const vector<uint32_t> inData2(getSparseIdsAbuseData(numDataPoints, seed + 2));
20665     const vector<uint32_t> outData(getSparseIdsAbuseResults(inData1, inData2));
20666     const StringTemplate preMain("%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20667                                  "   %up_u32 = OpTypePointer Uniform %u32\n"
20668                                  "   %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20669                                  "   %SSBO32 = OpTypeStruct %ra_u32\n"
20670                                  "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20671                                  "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20672                                  "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20673                                  " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n");
20674     const StringTemplate decoration("OpDecorate %ra_u32 ArrayStride 4\n"
20675                                     "OpMemberDecorate %SSBO32 0 Offset 0\n"
20676                                     "OpDecorate %SSBO32 BufferBlock\n"
20677                                     "OpDecorate %ssbo_src0 DescriptorSet 0\n"
20678                                     "OpDecorate %ssbo_src0 Binding 0\n"
20679                                     "OpDecorate %ssbo_src1 DescriptorSet 0\n"
20680                                     "OpDecorate %ssbo_src1 Binding 1\n"
20681                                     "OpDecorate %ssbo_dst DescriptorSet 0\n"
20682                                     "OpDecorate %ssbo_dst Binding 2\n");
20683     const StringTemplate testFun(
20684         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20685         "    %param = OpFunctionParameter %v4f32\n"
20686 
20687         "    %entry = OpLabel\n"
20688         "        %i = OpVariable %fp_i32 Function\n"
20689         "             OpStore %i %c_i32_0\n"
20690         "             OpBranch %loop\n"
20691 
20692         "     %loop = OpLabel\n"
20693         "    %i_cmp = OpLoad %i32 %i\n"
20694         "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20695         "             OpLoopMerge %merge %next None\n"
20696         "             OpBranchConditional %lt %write %merge\n"
20697 
20698         "    %write = OpLabel\n"
20699         "      %ndx = OpLoad %i32 %i\n"
20700 
20701         "      %127 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20702         "      %128 = OpLoad %u32 %127\n"
20703 
20704         // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20705         "  %4194000 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20706         "  %4194001 = OpLoad %u32 %4194000\n"
20707 
20708         "  %2097151 = OpIAdd %u32 %128 %4194001\n"
20709         "  %2097152 = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20710         "             OpStore %2097152 %2097151\n"
20711         "             OpBranch %next\n"
20712 
20713         "     %next = OpLabel\n"
20714         "    %i_cur = OpLoad %i32 %i\n"
20715         "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20716         "             OpStore %i %i_new\n"
20717         "             OpBranch %loop\n"
20718 
20719         "    %merge = OpLabel\n"
20720         "             OpReturnValue %param\n"
20721 
20722         "             OpFunctionEnd\n");
20723     SpecResource specResource;
20724     map<string, string> specs;
20725     VulkanFeatures features;
20726     map<string, string> fragments;
20727     vector<string> extensions;
20728 
20729     specs["num_data_points"] = de::toString(numDataPoints);
20730 
20731     fragments["decoration"] = decoration.specialize(specs);
20732     fragments["pre_main"]   = preMain.specialize(specs);
20733     fragments["testfun"]    = testFun.specialize(specs);
20734 
20735     specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20736     specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20737     specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20738 
20739     if (std::is_base_of<GraphicsResources, SpecResource>::value)
20740     {
20741         features.coreFeatures.vertexPipelineStoresAndAtomics = true;
20742         features.coreFeatures.fragmentStoresAndAtomics       = true;
20743     }
20744 
20745     finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
20746                           IVec3(1, 1, 1));
20747 }
20748 
getLotsIdsAbuseData(const uint32_t numDataPoints,const uint32_t seed)20749 vector<uint32_t> getLotsIdsAbuseData(const uint32_t numDataPoints, const uint32_t seed)
20750 {
20751     vector<uint32_t> result;
20752     de::Random rnd(seed);
20753 
20754     result.reserve(numDataPoints);
20755 
20756     // Fixed value
20757     result.push_back(1u);
20758 
20759     // Random values
20760     for (uint32_t dataPointNdx = 1; dataPointNdx < numDataPoints; ++dataPointNdx)
20761         result.push_back(rnd.getUint8());
20762 
20763     return result;
20764 }
20765 
getLotsIdsAbuseResults(const vector<uint32_t> & inData1,const vector<uint32_t> & inData2,const uint32_t count)20766 vector<uint32_t> getLotsIdsAbuseResults(const vector<uint32_t> &inData1, const vector<uint32_t> &inData2,
20767                                         const uint32_t count)
20768 {
20769     vector<uint32_t> result;
20770 
20771     result.reserve(inData1.size());
20772 
20773     for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20774         result.push_back(inData1[dataPointNdx] + count * inData2[dataPointNdx]);
20775 
20776     return result;
20777 }
20778 
20779 template <class SpecResource>
createLotsIdsAbuseTest(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> & testGroup)20780 void createLotsIdsAbuseTest(tcu::TestContext &testCtx, de::MovePtr<tcu::TestCaseGroup> &testGroup)
20781 {
20782     const uint32_t numDataPoints = 16;
20783     const uint32_t firstNdx      = 100u;
20784     const uint32_t sequenceCount = 10000u;
20785     const std::string testName("lots_ids");
20786     const uint32_t seed(deStringHash(testName.c_str()));
20787     const vector<uint32_t> inData1(getLotsIdsAbuseData(numDataPoints, seed + 1));
20788     const vector<uint32_t> inData2(getLotsIdsAbuseData(numDataPoints, seed + 2));
20789     const vector<uint32_t> outData(getLotsIdsAbuseResults(inData1, inData2, sequenceCount));
20790     const StringTemplate preMain("%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20791                                  "   %up_u32 = OpTypePointer Uniform %u32\n"
20792                                  "   %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20793                                  "   %SSBO32 = OpTypeStruct %ra_u32\n"
20794                                  "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20795                                  "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20796                                  "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20797                                  " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n");
20798     const StringTemplate decoration("OpDecorate %ra_u32 ArrayStride 4\n"
20799                                     "OpMemberDecorate %SSBO32 0 Offset 0\n"
20800                                     "OpDecorate %SSBO32 BufferBlock\n"
20801                                     "OpDecorate %ssbo_src0 DescriptorSet 0\n"
20802                                     "OpDecorate %ssbo_src0 Binding 0\n"
20803                                     "OpDecorate %ssbo_src1 DescriptorSet 0\n"
20804                                     "OpDecorate %ssbo_src1 Binding 1\n"
20805                                     "OpDecorate %ssbo_dst DescriptorSet 0\n"
20806                                     "OpDecorate %ssbo_dst Binding 2\n");
20807     const StringTemplate testFun(
20808         "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20809         "    %param = OpFunctionParameter %v4f32\n"
20810 
20811         "    %entry = OpLabel\n"
20812         "        %i = OpVariable %fp_i32 Function\n"
20813         "             OpStore %i %c_i32_0\n"
20814         "             OpBranch %loop\n"
20815 
20816         "     %loop = OpLabel\n"
20817         "    %i_cmp = OpLoad %i32 %i\n"
20818         "       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20819         "             OpLoopMerge %merge %next None\n"
20820         "             OpBranchConditional %lt %write %merge\n"
20821 
20822         "    %write = OpLabel\n"
20823         "      %ndx = OpLoad %i32 %i\n"
20824 
20825         "       %90 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20826         "       %91 = OpLoad %u32 %90\n"
20827 
20828         "       %98 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20829         "       %${zeroth_id} = OpLoad %u32 %98\n"
20830 
20831         "${seq}\n"
20832 
20833         // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20834         "      %dst = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20835         "             OpStore %dst %${last_id}\n"
20836         "             OpBranch %next\n"
20837 
20838         "     %next = OpLabel\n"
20839         "    %i_cur = OpLoad %i32 %i\n"
20840         "    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20841         "             OpStore %i %i_new\n"
20842         "             OpBranch %loop\n"
20843 
20844         "    %merge = OpLabel\n"
20845         "             OpReturnValue %param\n"
20846 
20847         "             OpFunctionEnd\n");
20848     uint32_t lastId = firstNdx;
20849     SpecResource specResource;
20850     map<string, string> specs;
20851     VulkanFeatures features;
20852     map<string, string> fragments;
20853     vector<string> extensions;
20854     std::string sequence;
20855 
20856     for (uint32_t sequenceNdx = 0; sequenceNdx < sequenceCount; ++sequenceNdx)
20857     {
20858         const uint32_t sequenceId       = sequenceNdx + firstNdx;
20859         const std::string sequenceIdStr = de::toString(sequenceId);
20860 
20861         sequence += "%" + sequenceIdStr + " = OpIAdd %u32 %91 %" + de::toString(sequenceId - 1) + "\n";
20862         lastId = sequenceId;
20863 
20864         if (sequenceNdx == 0)
20865             sequence.reserve((10 + sequence.length()) * sequenceCount);
20866     }
20867 
20868     specs["num_data_points"] = de::toString(numDataPoints);
20869     specs["zeroth_id"]       = de::toString(firstNdx - 1);
20870     specs["last_id"]         = de::toString(lastId);
20871     specs["seq"]             = sequence;
20872 
20873     fragments["decoration"] = decoration.specialize(specs);
20874     fragments["pre_main"]   = preMain.specialize(specs);
20875     fragments["testfun"]    = testFun.specialize(specs);
20876 
20877     specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20878     specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20879     specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20880 
20881     if (std::is_base_of<GraphicsResources, SpecResource>::value)
20882     {
20883         features.coreFeatures.vertexPipelineStoresAndAtomics = true;
20884         features.coreFeatures.fragmentStoresAndAtomics       = true;
20885     }
20886 
20887     finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
20888                           IVec3(1, 1, 1));
20889 }
20890 
createSpirvIdsAbuseTests(tcu::TestContext & testCtx)20891 tcu::TestCaseGroup *createSpirvIdsAbuseTests(tcu::TestContext &testCtx)
20892 {
20893     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse"));
20894 
20895     createSparseIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20896     createLotsIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20897 
20898     return testGroup.release();
20899 }
20900 
createSpirvIdsAbuseGroup(tcu::TestContext & testCtx)20901 tcu::TestCaseGroup *createSpirvIdsAbuseGroup(tcu::TestContext &testCtx)
20902 {
20903     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse"));
20904 
20905     createSparseIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20906     createLotsIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20907 
20908     return testGroup.release();
20909 }
20910 
createFunctionParamsGroup(tcu::TestContext & testCtx)20911 tcu::TestCaseGroup *createFunctionParamsGroup(tcu::TestContext &testCtx)
20912 {
20913     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "function_params"));
20914 #ifndef CTS_USES_VULKANSC
20915     static const char data_dir[] = "spirv_assembly/instruction/function_params";
20916 
20917     static const struct
20918     {
20919         const std::string name;
20920         const std::string desc;
20921     } cases[] = {
20922         {"sampler_param", "Test combined image sampler as function parameter"},
20923     };
20924 
20925     for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
20926     {
20927         cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(
20928             testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), data_dir, cases[i].name + ".amber");
20929         testGroup->addChild(testCase);
20930     }
20931 #endif
20932     return testGroup.release();
20933 }
20934 
createEarlyFragmentTests(tcu::TestContext & testCtx)20935 tcu::TestCaseGroup *createEarlyFragmentTests(tcu::TestContext &testCtx)
20936 {
20937     de::MovePtr<tcu::TestCaseGroup> earlyFragTests(new tcu::TestCaseGroup(testCtx, "early_fragment"));
20938 
20939 #ifndef CTS_USES_VULKANSC
20940     static const char dataDir[] = "spirv_assembly/instruction/graphics/early_fragment";
20941 
20942     static const struct Case
20943     {
20944         const string name;
20945         const string desc;
20946     } cases[] = {// Overwriting the gl_FragDepth should be ignored, when Early Fragment Test Mode is enabled.
20947                  {"depth_less", "gl_FragDepth > CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."},
20948                  {"depth_greater", "gl_FragDepth < CLEAR_DEPTH. Polygon depth > CLEAR_DEPTH."},
20949                  {"depth_less_or_equal", "gl_FragDepth > CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."},
20950                  {"depth_greater_or_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."},
20951                  {"depth_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."},
20952                  {"depth_not_equal", "gl_FragDepth == CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."}};
20953 
20954     for (const auto &tCase : cases)
20955     {
20956         cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(
20957             testCtx, tCase.name.c_str(), tCase.desc.c_str(), dataDir, tCase.name + ".amber");
20958 
20959         earlyFragTests->addChild(testCase);
20960     }
20961 #endif // CTS_USES_VULKANSC
20962 
20963     return earlyFragTests.release();
20964 }
20965 
createEarlyAndLateFragmentTests(tcu::TestContext & testCtx)20966 tcu::TestCaseGroup *createEarlyAndLateFragmentTests(tcu::TestContext &testCtx)
20967 {
20968     de::MovePtr<tcu::TestCaseGroup> earlyLateFragTests(new tcu::TestCaseGroup(testCtx, "early_and_late_fragment"));
20969 #ifndef CTS_USES_VULKANSC
20970     static const char dataDir[] = "spirv_assembly/instruction/graphics/early_and_late_fragment";
20971 
20972     static const struct Case
20973     {
20974         const string name;
20975         const string desc;
20976     } cases[] = {{"depth_less", "gl_FragDepth < CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."},
20977                  {"depth_greater", "gl_FragDepth > CLEAR_DEPTH. Polygon depth > CLEAR_DEPTH."},
20978                  {"depth_less_or_equal", "gl_FragDepth > CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."},
20979                  {"depth_greater_or_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."},
20980                  {"depth_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."},
20981                  {"depth_not_equal", "gl_FragDepth == CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."}};
20982 
20983     for (const auto &tCase : cases)
20984     {
20985         cts_amber::AmberTestCase *testCase =
20986             cts_amber::createAmberTestCase(testCtx, tCase.name.c_str(), tCase.desc.c_str(), dataDir,
20987                                            tCase.name + ".amber", {"VK_AMD_shader_early_and_late_fragment_tests"});
20988 
20989         earlyLateFragTests->addChild(testCase);
20990     }
20991 #endif
20992 
20993     return earlyLateFragTests.release();
20994 }
20995 
createOpExecutionModeTests(tcu::TestContext & testCtx)20996 tcu::TestCaseGroup *createOpExecutionModeTests(tcu::TestContext &testCtx)
20997 {
20998     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "execution_mode"));
20999 
21000 #ifndef CTS_USES_VULKANSC
21001     static const char dataDir[] = "spirv_assembly/instruction/graphics/execution_mode";
21002 
21003     static const struct Case
21004     {
21005         const string name;
21006         const string desc;
21007     } cases[] = {
21008         {"depthless_0", "FragDepth < Polygon depth: depth test should pass."},
21009         {"depthless_1", "FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit "
21010                         "depth, but the depth test should pass."},
21011         {"depthless_2", "FragDepth < Polygon depth: depth test should fail."},
21012         {"depthless_3", "FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit "
21013                         "depth, the depth test should fail."},
21014         {"depthless_4", "FragDepth < Polygon depth: depth test should pass."},
21015         {"depthgreater_0", "FragDepth > Polygon depth: depth test should pass."},
21016         {"depthgreater_1", "FragDepth < Polygon depth: violates the promise that FragDepth is greater than the "
21017                            "implicit depth, but the depth test should pass."},
21018         {"depthgreater_2", "FragDepth > Polygon depth: depth test should fail."},
21019         {"depthgreater_3", "FragDepth > Polygon depth: violates the promise that FragDepth is greater than the "
21020                            "implicit depth, the depth test should fail."},
21021         {"depthgreater_4", "FragDepth > Polygon depth: depth test should pass."},
21022         {"depthunchanged_0", "FragDepth == Polygon depth: depth test should pass."},
21023         {"depthunchanged_1", "FragDepth == Polygon depth: depth test should fail."},
21024         {"depthunchanged_2", "FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit "
21025                              "depth, the depth test should pass."},
21026         {"depthunchanged_3", "FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit "
21027                              "depth, the depth test should fail."},
21028     };
21029 
21030     for (const auto &case_ : cases)
21031     {
21032         cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(
21033             testCtx, case_.name.c_str(), case_.desc.c_str(), dataDir, case_.name + ".amber");
21034         testGroup->addChild(testCase);
21035     }
21036 #endif // CTS_USES_VULKANSC
21037 
21038     return testGroup.release();
21039 }
21040 
createOpMulExtendedGroup(tcu::TestContext & testCtx)21041 tcu::TestCaseGroup *createOpMulExtendedGroup(tcu::TestContext &testCtx)
21042 {
21043     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "mul_extended"));
21044 
21045 #ifndef CTS_USES_VULKANSC
21046     static const char dataDir[] = "spirv_assembly/instruction/compute/mul_extended";
21047 
21048     static const struct Case
21049     {
21050         const string name;
21051         const vector<string> features;
21052     } cases[] = {{"signed_16bit", {"Features.shaderInt16", "Storage16BitFeatures.storageBuffer16BitAccess"}},
21053                  {"signed_32bit", {}},
21054                  {"signed_64bit", {"Features.shaderInt64"}},
21055                  {"signed_8bit", {"Float16Int8Features.shaderInt8", "Storage8BitFeatures.storageBuffer8BitAccess"}},
21056                  {"unsigned_16bit", {"Features.shaderInt16", "Storage16BitFeatures.storageBuffer16BitAccess"}},
21057                  {"unsigned_32bit", {}},
21058                  {"unsigned_64bit", {"Features.shaderInt64"}},
21059                  {"unsigned_8bit", {"Float16Int8Features.shaderInt8", "Storage8BitFeatures.storageBuffer8BitAccess"}}};
21060 
21061     for (const auto &test : cases)
21062     {
21063         cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx, test.name.c_str(), "", dataDir,
21064                                                                             test.name + ".amber", test.features);
21065         testGroup->addChild(testCase);
21066     }
21067 #endif // CTS_USES_VULKANSC
21068 
21069     return testGroup.release();
21070 }
21071 
createQueryGroup(tcu::TestContext & testCtx)21072 tcu::TestCaseGroup *createQueryGroup(tcu::TestContext &testCtx)
21073 {
21074     de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "image_query"));
21075 
21076 #ifndef CTS_USES_VULKANSC
21077     static const char data_dir[] = "spirv_assembly/instruction/image_query";
21078 
21079     static const struct
21080     {
21081         const std::string name;
21082         const std::string desc;
21083     } cases[] = {
21084         {"samples_storage", "Test samples query can be used on storage images"},
21085     };
21086 
21087     vector<string> requirements(1, "Features.shaderStorageImageMultisample");
21088 
21089     for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
21090     {
21091         cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(
21092             testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), data_dir, cases[i].name + ".amber", requirements);
21093         testGroup->addChild(testCase);
21094     }
21095 #endif // CTS_USES_VULKANSC
21096 
21097     return testGroup.release();
21098 }
21099 
createInstructionTests(tcu::TestContext & testCtx)21100 tcu::TestCaseGroup *createInstructionTests(tcu::TestContext &testCtx)
21101 {
21102     const bool testComputePipeline = true;
21103 
21104     de::MovePtr<tcu::TestCaseGroup> instructionTests(new tcu::TestCaseGroup(testCtx, "instruction"));
21105     de::MovePtr<tcu::TestCaseGroup> computeTests(new tcu::TestCaseGroup(testCtx, "compute"));
21106     de::MovePtr<tcu::TestCaseGroup> graphicsTests(new tcu::TestCaseGroup(testCtx, "graphics"));
21107 
21108     computeTests->addChild(createSpivVersionCheckTests(testCtx, testComputePipeline));
21109     computeTests->addChild(createLocalSizeGroup(testCtx, false));
21110     computeTests->addChild(createLocalSizeGroup(testCtx, true));
21111     computeTests->addChild(createNonSemanticInfoGroup(testCtx));
21112     computeTests->addChild(createOpNopGroup(testCtx));
21113     computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITHOUT_NAN));
21114     computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITH_NAN));
21115     computeTests->addChild(createOpAtomicGroup(testCtx, false));
21116     computeTests->addChild(createOpAtomicGroup(testCtx, true));              // Using new StorageBuffer decoration
21117     computeTests->addChild(createOpAtomicGroup(testCtx, false, 1024, true)); // Return value validation
21118     computeTests->addChild(createOpAtomicGroup(testCtx, true, 65535, false, true)); // volatile atomics
21119     computeTests->addChild(createOpLineGroup(testCtx));
21120     computeTests->addChild(createOpModuleProcessedGroup(testCtx));
21121     computeTests->addChild(createOpNoLineGroup(testCtx));
21122     computeTests->addChild(createOpConstantNullGroup(testCtx));
21123     computeTests->addChild(createOpConstantCompositeGroup(testCtx));
21124     computeTests->addChild(createOpConstantUsageGroup(testCtx));
21125     computeTests->addChild(createSpecConstantGroup(testCtx));
21126     computeTests->addChild(createOpSourceGroup(testCtx));
21127     computeTests->addChild(createOpSourceExtensionGroup(testCtx));
21128     computeTests->addChild(createDecorationGroupGroup(testCtx));
21129     computeTests->addChild(createOpPhiGroup(testCtx));
21130     computeTests->addChild(createLoopControlGroup(testCtx));
21131     computeTests->addChild(createFunctionControlGroup(testCtx));
21132     computeTests->addChild(createSelectionControlGroup(testCtx));
21133     computeTests->addChild(createBlockOrderGroup(testCtx));
21134     computeTests->addChild(createMultipleShaderGroup(testCtx));
21135     computeTests->addChild(createMultipleShaderExtendedGroup(testCtx));
21136     computeTests->addChild(createMemoryAccessGroup(testCtx));
21137     computeTests->addChild(createOpCopyMemoryGroup(testCtx));
21138     computeTests->addChild(createOpCopyObjectGroup(testCtx));
21139     computeTests->addChild(createNoContractionGroup(testCtx));
21140     computeTests->addChild(createOpUndefGroup(testCtx));
21141     computeTests->addChild(createOpUnreachableGroup(testCtx));
21142     computeTests->addChild(createOpQuantizeToF16Group(testCtx));
21143     computeTests->addChild(createOpFRemGroup(testCtx));
21144     computeTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_PASS));
21145     computeTests->addChild(createOpSRemComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
21146     computeTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_PASS));
21147     computeTests->addChild(createOpSModComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
21148 #ifndef CTS_USES_VULKANSC
21149     computeTests->addChild(createOpSDotKHRComputeGroup(testCtx));
21150     computeTests->addChild(createOpUDotKHRComputeGroup(testCtx));
21151     computeTests->addChild(createOpSUDotKHRComputeGroup(testCtx));
21152     computeTests->addChild(createOpSDotAccSatKHRComputeGroup(testCtx));
21153     computeTests->addChild(createOpUDotAccSatKHRComputeGroup(testCtx));
21154     computeTests->addChild(createOpSUDotAccSatKHRComputeGroup(testCtx));
21155 #endif // CTS_USES_VULKANSC
21156     computeTests->addChild(createConvertComputeTests(testCtx, "OpSConvert", "sconvert"));
21157     computeTests->addChild(createConvertComputeTests(testCtx, "OpUConvert", "uconvert"));
21158     computeTests->addChild(createConvertComputeTests(testCtx, "OpFConvert", "fconvert"));
21159     computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertSToF", "convertstof"));
21160     computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToS", "convertftos"));
21161     computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertUToF", "convertutof"));
21162     computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToU", "convertftou"));
21163     computeTests->addChild(createOpCompositeInsertGroup(testCtx));
21164     computeTests->addChild(createOpInBoundsAccessChainGroup(testCtx));
21165     computeTests->addChild(createShaderDefaultOutputGroup(testCtx));
21166     computeTests->addChild(createOpNMinGroup(testCtx));
21167     computeTests->addChild(createOpNMaxGroup(testCtx));
21168     computeTests->addChild(createOpNClampGroup(testCtx));
21169     computeTests->addChild(createFloatControlsExtensionlessGroup(testCtx));
21170     {
21171         de::MovePtr<tcu::TestCaseGroup> computeAndroidTests(new tcu::TestCaseGroup(testCtx, "android"));
21172 
21173         computeAndroidTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
21174         computeAndroidTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
21175 
21176         computeTests->addChild(computeAndroidTests.release());
21177     }
21178 
21179     computeTests->addChild(create8BitStorageComputeGroup(testCtx));
21180     computeTests->addChild(create16BitStorageComputeGroup(testCtx));
21181     computeTests->addChild(createFloatControlsComputeGroup(testCtx));
21182 #ifndef CTS_USES_VULKANSC
21183     computeTests->addChild(createFloatControls2ComputeGroup(testCtx));
21184 #endif // CTS_USES_VULKANSC
21185     computeTests->addChild(createUboMatrixPaddingComputeGroup(testCtx));
21186     computeTests->addChild(createCompositeInsertComputeGroup(testCtx));
21187     computeTests->addChild(createVariableInitComputeGroup(testCtx));
21188     computeTests->addChild(createConditionalBranchComputeGroup(testCtx));
21189     computeTests->addChild(createIndexingComputeGroup(testCtx));
21190     computeTests->addChild(createVariablePointersComputeGroup(testCtx));
21191     computeTests->addChild(createPhysicalPointersComputeGroup(testCtx));
21192     computeTests->addChild(createImageSamplerComputeGroup(testCtx));
21193     computeTests->addChild(createOpNameGroup(testCtx));
21194     computeTests->addChild(createOpMemberNameGroup(testCtx));
21195     computeTests->addChild(createPointerParameterComputeGroup(testCtx));
21196     computeTests->addChild(createFloat16Group(testCtx));
21197 #ifndef CTS_USES_VULKANSC
21198     computeTests->addChild(createFloat32Group(testCtx));
21199 #endif // CTS_USES_VULKANSC
21200     computeTests->addChild(createBoolGroup(testCtx));
21201     computeTests->addChild(createWorkgroupMemoryComputeGroup(testCtx));
21202     computeTests->addChild(createSpirvIdsAbuseGroup(testCtx));
21203 #ifndef CTS_USES_VULKANSC
21204     computeTests->addChild(createSignedIntCompareGroup(testCtx));
21205     computeTests->addChild(createSignedOpTestsGroup(testCtx));
21206 #endif // CTS_USES_VULKANSC
21207     computeTests->addChild(createUnusedVariableComputeTests(testCtx));
21208 #ifndef CTS_USES_VULKANSC
21209     computeTests->addChild(createPtrAccessChainGroup(testCtx));
21210     computeTests->addChild(createVectorShuffleGroup(testCtx));
21211 #endif // CTS_USES_VULKANSC
21212     computeTests->addChild(createHlslComputeGroup(testCtx));
21213     computeTests->addChild(createEmptyStructComputeGroup(testCtx));
21214     computeTests->addChild(create64bitCompareComputeGroup(testCtx));
21215 #ifndef CTS_USES_VULKANSC
21216     computeTests->addChild(createOpArrayLengthComputeGroup(testCtx));
21217 #endif // CTS_USES_VULKANSC
21218     computeTests->addChild(createPhysicalStorageBufferTestGroup(testCtx));
21219     computeTests->addChild(createOpMulExtendedGroup(testCtx));
21220 
21221     graphicsTests->addChild(createCrossStageInterfaceTests(testCtx));
21222     graphicsTests->addChild(createSpivVersionCheckTests(testCtx, !testComputePipeline));
21223     graphicsTests->addChild(createOpNopTests(testCtx));
21224     graphicsTests->addChild(createOpSourceTests(testCtx));
21225     graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
21226     graphicsTests->addChild(createOpModuleProcessedTests(testCtx));
21227     graphicsTests->addChild(createOpLineTests(testCtx));
21228     graphicsTests->addChild(createOpNoLineTests(testCtx));
21229     graphicsTests->addChild(createOpConstantNullTests(testCtx));
21230     graphicsTests->addChild(createOpConstantCompositeTests(testCtx));
21231     graphicsTests->addChild(createMemoryAccessTests(testCtx));
21232     graphicsTests->addChild(createOpUndefTests(testCtx));
21233     graphicsTests->addChild(createSelectionBlockOrderTests(testCtx));
21234     graphicsTests->addChild(createModuleTests(testCtx));
21235     graphicsTests->addChild(createUnusedVariableTests(testCtx));
21236     graphicsTests->addChild(createSwitchBlockOrderTests(testCtx));
21237     graphicsTests->addChild(createOpPhiTests(testCtx));
21238     graphicsTests->addChild(createNoContractionTests(testCtx));
21239     graphicsTests->addChild(createOpQuantizeTests(testCtx));
21240     graphicsTests->addChild(createLoopTests(testCtx));
21241     graphicsTests->addChild(createSpecConstantTests(testCtx));
21242     graphicsTests->addChild(createSpecConstantOpQuantizeToF16Group(testCtx));
21243     graphicsTests->addChild(createBarrierTests(testCtx));
21244     graphicsTests->addChild(createDecorationGroupTests(testCtx));
21245     graphicsTests->addChild(createFRemTests(testCtx));
21246     graphicsTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
21247     graphicsTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
21248 
21249     {
21250         de::MovePtr<tcu::TestCaseGroup> graphicsAndroidTests(new tcu::TestCaseGroup(testCtx, "android"));
21251 
21252         graphicsAndroidTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
21253         graphicsAndroidTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
21254 
21255         graphicsTests->addChild(graphicsAndroidTests.release());
21256     }
21257 
21258     graphicsTests->addChild(createOpNameTests(testCtx));
21259     graphicsTests->addChild(createOpNameAbuseTests(testCtx));
21260     graphicsTests->addChild(createOpMemberNameAbuseTests(testCtx));
21261 
21262     graphicsTests->addChild(create8BitStorageGraphicsGroup(testCtx));
21263     graphicsTests->addChild(create16BitStorageGraphicsGroup(testCtx));
21264     graphicsTests->addChild(createFloatControlsGraphicsGroup(testCtx));
21265 #ifndef CTS_USES_VULKANSC
21266     graphicsTests->addChild(createFloatControls2GraphicsGroup(testCtx));
21267 #endif // CTS_USES_VULKANSC
21268     graphicsTests->addChild(createUboMatrixPaddingGraphicsGroup(testCtx));
21269     graphicsTests->addChild(createCompositeInsertGraphicsGroup(testCtx));
21270     graphicsTests->addChild(createVariableInitGraphicsGroup(testCtx));
21271     graphicsTests->addChild(createConditionalBranchGraphicsGroup(testCtx));
21272     graphicsTests->addChild(createIndexingGraphicsGroup(testCtx));
21273     graphicsTests->addChild(createVariablePointersGraphicsGroup(testCtx));
21274     graphicsTests->addChild(createImageSamplerGraphicsGroup(testCtx));
21275     graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpSConvert", "sconvert"));
21276     graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpUConvert", "uconvert"));
21277     graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpFConvert", "fconvert"));
21278     graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertSToF", "convertstof"));
21279     graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToS", "convertftos"));
21280     graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertUToF", "convertutof"));
21281     graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToU", "convertftou"));
21282     graphicsTests->addChild(createPointerParameterGraphicsGroup(testCtx));
21283     graphicsTests->addChild(createVaryingNameGraphicsGroup(testCtx));
21284     graphicsTests->addChild(createFloat16Tests(testCtx));
21285 #ifndef CTS_USES_VULKANSC
21286     graphicsTests->addChild(createFloat32Tests(testCtx));
21287 #endif // CTS_USES_VULKANSC
21288     graphicsTests->addChild(createSpirvIdsAbuseTests(testCtx));
21289     graphicsTests->addChild(create64bitCompareGraphicsGroup(testCtx));
21290     graphicsTests->addChild(createEarlyFragmentTests(testCtx));
21291     graphicsTests->addChild(createEarlyAndLateFragmentTests(testCtx));
21292     graphicsTests->addChild(createOpExecutionModeTests(testCtx));
21293 
21294     instructionTests->addChild(computeTests.release());
21295     instructionTests->addChild(graphicsTests.release());
21296 #ifndef CTS_USES_VULKANSC
21297     instructionTests->addChild(createSpirvVersion1p4Group(testCtx));
21298     instructionTests->addChild(createFunctionParamsGroup(testCtx));
21299 #endif // CTS_USES_VULKANSC
21300     instructionTests->addChild(createQueryGroup(testCtx));
21301     instructionTests->addChild(createTrinaryMinMaxGroup(testCtx));
21302     instructionTests->addChild(createTerminateInvocationGroup(testCtx));
21303 
21304     return instructionTests.release();
21305 }
21306 
21307 } // namespace SpirVAssembly
21308 } // namespace vkt
21309