1 /*-------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 Google Inc.
6 * Copyright (c) 2016 The Khronos Group Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief SPIR-V Assembly Tests for Instructions (special opcode/operand)
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktSpvAsmInstructionTests.hpp"
26 #include "vktAmberTestCase.hpp"
27
28 #include "tcuCommandLine.hpp"
29 #include "tcuFormatUtil.hpp"
30 #include "tcuFloat.hpp"
31 #include "tcuFloatFormat.hpp"
32 #include "tcuRGBA.hpp"
33 #include "tcuStringTemplate.hpp"
34 #include "tcuTestLog.hpp"
35 #include "tcuVectorUtil.hpp"
36 #include "tcuInterval.hpp"
37
38 #include "vkDefs.hpp"
39 #include "vkDeviceUtil.hpp"
40 #include "vkMemUtil.hpp"
41 #include "vkPlatform.hpp"
42 #include "vkPrograms.hpp"
43 #include "vkQueryUtil.hpp"
44 #include "vkRef.hpp"
45 #include "vkRefUtil.hpp"
46 #include "vkStrUtil.hpp"
47 #include "vkTypeUtil.hpp"
48
49 #include "deStringUtil.hpp"
50 #include "deUniquePtr.hpp"
51 #include "deMath.h"
52 #include "deRandom.hpp"
53 #include "tcuStringTemplate.hpp"
54
55 #include "vktSpvAsmCrossStageInterfaceTests.hpp"
56 #include "vktSpvAsm8bitStorageTests.hpp"
57 #include "vktSpvAsm16bitStorageTests.hpp"
58 #include "vktSpvAsmUboMatrixPaddingTests.hpp"
59 #include "vktSpvAsmConditionalBranchTests.hpp"
60 #include "vktSpvAsmIndexingTests.hpp"
61 #include "vktSpvAsmImageSamplerTests.hpp"
62 #include "vktSpvAsmComputeShaderCase.hpp"
63 #include "vktSpvAsmComputeShaderTestUtil.hpp"
64 #include "vktSpvAsmFloatControlsTests.hpp"
65 #include "vktSpvAsmFromHlslTests.hpp"
66 #include "vktSpvAsmEmptyStructTests.hpp"
67 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
68 #include "vktSpvAsmVariablePointersTests.hpp"
69 #include "vktSpvAsmVariableInitTests.hpp"
70 #include "vktSpvAsmPointerParameterTests.hpp"
71 #include "vktSpvAsmSpirvVersion1p4Tests.hpp"
72 #include "vktSpvAsmSpirvVersionTests.hpp"
73 #include "vktTestCaseUtil.hpp"
74 #include "vktSpvAsmLoopDepLenTests.hpp"
75 #include "vktSpvAsmLoopDepInfTests.hpp"
76 #include "vktSpvAsmCompositeInsertTests.hpp"
77 #include "vktSpvAsmVaryingNameTests.hpp"
78 #include "vktSpvAsmWorkgroupMemoryTests.hpp"
79 #include "vktSpvAsmSignedIntCompareTests.hpp"
80 #include "vktSpvAsmSignedOpTests.hpp"
81 #include "vktSpvAsmPtrAccessChainTests.hpp"
82 #include "vktSpvAsmVectorShuffleTests.hpp"
83 #include "vktSpvAsmFloatControlsExtensionlessTests.hpp"
84 #include "vktSpvAsmNonSemanticInfoTests.hpp"
85 #include "vktSpvAsm64bitCompareTests.hpp"
86 #include "vktSpvAsmTrinaryMinMaxTests.hpp"
87 #include "vktSpvAsmTerminateInvocationTests.hpp"
88 #include "vktSpvAsmMultipleShadersTests.hpp"
89 #ifndef CTS_USES_VULKANSC
90 #include "vktSpvAsmFloatControls2Tests.hpp"
91 #include "vktSpvAsmIntegerDotProductTests.hpp"
92 #endif // CTS_USES_VULKANSC
93 #include "vktSpvAsmPhysicalStorageBufferPointerTests.hpp"
94
95 #include <cmath>
96 #include <limits>
97 #include <map>
98 #include <string>
99 #include <sstream>
100 #include <utility>
101 #include <stack>
102 #include <cassert>
103
104 namespace vkt
105 {
106 namespace SpirVAssembly
107 {
108
109 namespace
110 {
111
112 using namespace vk;
113 using de::UniquePtr;
114 using std::map;
115 using std::string;
116 using std::vector;
117 using tcu::IVec3;
118 using tcu::IVec4;
119 using tcu::RGBA;
120 using tcu::StringTemplate;
121 using tcu::TestLog;
122 using tcu::TestStatus;
123 using tcu::Vec4;
124
125 const bool TEST_WITH_NAN = true;
126 const bool TEST_WITHOUT_NAN = false;
127
128 const string loadScalarF16FromUint =
129 "%ld_arg_${var} = OpFunction %f16 None %f16_i32_fn\n"
130 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
131 "%ld_arg_${var}_entry = OpLabel\n"
132 "%ld_arg_${var}_conv = OpBitcast %u32 %ld_arg_${var}_param\n"
133 "%ld_arg_${var}_div = OpUDiv %u32 %ld_arg_${var}_conv %c_u32_2\n"
134 "%ld_arg_${var}_and_low = OpBitwiseAnd %u32 %ld_arg_${var}_param %c_u32_1\n"
135 "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_div\n"
136 "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
137 "%ld_arg_${var}_unpack = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
138 "%ld_arg_${var}_ex = OpVectorExtractDynamic %f16 %ld_arg_${var}_unpack %ld_arg_${var}_and_low\n"
139 "OpReturnValue %ld_arg_${var}_ex\n"
140 "OpFunctionEnd\n";
141
142 const string loadV2F16FromUint = "%ld_arg_${var} = OpFunction %v2f16 None %v2f16_i32_fn\n"
143 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
144 "%ld_arg_${var}_entry = OpLabel\n"
145 "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param\n"
146 "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
147 "%ld_arg_${var}_cast = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
148 "OpReturnValue %ld_arg_${var}_cast\n"
149 "OpFunctionEnd\n";
150
151 const string loadV3F16FromUints =
152 // Since we allocate a vec4 worth of values, this case is almost the
153 // same as that case.
154 "%ld_arg_${var} = OpFunction %v3f16 None %v3f16_i32_fn\n"
155 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
156 "%ld_arg_${var}_entry = OpLabel\n"
157 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
158 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
159 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
160 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
161 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
162 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
163 "%ld_arg_${var}_shuffle = OpVectorShuffle %v3f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2\n"
164 "OpReturnValue %ld_arg_${var}_shuffle\n"
165 "OpFunctionEnd\n";
166
167 const string loadV4F16FromUints =
168 "%ld_arg_${var} = OpFunction %v4f16 None %v4f16_i32_fn\n"
169 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
170 "%ld_arg_${var}_entry = OpLabel\n"
171 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
172 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
173 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
174 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
175 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
176 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
177 "%ld_arg_${var}_shuffle = OpVectorShuffle %v4f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2 3\n"
178 "OpReturnValue %ld_arg_${var}_shuffle\n"
179 "OpFunctionEnd\n";
180
181 const string loadM2x2F16FromUints =
182 "%ld_arg_${var} = OpFunction %m2x2f16 None %m2x2f16_i32_fn\n"
183 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
184 "%ld_arg_${var}_entry = OpLabel\n"
185 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
186 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
187 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
188 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
189 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
190 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
191 "%ld_arg_${var}_cons = OpCompositeConstruct %m2x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1\n"
192 "OpReturnValue %ld_arg_${var}_cons\n"
193 "OpFunctionEnd\n";
194
195 const string loadM2x3F16FromUints =
196 "%ld_arg_${var} = OpFunction %m2x3f16 None %m2x3f16_i32_fn\n"
197 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
198 "%ld_arg_${var}_entry = OpLabel\n"
199 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
200 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
201 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
202 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
203 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
204 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
205 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
206 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
207 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
208 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
209 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
210 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
211 "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
212 "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
213 "%ld_arg_${var}_mat = OpCompositeConstruct %m2x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
214 "OpReturnValue %ld_arg_${var}_mat\n"
215 "OpFunctionEnd\n";
216
217 const string loadM2x4F16FromUints =
218 "%ld_arg_${var} = OpFunction %m2x4f16 None %m2x4f16_i32_fn\n"
219 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
220 "%ld_arg_${var}_entry = OpLabel\n"
221 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
222 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
223 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
224 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
225 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
226 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
227 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
228 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
229 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
230 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
231 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
232 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
233 "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
234 "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
235 "%ld_arg_${var}_mat = OpCompositeConstruct %m2x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
236 "OpReturnValue %ld_arg_${var}_mat\n"
237 "OpFunctionEnd\n";
238
239 const string loadM3x2F16FromUints =
240 "%ld_arg_${var} = OpFunction %m3x2f16 None %m3x2f16_i32_fn\n"
241 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
242 "%ld_arg_${var}_entry = OpLabel\n"
243 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
244 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
245 "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
246 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
247 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
248 "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
249 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
250 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
251 "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
252 "%ld_arg_${var}_mat = OpCompositeConstruct %m3x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2\n"
253 "OpReturnValue %ld_arg_${var}_mat\n"
254 "OpFunctionEnd\n";
255
256 const string loadM3x3F16FromUints =
257 "%ld_arg_${var} = OpFunction %m3x3f16 None %m3x3f16_i32_fn\n"
258 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
259 "%ld_arg_${var}_entry = OpLabel\n"
260 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
261 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
262 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
263 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
264 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
265 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
266 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
267 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
268 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
269 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
270 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
271 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
272 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
273 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
274 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
275 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
276 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
277 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
278 "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
279 "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
280 "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
281 "%ld_arg_${var}_mat = OpCompositeConstruct %m3x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
282 "OpReturnValue %ld_arg_${var}_mat\n"
283 "OpFunctionEnd\n";
284
285 const string loadM3x4F16FromUints =
286 "%ld_arg_${var} = OpFunction %m3x4f16 None %m3x4f16_i32_fn\n"
287 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
288 "%ld_arg_${var}_entry = OpLabel\n"
289 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
290 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
291 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
292 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
293 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
294 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
295 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
296 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
297 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
298 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
299 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
300 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
301 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
302 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
303 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
304 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
305 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
306 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
307 "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
308 "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
309 "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
310 "%ld_arg_${var}_mat = OpCompositeConstruct %m3x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
311 "OpReturnValue %ld_arg_${var}_mat\n"
312 "OpFunctionEnd\n";
313
314 const string loadM4x2F16FromUints =
315 "%ld_arg_${var} = OpFunction %m4x2f16 None %m4x2f16_i32_fn\n"
316 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
317 "%ld_arg_${var}_entry = OpLabel\n"
318 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
319 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
320 "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
321 "%ld_arg_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
322 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
323 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
324 "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
325 "%ld_arg_${var}_ld3 = OpLoad %u32 %ld_arg_${var}_gep3\n"
326 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
327 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
328 "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
329 "%ld_arg_${var}_bc3 = OpBitcast %v2f16 %ld_arg_${var}_ld3\n"
330 "%ld_arg_${var}_mat = OpCompositeConstruct %m4x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2 "
331 "%ld_arg_${var}_bc3\n"
332 "OpReturnValue %ld_arg_${var}_mat\n"
333 "OpFunctionEnd\n";
334
335 const string loadM4x3F16FromUints =
336 "%ld_arg_${var} = OpFunction %m4x3f16 None %m4x3f16_i32_fn\n"
337 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
338 "%ld_arg_${var}_entry = OpLabel\n"
339 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
340 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
341 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
342 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
343 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
344 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
345 "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
346 "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
347 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
348 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
349 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
350 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
351 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
352 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
353 "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
354 "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
355 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
356 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
357 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
358 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
359 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
360 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
361 "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
362 "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
363 "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
364 "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
365 "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
366 "%ld_arg_${var}_vec3 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2\n"
367 "%ld_arg_${var}_mat = OpCompositeConstruct %m4x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 "
368 "%ld_arg_${var}_vec3\n"
369 "OpReturnValue %ld_arg_${var}_mat\n"
370 "OpFunctionEnd\n";
371
372 const string loadM4x4F16FromUints =
373 "%ld_arg_${var} = OpFunction %m4x4f16 None %m4x4f16_i32_fn\n"
374 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
375 "%ld_arg_${var}_entry = OpLabel\n"
376 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
377 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
378 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
379 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
380 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
381 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
382 "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
383 "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
384 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
385 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
386 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
387 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
388 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
389 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
390 "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
391 "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
392 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
393 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
394 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
395 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
396 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
397 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
398 "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
399 "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
400 "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
401 "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
402 "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
403 "%ld_arg_${var}_vec3 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2 3\n"
404 "%ld_arg_${var}_mat = OpCompositeConstruct %m4x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 "
405 "%ld_arg_${var}_vec3\n"
406 "OpReturnValue %ld_arg_${var}_mat\n"
407 "OpFunctionEnd\n";
408
409 const string storeScalarF16AsUint =
410 // This version is sensitive to the initial value in the output buffer.
411 // The infrastructure sets all output buffer bits to one before invoking
412 // the shader so this version uses an atomic and to generate the correct
413 // zeroes.
414 "%st_fn_${var} = OpFunction %void None %void_f16_i32_fn\n"
415 "%st_fn_${var}_param1 = OpFunctionParameter %f16\n"
416 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
417 "%st_fn_${var}_entry = OpLabel\n"
418 "%st_fn_${var}_and_low = OpBitwiseAnd %u32 %st_fn_${var}_param2 %c_u32_1\n"
419 "%st_fn_${var}_zero_vec = OpBitcast %v2f16 %c_u32_0\n"
420 "%st_fn_${var}_insert = OpVectorInsertDynamic %v2f16 %st_fn_${var}_zero_vec %st_fn_${var}_param1 "
421 "%st_fn_${var}_and_low\n"
422 "%st_fn_${var}_odd = OpIEqual %bool %st_fn_${var}_and_low %c_u32_1\n"
423 // Or 16 bits of ones into the half that was not populated with the result.
424 "%st_fn_${var}_sel = OpSelect %u32 %st_fn_${var}_odd %c_u32_low_ones %c_u32_high_ones\n"
425 "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_insert\n"
426 "%st_fn_${var}_or = OpBitwiseOr %u32 %st_fn_${var}_cast %st_fn_${var}_sel\n"
427 "%st_fn_${var}_conv = OpBitcast %u32 %st_fn_${var}_param2\n"
428 "%st_fn_${var}_div = OpUDiv %u32 %st_fn_${var}_conv %c_u32_2\n"
429 "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_div\n"
430 "%st_fn_${var}_and = OpAtomicAnd %u32 %st_fn_${var}_gep %c_u32_1 %c_u32_0 %st_fn_${var}_or\n"
431 "OpReturn\n"
432 "OpFunctionEnd\n";
433
434 const string storeV2F16AsUint = "%st_fn_${var} = OpFunction %void None %void_v2f16_i32_fn\n"
435 "%st_fn_${var}_param1 = OpFunctionParameter %v2f16\n"
436 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
437 "%st_fn_${var}_entry = OpLabel\n"
438 "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_param1\n"
439 "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2\n"
440 "OpStore %st_fn_${var}_gep %st_fn_${var}_cast\n"
441 "OpReturn\n"
442 "OpFunctionEnd\n";
443
444 const string storeV3F16AsUints =
445 // Since we allocate a vec4 worth of values, this case can be treated
446 // almost the same as a vec4 case. We will store some extra data that
447 // should not be compared.
448 "%st_fn_${var} = OpFunction %void None %void_v3f16_i32_fn\n"
449 "%st_fn_${var}_param1 = OpFunctionParameter %v3f16\n"
450 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
451 "%st_fn_${var}_entry = OpLabel\n"
452 "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
453 "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
454 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
455 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
456 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
457 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
458 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
459 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
460 "OpReturn\n"
461 "OpFunctionEnd\n";
462
463 const string storeV4F16AsUints =
464 "%st_fn_${var} = OpFunction %void None %void_v4f16_i32_fn\n"
465 "%st_fn_${var}_param1 = OpFunctionParameter %v4f16\n"
466 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
467 "%st_fn_${var}_entry = OpLabel\n"
468 "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
469 "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
470 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
471 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
472 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
473 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
474 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
475 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
476 "OpReturn\n"
477 "OpFunctionEnd\n";
478
479 const string storeM2x2F16AsUints =
480 "%st_fn_${var} = OpFunction %void None %void_m2x2f16_i32_fn\n"
481 "%st_fn_${var}_param1 = OpFunctionParameter %m2x2f16\n"
482 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
483 "%st_fn_${var}_entry = OpLabel\n"
484 "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
485 "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
486 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
487 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
488 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
489 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
490 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
491 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
492 "OpReturn\n"
493 "OpFunctionEnd\n";
494
495 const string storeM2x3F16AsUints =
496 // In the extracted elements for 01 and 11 the second element doesn't
497 // matter.
498 "%st_fn_${var} = OpFunction %void None %void_m2x3f16_i32_fn\n"
499 "%st_fn_${var}_param1 = OpFunctionParameter %m2x3f16\n"
500 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
501 "%st_fn_${var}_entry = OpLabel\n"
502 "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
503 "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
504 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
505 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
506 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
507 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
508 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
509 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
510 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
511 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
512 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
513 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
514 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
515 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
516 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
517 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
518 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
519 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
520 "OpReturn\n"
521 "OpFunctionEnd\n";
522
523 const string storeM2x4F16AsUints =
524 "%st_fn_${var} = OpFunction %void None %void_m2x4f16_i32_fn\n"
525 "%st_fn_${var}_param1 = OpFunctionParameter %m2x4f16\n"
526 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
527 "%st_fn_${var}_entry = OpLabel\n"
528 "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
529 "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
530 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
531 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
532 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
533 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
534 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
535 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
536 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
537 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
538 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
539 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
540 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
541 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
542 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
543 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
544 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
545 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
546 "OpReturn\n"
547 "OpFunctionEnd\n";
548
549 const string storeM3x2F16AsUints =
550 "%st_fn_${var} = OpFunction %void None %void_m3x2f16_i32_fn\n"
551 "%st_fn_${var}_param1 = OpFunctionParameter %m3x2f16\n"
552 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
553 "%st_fn_${var}_entry = OpLabel\n"
554 "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
555 "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
556 "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
557 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
558 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
559 "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
560 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
561 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
562 "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
563 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
564 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
565 "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
566 "OpReturn\n"
567 "OpFunctionEnd\n";
568
569 const string storeM3x3F16AsUints =
570 // The second element of the each broken down vec3 doesn't matter.
571 "%st_fn_${var} = OpFunction %void None %void_m3x3f16_i32_fn\n"
572 "%st_fn_${var}_param1 = OpFunctionParameter %m3x3f16\n"
573 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
574 "%st_fn_${var}_entry = OpLabel\n"
575 "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
576 "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
577 "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
578 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
579 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
580 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
581 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
582 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
583 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
584 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
585 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
586 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
587 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
588 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
589 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
590 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
591 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
592 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
593 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
594 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
595 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
596 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
597 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
598 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
599 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
600 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
601 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
602 "OpReturn\n"
603 "OpFunctionEnd\n";
604
605 const string storeM3x4F16AsUints =
606 "%st_fn_${var} = OpFunction %void None %void_m3x4f16_i32_fn\n"
607 "%st_fn_${var}_param1 = OpFunctionParameter %m3x4f16\n"
608 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
609 "%st_fn_${var}_entry = OpLabel\n"
610 "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
611 "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
612 "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
613 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
614 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
615 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
616 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
617 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
618 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
619 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
620 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
621 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
622 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
623 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
624 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
625 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
626 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
627 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
628 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
629 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
630 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
631 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
632 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
633 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
634 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
635 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
636 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
637 "OpReturn\n"
638 "OpFunctionEnd\n";
639
640 const string storeM4x2F16AsUints =
641 "%st_fn_${var} = OpFunction %void None %void_m4x2f16_i32_fn\n"
642 "%st_fn_${var}_param1 = OpFunctionParameter %m4x2f16\n"
643 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
644 "%st_fn_${var}_entry = OpLabel\n"
645 "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
646 "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
647 "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
648 "%st_fn_${var}_ex3 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 3\n"
649 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
650 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
651 "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
652 "%st_fn_${var}_bc3 = OpBitcast %u32 %st_fn_${var}_ex3\n"
653 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
654 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
655 "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
656 "%st_fn_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
657 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
658 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
659 "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
660 "OpStore %st_fn_${var}_gep3 %st_fn_${var}_bc3\n"
661 "OpReturn\n"
662 "OpFunctionEnd\n";
663
664 const string storeM4x3F16AsUints =
665 // The last element of each decomposed vec3 doesn't matter.
666 "%st_fn_${var} = OpFunction %void None %void_m4x3f16_i32_fn\n"
667 "%st_fn_${var}_param1 = OpFunctionParameter %m4x3f16\n"
668 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
669 "%st_fn_${var}_entry = OpLabel\n"
670 "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
671 "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
672 "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
673 "%st_fn_${var}_ex3 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 3\n"
674 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
675 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
676 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
677 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
678 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
679 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
680 "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
681 "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
682 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
683 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
684 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
685 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
686 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
687 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
688 "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
689 "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
690 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
691 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
692 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
693 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
694 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
695 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
696 "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
697 "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
698 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
699 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
700 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
701 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
702 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
703 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
704 "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
705 "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
706 "OpReturn\n"
707 "OpFunctionEnd\n";
708
709 const string storeM4x4F16AsUints =
710 "%st_fn_${var} = OpFunction %void None %void_m4x4f16_i32_fn\n"
711 "%st_fn_${var}_param1 = OpFunctionParameter %m4x4f16\n"
712 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
713 "%st_fn_${var}_entry = OpLabel\n"
714 "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
715 "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
716 "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
717 "%st_fn_${var}_ex3 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 3\n"
718 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
719 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
720 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
721 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
722 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
723 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
724 "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
725 "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
726 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
727 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
728 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
729 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
730 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
731 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
732 "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
733 "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
734 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
735 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
736 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
737 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
738 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
739 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
740 "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
741 "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
742 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
743 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
744 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
745 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
746 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
747 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
748 "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
749 "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
750 "OpReturn\n"
751 "OpFunctionEnd\n";
752
753 template <typename T>
fillRandomScalars(de::Random & rnd,T minValue,T maxValue,void * dst,int numValues,int offset=0)754 static void fillRandomScalars(de::Random &rnd, T minValue, T maxValue, void *dst, int numValues, int offset = 0)
755 {
756 T *const typedPtr = (T *)dst;
757 for (int ndx = 0; ndx < numValues; ndx++)
758 typedPtr[offset + ndx] = de::randomScalar<T>(rnd, minValue, maxValue);
759 }
760
761 // Filter is a function that returns true if a value should pass, false otherwise.
762 template <typename T, typename FilterT>
fillRandomScalars(de::Random & rnd,T minValue,T maxValue,void * dst,int numValues,FilterT filter,int offset=0)763 static void fillRandomScalars(de::Random &rnd, T minValue, T maxValue, void *dst, int numValues, FilterT filter,
764 int offset = 0)
765 {
766 T *const typedPtr = (T *)dst;
767 T value;
768 for (int ndx = 0; ndx < numValues; ndx++)
769 {
770 do
771 value = de::randomScalar<T>(rnd, minValue, maxValue);
772 while (!filter(value));
773
774 typedPtr[offset + ndx] = value;
775 }
776 }
777
778 // Gets a 64-bit integer with a more logarithmic distribution
randomInt64LogDistributed(de::Random & rnd)779 int64_t randomInt64LogDistributed(de::Random &rnd)
780 {
781 int64_t val = rnd.getUint64();
782 val &= (1ull << rnd.getInt(1, 63)) - 1;
783 if (rnd.getBool())
784 val = -val;
785 return val;
786 }
787
fillRandomInt64sLogDistributed(de::Random & rnd,vector<int64_t> & dst,int numValues)788 static void fillRandomInt64sLogDistributed(de::Random &rnd, vector<int64_t> &dst, int numValues)
789 {
790 for (int ndx = 0; ndx < numValues; ndx++)
791 dst[ndx] = randomInt64LogDistributed(rnd);
792 }
793
794 template <typename FilterT>
fillRandomInt64sLogDistributed(de::Random & rnd,vector<int64_t> & dst,int numValues,FilterT filter)795 static void fillRandomInt64sLogDistributed(de::Random &rnd, vector<int64_t> &dst, int numValues, FilterT filter)
796 {
797 for (int ndx = 0; ndx < numValues; ndx++)
798 {
799 int64_t value;
800 do
801 {
802 value = randomInt64LogDistributed(rnd);
803 } while (!filter(value));
804 dst[ndx] = value;
805 }
806 }
807
filterNonNegative(const int64_t value)808 inline bool filterNonNegative(const int64_t value)
809 {
810 return value >= 0;
811 }
812
filterPositive(const int64_t value)813 inline bool filterPositive(const int64_t value)
814 {
815 return value > 0;
816 }
817
filterNotZero(const int64_t value)818 inline bool filterNotZero(const int64_t value)
819 {
820 return value != 0;
821 }
822
floorAll(vector<float> & values)823 static void floorAll(vector<float> &values)
824 {
825 for (size_t i = 0; i < values.size(); i++)
826 values[i] = deFloatFloor(values[i]);
827 }
828
floorAll(vector<Vec4> & values)829 static void floorAll(vector<Vec4> &values)
830 {
831 for (size_t i = 0; i < values.size(); i++)
832 values[i] = floor(values[i]);
833 }
834
835 struct CaseParameter
836 {
837 const char *name;
838 string param;
839
CaseParametervkt::SpirVAssembly::__anon8834af5b0111::CaseParameter840 CaseParameter(const char *case_, const string ¶m_) : name(case_), param(param_)
841 {
842 }
843 };
844
845 // Assembly code used for testing LocalSize, OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
846 //
847 // #version 430
848 //
849 // layout(std140, set = 0, binding = 0) readonly buffer Input {
850 // float elements[];
851 // } input_data;
852 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
853 // float elements[];
854 // } output_data;
855 //
856 // layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
857 //
858 // void main() {
859 // uint x = gl_GlobalInvocationID.x;
860 // output_data.elements[x] = -input_data.elements[x];
861 // }
862
863 enum LocalSizeValueType
864 {
865 LSV_NONE,
866 LSV_LITERAL,
867 LSV_SPEC_CONST
868 };
869
getAsmForLocalSizeTest(bool useLocalSizeId,LocalSizeValueType execModeType,LocalSizeValueType workgroupSizeType,IVec3 workGroupSize,uint32_t ndx)870 static string getAsmForLocalSizeTest(bool useLocalSizeId, LocalSizeValueType execModeType,
871 LocalSizeValueType workgroupSizeType, IVec3 workGroupSize, uint32_t ndx)
872 {
873 std::ostringstream out;
874 out << "OpCapability Shader\n"
875 "OpMemoryModel Logical GLSL450\n";
876
877 // LocalsizeId tests require SPIR-V 1.5, so the interface specification is different
878 if (useLocalSizeId)
879 out << "OpEntryPoint GLCompute %main \"main\" %id %indata %outdata\n";
880 else
881 out << "OpEntryPoint GLCompute %main \"main\" %id\n";
882
883 // If using workgroup size then this overrides the execution mode, so use nonsense values.
884 IVec3 nonsense(9, 13, 106);
885 IVec3 execModeValue = (workgroupSizeType != LSV_NONE) ? nonsense : workGroupSize;
886
887 if (execModeType != LSV_NONE)
888 {
889 if (useLocalSizeId)
890 out << "OpExecutionModeId %main LocalSizeId %emv_0 %emv_1 %emv_2\n";
891 else
892 out << "OpExecutionMode %main LocalSize " << execModeValue.x() << " " << execModeValue.y() << " "
893 << execModeValue.z() << "\n";
894 }
895
896 out << "OpSource GLSL 430\n"
897 "OpName %main \"main\"\n"
898 "OpName %id \"gl_GlobalInvocationID\"\n"
899 "OpDecorate %id BuiltIn GlobalInvocationId\n";
900
901 if (execModeType == LSV_SPEC_CONST)
902 {
903 out << "OpDecorate %emv_0 SpecId 100\n"
904 "OpDecorate %emv_1 SpecId 101\n"
905 "OpDecorate %emv_2 SpecId 102\n";
906 }
907 if (workgroupSizeType == LSV_SPEC_CONST)
908 {
909 out << "OpDecorate %wgs_0 SpecId 200\n"
910 "OpDecorate %wgs_1 SpecId 201\n"
911 "OpDecorate %wgs_2 SpecId 202\n";
912 }
913
914 if (workgroupSizeType != LSV_NONE)
915 out << "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n";
916
917 // SPIR-V 1.0 uses Uniform/BufferBlock, 1.5 uses StorageBuffer/Block
918 string blockDec = useLocalSizeId ? "Block" : "BufferBlock";
919 string blockSC = useLocalSizeId ? "StorageBuffer" : "Uniform";
920 out << getComputeAsmInputOutputBufferTraits(blockDec) << getComputeAsmCommonTypes(blockSC)
921 << getComputeAsmInputOutputBuffer(blockSC);
922
923 assert(useLocalSizeId || execModeType != LSV_SPEC_CONST);
924 if (useLocalSizeId)
925 {
926 switch (execModeType)
927 {
928 case LSV_NONE: /* Do nothing */
929 break;
930 case LSV_LITERAL:
931 out << "%emv_0 = OpConstant %u32 " << execModeValue.x()
932 << "\n"
933 "%emv_1 = OpConstant %u32 "
934 << execModeValue.y()
935 << "\n"
936 "%emv_2 = OpConstant %u32 "
937 << execModeValue.z() << "\n";
938 break;
939 case LSV_SPEC_CONST:
940 out << "%emv_0 = OpSpecConstant %u32 " << execModeValue.x()
941 << "\n"
942 "%emv_1 = OpSpecConstant %u32 "
943 << execModeValue.y()
944 << "\n"
945 "%emv_2 = OpSpecConstant %u32 "
946 << execModeValue.z() << "\n";
947 break;
948 }
949 }
950
951 out << "%id = OpVariable %uvec3ptr Input\n"
952 "%zero = OpConstant %i32 0 \n";
953
954 switch (workgroupSizeType)
955 {
956 case LSV_NONE: /* Do nothing */
957 break;
958 case LSV_LITERAL:
959 out << "%wgs_0 = OpConstant %u32 " << workGroupSize.x()
960 << "\n"
961 "%wgs_1 = OpConstant %u32 "
962 << workGroupSize.y()
963 << "\n"
964 "%wgs_2 = OpConstant %u32 "
965 << workGroupSize.z()
966 << "\n"
967 "%gl_WorkGroupSize = OpConstantComposite %uvec3 %wgs_0 %wgs_1 %wgs_2\n";
968 break;
969 case LSV_SPEC_CONST:
970 out << "%wgs_0 = OpSpecConstant %u32 " << workGroupSize.x()
971 << "\n"
972 "%wgs_1 = OpSpecConstant %u32 "
973 << workGroupSize.y()
974 << "\n"
975 "%wgs_2 = OpSpecConstant %u32 "
976 << workGroupSize.z()
977 << "\n"
978 "%gl_WorkGroupSize = OpSpecConstantComposite %uvec3 %wgs_0 %wgs_1 %wgs_2\n";
979 break;
980 }
981
982 out << "%main = OpFunction %void None %voidf\n"
983 "%label = OpLabel\n"
984 "%idval = OpLoad %uvec3 %id\n"
985 "%ndx = OpCompositeExtract %u32 %idval "
986 << ndx
987 << "\n"
988
989 "%inloc = OpAccessChain %f32ptr %indata %zero %ndx\n"
990 "%inval = OpLoad %f32 %inloc\n"
991 "%neg = OpFNegate %f32 %inval\n"
992 "%outloc = OpAccessChain %f32ptr %outdata %zero %ndx\n"
993 " OpStore %outloc %neg\n"
994 " OpReturn\n"
995 " OpFunctionEnd\n";
996
997 return out.str();
998 }
999
localSizeModeToString(LocalSizeValueType t)1000 static string localSizeModeToString(LocalSizeValueType t)
1001 {
1002 switch (t)
1003 {
1004 case LSV_NONE:
1005 return "none";
1006 case LSV_LITERAL:
1007 return "literal";
1008 case LSV_SPEC_CONST:
1009 return "specid";
1010 default:
1011 assert(0);
1012 return "INVALID";
1013 }
1014 }
1015
createLocalSizeGroup(tcu::TestContext & testCtx,bool useLocalSizeId)1016 tcu::TestCaseGroup *createLocalSizeGroup(tcu::TestContext &testCtx, bool useLocalSizeId)
1017 {
1018 const char *groupName[]{"localsize", "localsize_id"};
1019
1020 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, groupName[useLocalSizeId]));
1021 ComputeShaderSpec spec;
1022 de::Random rnd(deStringHash(group->getName()));
1023 const uint32_t numElements = 64u;
1024 vector<float> positiveFloats(numElements, 0);
1025 vector<float> negativeFloats(numElements, 0);
1026
1027 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1028
1029 for (size_t ndx = 0; ndx < numElements; ++ndx)
1030 negativeFloats[ndx] = -positiveFloats[ndx];
1031
1032 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1033 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1034
1035 if (useLocalSizeId)
1036 {
1037 spec.spirvVersion = SPIRV_VERSION_1_5;
1038 spec.extensions.push_back("VK_KHR_maintenance4");
1039 }
1040
1041 struct testCase
1042 {
1043 std::string nameSuffix;
1044 IVec3 numWorkGroups;
1045 IVec3 localSize;
1046 uint32_t ndx;
1047 } cases[] = {{"", IVec3(numElements, 1, 1), IVec3(1, 1, 1), 0u},
1048 {"_x", IVec3(1, 1, 1), IVec3(numElements, 1, 1), 0u},
1049 {"_y", IVec3(1, 1, 1), IVec3(1, numElements, 1), 1u},
1050 {"_z", IVec3(1, 1, 1), IVec3(1, 1, numElements), 2u}};
1051
1052 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); i++)
1053 {
1054 for (int j = 0; j < 3; j++)
1055 {
1056 for (int k = 0; k < 3; k++)
1057 {
1058 LocalSizeValueType execModeType = (LocalSizeValueType)j;
1059 LocalSizeValueType wgSizeType = (LocalSizeValueType)k;
1060
1061 // Something has to specify the local size.
1062 if (execModeType == LSV_NONE && wgSizeType == LSV_NONE)
1063 continue;
1064 // Spec constants not allowed for LocalSize (must use the Id variant)
1065 if (execModeType == LSV_SPEC_CONST && !useLocalSizeId)
1066 continue;
1067
1068 string testName = localSizeModeToString(execModeType) + "_wgsize_" + localSizeModeToString(wgSizeType) +
1069 cases[i].nameSuffix;
1070
1071 spec.numWorkGroups = cases[i].numWorkGroups;
1072
1073 spec.assembly =
1074 getAsmForLocalSizeTest(useLocalSizeId, execModeType, wgSizeType, cases[i].localSize, cases[i].ndx);
1075 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
1076 }
1077 }
1078 }
1079
1080 return group.release();
1081 }
1082
createOpNopGroup(tcu::TestContext & testCtx)1083 tcu::TestCaseGroup *createOpNopGroup(tcu::TestContext &testCtx)
1084 {
1085 // Test the OpNop instruction
1086 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnop"));
1087 ComputeShaderSpec spec;
1088 de::Random rnd(deStringHash(group->getName()));
1089 const int numElements = 100;
1090 vector<float> positiveFloats(numElements, 0);
1091 vector<float> negativeFloats(numElements, 0);
1092
1093 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1094
1095 for (size_t ndx = 0; ndx < numElements; ++ndx)
1096 negativeFloats[ndx] = -positiveFloats[ndx];
1097
1098 spec.assembly = string(getComputeAsmShaderPreamble()) +
1099
1100 "OpSource GLSL 430\n"
1101 "OpName %main \"main\"\n"
1102 "OpName %id \"gl_GlobalInvocationID\"\n"
1103
1104 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1105
1106 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1107
1108 + string(getComputeAsmInputOutputBuffer()) +
1109
1110 "%id = OpVariable %uvec3ptr Input\n"
1111 "%zero = OpConstant %i32 0\n"
1112
1113 "%main = OpFunction %void None %voidf\n"
1114 "%label = OpLabel\n"
1115 "%idval = OpLoad %uvec3 %id\n"
1116 "%x = OpCompositeExtract %u32 %idval 0\n"
1117
1118 " OpNop\n" // Inside a function body
1119
1120 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1121 "%inval = OpLoad %f32 %inloc\n"
1122 "%neg = OpFNegate %f32 %inval\n"
1123 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1124 " OpStore %outloc %neg\n"
1125 " OpReturn\n"
1126 " OpFunctionEnd\n";
1127 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1128 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1129 spec.numWorkGroups = IVec3(numElements, 1, 1);
1130
1131 // OpNop appearing at different places
1132 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
1133
1134 return group.release();
1135 }
1136
createUnusedVariableComputeTests(tcu::TestContext & testCtx)1137 tcu::TestCaseGroup *createUnusedVariableComputeTests(tcu::TestContext &testCtx)
1138 {
1139 // Compute shaders with unused variables
1140 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "unused_variables"));
1141 de::Random rnd(deStringHash(group->getName()));
1142 const int numElements = 100;
1143 vector<float> positiveFloats(numElements, 0);
1144 vector<float> negativeFloats(numElements, 0);
1145
1146 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1147
1148 for (size_t ndx = 0; ndx < numElements; ++ndx)
1149 negativeFloats[ndx] = -positiveFloats[ndx];
1150
1151 const VariableLocation testLocations[] = {
1152 // Set Binding
1153 {0, 5},
1154 {5, 5},
1155 };
1156
1157 for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
1158 {
1159 const VariableLocation &location = testLocations[locationNdx];
1160
1161 // Unused variable.
1162 {
1163 ComputeShaderSpec spec;
1164
1165 spec.assembly = string(getComputeAsmShaderPreamble()) +
1166
1167 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1168
1169 + getUnusedDecorations(location)
1170
1171 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1172
1173 + getUnusedTypesAndConstants()
1174
1175 + string(getComputeAsmInputOutputBuffer())
1176
1177 + getUnusedBuffer() +
1178
1179 "%id = OpVariable %uvec3ptr Input\n"
1180 "%zero = OpConstant %i32 0\n"
1181
1182 "%main = OpFunction %void None %voidf\n"
1183 "%label = OpLabel\n"
1184 "%idval = OpLoad %uvec3 %id\n"
1185 "%x = OpCompositeExtract %u32 %idval 0\n"
1186
1187 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1188 "%inval = OpLoad %f32 %inloc\n"
1189 "%neg = OpFNegate %f32 %inval\n"
1190 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1191 " OpStore %outloc %neg\n"
1192 " OpReturn\n"
1193 " OpFunctionEnd\n";
1194 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1195 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1196 spec.numWorkGroups = IVec3(numElements, 1, 1);
1197
1198 std::string testName = "variable_" + location.toString();
1199
1200 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
1201 }
1202
1203 // Unused function.
1204 {
1205 ComputeShaderSpec spec;
1206
1207 spec.assembly = string(getComputeAsmShaderPreamble("", "", "", getUnusedEntryPoint())) +
1208
1209 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1210
1211 + getUnusedDecorations(location)
1212
1213 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1214
1215 + getUnusedTypesAndConstants() +
1216
1217 "%c_i32_0 = OpConstant %i32 0\n"
1218 "%c_i32_1 = OpConstant %i32 1\n"
1219
1220 + string(getComputeAsmInputOutputBuffer())
1221
1222 + getUnusedBuffer() +
1223
1224 "%id = OpVariable %uvec3ptr Input\n"
1225 "%zero = OpConstant %i32 0\n"
1226
1227 "%main = OpFunction %void None %voidf\n"
1228 "%label = OpLabel\n"
1229 "%idval = OpLoad %uvec3 %id\n"
1230 "%x = OpCompositeExtract %u32 %idval 0\n"
1231
1232 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1233 "%inval = OpLoad %f32 %inloc\n"
1234 "%neg = OpFNegate %f32 %inval\n"
1235 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1236 " OpStore %outloc %neg\n"
1237 " OpReturn\n"
1238 " OpFunctionEnd\n"
1239
1240 + getUnusedFunctionBody();
1241
1242 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1243 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1244 spec.numWorkGroups = IVec3(numElements, 1, 1);
1245
1246 std::string testName = "function_" + location.toString();
1247
1248 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
1249 }
1250 }
1251
1252 return group.release();
1253 }
1254
1255 template <bool nanSupported>
compareFUnord(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)1256 bool compareFUnord(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
1257 const std::vector<Resource> &expectedOutputs, TestLog &log)
1258 {
1259 if (outputAllocs.size() != 1)
1260 return false;
1261
1262 vector<uint8_t> input1Bytes;
1263 vector<uint8_t> input2Bytes;
1264 vector<uint8_t> expectedBytes;
1265
1266 inputs[0].getBytes(input1Bytes);
1267 inputs[1].getBytes(input2Bytes);
1268 expectedOutputs[0].getBytes(expectedBytes);
1269
1270 const int32_t *const expectedOutputAsInt = reinterpret_cast<const int32_t *>(&expectedBytes.front());
1271 const int32_t *const outputAsInt = static_cast<const int32_t *>(outputAllocs[0]->getHostPtr());
1272 const float *const input1AsFloat = reinterpret_cast<const float *>(&input1Bytes.front());
1273 const float *const input2AsFloat = reinterpret_cast<const float *>(&input2Bytes.front());
1274 bool returnValue = true;
1275
1276 for (size_t idx = 0; idx < expectedBytes.size() / sizeof(int32_t); ++idx)
1277 {
1278 if (!nanSupported && (tcu::Float32(input1AsFloat[idx]).isNaN() || tcu::Float32(input2AsFloat[idx]).isNaN()))
1279 continue;
1280
1281 if (outputAsInt[idx] != expectedOutputAsInt[idx])
1282 {
1283 log << TestLog::Message << "ERROR: Sub-case failed. inputs: " << input1AsFloat[idx] << ","
1284 << input2AsFloat[idx] << " output: " << outputAsInt[idx]
1285 << " expected output: " << expectedOutputAsInt[idx] << TestLog::EndMessage;
1286 returnValue = false;
1287 }
1288 }
1289 return returnValue;
1290 }
1291
1292 typedef VkBool32 (*compareFuncType)(float, float);
1293
1294 struct OpFUnordCase
1295 {
1296 const char *name;
1297 const char *opCode;
1298 compareFuncType compareFunc;
1299
OpFUnordCasevkt::SpirVAssembly::__anon8834af5b0111::OpFUnordCase1300 OpFUnordCase(const char *_name, const char *_opCode, compareFuncType _compareFunc)
1301 : name(_name)
1302 , opCode(_opCode)
1303 , compareFunc(_compareFunc)
1304 {
1305 }
1306 };
1307
1308 #define ADD_OPFUNORD_CASE(NAME, OPCODE, OPERATOR) \
1309 do \
1310 { \
1311 struct compare_##NAME \
1312 { \
1313 static VkBool32 compare(float x, float y) \
1314 { \
1315 return (x OPERATOR y) ? VK_TRUE : VK_FALSE; \
1316 } \
1317 }; \
1318 cases.push_back(OpFUnordCase(#NAME, OPCODE, compare_##NAME::compare)); \
1319 } while (false)
1320
createOpFUnordGroup(tcu::TestContext & testCtx,const bool testWithNan)1321 tcu::TestCaseGroup *createOpFUnordGroup(tcu::TestContext &testCtx, const bool testWithNan)
1322 {
1323 const string nan = testWithNan ? "_nan" : "";
1324 const string groupName = "opfunord" + nan;
1325 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, groupName.c_str()));
1326 de::Random rnd(deStringHash(group->getName()));
1327 const int numElements = 100;
1328 vector<OpFUnordCase> cases;
1329 string extensions = testWithNan ? "OpExtension \"SPV_KHR_float_controls\"\n" : "";
1330 string capabilities = testWithNan ? "OpCapability SignedZeroInfNanPreserve\n" : "";
1331 string exeModes = testWithNan ? "OpExecutionMode %main SignedZeroInfNanPreserve 32\n" : "";
1332 const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble(capabilities, extensions, exeModes)) +
1333 "OpSource GLSL 430\n"
1334 "OpName %main \"main\"\n"
1335 "OpName %id \"gl_GlobalInvocationID\"\n"
1336
1337 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1338
1339 "OpDecorate %buf BufferBlock\n"
1340 "OpDecorate %buf2 BufferBlock\n"
1341 "OpDecorate %indata1 DescriptorSet 0\n"
1342 "OpDecorate %indata1 Binding 0\n"
1343 "OpDecorate %indata2 DescriptorSet 0\n"
1344 "OpDecorate %indata2 Binding 1\n"
1345 "OpDecorate %outdata DescriptorSet 0\n"
1346 "OpDecorate %outdata Binding 2\n"
1347 "OpDecorate %f32arr ArrayStride 4\n"
1348 "OpDecorate %i32arr ArrayStride 4\n"
1349 "OpMemberDecorate %buf 0 Offset 0\n"
1350 "OpMemberDecorate %buf2 0 Offset 0\n"
1351
1352 + string(getComputeAsmCommonTypes()) +
1353
1354 "%buf = OpTypeStruct %f32arr\n"
1355 "%bufptr = OpTypePointer Uniform %buf\n"
1356 "%indata1 = OpVariable %bufptr Uniform\n"
1357 "%indata2 = OpVariable %bufptr Uniform\n"
1358
1359 "%buf2 = OpTypeStruct %i32arr\n"
1360 "%buf2ptr = OpTypePointer Uniform %buf2\n"
1361 "%outdata = OpVariable %buf2ptr Uniform\n"
1362
1363 "%id = OpVariable %uvec3ptr Input\n"
1364 "%zero = OpConstant %i32 0\n"
1365 "%consti1 = OpConstant %i32 1\n"
1366 "%constf1 = OpConstant %f32 1.0\n"
1367
1368 "%main = OpFunction %void None %voidf\n"
1369 "%label = OpLabel\n"
1370 "%idval = OpLoad %uvec3 %id\n"
1371 "%x = OpCompositeExtract %u32 %idval 0\n"
1372
1373 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
1374 "%inval1 = OpLoad %f32 %inloc1\n"
1375 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
1376 "%inval2 = OpLoad %f32 %inloc2\n"
1377 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
1378
1379 "%result = ${OPCODE} %bool %inval1 %inval2\n"
1380 "%int_res = OpSelect %i32 %result %consti1 %zero\n"
1381 " OpStore %outloc %int_res\n"
1382
1383 " OpReturn\n"
1384 " OpFunctionEnd\n");
1385
1386 ADD_OPFUNORD_CASE(equal, "OpFUnordEqual", ==);
1387 ADD_OPFUNORD_CASE(less, "OpFUnordLessThan", <);
1388 ADD_OPFUNORD_CASE(lessequal, "OpFUnordLessThanEqual", <=);
1389 ADD_OPFUNORD_CASE(greater, "OpFUnordGreaterThan", >);
1390 ADD_OPFUNORD_CASE(greaterequal, "OpFUnordGreaterThanEqual", >=);
1391 ADD_OPFUNORD_CASE(notequal, "OpFUnordNotEqual", !=);
1392
1393 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1394 {
1395 map<string, string> specializations;
1396 ComputeShaderSpec spec;
1397 const float NaN = std::numeric_limits<float>::quiet_NaN();
1398 vector<float> inputFloats1(numElements, 0);
1399 vector<float> inputFloats2(numElements, 0);
1400 vector<int32_t> expectedInts(numElements, 0);
1401
1402 specializations["OPCODE"] = cases[caseNdx].opCode;
1403 spec.assembly = shaderTemplate.specialize(specializations);
1404
1405 fillRandomScalars(rnd, 1.f, 100.f, &inputFloats1[0], numElements);
1406 for (size_t ndx = 0; ndx < numElements; ++ndx)
1407 {
1408 switch (ndx % 6)
1409 {
1410 case 0:
1411 inputFloats2[ndx] = inputFloats1[ndx] + 1.0f;
1412 break;
1413 case 1:
1414 inputFloats2[ndx] = inputFloats1[ndx] - 1.0f;
1415 break;
1416 case 2:
1417 inputFloats2[ndx] = inputFloats1[ndx];
1418 break;
1419 case 3:
1420 inputFloats2[ndx] = NaN;
1421 break;
1422 case 4:
1423 inputFloats2[ndx] = inputFloats1[ndx];
1424 inputFloats1[ndx] = NaN;
1425 break;
1426 case 5:
1427 inputFloats2[ndx] = NaN;
1428 inputFloats1[ndx] = NaN;
1429 break;
1430 }
1431 expectedInts[ndx] = tcu::Float32(inputFloats1[ndx]).isNaN() || tcu::Float32(inputFloats2[ndx]).isNaN() ||
1432 cases[caseNdx].compareFunc(inputFloats1[ndx], inputFloats2[ndx]);
1433 }
1434
1435 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1436 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1437 spec.outputs.push_back(BufferSp(new Int32Buffer(expectedInts)));
1438 spec.numWorkGroups = IVec3(numElements, 1, 1);
1439 spec.verifyIO = testWithNan ? &compareFUnord<true> : &compareFUnord<false>;
1440
1441 if (testWithNan)
1442 {
1443 spec.extensions.push_back("VK_KHR_shader_float_controls");
1444 spec.requestedVulkanFeatures.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat32 = true;
1445 }
1446
1447 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
1448 }
1449
1450 return group.release();
1451 }
1452
1453 struct OpAtomicCase
1454 {
1455 const char *name;
1456 const char *assembly;
1457 const char *retValAssembly;
1458 OpAtomicType opAtomic;
1459 int32_t numOutputElements;
1460
OpAtomicCasevkt::SpirVAssembly::__anon8834af5b0111::OpAtomicCase1461 OpAtomicCase(const char *_name, const char *_assembly, const char *_retValAssembly, OpAtomicType _opAtomic,
1462 int32_t _numOutputElements)
1463 : name(_name)
1464 , assembly(_assembly)
1465 , retValAssembly(_retValAssembly)
1466 , opAtomic(_opAtomic)
1467 , numOutputElements(_numOutputElements)
1468 {
1469 }
1470 };
1471
createOpAtomicGroup(tcu::TestContext & testCtx,bool useStorageBuffer,int numElements=65535,bool verifyReturnValues=false,bool volatileAtomic=false)1472 tcu::TestCaseGroup *createOpAtomicGroup(tcu::TestContext &testCtx, bool useStorageBuffer, int numElements = 65535,
1473 bool verifyReturnValues = false, bool volatileAtomic = false)
1474 {
1475 std::string groupName("opatomic");
1476 if (useStorageBuffer)
1477 groupName += "_storage_buffer";
1478 if (verifyReturnValues)
1479 groupName += "_return_values";
1480 if (volatileAtomic)
1481 groupName += "_volatile";
1482 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, groupName.c_str()));
1483 vector<OpAtomicCase> cases;
1484
1485 const StringTemplate shaderTemplate(
1486
1487 string("OpCapability Shader\n") + (volatileAtomic ? "OpCapability VulkanMemoryModelKHR\n" : "") +
1488 (useStorageBuffer ? "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n" : "") +
1489 (volatileAtomic ? "OpExtension \"SPV_KHR_vulkan_memory_model\"\n" : "") +
1490 (volatileAtomic ? "OpMemoryModel Logical VulkanKHR\n" : "OpMemoryModel Logical GLSL450\n") +
1491 "OpEntryPoint GLCompute %main \"main\" %id\n"
1492 "OpExecutionMode %main LocalSize 1 1 1\n" +
1493
1494 "OpSource GLSL 430\n"
1495 "OpName %main \"main\"\n"
1496 "OpName %id \"gl_GlobalInvocationID\"\n"
1497
1498 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1499
1500 "OpDecorate %buf ${BLOCK_DECORATION}\n"
1501 "OpDecorate %indata DescriptorSet 0\n"
1502 "OpDecorate %indata Binding 0\n"
1503 "OpDecorate %i32arr ArrayStride 4\n"
1504 "OpMemberDecorate %buf 0 Offset 0\n"
1505
1506 "OpDecorate %sumbuf ${BLOCK_DECORATION}\n"
1507 "OpDecorate %sum DescriptorSet 0\n"
1508 "OpDecorate %sum Binding 1\n"
1509 "OpMemberDecorate %sumbuf 0 Offset 0\n"
1510
1511 "${RETVAL_BUF_DECORATE}"
1512
1513 + getComputeAsmCommonTypes("${BLOCK_POINTER_TYPE}") +
1514
1515 "%buf = OpTypeStruct %i32arr\n"
1516 "%bufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %buf\n"
1517 "%indata = OpVariable %bufptr ${BLOCK_POINTER_TYPE}\n"
1518
1519 "%sumbuf = OpTypeStruct %i32arr\n"
1520 "%sumbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %sumbuf\n"
1521 "%sum = OpVariable %sumbufptr ${BLOCK_POINTER_TYPE}\n"
1522
1523 "${RETVAL_BUF_DECL}"
1524
1525 "%id = OpVariable %uvec3ptr Input\n"
1526 "%minusone = OpConstant %i32 -1\n"
1527 "%zero = OpConstant %i32 0\n"
1528 "%one = OpConstant %u32 1\n"
1529 "%two = OpConstant %i32 2\n"
1530 "%five = OpConstant %i32 5\n"
1531 "%volbit = OpConstant %i32 32768\n"
1532
1533 "%main = OpFunction %void None %voidf\n"
1534 "%label = OpLabel\n"
1535 "%idval = OpLoad %uvec3 %id\n"
1536 "%x = OpCompositeExtract %u32 %idval 0\n"
1537
1538 "%inloc = OpAccessChain %i32ptr %indata %zero %x\n"
1539 "%inval = OpLoad %i32 %inloc\n"
1540
1541 "%outloc = OpAccessChain %i32ptr %sum %zero ${INDEX}\n"
1542 "${INSTRUCTION}"
1543 "${RETVAL_ASSEMBLY}"
1544
1545 " OpReturn\n"
1546 " OpFunctionEnd\n");
1547
1548 #define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS) \
1549 do \
1550 { \
1551 cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS)); \
1552 } while (false)
1553 #define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) \
1554 ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, 1)
1555 #define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) \
1556 ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, numElements)
1557
1558 ADD_OPATOMIC_CASE_1(iadd, "%retv = OpAtomicIAdd %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1559 " OpStore %retloc %retv\n", OPATOMIC_IADD);
1560 ADD_OPATOMIC_CASE_1(isub, "%retv = OpAtomicISub %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1561 " OpStore %retloc %retv\n", OPATOMIC_ISUB);
1562 ADD_OPATOMIC_CASE_1(iinc, "%retv = OpAtomicIIncrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1563 " OpStore %retloc %retv\n", OPATOMIC_IINC);
1564 ADD_OPATOMIC_CASE_1(idec, "%retv = OpAtomicIDecrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1565 " OpStore %retloc %retv\n", OPATOMIC_IDEC);
1566 if (!verifyReturnValues)
1567 {
1568 ADD_OPATOMIC_CASE_N(load,
1569 "%inval2 = OpAtomicLoad %i32 %inloc ${SCOPE} ${SEMANTICS}\n"
1570 " OpStore %outloc %inval2\n",
1571 "", OPATOMIC_LOAD);
1572 ADD_OPATOMIC_CASE_N(store, " OpAtomicStore %outloc ${SCOPE} ${SEMANTICS} %inval\n", "",
1573 OPATOMIC_STORE);
1574 }
1575
1576 ADD_OPATOMIC_CASE_N(
1577 compex,
1578 "%even = OpSMod %i32 %inval %two\n"
1579 " OpStore %outloc %even\n"
1580 "%retv = OpAtomicCompareExchange %i32 %outloc ${SCOPE} ${SEMANTICS} ${SEMANTICS} %minusone %zero\n",
1581 " OpStore %retloc %retv\n", OPATOMIC_COMPEX);
1582
1583 #undef ADD_OPATOMIC_CASE
1584 #undef ADD_OPATOMIC_CASE_1
1585 #undef ADD_OPATOMIC_CASE_N
1586
1587 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1588 {
1589 map<string, string> specializations;
1590 ComputeShaderSpec spec;
1591 vector<int32_t> inputInts(numElements, 0);
1592 vector<int32_t> expected(cases[caseNdx].numOutputElements, -1);
1593
1594 if (volatileAtomic)
1595 {
1596 spec.extensions.push_back("VK_KHR_vulkan_memory_model");
1597 spec.requestedVulkanFeatures.extVulkanMemoryModel.vulkanMemoryModel = true;
1598
1599 // volatile, queuefamily scope
1600 specializations["SEMANTICS"] = "%volbit";
1601 specializations["SCOPE"] = "%five";
1602 }
1603 else
1604 {
1605 // non-volatile, device scope
1606 specializations["SEMANTICS"] = "%zero";
1607 specializations["SCOPE"] = "%one";
1608 }
1609 specializations["INDEX"] = (cases[caseNdx].numOutputElements == 1) ? "%zero" : "%x";
1610 specializations["INSTRUCTION"] = cases[caseNdx].assembly;
1611 specializations["BLOCK_DECORATION"] = useStorageBuffer ? "Block" : "BufferBlock";
1612 specializations["BLOCK_POINTER_TYPE"] = useStorageBuffer ? "StorageBuffer" : "Uniform";
1613
1614 if (verifyReturnValues)
1615 {
1616 const StringTemplate blockDecoration("\n"
1617 "OpDecorate %retbuf ${BLOCK_DECORATION}\n"
1618 "OpDecorate %ret DescriptorSet 0\n"
1619 "OpDecorate %ret Binding 2\n"
1620 "OpMemberDecorate %retbuf 0 Offset 0\n\n");
1621
1622 const StringTemplate blockDeclaration("\n"
1623 "%retbuf = OpTypeStruct %i32arr\n"
1624 "%retbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %retbuf\n"
1625 "%ret = OpVariable %retbufptr ${BLOCK_POINTER_TYPE}\n\n");
1626
1627 specializations["RETVAL_ASSEMBLY"] =
1628 "%retloc = OpAccessChain %i32ptr %ret %zero %x\n" + std::string(cases[caseNdx].retValAssembly);
1629
1630 specializations["RETVAL_BUF_DECORATE"] = blockDecoration.specialize(specializations);
1631 specializations["RETVAL_BUF_DECL"] = blockDeclaration.specialize(specializations);
1632 }
1633 else
1634 {
1635 specializations["RETVAL_ASSEMBLY"] = "";
1636 specializations["RETVAL_BUF_DECORATE"] = "";
1637 specializations["RETVAL_BUF_DECL"] = "";
1638 }
1639
1640 spec.assembly = shaderTemplate.specialize(specializations);
1641
1642 // Specialize one more time, to catch things that were in a template parameter
1643 const StringTemplate assemblyTemplate(spec.assembly);
1644 spec.assembly = assemblyTemplate.specialize(specializations);
1645
1646 if (useStorageBuffer)
1647 spec.extensions.push_back("VK_KHR_storage_buffer_storage_class");
1648
1649 spec.inputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements,
1650 cases[caseNdx].opAtomic, BUFFERTYPE_INPUT)));
1651 spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements,
1652 cases[caseNdx].opAtomic, BUFFERTYPE_EXPECTED)));
1653 if (verifyReturnValues)
1654 spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements,
1655 cases[caseNdx].opAtomic, BUFFERTYPE_ATOMIC_RET)));
1656 spec.numWorkGroups = IVec3(numElements, 1, 1);
1657
1658 if (verifyReturnValues)
1659 {
1660 switch (cases[caseNdx].opAtomic)
1661 {
1662 case OPATOMIC_IADD:
1663 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IADD>;
1664 break;
1665 case OPATOMIC_ISUB:
1666 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_ISUB>;
1667 break;
1668 case OPATOMIC_IINC:
1669 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IINC>;
1670 break;
1671 case OPATOMIC_IDEC:
1672 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IDEC>;
1673 break;
1674 case OPATOMIC_COMPEX:
1675 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_COMPEX>;
1676 break;
1677 default:
1678 DE_FATAL("Unsupported OpAtomic type for return value verification");
1679 }
1680 }
1681 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
1682 }
1683
1684 return group.release();
1685 }
1686
createOpLineGroup(tcu::TestContext & testCtx)1687 tcu::TestCaseGroup *createOpLineGroup(tcu::TestContext &testCtx)
1688 {
1689 // Test the OpLine instruction
1690 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opline"));
1691 ComputeShaderSpec spec;
1692 de::Random rnd(deStringHash(group->getName()));
1693 const int numElements = 100;
1694 vector<float> positiveFloats(numElements, 0);
1695 vector<float> negativeFloats(numElements, 0);
1696
1697 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1698
1699 for (size_t ndx = 0; ndx < numElements; ++ndx)
1700 negativeFloats[ndx] = -positiveFloats[ndx];
1701
1702 spec.assembly = string(getComputeAsmShaderPreamble()) +
1703
1704 "%fname1 = OpString \"negateInputs.comp\"\n"
1705 "%fname2 = OpString \"negateInputs\"\n"
1706
1707 "OpSource GLSL 430\n"
1708 "OpName %main \"main\"\n"
1709 "OpName %id \"gl_GlobalInvocationID\"\n"
1710
1711 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1712
1713 + string(getComputeAsmInputOutputBufferTraits()) +
1714
1715 "OpLine %fname1 0 0\n" // At the earliest possible position
1716
1717 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1718
1719 "OpLine %fname1 0 1\n" // Multiple OpLines in sequence
1720 "OpLine %fname2 1 0\n" // Different filenames
1721 "OpLine %fname1 1000 100000\n"
1722
1723 "%id = OpVariable %uvec3ptr Input\n"
1724 "%zero = OpConstant %i32 0\n"
1725
1726 "OpLine %fname1 1 1\n" // Before a function
1727
1728 "%main = OpFunction %void None %voidf\n"
1729 "%label = OpLabel\n"
1730
1731 "OpLine %fname1 1 1\n" // In a function
1732
1733 "%idval = OpLoad %uvec3 %id\n"
1734 "%x = OpCompositeExtract %u32 %idval 0\n"
1735 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1736 "%inval = OpLoad %f32 %inloc\n"
1737 "%neg = OpFNegate %f32 %inval\n"
1738 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1739 " OpStore %outloc %neg\n"
1740 " OpReturn\n"
1741 " OpFunctionEnd\n";
1742 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1743 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1744 spec.numWorkGroups = IVec3(numElements, 1, 1);
1745
1746 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
1747
1748 return group.release();
1749 }
1750
veryfiBinaryShader(const ProgramBinary & binary)1751 bool veryfiBinaryShader(const ProgramBinary &binary)
1752 {
1753 const size_t paternCount = 3u;
1754 bool paternsCheck[paternCount] = {false, false, false};
1755 const string patersns[paternCount] = {"VULKAN CTS", "Negative values", "Date: 2017/09/21"};
1756 size_t paternNdx = 0u;
1757
1758 for (size_t ndx = 0u; ndx < binary.getSize(); ++ndx)
1759 {
1760 if (false == paternsCheck[paternNdx] && patersns[paternNdx][0] == static_cast<char>(binary.getBinary()[ndx]) &&
1761 deMemoryEqual((const char *)&binary.getBinary()[ndx], &patersns[paternNdx][0],
1762 patersns[paternNdx].length()))
1763 {
1764 paternsCheck[paternNdx] = true;
1765 paternNdx++;
1766 if (paternNdx == paternCount)
1767 break;
1768 }
1769 }
1770
1771 for (size_t ndx = 0u; ndx < paternCount; ++ndx)
1772 {
1773 if (!paternsCheck[ndx])
1774 return false;
1775 }
1776
1777 return true;
1778 }
1779
createOpModuleProcessedGroup(tcu::TestContext & testCtx)1780 tcu::TestCaseGroup *createOpModuleProcessedGroup(tcu::TestContext &testCtx)
1781 {
1782 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opmoduleprocessed"));
1783 ComputeShaderSpec spec;
1784 de::Random rnd(deStringHash(group->getName()));
1785 const int numElements = 10;
1786 vector<float> positiveFloats(numElements, 0);
1787 vector<float> negativeFloats(numElements, 0);
1788
1789 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1790
1791 for (size_t ndx = 0; ndx < numElements; ++ndx)
1792 negativeFloats[ndx] = -positiveFloats[ndx];
1793
1794 spec.assembly = string(getComputeAsmShaderPreamble()) +
1795 "%fname = OpString \"negateInputs.comp\"\n"
1796
1797 "OpSource GLSL 430\n"
1798 "OpName %main \"main\"\n"
1799 "OpName %id \"gl_GlobalInvocationID\"\n"
1800 "OpModuleProcessed \"VULKAN CTS\"\n" //OpModuleProcessed;
1801 "OpModuleProcessed \"Negative values\"\n"
1802 "OpModuleProcessed \"Date: 2017/09/21\"\n"
1803 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1804
1805 + string(getComputeAsmInputOutputBufferTraits())
1806
1807 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1808
1809 "OpLine %fname 0 1\n"
1810
1811 "OpLine %fname 1000 1\n"
1812
1813 "%id = OpVariable %uvec3ptr Input\n"
1814 "%zero = OpConstant %i32 0\n"
1815 "%main = OpFunction %void None %voidf\n"
1816
1817 "%label = OpLabel\n"
1818 "%idval = OpLoad %uvec3 %id\n"
1819 "%x = OpCompositeExtract %u32 %idval 0\n"
1820
1821 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1822 "%inval = OpLoad %f32 %inloc\n"
1823 "%neg = OpFNegate %f32 %inval\n"
1824 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1825 " OpStore %outloc %neg\n"
1826 " OpReturn\n"
1827 " OpFunctionEnd\n";
1828 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1829 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1830 spec.numWorkGroups = IVec3(numElements, 1, 1);
1831 spec.verifyBinary = veryfiBinaryShader;
1832 spec.spirvVersion = SPIRV_VERSION_1_3;
1833
1834 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
1835
1836 return group.release();
1837 }
1838
createOpNoLineGroup(tcu::TestContext & testCtx)1839 tcu::TestCaseGroup *createOpNoLineGroup(tcu::TestContext &testCtx)
1840 {
1841 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnoline"));
1842 ComputeShaderSpec spec;
1843 de::Random rnd(deStringHash(group->getName()));
1844 const int numElements = 100;
1845 vector<float> positiveFloats(numElements, 0);
1846 vector<float> negativeFloats(numElements, 0);
1847
1848 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1849
1850 for (size_t ndx = 0; ndx < numElements; ++ndx)
1851 negativeFloats[ndx] = -positiveFloats[ndx];
1852
1853 spec.assembly = string(getComputeAsmShaderPreamble()) +
1854
1855 "%fname = OpString \"negateInputs.comp\"\n"
1856
1857 "OpSource GLSL 430\n"
1858 "OpName %main \"main\"\n"
1859 "OpName %id \"gl_GlobalInvocationID\"\n"
1860
1861 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1862
1863 + string(getComputeAsmInputOutputBufferTraits()) +
1864
1865 "OpNoLine\n" // At the earliest possible position, without preceding OpLine
1866
1867 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1868
1869 "OpLine %fname 0 1\n"
1870 "OpNoLine\n" // Immediately following a preceding OpLine
1871
1872 "OpLine %fname 1000 1\n"
1873
1874 "%id = OpVariable %uvec3ptr Input\n"
1875 "%zero = OpConstant %i32 0\n"
1876
1877 "OpNoLine\n" // Contents after the previous OpLine
1878
1879 "%main = OpFunction %void None %voidf\n"
1880 "%label = OpLabel\n"
1881 "%idval = OpLoad %uvec3 %id\n"
1882 "%x = OpCompositeExtract %u32 %idval 0\n"
1883
1884 "OpNoLine\n" // Multiple OpNoLine
1885 "OpNoLine\n"
1886 "OpNoLine\n"
1887
1888 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1889 "%inval = OpLoad %f32 %inloc\n"
1890 "%neg = OpFNegate %f32 %inval\n"
1891 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1892 " OpStore %outloc %neg\n"
1893 " OpReturn\n"
1894 " OpFunctionEnd\n";
1895 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1896 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1897 spec.numWorkGroups = IVec3(numElements, 1, 1);
1898
1899 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
1900
1901 return group.release();
1902 }
1903
1904 // Compare instruction for the contraction compute case.
1905 // Returns true if the output is what is expected from the test case.
compareNoContractCase(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)1906 bool compareNoContractCase(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
1907 const std::vector<Resource> &expectedOutputs, TestLog &)
1908 {
1909 if (outputAllocs.size() != 1)
1910 return false;
1911
1912 // Only size is needed because we are not comparing the exact values.
1913 size_t byteSize = expectedOutputs[0].getByteSize();
1914
1915 const float *outputAsFloat = static_cast<const float *>(outputAllocs[0]->getHostPtr());
1916
1917 for (size_t i = 0; i < byteSize / sizeof(float); ++i)
1918 {
1919 if (outputAsFloat[i] != 0.f && outputAsFloat[i] != -ldexp(1, -24))
1920 {
1921 return false;
1922 }
1923 }
1924
1925 return true;
1926 }
1927
createNoContractionGroup(tcu::TestContext & testCtx)1928 tcu::TestCaseGroup *createNoContractionGroup(tcu::TestContext &testCtx)
1929 {
1930 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "nocontraction"));
1931 vector<CaseParameter> cases;
1932 const int numElements = 100;
1933 vector<float> inputFloats1(numElements, 0);
1934 vector<float> inputFloats2(numElements, 0);
1935 vector<float> outputFloats(numElements, 0);
1936 const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
1937
1938 "OpName %main \"main\"\n"
1939 "OpName %id \"gl_GlobalInvocationID\"\n"
1940
1941 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1942
1943 "${DECORATION}\n"
1944
1945 "OpDecorate %buf BufferBlock\n"
1946 "OpDecorate %indata1 DescriptorSet 0\n"
1947 "OpDecorate %indata1 Binding 0\n"
1948 "OpDecorate %indata2 DescriptorSet 0\n"
1949 "OpDecorate %indata2 Binding 1\n"
1950 "OpDecorate %outdata DescriptorSet 0\n"
1951 "OpDecorate %outdata Binding 2\n"
1952 "OpDecorate %f32arr ArrayStride 4\n"
1953 "OpMemberDecorate %buf 0 Offset 0\n"
1954
1955 + string(getComputeAsmCommonTypes()) +
1956
1957 "%buf = OpTypeStruct %f32arr\n"
1958 "%bufptr = OpTypePointer Uniform %buf\n"
1959 "%indata1 = OpVariable %bufptr Uniform\n"
1960 "%indata2 = OpVariable %bufptr Uniform\n"
1961 "%outdata = OpVariable %bufptr Uniform\n"
1962
1963 "%id = OpVariable %uvec3ptr Input\n"
1964 "%zero = OpConstant %i32 0\n"
1965 "%c_f_m1 = OpConstant %f32 -1.\n"
1966
1967 "%main = OpFunction %void None %voidf\n"
1968 "%label = OpLabel\n"
1969 "%idval = OpLoad %uvec3 %id\n"
1970 "%x = OpCompositeExtract %u32 %idval 0\n"
1971 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
1972 "%inval1 = OpLoad %f32 %inloc1\n"
1973 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
1974 "%inval2 = OpLoad %f32 %inloc2\n"
1975 "%mul = OpFMul %f32 %inval1 %inval2\n"
1976 "%add = OpFAdd %f32 %mul %c_f_m1\n"
1977 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1978 " OpStore %outloc %add\n"
1979 " OpReturn\n"
1980 " OpFunctionEnd\n");
1981
1982 cases.push_back(CaseParameter("multiplication", "OpDecorate %mul NoContraction"));
1983 cases.push_back(CaseParameter("addition", "OpDecorate %add NoContraction"));
1984 cases.push_back(CaseParameter("both", "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"));
1985
1986 for (size_t ndx = 0; ndx < numElements; ++ndx)
1987 {
1988 inputFloats1[ndx] = 1.f + std::ldexp(1.f, -23); // 1 + 2^-23.
1989 inputFloats2[ndx] = 1.f - std::ldexp(1.f, -23); // 1 - 2^-23.
1990 // Result for (1 + 2^-23) * (1 - 2^-23) - 1. With NoContraction, the multiplication will be
1991 // conducted separately and the result is rounded to 1, or 0x1.fffffcp-1
1992 // So the final result will be 0.f or 0x1p-24.
1993 // If the operation is combined into a precise fused multiply-add, then the result would be
1994 // 2^-46 (0xa8800000).
1995 outputFloats[ndx] = 0.f;
1996 }
1997
1998 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1999 {
2000 map<string, string> specializations;
2001 ComputeShaderSpec spec;
2002
2003 specializations["DECORATION"] = cases[caseNdx].param;
2004 spec.assembly = shaderTemplate.specialize(specializations);
2005 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2006 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2007 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2008 spec.numWorkGroups = IVec3(numElements, 1, 1);
2009 // Check against the two possible answers based on rounding mode.
2010 spec.verifyIO = &compareNoContractCase;
2011
2012 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
2013 }
2014 return group.release();
2015 }
2016
compareFRem(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2017 bool compareFRem(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
2018 const std::vector<Resource> &expectedOutputs, TestLog &)
2019 {
2020 if (outputAllocs.size() != 1)
2021 return false;
2022
2023 vector<uint8_t> expectedBytes;
2024 expectedOutputs[0].getBytes(expectedBytes);
2025
2026 const float *expectedOutputAsFloat = reinterpret_cast<const float *>(&expectedBytes.front());
2027 const float *outputAsFloat = static_cast<const float *>(outputAllocs[0]->getHostPtr());
2028
2029 for (size_t idx = 0; idx < expectedBytes.size() / sizeof(float); ++idx)
2030 {
2031 const float f0 = expectedOutputAsFloat[idx];
2032 const float f1 = outputAsFloat[idx];
2033 // \todo relative error needs to be fairly high because FRem may be implemented as
2034 // (roughly) frac(a/b)*b, so LSB errors can be magnified. But this should be fine for now.
2035 if (deFloatAbs((f1 - f0) / f0) > 0.02)
2036 return false;
2037 }
2038
2039 return true;
2040 }
2041
createOpFRemGroup(tcu::TestContext & testCtx)2042 tcu::TestCaseGroup *createOpFRemGroup(tcu::TestContext &testCtx)
2043 {
2044 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opfrem"));
2045 ComputeShaderSpec spec;
2046 de::Random rnd(deStringHash(group->getName()));
2047 const int numElements = 200;
2048 vector<float> inputFloats1(numElements, 0);
2049 vector<float> inputFloats2(numElements, 0);
2050 vector<float> outputFloats(numElements, 0);
2051
2052 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2053 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats2[0], numElements);
2054
2055 for (size_t ndx = 0; ndx < numElements; ++ndx)
2056 {
2057 // Guard against divisors near zero.
2058 if (std::fabs(inputFloats2[ndx]) < 1e-3)
2059 inputFloats2[ndx] = 8.f;
2060
2061 // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2062 outputFloats[ndx] = std::fmod(inputFloats1[ndx], inputFloats2[ndx]);
2063 }
2064
2065 spec.assembly = string(getComputeAsmShaderPreamble()) +
2066
2067 "OpName %main \"main\"\n"
2068 "OpName %id \"gl_GlobalInvocationID\"\n"
2069
2070 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2071
2072 "OpDecorate %buf BufferBlock\n"
2073 "OpDecorate %indata1 DescriptorSet 0\n"
2074 "OpDecorate %indata1 Binding 0\n"
2075 "OpDecorate %indata2 DescriptorSet 0\n"
2076 "OpDecorate %indata2 Binding 1\n"
2077 "OpDecorate %outdata DescriptorSet 0\n"
2078 "OpDecorate %outdata Binding 2\n"
2079 "OpDecorate %f32arr ArrayStride 4\n"
2080 "OpMemberDecorate %buf 0 Offset 0\n"
2081
2082 + string(getComputeAsmCommonTypes()) +
2083
2084 "%buf = OpTypeStruct %f32arr\n"
2085 "%bufptr = OpTypePointer Uniform %buf\n"
2086 "%indata1 = OpVariable %bufptr Uniform\n"
2087 "%indata2 = OpVariable %bufptr Uniform\n"
2088 "%outdata = OpVariable %bufptr Uniform\n"
2089
2090 "%id = OpVariable %uvec3ptr Input\n"
2091 "%zero = OpConstant %i32 0\n"
2092
2093 "%main = OpFunction %void None %voidf\n"
2094 "%label = OpLabel\n"
2095 "%idval = OpLoad %uvec3 %id\n"
2096 "%x = OpCompositeExtract %u32 %idval 0\n"
2097 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2098 "%inval1 = OpLoad %f32 %inloc1\n"
2099 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2100 "%inval2 = OpLoad %f32 %inloc2\n"
2101 "%rem = OpFRem %f32 %inval1 %inval2\n"
2102 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2103 " OpStore %outloc %rem\n"
2104 " OpReturn\n"
2105 " OpFunctionEnd\n";
2106
2107 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2108 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2109 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2110 spec.numWorkGroups = IVec3(numElements, 1, 1);
2111 spec.verifyIO = &compareFRem;
2112
2113 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
2114
2115 return group.release();
2116 }
2117
compareNMin(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2118 bool compareNMin(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
2119 const std::vector<Resource> &expectedOutputs, TestLog &)
2120 {
2121 if (outputAllocs.size() != 1)
2122 return false;
2123
2124 const BufferSp &expectedOutput(expectedOutputs[0].getBuffer());
2125 std::vector<uint8_t> data;
2126 expectedOutput->getBytes(data);
2127
2128 const float *const expectedOutputAsFloat = reinterpret_cast<const float *>(&data.front());
2129 const float *const outputAsFloat = static_cast<const float *>(outputAllocs[0]->getHostPtr());
2130
2131 for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2132 {
2133 const float f0 = expectedOutputAsFloat[idx];
2134 const float f1 = outputAsFloat[idx];
2135
2136 // For NMin, we accept NaN as output if both inputs were NaN.
2137 // Otherwise the NaN is the wrong choise, as on architectures that
2138 // do not handle NaN, those are huge values.
2139 if (!(tcu::Float32(f1).isNaN() && tcu::Float32(f0).isNaN()) && deFloatAbs(f1 - f0) > 0.00001f)
2140 return false;
2141 }
2142
2143 return true;
2144 }
2145
createOpNMinGroup(tcu::TestContext & testCtx)2146 tcu::TestCaseGroup *createOpNMinGroup(tcu::TestContext &testCtx)
2147 {
2148 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnmin"));
2149 ComputeShaderSpec spec;
2150 de::Random rnd(deStringHash(group->getName()));
2151 const int numElements = 200;
2152 vector<float> inputFloats1(numElements, 0);
2153 vector<float> inputFloats2(numElements, 0);
2154 vector<float> outputFloats(numElements, 0);
2155
2156 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2157 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2158
2159 // Make the first case a full-NAN case.
2160 inputFloats1[0] = TCU_NAN;
2161 inputFloats2[0] = TCU_NAN;
2162
2163 for (size_t ndx = 0; ndx < numElements; ++ndx)
2164 {
2165 // By default, pick the smallest
2166 outputFloats[ndx] = std::min(inputFloats1[ndx], inputFloats2[ndx]);
2167
2168 // Make half of the cases NaN cases
2169 if ((ndx & 1) == 0)
2170 {
2171 // Alternate between the NaN operand
2172 if ((ndx & 2) == 0)
2173 {
2174 outputFloats[ndx] = inputFloats2[ndx];
2175 inputFloats1[ndx] = TCU_NAN;
2176 }
2177 else
2178 {
2179 outputFloats[ndx] = inputFloats1[ndx];
2180 inputFloats2[ndx] = TCU_NAN;
2181 }
2182 }
2183 }
2184
2185 spec.assembly = "OpCapability Shader\n"
2186 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2187 "OpMemoryModel Logical GLSL450\n"
2188 "OpEntryPoint GLCompute %main \"main\" %id\n"
2189 "OpExecutionMode %main LocalSize 1 1 1\n"
2190
2191 "OpName %main \"main\"\n"
2192 "OpName %id \"gl_GlobalInvocationID\"\n"
2193
2194 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2195
2196 "OpDecorate %buf BufferBlock\n"
2197 "OpDecorate %indata1 DescriptorSet 0\n"
2198 "OpDecorate %indata1 Binding 0\n"
2199 "OpDecorate %indata2 DescriptorSet 0\n"
2200 "OpDecorate %indata2 Binding 1\n"
2201 "OpDecorate %outdata DescriptorSet 0\n"
2202 "OpDecorate %outdata Binding 2\n"
2203 "OpDecorate %f32arr ArrayStride 4\n"
2204 "OpMemberDecorate %buf 0 Offset 0\n"
2205
2206 + string(getComputeAsmCommonTypes()) +
2207
2208 "%buf = OpTypeStruct %f32arr\n"
2209 "%bufptr = OpTypePointer Uniform %buf\n"
2210 "%indata1 = OpVariable %bufptr Uniform\n"
2211 "%indata2 = OpVariable %bufptr Uniform\n"
2212 "%outdata = OpVariable %bufptr Uniform\n"
2213
2214 "%id = OpVariable %uvec3ptr Input\n"
2215 "%zero = OpConstant %i32 0\n"
2216
2217 "%main = OpFunction %void None %voidf\n"
2218 "%label = OpLabel\n"
2219 "%idval = OpLoad %uvec3 %id\n"
2220 "%x = OpCompositeExtract %u32 %idval 0\n"
2221 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2222 "%inval1 = OpLoad %f32 %inloc1\n"
2223 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2224 "%inval2 = OpLoad %f32 %inloc2\n"
2225 "%rem = OpExtInst %f32 %std450 NMin %inval1 %inval2\n"
2226 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2227 " OpStore %outloc %rem\n"
2228 " OpReturn\n"
2229 " OpFunctionEnd\n";
2230
2231 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2232 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2233 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2234 spec.numWorkGroups = IVec3(numElements, 1, 1);
2235 spec.verifyIO = &compareNMin;
2236
2237 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
2238
2239 return group.release();
2240 }
2241
compareNMax(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2242 bool compareNMax(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
2243 const std::vector<Resource> &expectedOutputs, TestLog &)
2244 {
2245 if (outputAllocs.size() != 1)
2246 return false;
2247
2248 const BufferSp &expectedOutput = expectedOutputs[0].getBuffer();
2249 std::vector<uint8_t> data;
2250 expectedOutput->getBytes(data);
2251
2252 const float *const expectedOutputAsFloat = reinterpret_cast<const float *>(&data.front());
2253 const float *const outputAsFloat = static_cast<const float *>(outputAllocs[0]->getHostPtr());
2254
2255 for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2256 {
2257 const float f0 = expectedOutputAsFloat[idx];
2258 const float f1 = outputAsFloat[idx];
2259
2260 // For NMax, NaN is considered acceptable result, since in
2261 // architectures that do not handle NaNs, those are huge values.
2262 if (!tcu::Float32(f1).isNaN() && deFloatAbs(f1 - f0) > 0.00001f)
2263 return false;
2264 }
2265
2266 return true;
2267 }
2268
createOpNMaxGroup(tcu::TestContext & testCtx)2269 tcu::TestCaseGroup *createOpNMaxGroup(tcu::TestContext &testCtx)
2270 {
2271 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnmax"));
2272 ComputeShaderSpec spec;
2273 de::Random rnd(deStringHash(group->getName()));
2274 const int numElements = 200;
2275 vector<float> inputFloats1(numElements, 0);
2276 vector<float> inputFloats2(numElements, 0);
2277 vector<float> outputFloats(numElements, 0);
2278
2279 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2280 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2281
2282 // Make the first case a full-NAN case.
2283 inputFloats1[0] = TCU_NAN;
2284 inputFloats2[0] = TCU_NAN;
2285
2286 for (size_t ndx = 0; ndx < numElements; ++ndx)
2287 {
2288 // By default, pick the biggest
2289 outputFloats[ndx] = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2290
2291 // Make half of the cases NaN cases
2292 if ((ndx & 1) == 0)
2293 {
2294 // Alternate between the NaN operand
2295 if ((ndx & 2) == 0)
2296 {
2297 outputFloats[ndx] = inputFloats2[ndx];
2298 inputFloats1[ndx] = TCU_NAN;
2299 }
2300 else
2301 {
2302 outputFloats[ndx] = inputFloats1[ndx];
2303 inputFloats2[ndx] = TCU_NAN;
2304 }
2305 }
2306 }
2307
2308 spec.assembly = "OpCapability Shader\n"
2309 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2310 "OpMemoryModel Logical GLSL450\n"
2311 "OpEntryPoint GLCompute %main \"main\" %id\n"
2312 "OpExecutionMode %main LocalSize 1 1 1\n"
2313
2314 "OpName %main \"main\"\n"
2315 "OpName %id \"gl_GlobalInvocationID\"\n"
2316
2317 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2318
2319 "OpDecorate %buf BufferBlock\n"
2320 "OpDecorate %indata1 DescriptorSet 0\n"
2321 "OpDecorate %indata1 Binding 0\n"
2322 "OpDecorate %indata2 DescriptorSet 0\n"
2323 "OpDecorate %indata2 Binding 1\n"
2324 "OpDecorate %outdata DescriptorSet 0\n"
2325 "OpDecorate %outdata Binding 2\n"
2326 "OpDecorate %f32arr ArrayStride 4\n"
2327 "OpMemberDecorate %buf 0 Offset 0\n"
2328
2329 + string(getComputeAsmCommonTypes()) +
2330
2331 "%buf = OpTypeStruct %f32arr\n"
2332 "%bufptr = OpTypePointer Uniform %buf\n"
2333 "%indata1 = OpVariable %bufptr Uniform\n"
2334 "%indata2 = OpVariable %bufptr Uniform\n"
2335 "%outdata = OpVariable %bufptr Uniform\n"
2336
2337 "%id = OpVariable %uvec3ptr Input\n"
2338 "%zero = OpConstant %i32 0\n"
2339
2340 "%main = OpFunction %void None %voidf\n"
2341 "%label = OpLabel\n"
2342 "%idval = OpLoad %uvec3 %id\n"
2343 "%x = OpCompositeExtract %u32 %idval 0\n"
2344 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2345 "%inval1 = OpLoad %f32 %inloc1\n"
2346 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2347 "%inval2 = OpLoad %f32 %inloc2\n"
2348 "%rem = OpExtInst %f32 %std450 NMax %inval1 %inval2\n"
2349 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2350 " OpStore %outloc %rem\n"
2351 " OpReturn\n"
2352 " OpFunctionEnd\n";
2353
2354 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2355 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2356 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2357 spec.numWorkGroups = IVec3(numElements, 1, 1);
2358 spec.verifyIO = &compareNMax;
2359
2360 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
2361
2362 return group.release();
2363 }
2364
compareNClamp(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2365 bool compareNClamp(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
2366 const std::vector<Resource> &expectedOutputs, TestLog &)
2367 {
2368 if (outputAllocs.size() != 1)
2369 return false;
2370
2371 const BufferSp &expectedOutput = expectedOutputs[0].getBuffer();
2372 std::vector<uint8_t> data;
2373 expectedOutput->getBytes(data);
2374
2375 const float *const expectedOutputAsFloat = reinterpret_cast<const float *>(&data.front());
2376 const float *const outputAsFloat = static_cast<const float *>(outputAllocs[0]->getHostPtr());
2377
2378 for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float) / 2; ++idx)
2379 {
2380 const float e0 = expectedOutputAsFloat[idx * 2];
2381 const float e1 = expectedOutputAsFloat[idx * 2 + 1];
2382 const float res = outputAsFloat[idx];
2383
2384 // For NClamp, we have two possible outcomes based on
2385 // whether NaNs are handled or not.
2386 // If either min or max value is NaN, the result is undefined,
2387 // so this test doesn't stress those. If the clamped value is
2388 // NaN, and NaNs are handled, the result is min; if NaNs are not
2389 // handled, they are big values that result in max.
2390 // If all three parameters are NaN, the result should be NaN.
2391 if (!((tcu::Float32(e0).isNaN() && tcu::Float32(res).isNaN()) || (deFloatAbs(e0 - res) < 0.00001f) ||
2392 (deFloatAbs(e1 - res) < 0.00001f)))
2393 return false;
2394 }
2395
2396 return true;
2397 }
2398
createOpNClampGroup(tcu::TestContext & testCtx)2399 tcu::TestCaseGroup *createOpNClampGroup(tcu::TestContext &testCtx)
2400 {
2401 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnclamp"));
2402 ComputeShaderSpec spec;
2403 de::Random rnd(deStringHash(group->getName()));
2404 const int numElements = 200;
2405 vector<float> inputFloats1(numElements, 0);
2406 vector<float> inputFloats2(numElements, 0);
2407 vector<float> inputFloats3(numElements, 0);
2408 vector<float> outputFloats(numElements * 2, 0);
2409
2410 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2411 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2412 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats3[0], numElements);
2413
2414 for (size_t ndx = 0; ndx < numElements; ++ndx)
2415 {
2416 // Results are only defined if max value is bigger than min value.
2417 if (inputFloats2[ndx] > inputFloats3[ndx])
2418 {
2419 float t = inputFloats2[ndx];
2420 inputFloats2[ndx] = inputFloats3[ndx];
2421 inputFloats3[ndx] = t;
2422 }
2423
2424 // By default, do the clamp, setting both possible answers
2425 float defaultRes = std::min(std::max(inputFloats1[ndx], inputFloats2[ndx]), inputFloats3[ndx]);
2426
2427 float maxResA = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2428 float maxResB = maxResA;
2429
2430 // Alternate between the NaN cases
2431 if (ndx & 1)
2432 {
2433 inputFloats1[ndx] = TCU_NAN;
2434 // If NaN is handled, the result should be same as the clamp minimum.
2435 // If NaN is not handled, the result should clamp to the clamp maximum.
2436 maxResA = inputFloats2[ndx];
2437 maxResB = inputFloats3[ndx];
2438 }
2439 else
2440 {
2441 // Not a NaN case - only one legal result.
2442 maxResA = defaultRes;
2443 maxResB = defaultRes;
2444 }
2445
2446 outputFloats[ndx * 2] = maxResA;
2447 outputFloats[ndx * 2 + 1] = maxResB;
2448 }
2449
2450 // Make the first case a full-NAN case.
2451 inputFloats1[0] = TCU_NAN;
2452 inputFloats2[0] = TCU_NAN;
2453 inputFloats3[0] = TCU_NAN;
2454 outputFloats[0] = TCU_NAN;
2455 outputFloats[1] = TCU_NAN;
2456
2457 spec.assembly = "OpCapability Shader\n"
2458 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2459 "OpMemoryModel Logical GLSL450\n"
2460 "OpEntryPoint GLCompute %main \"main\" %id\n"
2461 "OpExecutionMode %main LocalSize 1 1 1\n"
2462
2463 "OpName %main \"main\"\n"
2464 "OpName %id \"gl_GlobalInvocationID\"\n"
2465
2466 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2467
2468 "OpDecorate %buf BufferBlock\n"
2469 "OpDecorate %indata1 DescriptorSet 0\n"
2470 "OpDecorate %indata1 Binding 0\n"
2471 "OpDecorate %indata2 DescriptorSet 0\n"
2472 "OpDecorate %indata2 Binding 1\n"
2473 "OpDecorate %indata3 DescriptorSet 0\n"
2474 "OpDecorate %indata3 Binding 2\n"
2475 "OpDecorate %outdata DescriptorSet 0\n"
2476 "OpDecorate %outdata Binding 3\n"
2477 "OpDecorate %f32arr ArrayStride 4\n"
2478 "OpMemberDecorate %buf 0 Offset 0\n"
2479
2480 + string(getComputeAsmCommonTypes()) +
2481
2482 "%buf = OpTypeStruct %f32arr\n"
2483 "%bufptr = OpTypePointer Uniform %buf\n"
2484 "%indata1 = OpVariable %bufptr Uniform\n"
2485 "%indata2 = OpVariable %bufptr Uniform\n"
2486 "%indata3 = OpVariable %bufptr Uniform\n"
2487 "%outdata = OpVariable %bufptr Uniform\n"
2488
2489 "%id = OpVariable %uvec3ptr Input\n"
2490 "%zero = OpConstant %i32 0\n"
2491
2492 "%main = OpFunction %void None %voidf\n"
2493 "%label = OpLabel\n"
2494 "%idval = OpLoad %uvec3 %id\n"
2495 "%x = OpCompositeExtract %u32 %idval 0\n"
2496 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2497 "%inval1 = OpLoad %f32 %inloc1\n"
2498 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2499 "%inval2 = OpLoad %f32 %inloc2\n"
2500 "%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
2501 "%inval3 = OpLoad %f32 %inloc3\n"
2502 "%rem = OpExtInst %f32 %std450 NClamp %inval1 %inval2 %inval3\n"
2503 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2504 " OpStore %outloc %rem\n"
2505 " OpReturn\n"
2506 " OpFunctionEnd\n";
2507
2508 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2509 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2510 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
2511 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2512 spec.numWorkGroups = IVec3(numElements, 1, 1);
2513 spec.verifyIO = &compareNClamp;
2514
2515 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
2516
2517 return group.release();
2518 }
2519
createOpSRemComputeGroup(tcu::TestContext & testCtx,qpTestResult negFailResult)2520 tcu::TestCaseGroup *createOpSRemComputeGroup(tcu::TestContext &testCtx, qpTestResult negFailResult)
2521 {
2522 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsrem"));
2523 de::Random rnd(deStringHash(group->getName()));
2524 const int numElements = 200;
2525
2526 const struct CaseParams
2527 {
2528 const char *name;
2529 const char *failMessage; // customized status message
2530 qpTestResult failResult; // override status on failure
2531 int op1Min, op1Max; // operand ranges
2532 int op2Min, op2Max;
2533 } cases[] = {
2534 {"positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, 0, 65536, 0, 100},
2535 {"all", "Inconsistent results, but within specification", negFailResult, -65536, 65536, -100, 100}, // see below
2536 };
2537 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2538
2539 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2540 {
2541 const CaseParams ¶ms = cases[caseNdx];
2542 ComputeShaderSpec spec;
2543 vector<int32_t> inputInts1(numElements, 0);
2544 vector<int32_t> inputInts2(numElements, 0);
2545 vector<int32_t> outputInts(numElements, 0);
2546
2547 fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2548 fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2549
2550 for (int ndx = 0; ndx < numElements; ++ndx)
2551 {
2552 // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2553 outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2554 }
2555
2556 spec.assembly = string(getComputeAsmShaderPreamble()) +
2557
2558 "OpName %main \"main\"\n"
2559 "OpName %id \"gl_GlobalInvocationID\"\n"
2560
2561 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2562
2563 "OpDecorate %buf BufferBlock\n"
2564 "OpDecorate %indata1 DescriptorSet 0\n"
2565 "OpDecorate %indata1 Binding 0\n"
2566 "OpDecorate %indata2 DescriptorSet 0\n"
2567 "OpDecorate %indata2 Binding 1\n"
2568 "OpDecorate %outdata DescriptorSet 0\n"
2569 "OpDecorate %outdata Binding 2\n"
2570 "OpDecorate %i32arr ArrayStride 4\n"
2571 "OpMemberDecorate %buf 0 Offset 0\n"
2572
2573 + string(getComputeAsmCommonTypes()) +
2574
2575 "%buf = OpTypeStruct %i32arr\n"
2576 "%bufptr = OpTypePointer Uniform %buf\n"
2577 "%indata1 = OpVariable %bufptr Uniform\n"
2578 "%indata2 = OpVariable %bufptr Uniform\n"
2579 "%outdata = OpVariable %bufptr Uniform\n"
2580
2581 "%id = OpVariable %uvec3ptr Input\n"
2582 "%zero = OpConstant %i32 0\n"
2583
2584 "%main = OpFunction %void None %voidf\n"
2585 "%label = OpLabel\n"
2586 "%idval = OpLoad %uvec3 %id\n"
2587 "%x = OpCompositeExtract %u32 %idval 0\n"
2588 "%inloc1 = OpAccessChain %i32ptr %indata1 %zero %x\n"
2589 "%inval1 = OpLoad %i32 %inloc1\n"
2590 "%inloc2 = OpAccessChain %i32ptr %indata2 %zero %x\n"
2591 "%inval2 = OpLoad %i32 %inloc2\n"
2592 "%rem = OpSRem %i32 %inval1 %inval2\n"
2593 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
2594 " OpStore %outloc %rem\n"
2595 " OpReturn\n"
2596 " OpFunctionEnd\n";
2597
2598 spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts1)));
2599 spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts2)));
2600 spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts)));
2601 spec.numWorkGroups = IVec3(numElements, 1, 1);
2602 spec.failResult = params.failResult;
2603 spec.failMessage = params.failMessage;
2604
2605 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, spec));
2606 }
2607
2608 return group.release();
2609 }
2610
createOpSRemComputeGroup64(tcu::TestContext & testCtx,qpTestResult negFailResult)2611 tcu::TestCaseGroup *createOpSRemComputeGroup64(tcu::TestContext &testCtx, qpTestResult negFailResult)
2612 {
2613 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsrem64"));
2614 de::Random rnd(deStringHash(group->getName()));
2615 const int numElements = 200;
2616
2617 const struct CaseParams
2618 {
2619 const char *name;
2620 const char *failMessage; // customized status message
2621 qpTestResult failResult; // override status on failure
2622 bool positive;
2623 } cases[] = {
2624 {"positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, true},
2625 {"all", "Inconsistent results, but within specification", negFailResult, false}, // see below
2626 };
2627 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2628
2629 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2630 {
2631 const CaseParams ¶ms = cases[caseNdx];
2632 ComputeShaderSpec spec;
2633 vector<int64_t> inputInts1(numElements, 0);
2634 vector<int64_t> inputInts2(numElements, 0);
2635 vector<int64_t> outputInts(numElements, 0);
2636
2637 if (params.positive)
2638 {
2639 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2640 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2641 }
2642 else
2643 {
2644 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2645 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2646 }
2647
2648 for (int ndx = 0; ndx < numElements; ++ndx)
2649 {
2650 // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2651 outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2652 }
2653
2654 spec.assembly = "OpCapability Int64\n"
2655
2656 + string(getComputeAsmShaderPreamble()) +
2657
2658 "OpName %main \"main\"\n"
2659 "OpName %id \"gl_GlobalInvocationID\"\n"
2660
2661 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2662
2663 "OpDecorate %buf BufferBlock\n"
2664 "OpDecorate %indata1 DescriptorSet 0\n"
2665 "OpDecorate %indata1 Binding 0\n"
2666 "OpDecorate %indata2 DescriptorSet 0\n"
2667 "OpDecorate %indata2 Binding 1\n"
2668 "OpDecorate %outdata DescriptorSet 0\n"
2669 "OpDecorate %outdata Binding 2\n"
2670 "OpDecorate %i64arr ArrayStride 8\n"
2671 "OpMemberDecorate %buf 0 Offset 0\n"
2672
2673 + string(getComputeAsmCommonTypes()) + string(getComputeAsmCommonInt64Types()) +
2674
2675 "%buf = OpTypeStruct %i64arr\n"
2676 "%bufptr = OpTypePointer Uniform %buf\n"
2677 "%indata1 = OpVariable %bufptr Uniform\n"
2678 "%indata2 = OpVariable %bufptr Uniform\n"
2679 "%outdata = OpVariable %bufptr Uniform\n"
2680
2681 "%id = OpVariable %uvec3ptr Input\n"
2682 "%zero = OpConstant %i64 0\n"
2683
2684 "%main = OpFunction %void None %voidf\n"
2685 "%label = OpLabel\n"
2686 "%idval = OpLoad %uvec3 %id\n"
2687 "%x = OpCompositeExtract %u32 %idval 0\n"
2688 "%inloc1 = OpAccessChain %i64ptr %indata1 %zero %x\n"
2689 "%inval1 = OpLoad %i64 %inloc1\n"
2690 "%inloc2 = OpAccessChain %i64ptr %indata2 %zero %x\n"
2691 "%inval2 = OpLoad %i64 %inloc2\n"
2692 "%rem = OpSRem %i64 %inval1 %inval2\n"
2693 "%outloc = OpAccessChain %i64ptr %outdata %zero %x\n"
2694 " OpStore %outloc %rem\n"
2695 " OpReturn\n"
2696 " OpFunctionEnd\n";
2697
2698 spec.inputs.push_back(BufferSp(new Int64Buffer(inputInts1)));
2699 spec.inputs.push_back(BufferSp(new Int64Buffer(inputInts2)));
2700 spec.outputs.push_back(BufferSp(new Int64Buffer(outputInts)));
2701 spec.numWorkGroups = IVec3(numElements, 1, 1);
2702 spec.failResult = params.failResult;
2703 spec.failMessage = params.failMessage;
2704
2705 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2706
2707 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, spec));
2708 }
2709
2710 return group.release();
2711 }
2712
createOpSModComputeGroup(tcu::TestContext & testCtx,qpTestResult negFailResult)2713 tcu::TestCaseGroup *createOpSModComputeGroup(tcu::TestContext &testCtx, qpTestResult negFailResult)
2714 {
2715 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsmod"));
2716 de::Random rnd(deStringHash(group->getName()));
2717 const int numElements = 200;
2718
2719 const struct CaseParams
2720 {
2721 const char *name;
2722 const char *failMessage; // customized status message
2723 qpTestResult failResult; // override status on failure
2724 int op1Min, op1Max; // operand ranges
2725 int op2Min, op2Max;
2726 } cases[] = {
2727 {"positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, 0, 65536, 0, 100},
2728 {"all", "Inconsistent results, but within specification", negFailResult, -65536, 65536, -100, 100}, // see below
2729 };
2730 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2731
2732 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2733 {
2734 const CaseParams ¶ms = cases[caseNdx];
2735
2736 ComputeShaderSpec spec;
2737 vector<int32_t> inputInts1(numElements, 0);
2738 vector<int32_t> inputInts2(numElements, 0);
2739 vector<int32_t> outputInts(numElements, 0);
2740
2741 fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2742 fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2743
2744 for (int ndx = 0; ndx < numElements; ++ndx)
2745 {
2746 int32_t rem = inputInts1[ndx] % inputInts2[ndx];
2747 if (rem == 0)
2748 {
2749 outputInts[ndx] = 0;
2750 }
2751 else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2752 {
2753 // They have the same sign
2754 outputInts[ndx] = rem;
2755 }
2756 else
2757 {
2758 // They have opposite sign. The remainder operation takes the
2759 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2760 // of inputInts2[ndx]. Adding inputInts2[ndx] will ensure that
2761 // the result has the correct sign and that it is still
2762 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2763 //
2764 // See also http://mathforum.org/library/drmath/view/52343.html
2765 outputInts[ndx] = rem + inputInts2[ndx];
2766 }
2767 }
2768
2769 spec.assembly = string(getComputeAsmShaderPreamble()) +
2770
2771 "OpName %main \"main\"\n"
2772 "OpName %id \"gl_GlobalInvocationID\"\n"
2773
2774 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2775
2776 "OpDecorate %buf BufferBlock\n"
2777 "OpDecorate %indata1 DescriptorSet 0\n"
2778 "OpDecorate %indata1 Binding 0\n"
2779 "OpDecorate %indata2 DescriptorSet 0\n"
2780 "OpDecorate %indata2 Binding 1\n"
2781 "OpDecorate %outdata DescriptorSet 0\n"
2782 "OpDecorate %outdata Binding 2\n"
2783 "OpDecorate %i32arr ArrayStride 4\n"
2784 "OpMemberDecorate %buf 0 Offset 0\n"
2785
2786 + string(getComputeAsmCommonTypes()) +
2787
2788 "%buf = OpTypeStruct %i32arr\n"
2789 "%bufptr = OpTypePointer Uniform %buf\n"
2790 "%indata1 = OpVariable %bufptr Uniform\n"
2791 "%indata2 = OpVariable %bufptr Uniform\n"
2792 "%outdata = OpVariable %bufptr Uniform\n"
2793
2794 "%id = OpVariable %uvec3ptr Input\n"
2795 "%zero = OpConstant %i32 0\n"
2796
2797 "%main = OpFunction %void None %voidf\n"
2798 "%label = OpLabel\n"
2799 "%idval = OpLoad %uvec3 %id\n"
2800 "%x = OpCompositeExtract %u32 %idval 0\n"
2801 "%inloc1 = OpAccessChain %i32ptr %indata1 %zero %x\n"
2802 "%inval1 = OpLoad %i32 %inloc1\n"
2803 "%inloc2 = OpAccessChain %i32ptr %indata2 %zero %x\n"
2804 "%inval2 = OpLoad %i32 %inloc2\n"
2805 "%rem = OpSMod %i32 %inval1 %inval2\n"
2806 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
2807 " OpStore %outloc %rem\n"
2808 " OpReturn\n"
2809 " OpFunctionEnd\n";
2810
2811 spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts1)));
2812 spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts2)));
2813 spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts)));
2814 spec.numWorkGroups = IVec3(numElements, 1, 1);
2815 spec.failResult = params.failResult;
2816 spec.failMessage = params.failMessage;
2817
2818 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, spec));
2819 }
2820
2821 return group.release();
2822 }
2823
createOpSModComputeGroup64(tcu::TestContext & testCtx,qpTestResult negFailResult)2824 tcu::TestCaseGroup *createOpSModComputeGroup64(tcu::TestContext &testCtx, qpTestResult negFailResult)
2825 {
2826 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsmod64"));
2827 de::Random rnd(deStringHash(group->getName()));
2828 const int numElements = 200;
2829
2830 const struct CaseParams
2831 {
2832 const char *name;
2833 const char *failMessage; // customized status message
2834 qpTestResult failResult; // override status on failure
2835 bool positive;
2836 } cases[] = {
2837 {"positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, true},
2838 {"all", "Inconsistent results, but within specification", negFailResult, false}, // see below
2839 };
2840 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2841
2842 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2843 {
2844 const CaseParams ¶ms = cases[caseNdx];
2845
2846 ComputeShaderSpec spec;
2847 vector<int64_t> inputInts1(numElements, 0);
2848 vector<int64_t> inputInts2(numElements, 0);
2849 vector<int64_t> outputInts(numElements, 0);
2850
2851 if (params.positive)
2852 {
2853 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2854 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2855 }
2856 else
2857 {
2858 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2859 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2860 }
2861
2862 for (int ndx = 0; ndx < numElements; ++ndx)
2863 {
2864 int64_t rem = inputInts1[ndx] % inputInts2[ndx];
2865 if (rem == 0)
2866 {
2867 outputInts[ndx] = 0;
2868 }
2869 else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2870 {
2871 // They have the same sign
2872 outputInts[ndx] = rem;
2873 }
2874 else
2875 {
2876 // They have opposite sign. The remainder operation takes the
2877 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2878 // of inputInts2[ndx]. Adding inputInts2[ndx] will ensure that
2879 // the result has the correct sign and that it is still
2880 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2881 //
2882 // See also http://mathforum.org/library/drmath/view/52343.html
2883 outputInts[ndx] = rem + inputInts2[ndx];
2884 }
2885 }
2886
2887 spec.assembly = "OpCapability Int64\n"
2888
2889 + string(getComputeAsmShaderPreamble()) +
2890
2891 "OpName %main \"main\"\n"
2892 "OpName %id \"gl_GlobalInvocationID\"\n"
2893
2894 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2895
2896 "OpDecorate %buf BufferBlock\n"
2897 "OpDecorate %indata1 DescriptorSet 0\n"
2898 "OpDecorate %indata1 Binding 0\n"
2899 "OpDecorate %indata2 DescriptorSet 0\n"
2900 "OpDecorate %indata2 Binding 1\n"
2901 "OpDecorate %outdata DescriptorSet 0\n"
2902 "OpDecorate %outdata Binding 2\n"
2903 "OpDecorate %i64arr ArrayStride 8\n"
2904 "OpMemberDecorate %buf 0 Offset 0\n"
2905
2906 + string(getComputeAsmCommonTypes()) + string(getComputeAsmCommonInt64Types()) +
2907
2908 "%buf = OpTypeStruct %i64arr\n"
2909 "%bufptr = OpTypePointer Uniform %buf\n"
2910 "%indata1 = OpVariable %bufptr Uniform\n"
2911 "%indata2 = OpVariable %bufptr Uniform\n"
2912 "%outdata = OpVariable %bufptr Uniform\n"
2913
2914 "%id = OpVariable %uvec3ptr Input\n"
2915 "%zero = OpConstant %i64 0\n"
2916
2917 "%main = OpFunction %void None %voidf\n"
2918 "%label = OpLabel\n"
2919 "%idval = OpLoad %uvec3 %id\n"
2920 "%x = OpCompositeExtract %u32 %idval 0\n"
2921 "%inloc1 = OpAccessChain %i64ptr %indata1 %zero %x\n"
2922 "%inval1 = OpLoad %i64 %inloc1\n"
2923 "%inloc2 = OpAccessChain %i64ptr %indata2 %zero %x\n"
2924 "%inval2 = OpLoad %i64 %inloc2\n"
2925 "%rem = OpSMod %i64 %inval1 %inval2\n"
2926 "%outloc = OpAccessChain %i64ptr %outdata %zero %x\n"
2927 " OpStore %outloc %rem\n"
2928 " OpReturn\n"
2929 " OpFunctionEnd\n";
2930
2931 spec.inputs.push_back(BufferSp(new Int64Buffer(inputInts1)));
2932 spec.inputs.push_back(BufferSp(new Int64Buffer(inputInts2)));
2933 spec.outputs.push_back(BufferSp(new Int64Buffer(outputInts)));
2934 spec.numWorkGroups = IVec3(numElements, 1, 1);
2935 spec.failResult = params.failResult;
2936 spec.failMessage = params.failMessage;
2937
2938 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2939
2940 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, spec));
2941 }
2942
2943 return group.release();
2944 }
2945
2946 // Copy contents in the input buffer to the output buffer.
createOpCopyMemoryGroup(tcu::TestContext & testCtx)2947 tcu::TestCaseGroup *createOpCopyMemoryGroup(tcu::TestContext &testCtx)
2948 {
2949 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opcopymemory"));
2950 de::Random rnd(deStringHash(group->getName()));
2951 const int numElements = 100;
2952
2953 // The following case adds vec4(0., 0.5, 1.5, 2.5) to each of the elements in the input buffer and writes output to the output buffer.
2954 ComputeShaderSpec spec1;
2955 vector<Vec4> inputFloats1(numElements);
2956 vector<Vec4> outputFloats1(numElements);
2957
2958 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats1[0], numElements * 4);
2959
2960 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
2961 floorAll(inputFloats1);
2962
2963 for (size_t ndx = 0; ndx < numElements; ++ndx)
2964 outputFloats1[ndx] = inputFloats1[ndx] + Vec4(0.f, 0.5f, 1.5f, 2.5f);
2965
2966 spec1.assembly = string(getComputeAsmShaderPreamble()) +
2967
2968 "OpName %main \"main\"\n"
2969 "OpName %id \"gl_GlobalInvocationID\"\n"
2970
2971 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2972 "OpDecorate %vec4arr ArrayStride 16\n"
2973
2974 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2975
2976 "%vec4 = OpTypeVector %f32 4\n"
2977 "%vec4ptr_u = OpTypePointer Uniform %vec4\n"
2978 "%vec4ptr_f = OpTypePointer Function %vec4\n"
2979 "%vec4arr = OpTypeRuntimeArray %vec4\n"
2980 "%buf = OpTypeStruct %vec4arr\n"
2981 "%bufptr = OpTypePointer Uniform %buf\n"
2982 "%indata = OpVariable %bufptr Uniform\n"
2983 "%outdata = OpVariable %bufptr Uniform\n"
2984
2985 "%id = OpVariable %uvec3ptr Input\n"
2986 "%zero = OpConstant %i32 0\n"
2987 "%c_f_0 = OpConstant %f32 0.\n"
2988 "%c_f_0_5 = OpConstant %f32 0.5\n"
2989 "%c_f_1_5 = OpConstant %f32 1.5\n"
2990 "%c_f_2_5 = OpConstant %f32 2.5\n"
2991 "%c_vec4 = OpConstantComposite %vec4 %c_f_0 %c_f_0_5 %c_f_1_5 %c_f_2_5\n"
2992
2993 "%main = OpFunction %void None %voidf\n"
2994 "%label = OpLabel\n"
2995 "%v_vec4 = OpVariable %vec4ptr_f Function\n"
2996 "%idval = OpLoad %uvec3 %id\n"
2997 "%x = OpCompositeExtract %u32 %idval 0\n"
2998 "%inloc = OpAccessChain %vec4ptr_u %indata %zero %x\n"
2999 "%outloc = OpAccessChain %vec4ptr_u %outdata %zero %x\n"
3000 " OpCopyMemory %v_vec4 %inloc\n"
3001 "%v_vec4_val = OpLoad %vec4 %v_vec4\n"
3002 "%add = OpFAdd %vec4 %v_vec4_val %c_vec4\n"
3003 " OpStore %outloc %add\n"
3004 " OpReturn\n"
3005 " OpFunctionEnd\n";
3006
3007 spec1.inputs.push_back(BufferSp(new Vec4Buffer(inputFloats1)));
3008 spec1.outputs.push_back(BufferSp(new Vec4Buffer(outputFloats1)));
3009 spec1.numWorkGroups = IVec3(numElements, 1, 1);
3010
3011 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector", spec1));
3012
3013 // The following case copies a float[100] variable from the input buffer to the output buffer.
3014 ComputeShaderSpec spec2;
3015 vector<float> inputFloats2(numElements);
3016 vector<float> outputFloats2(numElements);
3017
3018 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats2[0], numElements);
3019
3020 for (size_t ndx = 0; ndx < numElements; ++ndx)
3021 outputFloats2[ndx] = inputFloats2[ndx];
3022
3023 spec2.assembly = string(getComputeAsmShaderPreamble()) +
3024
3025 "OpName %main \"main\"\n"
3026 "OpName %id \"gl_GlobalInvocationID\"\n"
3027
3028 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3029 "OpDecorate %f32arr100 ArrayStride 4\n"
3030
3031 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3032
3033 "%hundred = OpConstant %u32 100\n"
3034 "%f32arr100 = OpTypeArray %f32 %hundred\n"
3035 "%f32arr100ptr_f = OpTypePointer Function %f32arr100\n"
3036 "%f32arr100ptr_u = OpTypePointer Uniform %f32arr100\n"
3037 "%buf = OpTypeStruct %f32arr100\n"
3038 "%bufptr = OpTypePointer Uniform %buf\n"
3039 "%indata = OpVariable %bufptr Uniform\n"
3040 "%outdata = OpVariable %bufptr Uniform\n"
3041
3042 "%id = OpVariable %uvec3ptr Input\n"
3043 "%zero = OpConstant %i32 0\n"
3044
3045 "%main = OpFunction %void None %voidf\n"
3046 "%label = OpLabel\n"
3047 "%var = OpVariable %f32arr100ptr_f Function\n"
3048 "%inarr = OpAccessChain %f32arr100ptr_u %indata %zero\n"
3049 "%outarr = OpAccessChain %f32arr100ptr_u %outdata %zero\n"
3050 " OpCopyMemory %var %inarr\n"
3051 " OpCopyMemory %outarr %var\n"
3052 " OpReturn\n"
3053 " OpFunctionEnd\n";
3054
3055 spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
3056 spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
3057 spec2.numWorkGroups = IVec3(1, 1, 1);
3058
3059 group->addChild(new SpvAsmComputeShaderCase(testCtx, "array", spec2));
3060
3061 // The following case copies a struct{vec4, vec4, vec4, vec4} variable from the input buffer to the output buffer.
3062 ComputeShaderSpec spec3;
3063 vector<float> inputFloats3(16);
3064 vector<float> outputFloats3(16);
3065
3066 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats3[0], 16);
3067
3068 for (size_t ndx = 0; ndx < 16; ++ndx)
3069 outputFloats3[ndx] = inputFloats3[ndx];
3070
3071 spec3.assembly = string(getComputeAsmShaderPreamble()) +
3072
3073 "OpName %main \"main\"\n"
3074 "OpName %id \"gl_GlobalInvocationID\"\n"
3075
3076 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3077 //"OpMemberDecorate %buf 0 Offset 0\n" - exists in getComputeAsmInputOutputBufferTraits
3078 "OpMemberDecorate %buf 1 Offset 16\n"
3079 "OpMemberDecorate %buf 2 Offset 32\n"
3080 "OpMemberDecorate %buf 3 Offset 48\n"
3081
3082 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3083
3084 "%vec4 = OpTypeVector %f32 4\n"
3085 "%buf = OpTypeStruct %vec4 %vec4 %vec4 %vec4\n"
3086 "%bufptr = OpTypePointer Uniform %buf\n"
3087 "%indata = OpVariable %bufptr Uniform\n"
3088 "%outdata = OpVariable %bufptr Uniform\n"
3089 "%vec4stptr = OpTypePointer Function %buf\n"
3090
3091 "%id = OpVariable %uvec3ptr Input\n"
3092 "%zero = OpConstant %i32 0\n"
3093
3094 "%main = OpFunction %void None %voidf\n"
3095 "%label = OpLabel\n"
3096 "%var = OpVariable %vec4stptr Function\n"
3097 " OpCopyMemory %var %indata\n"
3098 " OpCopyMemory %outdata %var\n"
3099 " OpReturn\n"
3100 " OpFunctionEnd\n";
3101
3102 spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3103 spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
3104 spec3.numWorkGroups = IVec3(1, 1, 1);
3105
3106 group->addChild(new SpvAsmComputeShaderCase(testCtx, "struct", spec3));
3107
3108 // The following case negates multiple float variables from the input buffer and stores the results to the output buffer.
3109 ComputeShaderSpec spec4;
3110 vector<float> inputFloats4(numElements);
3111 vector<float> outputFloats4(numElements);
3112
3113 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats4[0], numElements);
3114
3115 for (size_t ndx = 0; ndx < numElements; ++ndx)
3116 outputFloats4[ndx] = -inputFloats4[ndx];
3117
3118 spec4.assembly = string(getComputeAsmShaderPreamble()) +
3119
3120 "OpName %main \"main\"\n"
3121 "OpName %id \"gl_GlobalInvocationID\"\n"
3122
3123 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3124
3125 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3126 string(getComputeAsmInputOutputBuffer()) +
3127
3128 "%f32ptr_f = OpTypePointer Function %f32\n"
3129 "%id = OpVariable %uvec3ptr Input\n"
3130 "%zero = OpConstant %i32 0\n"
3131
3132 "%main = OpFunction %void None %voidf\n"
3133 "%label = OpLabel\n"
3134 "%var = OpVariable %f32ptr_f Function\n"
3135 "%idval = OpLoad %uvec3 %id\n"
3136 "%x = OpCompositeExtract %u32 %idval 0\n"
3137 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
3138 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3139 " OpCopyMemory %var %inloc\n"
3140 "%val = OpLoad %f32 %var\n"
3141 "%neg = OpFNegate %f32 %val\n"
3142 " OpStore %outloc %neg\n"
3143 " OpReturn\n"
3144 " OpFunctionEnd\n";
3145
3146 spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3147 spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
3148 spec4.numWorkGroups = IVec3(numElements, 1, 1);
3149
3150 group->addChild(new SpvAsmComputeShaderCase(testCtx, "float", spec4));
3151
3152 return group.release();
3153 }
3154
createOpCopyObjectGroup(tcu::TestContext & testCtx)3155 tcu::TestCaseGroup *createOpCopyObjectGroup(tcu::TestContext &testCtx)
3156 {
3157 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opcopyobject"));
3158 ComputeShaderSpec spec;
3159 de::Random rnd(deStringHash(group->getName()));
3160 const int numElements = 100;
3161 vector<float> inputFloats(numElements, 0);
3162 vector<float> outputFloats(numElements, 0);
3163
3164 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
3165
3166 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3167 floorAll(inputFloats);
3168
3169 for (size_t ndx = 0; ndx < numElements; ++ndx)
3170 outputFloats[ndx] = inputFloats[ndx] + 7.5f;
3171
3172 spec.assembly = string(getComputeAsmShaderPreamble()) +
3173
3174 "OpName %main \"main\"\n"
3175 "OpName %id \"gl_GlobalInvocationID\"\n"
3176
3177 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3178
3179 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3180
3181 "%fmat = OpTypeMatrix %fvec3 3\n"
3182 "%three = OpConstant %u32 3\n"
3183 "%farr = OpTypeArray %f32 %three\n"
3184 "%fst = OpTypeStruct %f32 %f32\n"
3185
3186 + string(getComputeAsmInputOutputBuffer()) +
3187
3188 "%id = OpVariable %uvec3ptr Input\n"
3189 "%zero = OpConstant %i32 0\n"
3190 "%c_f = OpConstant %f32 1.5\n"
3191 "%c_fvec3 = OpConstantComposite %fvec3 %c_f %c_f %c_f\n"
3192 "%c_fmat = OpConstantComposite %fmat %c_fvec3 %c_fvec3 %c_fvec3\n"
3193 "%c_farr = OpConstantComposite %farr %c_f %c_f %c_f\n"
3194 "%c_fst = OpConstantComposite %fst %c_f %c_f\n"
3195
3196 "%main = OpFunction %void None %voidf\n"
3197 "%label = OpLabel\n"
3198 "%c_f_copy = OpCopyObject %f32 %c_f\n"
3199 "%c_fvec3_copy = OpCopyObject %fvec3 %c_fvec3\n"
3200 "%c_fmat_copy = OpCopyObject %fmat %c_fmat\n"
3201 "%c_farr_copy = OpCopyObject %farr %c_farr\n"
3202 "%c_fst_copy = OpCopyObject %fst %c_fst\n"
3203 "%fvec3_elem = OpCompositeExtract %f32 %c_fvec3_copy 0\n"
3204 "%fmat_elem = OpCompositeExtract %f32 %c_fmat_copy 1 2\n"
3205 "%farr_elem = OpCompositeExtract %f32 %c_farr_copy 2\n"
3206 "%fst_elem = OpCompositeExtract %f32 %c_fst_copy 1\n"
3207 // Add up. 1.5 * 5 = 7.5.
3208 "%add1 = OpFAdd %f32 %c_f_copy %fvec3_elem\n"
3209 "%add2 = OpFAdd %f32 %add1 %fmat_elem\n"
3210 "%add3 = OpFAdd %f32 %add2 %farr_elem\n"
3211 "%add4 = OpFAdd %f32 %add3 %fst_elem\n"
3212
3213 "%idval = OpLoad %uvec3 %id\n"
3214 "%x = OpCompositeExtract %u32 %idval 0\n"
3215 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
3216 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3217 "%inval = OpLoad %f32 %inloc\n"
3218 "%add = OpFAdd %f32 %add4 %inval\n"
3219 " OpStore %outloc %add\n"
3220 " OpReturn\n"
3221 " OpFunctionEnd\n";
3222 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3223 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3224 spec.numWorkGroups = IVec3(numElements, 1, 1);
3225
3226 group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", spec));
3227
3228 return group.release();
3229 }
3230 // Assembly code used for testing OpUnreachable is based on GLSL source code:
3231 //
3232 // #version 430
3233 //
3234 // layout(std140, set = 0, binding = 0) readonly buffer Input {
3235 // float elements[];
3236 // } input_data;
3237 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
3238 // float elements[];
3239 // } output_data;
3240 //
3241 // void not_called_func() {
3242 // // place OpUnreachable here
3243 // }
3244 //
3245 // uint modulo4(uint val) {
3246 // switch (val % uint(4)) {
3247 // case 0: return 3;
3248 // case 1: return 2;
3249 // case 2: return 1;
3250 // case 3: return 0;
3251 // default: return 100; // place OpUnreachable here
3252 // }
3253 // }
3254 //
3255 // uint const5() {
3256 // return 5;
3257 // // place OpUnreachable here
3258 // }
3259 //
3260 // void main() {
3261 // uint x = gl_GlobalInvocationID.x;
3262 // if (const5() > modulo4(1000)) {
3263 // output_data.elements[x] = -input_data.elements[x];
3264 // } else {
3265 // // place OpUnreachable here
3266 // output_data.elements[x] = input_data.elements[x];
3267 // }
3268 // }
3269
addOpUnreachableAmberTests(tcu::TestCaseGroup & group,tcu::TestContext & testCtx)3270 void addOpUnreachableAmberTests(tcu::TestCaseGroup &group, tcu::TestContext &testCtx)
3271 {
3272 #ifndef CTS_USES_VULKANSC
3273 static const char dataDir[] = "spirv_assembly/instruction/compute/unreachable";
3274
3275 struct Case
3276 {
3277 string name;
3278 string desc;
3279 };
3280
3281 static const Case cases[] = {
3282 {"unreachable-switch-merge-in-loop",
3283 "Test containing an unreachable switch merge block inside an infinite loop"},
3284 };
3285
3286 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3287 {
3288 const string fileName = cases[i].name + ".amber";
3289 group.addChild(
3290 cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3291 }
3292 #else
3293 DE_UNREF(group);
3294 DE_UNREF(testCtx);
3295 #endif
3296 }
3297
addOpSwitchAmberTests(tcu::TestCaseGroup & group,tcu::TestContext & testCtx)3298 void addOpSwitchAmberTests(tcu::TestCaseGroup &group, tcu::TestContext &testCtx)
3299 {
3300 #ifndef CTS_USES_VULKANSC
3301 static const char dataDir[] = "spirv_assembly/instruction/compute/switch";
3302
3303 struct Case
3304 {
3305 string name;
3306 string desc;
3307 };
3308
3309 static const Case cases[] = {
3310 {"switch-case-to-merge-block", "Test switch containing a case that jumps directly to the merge block"},
3311 };
3312
3313 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3314 {
3315 const string fileName = cases[i].name + ".amber";
3316 group.addChild(
3317 cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3318 }
3319 #else
3320 DE_UNREF(group);
3321 DE_UNREF(testCtx);
3322 #endif
3323 }
3324
3325 #ifndef CTS_USES_VULKANSC
createOpArrayLengthComputeGroup(tcu::TestContext & testCtx)3326 tcu::TestCaseGroup *createOpArrayLengthComputeGroup(tcu::TestContext &testCtx)
3327 {
3328 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "oparraylength"));
3329 static const char dataDir[] = "spirv_assembly/instruction/compute/arraylength";
3330
3331 struct Case
3332 {
3333 string name;
3334 string desc;
3335 };
3336
3337 static const Case cases[] = {{"array-stride-larger-than-element-size",
3338 "Test using an unsized array with stride larger than the element size"}};
3339
3340 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3341 {
3342 const string fileName = cases[i].name + ".amber";
3343 group->addChild(
3344 cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3345 }
3346
3347 return group.release();
3348 }
3349 #endif
3350
createOpUnreachableGroup(tcu::TestContext & testCtx)3351 tcu::TestCaseGroup *createOpUnreachableGroup(tcu::TestContext &testCtx)
3352 {
3353 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opunreachable"));
3354 ComputeShaderSpec spec;
3355 de::Random rnd(deStringHash(group->getName()));
3356 const int numElements = 100;
3357 vector<float> positiveFloats(numElements, 0);
3358 vector<float> negativeFloats(numElements, 0);
3359
3360 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
3361
3362 for (size_t ndx = 0; ndx < numElements; ++ndx)
3363 negativeFloats[ndx] = -positiveFloats[ndx];
3364
3365 spec.assembly = string(getComputeAsmShaderPreamble()) +
3366
3367 "OpSource GLSL 430\n"
3368 "OpName %main \"main\"\n"
3369 "OpName %func_not_called_func \"not_called_func(\"\n"
3370 "OpName %func_modulo4 \"modulo4(u1;\"\n"
3371 "OpName %func_const5 \"const5(\"\n"
3372 "OpName %id \"gl_GlobalInvocationID\"\n"
3373
3374 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3375
3376 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3377
3378 "%u32ptr = OpTypePointer Function %u32\n"
3379 "%uintfuint = OpTypeFunction %u32 %u32ptr\n"
3380 "%unitf = OpTypeFunction %u32\n"
3381
3382 "%id = OpVariable %uvec3ptr Input\n"
3383 "%zero = OpConstant %u32 0\n"
3384 "%one = OpConstant %u32 1\n"
3385 "%two = OpConstant %u32 2\n"
3386 "%three = OpConstant %u32 3\n"
3387 "%four = OpConstant %u32 4\n"
3388 "%five = OpConstant %u32 5\n"
3389 "%hundred = OpConstant %u32 100\n"
3390 "%thousand = OpConstant %u32 1000\n"
3391
3392 + string(getComputeAsmInputOutputBuffer()) +
3393
3394 // Main()
3395 "%main = OpFunction %void None %voidf\n"
3396 "%main_entry = OpLabel\n"
3397 "%v_thousand = OpVariable %u32ptr Function %thousand\n"
3398 "%idval = OpLoad %uvec3 %id\n"
3399 "%x = OpCompositeExtract %u32 %idval 0\n"
3400 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
3401 "%inval = OpLoad %f32 %inloc\n"
3402 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3403 "%ret_const5 = OpFunctionCall %u32 %func_const5\n"
3404 "%ret_modulo4 = OpFunctionCall %u32 %func_modulo4 %v_thousand\n"
3405 "%cmp_gt = OpUGreaterThan %bool %ret_const5 %ret_modulo4\n"
3406 " OpSelectionMerge %if_end None\n"
3407 " OpBranchConditional %cmp_gt %if_true %if_false\n"
3408 "%if_true = OpLabel\n"
3409 "%negate = OpFNegate %f32 %inval\n"
3410 " OpStore %outloc %negate\n"
3411 " OpBranch %if_end\n"
3412 "%if_false = OpLabel\n"
3413 " OpUnreachable\n" // Unreachable else branch for if statement
3414 "%if_end = OpLabel\n"
3415 " OpReturn\n"
3416 " OpFunctionEnd\n"
3417
3418 // not_called_function()
3419 "%func_not_called_func = OpFunction %void None %voidf\n"
3420 "%not_called_func_entry = OpLabel\n"
3421 " OpUnreachable\n" // Unreachable entry block in not called static function
3422 " OpFunctionEnd\n"
3423
3424 // modulo4()
3425 "%func_modulo4 = OpFunction %u32 None %uintfuint\n"
3426 "%valptr = OpFunctionParameter %u32ptr\n"
3427 "%modulo4_entry = OpLabel\n"
3428 "%val = OpLoad %u32 %valptr\n"
3429 "%modulo = OpUMod %u32 %val %four\n"
3430 " OpSelectionMerge %switch_merge None\n"
3431 " OpSwitch %modulo %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
3432 "%case0 = OpLabel\n"
3433 " OpReturnValue %three\n"
3434 "%case1 = OpLabel\n"
3435 " OpReturnValue %two\n"
3436 "%case2 = OpLabel\n"
3437 " OpReturnValue %one\n"
3438 "%case3 = OpLabel\n"
3439 " OpReturnValue %zero\n"
3440 "%default = OpLabel\n"
3441 " OpUnreachable\n" // Unreachable default case for switch statement
3442 "%switch_merge = OpLabel\n"
3443 " OpUnreachable\n" // Unreachable merge block for switch statement
3444 " OpFunctionEnd\n"
3445
3446 // const5()
3447 "%func_const5 = OpFunction %u32 None %unitf\n"
3448 "%const5_entry = OpLabel\n"
3449 " OpReturnValue %five\n"
3450 "%unreachable = OpLabel\n"
3451 " OpUnreachable\n" // Unreachable block in function
3452 " OpFunctionEnd\n";
3453 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
3454 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
3455 spec.numWorkGroups = IVec3(numElements, 1, 1);
3456
3457 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
3458
3459 addOpUnreachableAmberTests(*group, testCtx);
3460
3461 return group.release();
3462 }
3463
3464 // Assembly code used for testing decoration group is based on GLSL source code:
3465 //
3466 // #version 430
3467 //
3468 // layout(std140, set = 0, binding = 0) readonly buffer Input0 {
3469 // float elements[];
3470 // } input_data0;
3471 // layout(std140, set = 0, binding = 1) readonly buffer Input1 {
3472 // float elements[];
3473 // } input_data1;
3474 // layout(std140, set = 0, binding = 2) readonly buffer Input2 {
3475 // float elements[];
3476 // } input_data2;
3477 // layout(std140, set = 0, binding = 3) readonly buffer Input3 {
3478 // float elements[];
3479 // } input_data3;
3480 // layout(std140, set = 0, binding = 4) readonly buffer Input4 {
3481 // float elements[];
3482 // } input_data4;
3483 // layout(std140, set = 0, binding = 5) writeonly buffer Output {
3484 // float elements[];
3485 // } output_data;
3486 //
3487 // void main() {
3488 // uint x = gl_GlobalInvocationID.x;
3489 // output_data.elements[x] = input_data0.elements[x] + input_data1.elements[x] + input_data2.elements[x] + input_data3.elements[x] + input_data4.elements[x];
3490 // }
createDecorationGroupGroup(tcu::TestContext & testCtx)3491 tcu::TestCaseGroup *createDecorationGroupGroup(tcu::TestContext &testCtx)
3492 {
3493 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "decoration_group"));
3494 ComputeShaderSpec spec;
3495 de::Random rnd(deStringHash(group->getName()));
3496 const int numElements = 100;
3497 vector<float> inputFloats0(numElements, 0);
3498 vector<float> inputFloats1(numElements, 0);
3499 vector<float> inputFloats2(numElements, 0);
3500 vector<float> inputFloats3(numElements, 0);
3501 vector<float> inputFloats4(numElements, 0);
3502 vector<float> outputFloats(numElements, 0);
3503
3504 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats0[0], numElements);
3505 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats1[0], numElements);
3506 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats2[0], numElements);
3507 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats3[0], numElements);
3508 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats4[0], numElements);
3509
3510 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3511 floorAll(inputFloats0);
3512 floorAll(inputFloats1);
3513 floorAll(inputFloats2);
3514 floorAll(inputFloats3);
3515 floorAll(inputFloats4);
3516
3517 for (size_t ndx = 0; ndx < numElements; ++ndx)
3518 outputFloats[ndx] =
3519 inputFloats0[ndx] + inputFloats1[ndx] + inputFloats2[ndx] + inputFloats3[ndx] + inputFloats4[ndx];
3520
3521 spec.assembly = string(getComputeAsmShaderPreamble()) +
3522
3523 "OpSource GLSL 430\n"
3524 "OpName %main \"main\"\n"
3525 "OpName %id \"gl_GlobalInvocationID\"\n"
3526
3527 // Not using group decoration on variable.
3528 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3529 // Not using group decoration on type.
3530 "OpDecorate %f32arr ArrayStride 4\n"
3531
3532 "OpDecorate %groups BufferBlock\n"
3533 "OpDecorate %groupm Offset 0\n"
3534 "%groups = OpDecorationGroup\n"
3535 "%groupm = OpDecorationGroup\n"
3536
3537 // Group decoration on multiple structs.
3538 "OpGroupDecorate %groups %outbuf %inbuf0 %inbuf1 %inbuf2 %inbuf3 %inbuf4\n"
3539 // Group decoration on multiple struct members.
3540 "OpGroupMemberDecorate %groupm %outbuf 0 %inbuf0 0 %inbuf1 0 %inbuf2 0 %inbuf3 0 %inbuf4 0\n"
3541
3542 "OpDecorate %group1 DescriptorSet 0\n"
3543 "OpDecorate %group3 DescriptorSet 0\n"
3544 "OpDecorate %group3 NonWritable\n"
3545 "OpDecorate %group3 Restrict\n"
3546 "%group0 = OpDecorationGroup\n"
3547 "%group1 = OpDecorationGroup\n"
3548 "%group3 = OpDecorationGroup\n"
3549
3550 // Applying the same decoration group multiple times.
3551 "OpGroupDecorate %group1 %outdata\n"
3552 "OpGroupDecorate %group1 %outdata\n"
3553 "OpGroupDecorate %group1 %outdata\n"
3554 "OpDecorate %outdata DescriptorSet 0\n"
3555 "OpDecorate %outdata Binding 5\n"
3556 // Applying decoration group containing nothing.
3557 "OpGroupDecorate %group0 %indata0\n"
3558 "OpDecorate %indata0 DescriptorSet 0\n"
3559 "OpDecorate %indata0 Binding 0\n"
3560 // Applying decoration group containing one decoration.
3561 "OpGroupDecorate %group1 %indata1\n"
3562 "OpDecorate %indata1 Binding 1\n"
3563 // Applying decoration group containing multiple decorations.
3564 "OpGroupDecorate %group3 %indata2 %indata3\n"
3565 "OpDecorate %indata2 Binding 2\n"
3566 "OpDecorate %indata3 Binding 3\n"
3567 // Applying multiple decoration groups (with overlapping).
3568 "OpGroupDecorate %group0 %indata4\n"
3569 "OpGroupDecorate %group1 %indata4\n"
3570 "OpGroupDecorate %group3 %indata4\n"
3571 "OpDecorate %indata4 Binding 4\n"
3572
3573 + string(getComputeAsmCommonTypes()) +
3574
3575 "%id = OpVariable %uvec3ptr Input\n"
3576 "%zero = OpConstant %i32 0\n"
3577
3578 "%outbuf = OpTypeStruct %f32arr\n"
3579 "%outbufptr = OpTypePointer Uniform %outbuf\n"
3580 "%outdata = OpVariable %outbufptr Uniform\n"
3581 "%inbuf0 = OpTypeStruct %f32arr\n"
3582 "%inbuf0ptr = OpTypePointer Uniform %inbuf0\n"
3583 "%indata0 = OpVariable %inbuf0ptr Uniform\n"
3584 "%inbuf1 = OpTypeStruct %f32arr\n"
3585 "%inbuf1ptr = OpTypePointer Uniform %inbuf1\n"
3586 "%indata1 = OpVariable %inbuf1ptr Uniform\n"
3587 "%inbuf2 = OpTypeStruct %f32arr\n"
3588 "%inbuf2ptr = OpTypePointer Uniform %inbuf2\n"
3589 "%indata2 = OpVariable %inbuf2ptr Uniform\n"
3590 "%inbuf3 = OpTypeStruct %f32arr\n"
3591 "%inbuf3ptr = OpTypePointer Uniform %inbuf3\n"
3592 "%indata3 = OpVariable %inbuf3ptr Uniform\n"
3593 "%inbuf4 = OpTypeStruct %f32arr\n"
3594 "%inbufptr = OpTypePointer Uniform %inbuf4\n"
3595 "%indata4 = OpVariable %inbufptr Uniform\n"
3596
3597 "%main = OpFunction %void None %voidf\n"
3598 "%label = OpLabel\n"
3599 "%idval = OpLoad %uvec3 %id\n"
3600 "%x = OpCompositeExtract %u32 %idval 0\n"
3601 "%inloc0 = OpAccessChain %f32ptr %indata0 %zero %x\n"
3602 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
3603 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
3604 "%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
3605 "%inloc4 = OpAccessChain %f32ptr %indata4 %zero %x\n"
3606 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3607 "%inval0 = OpLoad %f32 %inloc0\n"
3608 "%inval1 = OpLoad %f32 %inloc1\n"
3609 "%inval2 = OpLoad %f32 %inloc2\n"
3610 "%inval3 = OpLoad %f32 %inloc3\n"
3611 "%inval4 = OpLoad %f32 %inloc4\n"
3612 "%add0 = OpFAdd %f32 %inval0 %inval1\n"
3613 "%add1 = OpFAdd %f32 %add0 %inval2\n"
3614 "%add2 = OpFAdd %f32 %add1 %inval3\n"
3615 "%add = OpFAdd %f32 %add2 %inval4\n"
3616 " OpStore %outloc %add\n"
3617 " OpReturn\n"
3618 " OpFunctionEnd\n";
3619 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats0)));
3620 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
3621 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
3622 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3623 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3624 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3625 spec.numWorkGroups = IVec3(numElements, 1, 1);
3626
3627 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
3628
3629 return group.release();
3630 }
3631
3632 enum SpecConstantType
3633 {
3634 SC_INT8,
3635 SC_UINT8,
3636 SC_INT16,
3637 SC_UINT16,
3638 SC_INT32,
3639 SC_UINT32,
3640 SC_INT64,
3641 SC_UINT64,
3642 SC_FLOAT16,
3643 SC_FLOAT32,
3644 SC_FLOAT64,
3645 };
3646
3647 struct SpecConstantValue
3648 {
3649 SpecConstantType type;
3650 union ValueUnion
3651 {
3652 int8_t i8;
3653 uint8_t u8;
3654 int16_t i16;
3655 uint16_t u16;
3656 int32_t i32;
3657 uint32_t u32;
3658 int64_t i64;
3659 uint64_t u64;
3660 tcu::Float16 f16;
3661 tcu::Float32 f32;
3662 tcu::Float64 f64;
3663
ValueUnion(int8_t v)3664 ValueUnion(int8_t v) : i8(v)
3665 {
3666 }
ValueUnion(uint8_t v)3667 ValueUnion(uint8_t v) : u8(v)
3668 {
3669 }
ValueUnion(int16_t v)3670 ValueUnion(int16_t v) : i16(v)
3671 {
3672 }
ValueUnion(uint16_t v)3673 ValueUnion(uint16_t v) : u16(v)
3674 {
3675 }
ValueUnion(int32_t v)3676 ValueUnion(int32_t v) : i32(v)
3677 {
3678 }
ValueUnion(uint32_t v)3679 ValueUnion(uint32_t v) : u32(v)
3680 {
3681 }
ValueUnion(int64_t v)3682 ValueUnion(int64_t v) : i64(v)
3683 {
3684 }
ValueUnion(uint64_t v)3685 ValueUnion(uint64_t v) : u64(v)
3686 {
3687 }
ValueUnion(tcu::Float16 v)3688 ValueUnion(tcu::Float16 v) : f16(v)
3689 {
3690 }
ValueUnion(tcu::Float32 v)3691 ValueUnion(tcu::Float32 v) : f32(v)
3692 {
3693 }
ValueUnion(tcu::Float64 v)3694 ValueUnion(tcu::Float64 v) : f64(v)
3695 {
3696 }
3697 } value;
3698
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3699 SpecConstantValue(int8_t v) : type(SC_INT8), value(v)
3700 {
3701 }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3702 SpecConstantValue(uint8_t v) : type(SC_UINT8), value(v)
3703 {
3704 }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3705 SpecConstantValue(int16_t v) : type(SC_INT16), value(v)
3706 {
3707 }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3708 SpecConstantValue(uint16_t v) : type(SC_UINT16), value(v)
3709 {
3710 }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3711 SpecConstantValue(int32_t v) : type(SC_INT32), value(v)
3712 {
3713 }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3714 SpecConstantValue(uint32_t v) : type(SC_UINT32), value(v)
3715 {
3716 }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3717 SpecConstantValue(int64_t v) : type(SC_INT64), value(v)
3718 {
3719 }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3720 SpecConstantValue(uint64_t v) : type(SC_UINT64), value(v)
3721 {
3722 }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3723 SpecConstantValue(tcu::Float16 v) : type(SC_FLOAT16), value(v)
3724 {
3725 }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3726 SpecConstantValue(tcu::Float32 v) : type(SC_FLOAT32), value(v)
3727 {
3728 }
SpecConstantValuevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3729 SpecConstantValue(tcu::Float64 v) : type(SC_FLOAT64), value(v)
3730 {
3731 }
3732
appendTovkt::SpirVAssembly::__anon8834af5b0111::SpecConstantValue3733 void appendTo(vkt::SpirVAssembly::SpecConstants &specConstants)
3734 {
3735 switch (type)
3736 {
3737 case SC_INT8:
3738 specConstants.append(value.i8);
3739 break;
3740 case SC_UINT8:
3741 specConstants.append(value.u8);
3742 break;
3743 case SC_INT16:
3744 specConstants.append(value.i16);
3745 break;
3746 case SC_UINT16:
3747 specConstants.append(value.u16);
3748 break;
3749 case SC_INT32:
3750 specConstants.append(value.i32);
3751 break;
3752 case SC_UINT32:
3753 specConstants.append(value.u32);
3754 break;
3755 case SC_INT64:
3756 specConstants.append(value.i64);
3757 break;
3758 case SC_UINT64:
3759 specConstants.append(value.u64);
3760 break;
3761 case SC_FLOAT16:
3762 specConstants.append(value.f16);
3763 break;
3764 case SC_FLOAT32:
3765 specConstants.append(value.f32);
3766 break;
3767 case SC_FLOAT64:
3768 specConstants.append(value.f64);
3769 break;
3770 default:
3771 DE_ASSERT(false);
3772 }
3773 }
3774 };
3775
3776 enum CaseFlagBits
3777 {
3778 FLAG_NONE = 0,
3779 FLAG_CONVERT = 1,
3780 FLAG_I8 = (1 << 1),
3781 FLAG_I16 = (1 << 2),
3782 FLAG_I64 = (1 << 3),
3783 FLAG_F16 = (1 << 4),
3784 FLAG_F64 = (1 << 5),
3785 };
3786 using CaseFlags = uint32_t;
3787
3788 struct SpecConstantTwoValCase
3789 {
3790 const std::string caseName;
3791 const std::string scDefinition0;
3792 const std::string scDefinition1;
3793 const std::string scResultType;
3794 const std::string scOperation;
3795 SpecConstantValue scActualValue0;
3796 SpecConstantValue scActualValue1;
3797 const std::string resultOperation;
3798 vector<int32_t> expectedOutput;
3799 CaseFlags caseFlags;
3800
SpecConstantTwoValCasevkt::SpirVAssembly::__anon8834af5b0111::SpecConstantTwoValCase3801 SpecConstantTwoValCase(const std::string &name, const std::string &definition0, const std::string &definition1,
3802 const std::string &resultType, const std::string &operation, SpecConstantValue value0,
3803 SpecConstantValue value1, const std::string &resultOp, const vector<int32_t> &output,
3804 CaseFlags flags = FLAG_NONE)
3805 : caseName(name)
3806 , scDefinition0(definition0)
3807 , scDefinition1(definition1)
3808 , scResultType(resultType)
3809 , scOperation(operation)
3810 , scActualValue0(value0)
3811 , scActualValue1(value1)
3812 , resultOperation(resultOp)
3813 , expectedOutput(output)
3814 , caseFlags(flags)
3815 {
3816 }
3817 };
3818
getSpecConstantOpStructConstantsAndTypes()3819 std::string getSpecConstantOpStructConstantsAndTypes()
3820 {
3821 return "%zero = OpConstant %i32 0\n"
3822 "%one = OpConstant %i32 1\n"
3823 "%two = OpConstant %i32 2\n"
3824 "%three = OpConstant %i32 3\n"
3825 "%iarr3 = OpTypeArray %i32 %three\n"
3826 "%imat3 = OpTypeArray %iarr3 %three\n"
3827 "%struct = OpTypeStruct %imat3\n";
3828 }
3829
getSpecConstantOpStructComposites()3830 std::string getSpecConstantOpStructComposites()
3831 {
3832 return "%iarr3_0 = OpConstantComposite %iarr3 %zero %zero %zero\n"
3833 "%imat3_0 = OpConstantComposite %imat3 %iarr3_0 %iarr3_0 %iarr3_0\n"
3834 "%struct_0 = OpConstantComposite %struct %imat3_0\n";
3835 }
3836
getSpecConstantOpStructConstBlock()3837 std::string getSpecConstantOpStructConstBlock()
3838 {
3839 return "%iarr3_a = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_0 0\n" // Compose (sc_0, sc_1, sc_2)
3840 "%iarr3_b = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_a 1\n"
3841 "%iarr3_c = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_b 2\n"
3842
3843 "%iarr3_d = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_0 0\n" // Compose (sc_1, sc_2, sc_0)
3844 "%iarr3_e = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_d 1\n"
3845 "%iarr3_f = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_e 2\n"
3846
3847 "%iarr3_g = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_0 0\n" // Compose (sc_2, sc_0, sc_1)
3848 "%iarr3_h = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_g 1\n"
3849 "%iarr3_i = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_h 2\n"
3850
3851 "%imat3_a = OpSpecConstantOp %imat3 CompositeInsert %iarr3_c %imat3_0 0\n" // Matrix with the 3 previous arrays.
3852 "%imat3_b = OpSpecConstantOp %imat3 CompositeInsert %iarr3_f %imat3_a 1\n"
3853 "%imat3_c = OpSpecConstantOp %imat3 CompositeInsert %iarr3_i %imat3_b 2\n"
3854
3855 "%struct_a = OpSpecConstantOp %struct CompositeInsert %imat3_c %struct_0 0\n" // Save it in the struct.
3856
3857 "%comp_0_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 0 0\n" // Extract some component pairs to compare them.
3858 "%comp_1_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 1 0\n"
3859
3860 "%comp_0_1 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 0 1\n"
3861 "%comp_2_2 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 2 2\n"
3862
3863 "%comp_2_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 2 0\n"
3864 "%comp_1_1 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 1 1\n"
3865
3866 "%cmpres_0 = OpSpecConstantOp %bool IEqual %comp_0_0 %comp_1_0\n" // Must be false.
3867 "%cmpres_1 = OpSpecConstantOp %bool IEqual %comp_0_1 %comp_2_2\n" // Must be true.
3868 "%cmpres_2 = OpSpecConstantOp %bool IEqual %comp_2_0 %comp_1_1\n" // Must be true.
3869
3870 "%mustbe_0 = OpSpecConstantOp %i32 Select %cmpres_0 %one %zero\n" // Must select 0
3871 "%mustbe_1 = OpSpecConstantOp %i32 Select %cmpres_1 %one %zero\n" // Must select 1
3872 "%mustbe_2 = OpSpecConstantOp %i32 Select %cmpres_2 %two %one\n" // Must select 2
3873 ;
3874 }
3875
getSpecConstantOpStructInstructions()3876 std::string getSpecConstantOpStructInstructions()
3877 {
3878 return
3879 // Multiply final result with (1-mustbezero)*(mustbeone)*(mustbetwo-1). If everything goes right, the factor should be 1 and
3880 // the final result should not be altered.
3881 "%subf_a = OpISub %i32 %one %mustbe_0\n"
3882 "%subf_b = OpIMul %i32 %subf_a %mustbe_1\n"
3883 "%subf_c = OpISub %i32 %mustbe_2 %one\n"
3884 "%factor = OpIMul %i32 %subf_b %subf_c\n"
3885 "%sc_final = OpIMul %i32 %factor %sc_factor\n";
3886 }
3887
createSpecConstantGroup(tcu::TestContext & testCtx)3888 tcu::TestCaseGroup *createSpecConstantGroup(tcu::TestContext &testCtx)
3889 {
3890 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opspecconstantop"));
3891 vector<SpecConstantTwoValCase> cases;
3892 de::Random rnd(deStringHash(group->getName()));
3893 const int numElements = 100;
3894 vector<int32_t> inputInts(numElements, 0);
3895 vector<int32_t> outputInts1(numElements, 0);
3896 vector<int32_t> outputInts2(numElements, 0);
3897 vector<int32_t> outputInts3(numElements, 0);
3898 vector<int32_t> outputInts4(numElements, 0);
3899 vector<int32_t> outputInts5(numElements, 0);
3900 const StringTemplate shaderTemplate("${CAPABILITIES:opt}" + string(getComputeAsmShaderPreamble()) +
3901
3902 "OpName %main \"main\"\n"
3903 "OpName %id \"gl_GlobalInvocationID\"\n"
3904
3905 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3906 "OpDecorate %sc_0 SpecId 0\n"
3907 "OpDecorate %sc_1 SpecId 1\n"
3908 "OpDecorate %i32arr ArrayStride 4\n"
3909
3910 + string(getComputeAsmInputOutputBufferTraits()) +
3911 string(getComputeAsmCommonTypes()) +
3912
3913 "${OPTYPE_DEFINITIONS:opt}"
3914 "%buf = OpTypeStruct %i32arr\n"
3915 "%bufptr = OpTypePointer Uniform %buf\n"
3916 "%indata = OpVariable %bufptr Uniform\n"
3917 "%outdata = OpVariable %bufptr Uniform\n"
3918
3919 "%id = OpVariable %uvec3ptr Input\n"
3920 "%zero = OpConstant %i32 0\n"
3921
3922 "%sc_0 = OpSpecConstant${SC_DEF0}\n"
3923 "%sc_1 = OpSpecConstant${SC_DEF1}\n"
3924 "%sc_final = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n"
3925
3926 "%main = OpFunction %void None %voidf\n"
3927 "%label = OpLabel\n"
3928 "${TYPE_CONVERT:opt}"
3929 "%idval = OpLoad %uvec3 %id\n"
3930 "%x = OpCompositeExtract %u32 %idval 0\n"
3931 "%inloc = OpAccessChain %i32ptr %indata %zero %x\n"
3932 "%inval = OpLoad %i32 %inloc\n"
3933 "%final = ${GEN_RESULT}\n"
3934 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
3935 " OpStore %outloc %final\n"
3936 " OpReturn\n"
3937 " OpFunctionEnd\n");
3938
3939 fillRandomScalars(rnd, -65536, 65536, &inputInts[0], numElements);
3940
3941 for (size_t ndx = 0; ndx < numElements; ++ndx)
3942 {
3943 outputInts1[ndx] = inputInts[ndx] + 42;
3944 outputInts2[ndx] = inputInts[ndx];
3945 outputInts3[ndx] = inputInts[ndx] - 11200;
3946 outputInts4[ndx] = inputInts[ndx] + 1;
3947 outputInts5[ndx] = inputInts[ndx] - 42;
3948 }
3949
3950 const char addScToInput[] = "OpIAdd %i32 %inval %sc_final";
3951 const char addSc32ToInput[] = "OpIAdd %i32 %inval %sc_final32";
3952 const char selectTrueUsingSc[] = "OpSelect %i32 %sc_final %inval %zero";
3953 const char selectFalseUsingSc[] = "OpSelect %i32 %sc_final %zero %inval";
3954
3955 cases.push_back(SpecConstantTwoValCase("iadd", " %i32 0", " %i32 0", "%i32", "IAdd %sc_0 %sc_1", 62,
3956 -20, addScToInput, outputInts1));
3957 cases.push_back(SpecConstantTwoValCase("isub", " %i32 0", " %i32 0", "%i32", "ISub %sc_0 %sc_1",
3958 100, 58, addScToInput, outputInts1));
3959 cases.push_back(SpecConstantTwoValCase("imul", " %i32 0", " %i32 0", "%i32", "IMul %sc_0 %sc_1", -2,
3960 -21, addScToInput, outputInts1));
3961 cases.push_back(SpecConstantTwoValCase("sdiv", " %i32 0", " %i32 0", "%i32", "SDiv %sc_0 %sc_1",
3962 -126, -3, addScToInput, outputInts1));
3963 cases.push_back(SpecConstantTwoValCase("udiv", " %i32 0", " %i32 0", "%i32", "UDiv %sc_0 %sc_1",
3964 126, 3, addScToInput, outputInts1));
3965 cases.push_back(SpecConstantTwoValCase("srem", " %i32 0", " %i32 0", "%i32", "SRem %sc_0 %sc_1", 7,
3966 3, addScToInput, outputInts4));
3967 cases.push_back(SpecConstantTwoValCase("smod", " %i32 0", " %i32 0", "%i32", "SMod %sc_0 %sc_1", 7,
3968 3, addScToInput, outputInts4));
3969 cases.push_back(SpecConstantTwoValCase("umod", " %i32 0", " %i32 0", "%i32", "UMod %sc_0 %sc_1",
3970 342, 50, addScToInput, outputInts1));
3971 cases.push_back(SpecConstantTwoValCase("bitwiseand", " %i32 0", " %i32 0", "%i32",
3972 "BitwiseAnd %sc_0 %sc_1", 42, 63, addScToInput, outputInts1));
3973 cases.push_back(SpecConstantTwoValCase("bitwiseor", " %i32 0", " %i32 0", "%i32",
3974 "BitwiseOr %sc_0 %sc_1", 34, 8, addScToInput, outputInts1));
3975 cases.push_back(SpecConstantTwoValCase("bitwisexor", " %i32 0", " %i32 0", "%i32",
3976 "BitwiseXor %sc_0 %sc_1", 18, 56, addScToInput, outputInts1));
3977 cases.push_back(SpecConstantTwoValCase("shiftrightlogical", " %i32 0", " %i32 0", "%i32",
3978 "ShiftRightLogical %sc_0 %sc_1", 168, 2, addScToInput, outputInts1));
3979 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic", " %i32 0", " %i32 0", "%i32",
3980 "ShiftRightArithmetic %sc_0 %sc_1", -168, 2, addScToInput, outputInts5));
3981 cases.push_back(SpecConstantTwoValCase("shiftleftlogical", " %i32 0", " %i32 0", "%i32",
3982 "ShiftLeftLogical %sc_0 %sc_1", 21, 1, addScToInput, outputInts1));
3983
3984 // Shifts for other integer sizes.
3985 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i64", " %i64 0", " %i64 0", "%i64",
3986 "ShiftRightLogical %sc_0 %sc_1", int64_t{168}, int64_t{2}, addSc32ToInput,
3987 outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3988 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i64", " %i64 0", " %i64 0", "%i64",
3989 "ShiftRightArithmetic %sc_0 %sc_1", int64_t{-168}, int64_t{2},
3990 addSc32ToInput, outputInts5, (FLAG_I64 | FLAG_CONVERT)));
3991 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i64", " %i64 0", " %i64 0", "%i64",
3992 "ShiftLeftLogical %sc_0 %sc_1", int64_t{21}, int64_t{1}, addSc32ToInput,
3993 outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3994 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i16", " %i16 0", " %i16 0", "%i16",
3995 "ShiftRightLogical %sc_0 %sc_1", int16_t{168}, int16_t{2}, addSc32ToInput,
3996 outputInts1, (FLAG_I16 | FLAG_CONVERT)));
3997 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i16", " %i16 0", " %i16 0", "%i16",
3998 "ShiftRightArithmetic %sc_0 %sc_1", int16_t{-168}, int16_t{2},
3999 addSc32ToInput, outputInts5, (FLAG_I16 | FLAG_CONVERT)));
4000 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i16", " %i16 0", " %i16 0", "%i16",
4001 "ShiftLeftLogical %sc_0 %sc_1", int16_t{21}, int16_t{1}, addSc32ToInput,
4002 outputInts1, (FLAG_I16 | FLAG_CONVERT)));
4003 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i8", " %i8 0", " %i8 0", "%i8",
4004 "ShiftRightLogical %sc_0 %sc_1", int8_t{84}, int8_t{1}, addSc32ToInput,
4005 outputInts1, (FLAG_I8 | FLAG_CONVERT)));
4006 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i8", " %i8 0", " %i8 0", "%i8",
4007 "ShiftRightArithmetic %sc_0 %sc_1", int8_t{-84}, int8_t{1}, addSc32ToInput,
4008 outputInts5, (FLAG_I8 | FLAG_CONVERT)));
4009 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i8", " %i8 0", " %i8 0", "%i8",
4010 "ShiftLeftLogical %sc_0 %sc_1", int8_t{21}, int8_t{1}, addSc32ToInput,
4011 outputInts1, (FLAG_I8 | FLAG_CONVERT)));
4012
4013 // Shifts for other integer sizes but only in the shift amount.
4014 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i64", " %i32 0", " %i64 0", "%i32",
4015 "ShiftRightLogical %sc_0 %sc_1", 168, int64_t{2}, addScToInput,
4016 outputInts1, (FLAG_I64)));
4017 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i64", " %i32 0", " %i64 0", "%i32",
4018 "ShiftRightArithmetic %sc_0 %sc_1", -168, int64_t{2}, addScToInput,
4019 outputInts5, (FLAG_I64)));
4020 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i64", " %i32 0", " %i64 0", "%i32",
4021 "ShiftLeftLogical %sc_0 %sc_1", 21, int64_t{1}, addScToInput,
4022 outputInts1, (FLAG_I64)));
4023 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i16", " %i32 0", " %i16 0", "%i32",
4024 "ShiftRightLogical %sc_0 %sc_1", 168, int16_t{2}, addScToInput,
4025 outputInts1, (FLAG_I16)));
4026 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i16", " %i32 0", " %i16 0", "%i32",
4027 "ShiftRightArithmetic %sc_0 %sc_1", -168, int16_t{2}, addScToInput,
4028 outputInts5, (FLAG_I16)));
4029 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i16", " %i32 0", " %i16 0", "%i32",
4030 "ShiftLeftLogical %sc_0 %sc_1", 21, int16_t{1}, addScToInput,
4031 outputInts1, (FLAG_I16)));
4032 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i8", " %i32 0", " %i8 0", "%i32",
4033 "ShiftRightLogical %sc_0 %sc_1", 84, int8_t{1}, addScToInput, outputInts1,
4034 (FLAG_I8)));
4035 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i8", " %i32 0", " %i8 0", "%i32",
4036 "ShiftRightArithmetic %sc_0 %sc_1", -84, int8_t{1}, addScToInput,
4037 outputInts5, (FLAG_I8)));
4038 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i8", " %i32 0", " %i8 0", "%i32",
4039 "ShiftLeftLogical %sc_0 %sc_1", 21, int8_t{1}, addScToInput, outputInts1,
4040 (FLAG_I8)));
4041
4042 cases.push_back(SpecConstantTwoValCase("slessthan", " %i32 0", " %i32 0", "%bool",
4043 "SLessThan %sc_0 %sc_1", -20, -10, selectTrueUsingSc,
4044 outputInts2));
4045 cases.push_back(SpecConstantTwoValCase("ulessthan", " %i32 0", " %i32 0", "%bool",
4046 "ULessThan %sc_0 %sc_1", 10, 20, selectTrueUsingSc, outputInts2));
4047 cases.push_back(SpecConstantTwoValCase("sgreaterthan", " %i32 0", " %i32 0", "%bool",
4048 "SGreaterThan %sc_0 %sc_1", -1000, 50, selectFalseUsingSc,
4049 outputInts2));
4050 cases.push_back(SpecConstantTwoValCase("ugreaterthan", " %i32 0", " %i32 0", "%bool",
4051 "UGreaterThan %sc_0 %sc_1", 10, 5, selectTrueUsingSc, outputInts2));
4052 cases.push_back(SpecConstantTwoValCase("slessthanequal", " %i32 0", " %i32 0", "%bool",
4053 "SLessThanEqual %sc_0 %sc_1", -10, -10, selectTrueUsingSc,
4054 outputInts2));
4055 cases.push_back(SpecConstantTwoValCase("ulessthanequal", " %i32 0", " %i32 0", "%bool",
4056 "ULessThanEqual %sc_0 %sc_1", 50, 100, selectTrueUsingSc,
4057 outputInts2));
4058 cases.push_back(SpecConstantTwoValCase("sgreaterthanequal", " %i32 0", " %i32 0", "%bool",
4059 "SGreaterThanEqual %sc_0 %sc_1", -1000, 50, selectFalseUsingSc,
4060 outputInts2));
4061 cases.push_back(SpecConstantTwoValCase("ugreaterthanequal", " %i32 0", " %i32 0", "%bool",
4062 "UGreaterThanEqual %sc_0 %sc_1", 10, 10, selectTrueUsingSc, outputInts2));
4063 cases.push_back(SpecConstantTwoValCase("iequal", " %i32 0", " %i32 0", "%bool", "IEqual %sc_0 %sc_1",
4064 42, 24, selectFalseUsingSc, outputInts2));
4065 cases.push_back(SpecConstantTwoValCase("inotequal", " %i32 0", " %i32 0", "%bool",
4066 "INotEqual %sc_0 %sc_1", 42, 24, selectTrueUsingSc, outputInts2));
4067 cases.push_back(SpecConstantTwoValCase("logicaland", "True %bool", "True %bool", "%bool",
4068 "LogicalAnd %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputInts2));
4069 cases.push_back(SpecConstantTwoValCase("logicalor", "False %bool", "False %bool", "%bool",
4070 "LogicalOr %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputInts2));
4071 cases.push_back(SpecConstantTwoValCase("logicalequal", "True %bool", "True %bool", "%bool",
4072 "LogicalEqual %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputInts2));
4073 cases.push_back(SpecConstantTwoValCase("logicalnotequal", "False %bool", "False %bool", "%bool",
4074 "LogicalNotEqual %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputInts2));
4075 cases.push_back(SpecConstantTwoValCase("snegate", " %i32 0", " %i32 0", "%i32", "SNegate %sc_0", -42,
4076 0, addScToInput, outputInts1));
4077 cases.push_back(SpecConstantTwoValCase("not", " %i32 0", " %i32 0", "%i32", "Not %sc_0", -43, 0,
4078 addScToInput, outputInts1));
4079 cases.push_back(SpecConstantTwoValCase("logicalnot", "False %bool", "False %bool", "%bool",
4080 "LogicalNot %sc_0", 1, 0, selectFalseUsingSc, outputInts2));
4081 cases.push_back(SpecConstantTwoValCase("select", "False %bool", " %i32 0", "%i32",
4082 "Select %sc_0 %sc_1 %zero", 1, 42, addScToInput, outputInts1));
4083 cases.push_back(SpecConstantTwoValCase("sconvert", " %i32 0", " %i32 0", "%i16", "SConvert %sc_0",
4084 -11200, 0, addSc32ToInput, outputInts3, (FLAG_I16 | FLAG_CONVERT)));
4085 cases.push_back(SpecConstantTwoValCase("fconvert", " %f32 0", " %f32 0", "%f64", "FConvert %sc_0",
4086 tcu::Float32{-11200.0}, tcu::Float32{0.0}, addSc32ToInput, outputInts3,
4087 (FLAG_F64 | FLAG_CONVERT)));
4088 cases.push_back(SpecConstantTwoValCase("fconvert16", " %f16 0", " %f16 0", "%f32", "FConvert %sc_0",
4089 tcu::Float16{1.0}, tcu::Float16{0.0}, addSc32ToInput, outputInts4,
4090 (FLAG_F16 | FLAG_CONVERT)));
4091
4092 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
4093 {
4094 map<string, string> specializations;
4095 ComputeShaderSpec spec;
4096
4097 specializations["SC_DEF0"] = cases[caseNdx].scDefinition0;
4098 specializations["SC_DEF1"] = cases[caseNdx].scDefinition1;
4099 specializations["SC_RESULT_TYPE"] = cases[caseNdx].scResultType;
4100 specializations["SC_OP"] = cases[caseNdx].scOperation;
4101 specializations["GEN_RESULT"] = cases[caseNdx].resultOperation;
4102
4103 // Special SPIR-V code when using 16-bit integers.
4104 if (cases[caseNdx].caseFlags & FLAG_I16)
4105 {
4106 spec.requestedVulkanFeatures.coreFeatures.shaderInt16 = VK_TRUE;
4107 specializations["CAPABILITIES"] += "OpCapability Int16\n"; // Adds 16-bit integer capability
4108 specializations["OPTYPE_DEFINITIONS"] += "%i16 = OpTypeInt 16 1\n"; // Adds 16-bit integer type
4109 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
4110 specializations["TYPE_CONVERT"] +=
4111 "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 16-bit integer to 32-bit integer
4112 }
4113
4114 // Special SPIR-V code when using 64-bit integers.
4115 if (cases[caseNdx].caseFlags & FLAG_I64)
4116 {
4117 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
4118 specializations["CAPABILITIES"] += "OpCapability Int64\n"; // Adds 64-bit integer capability
4119 specializations["OPTYPE_DEFINITIONS"] += "%i64 = OpTypeInt 64 1\n"; // Adds 64-bit integer type
4120 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
4121 specializations["TYPE_CONVERT"] +=
4122 "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 64-bit integer to 32-bit integer
4123 }
4124
4125 // Special SPIR-V code when using 64-bit floats.
4126 if (cases[caseNdx].caseFlags & FLAG_F64)
4127 {
4128 spec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
4129 specializations["CAPABILITIES"] += "OpCapability Float64\n"; // Adds 64-bit float capability
4130 specializations["OPTYPE_DEFINITIONS"] += "%f64 = OpTypeFloat 64\n"; // Adds 64-bit float type
4131 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
4132 specializations["TYPE_CONVERT"] +=
4133 "%sc_final32 = OpConvertFToS %i32 %sc_final\n"; // Converts 64-bit float to 32-bit integer
4134 }
4135
4136 // Extension needed for float16 and int8.
4137 if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
4138 spec.extensions.push_back("VK_KHR_shader_float16_int8");
4139
4140 // Special SPIR-V code when using 16-bit floats.
4141 if (cases[caseNdx].caseFlags & FLAG_F16)
4142 {
4143 spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
4144 specializations["CAPABILITIES"] += "OpCapability Float16\n"; // Adds 16-bit float capability
4145 specializations["OPTYPE_DEFINITIONS"] += "%f16 = OpTypeFloat 16\n"; // Adds 16-bit float type
4146 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
4147 specializations["TYPE_CONVERT"] +=
4148 "%sc_final32 = OpConvertFToS %i32 %sc_final\n"; // Converts 16-bit float to 32-bit integer
4149 }
4150
4151 // Special SPIR-V code when using 8-bit integers.
4152 if (cases[caseNdx].caseFlags & FLAG_I8)
4153 {
4154 spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
4155 specializations["CAPABILITIES"] += "OpCapability Int8\n"; // Adds 8-bit integer capability
4156 specializations["OPTYPE_DEFINITIONS"] += "%i8 = OpTypeInt 8 1\n"; // Adds 8-bit integer type
4157 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
4158 specializations["TYPE_CONVERT"] +=
4159 "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 8-bit integer to 32-bit integer
4160 }
4161
4162 spec.assembly = shaderTemplate.specialize(specializations);
4163 spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
4164 spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
4165 spec.numWorkGroups = IVec3(numElements, 1, 1);
4166 cases[caseNdx].scActualValue0.appendTo(spec.specConstants);
4167 cases[caseNdx].scActualValue1.appendTo(spec.specConstants);
4168
4169 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName.c_str(), spec));
4170 }
4171
4172 ComputeShaderSpec spec;
4173
4174 spec.assembly =
4175 string(getComputeAsmShaderPreamble()) +
4176
4177 "OpName %main \"main\"\n"
4178 "OpName %id \"gl_GlobalInvocationID\"\n"
4179
4180 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4181 "OpDecorate %sc_0 SpecId 0\n"
4182 "OpDecorate %sc_1 SpecId 1\n"
4183 "OpDecorate %sc_2 SpecId 2\n"
4184 "OpDecorate %i32arr ArrayStride 4\n"
4185
4186 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4187
4188 "%ivec3 = OpTypeVector %i32 3\n"
4189
4190 + getSpecConstantOpStructConstantsAndTypes() +
4191
4192 "%buf = OpTypeStruct %i32arr\n"
4193 "%bufptr = OpTypePointer Uniform %buf\n"
4194 "%indata = OpVariable %bufptr Uniform\n"
4195 "%outdata = OpVariable %bufptr Uniform\n"
4196
4197 "%id = OpVariable %uvec3ptr Input\n"
4198 "%ivec3_0 = OpConstantComposite %ivec3 %zero %zero %zero\n"
4199 "%vec3_undef = OpUndef %ivec3\n"
4200
4201 + getSpecConstantOpStructComposites() +
4202
4203 "%sc_0 = OpSpecConstant %i32 0\n"
4204 "%sc_1 = OpSpecConstant %i32 0\n"
4205 "%sc_2 = OpSpecConstant %i32 0\n"
4206
4207 + getSpecConstantOpStructConstBlock() +
4208
4209 "%sc_vec3_0 = OpSpecConstantOp %ivec3 CompositeInsert %sc_0 %ivec3_0 0\n" // (sc_0, 0, 0)
4210 "%sc_vec3_1 = OpSpecConstantOp %ivec3 CompositeInsert %sc_1 %ivec3_0 1\n" // (0, sc_1, 0)
4211 "%sc_vec3_2 = OpSpecConstantOp %ivec3 CompositeInsert %sc_2 %ivec3_0 2\n" // (0, 0, sc_2)
4212 "%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0 %vec3_undef 0 "
4213 "0xFFFFFFFF 2\n" // (sc_0, ???, 0)
4214 "%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_1 %vec3_undef 0xFFFFFFFF 1 "
4215 " 0\n" // (???, sc_1, 0)
4216 "%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle %vec3_undef %sc_vec3_2 5 "
4217 "0xFFFFFFFF 5\n" // (sc_2, ???, sc_2)
4218 "%sc_vec3_01 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n" // (0, sc_0, sc_1)
4219 "%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_01 %sc_vec3_2_s 5 1 2\n" // (sc_2, sc_0, sc_1)
4220 "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0\n" // sc_2
4221 "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1\n" // sc_0
4222 "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2\n" // sc_1
4223 "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1\n" // (sc_2 - sc_0)
4224 "%sc_factor = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2\n" // (sc_2 - sc_0) * sc_1
4225
4226 "%main = OpFunction %void None %voidf\n"
4227 "%label = OpLabel\n"
4228
4229 + getSpecConstantOpStructInstructions() +
4230
4231 "%idval = OpLoad %uvec3 %id\n"
4232 "%x = OpCompositeExtract %u32 %idval 0\n"
4233 "%inloc = OpAccessChain %i32ptr %indata %zero %x\n"
4234 "%inval = OpLoad %i32 %inloc\n"
4235 "%final = OpIAdd %i32 %inval %sc_final\n"
4236 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
4237 " OpStore %outloc %final\n"
4238 " OpReturn\n"
4239 " OpFunctionEnd\n";
4240 spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
4241 spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts3)));
4242 spec.numWorkGroups = IVec3(numElements, 1, 1);
4243 spec.specConstants.append<int32_t>(123);
4244 spec.specConstants.append<int32_t>(56);
4245 spec.specConstants.append<int32_t>(-77);
4246
4247 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", spec));
4248
4249 return group.release();
4250 }
4251
createOpPhiVartypeTests(de::MovePtr<tcu::TestCaseGroup> & group,tcu::TestContext & testCtx)4252 void createOpPhiVartypeTests(de::MovePtr<tcu::TestCaseGroup> &group, tcu::TestContext &testCtx)
4253 {
4254 ComputeShaderSpec specInt;
4255 ComputeShaderSpec specFloat;
4256 ComputeShaderSpec specFloat16;
4257 ComputeShaderSpec specVec3;
4258 ComputeShaderSpec specMat4;
4259 ComputeShaderSpec specArray;
4260 ComputeShaderSpec specStruct;
4261 de::Random rnd(deStringHash(group->getName()));
4262 const int numElements = 100;
4263 vector<float> inputFloats(numElements, 0);
4264 vector<float> outputFloats(numElements, 0);
4265 vector<uint32_t> inputUints(numElements, 0);
4266 vector<uint32_t> outputUints(numElements, 0);
4267
4268 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4269
4270 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4271 floorAll(inputFloats);
4272
4273 for (size_t ndx = 0; ndx < numElements; ++ndx)
4274 {
4275 // Just check if the value is positive or not
4276 outputFloats[ndx] = (inputFloats[ndx] > 0) ? 1.0f : -1.0f;
4277 }
4278
4279 for (size_t ndx = 0; ndx < numElements; ++ndx)
4280 {
4281 inputUints[ndx] = tcu::Float16(inputFloats[ndx]).bits();
4282 outputUints[ndx] = tcu::Float16(outputFloats[ndx]).bits();
4283 }
4284
4285 // All of the tests are of the form:
4286 //
4287 // testtype r
4288 //
4289 // if (inputdata > 0)
4290 // r = 1
4291 // else
4292 // r = -1
4293 //
4294 // return (float)r
4295
4296 specFloat.assembly = string(getComputeAsmShaderPreamble()) +
4297
4298 "OpSource GLSL 430\n"
4299 "OpName %main \"main\"\n"
4300 "OpName %id \"gl_GlobalInvocationID\"\n"
4301
4302 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4303
4304 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4305 string(getComputeAsmInputOutputBuffer()) +
4306
4307 "%id = OpVariable %uvec3ptr Input\n"
4308 "%zero = OpConstant %i32 0\n"
4309 "%float_0 = OpConstant %f32 0.0\n"
4310 "%float_1 = OpConstant %f32 1.0\n"
4311 "%float_n1 = OpConstant %f32 -1.0\n"
4312
4313 "%main = OpFunction %void None %voidf\n"
4314 "%entry = OpLabel\n"
4315 "%idval = OpLoad %uvec3 %id\n"
4316 "%x = OpCompositeExtract %u32 %idval 0\n"
4317 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4318 "%inval = OpLoad %f32 %inloc\n"
4319
4320 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4321 " OpSelectionMerge %cm None\n"
4322 " OpBranchConditional %comp %tb %fb\n"
4323 "%tb = OpLabel\n"
4324 " OpBranch %cm\n"
4325 "%fb = OpLabel\n"
4326 " OpBranch %cm\n"
4327 "%cm = OpLabel\n"
4328 "%res = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4329
4330 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4331 " OpStore %outloc %res\n"
4332 " OpReturn\n"
4333
4334 " OpFunctionEnd\n";
4335 specFloat.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4336 specFloat.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4337 specFloat.numWorkGroups = IVec3(numElements, 1, 1);
4338
4339 specFloat16.assembly = "OpCapability Shader\n"
4340 "OpCapability Float16\n"
4341 "OpMemoryModel Logical GLSL450\n"
4342 "OpEntryPoint GLCompute %main \"main\" %id\n"
4343 "OpExecutionMode %main LocalSize 1 1 1\n"
4344
4345 "OpSource GLSL 430\n"
4346 "OpName %main \"main\"\n"
4347 "OpName %id \"gl_GlobalInvocationID\"\n"
4348
4349 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4350
4351 "OpDecorate %buf BufferBlock\n"
4352 "OpDecorate %indata DescriptorSet 0\n"
4353 "OpDecorate %indata Binding 0\n"
4354 "OpDecorate %outdata DescriptorSet 0\n"
4355 "OpDecorate %outdata Binding 1\n"
4356 "OpDecorate %u32arr ArrayStride 4\n"
4357 "OpMemberDecorate %buf 0 Offset 0\n"
4358
4359 + string(getComputeAsmCommonTypes()) +
4360
4361 "%f16 = OpTypeFloat 16\n"
4362 "%f16vec2 = OpTypeVector %f16 2\n"
4363 "%fvec2 = OpTypeVector %f32 2\n"
4364 "%u32ptr = OpTypePointer Uniform %u32\n"
4365 "%u32arr = OpTypeRuntimeArray %u32\n"
4366 "%f16_0 = OpConstant %f16 0.0\n"
4367
4368 "%buf = OpTypeStruct %u32arr\n"
4369 "%bufptr = OpTypePointer Uniform %buf\n"
4370 "%indata = OpVariable %bufptr Uniform\n"
4371 "%outdata = OpVariable %bufptr Uniform\n"
4372
4373 "%id = OpVariable %uvec3ptr Input\n"
4374 "%zero = OpConstant %i32 0\n"
4375 "%float_0 = OpConstant %f32 0.0\n"
4376 "%float_1 = OpConstant %f32 1.0\n"
4377 "%float_n1 = OpConstant %f32 -1.0\n"
4378
4379 "%main = OpFunction %void None %voidf\n"
4380 "%entry = OpLabel\n"
4381 "%idval = OpLoad %uvec3 %id\n"
4382 "%x = OpCompositeExtract %u32 %idval 0\n"
4383 "%inloc = OpAccessChain %u32ptr %indata %zero %x\n"
4384 "%inval = OpLoad %u32 %inloc\n"
4385 "%f16_vec2_inval = OpBitcast %f16vec2 %inval\n"
4386 "%f16_inval = OpCompositeExtract %f16 %f16_vec2_inval 0\n"
4387 "%f32_inval = OpFConvert %f32 %f16_inval\n"
4388
4389 "%comp = OpFOrdGreaterThan %bool %f32_inval %float_0\n"
4390 " OpSelectionMerge %cm None\n"
4391 " OpBranchConditional %comp %tb %fb\n"
4392 "%tb = OpLabel\n"
4393 " OpBranch %cm\n"
4394 "%fb = OpLabel\n"
4395 " OpBranch %cm\n"
4396 "%cm = OpLabel\n"
4397 "%res = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4398 "%f16_res = OpFConvert %f16 %res\n"
4399
4400 "%f16vec2_res = OpCompositeConstruct %f16vec2 %f16_res %f16_0\n"
4401 "%u32_res = OpBitcast %u32 %f16vec2_res\n"
4402
4403 "%outloc = OpAccessChain %u32ptr %outdata %zero %x\n"
4404 " OpStore %outloc %u32_res\n"
4405 " OpReturn\n"
4406
4407 " OpFunctionEnd\n";
4408
4409 specFloat16.inputs.push_back(BufferSp(new Uint32Buffer(inputUints)));
4410 specFloat16.outputs.push_back(BufferSp(new Uint32Buffer(outputUints)));
4411 specFloat16.numWorkGroups = IVec3(numElements, 1, 1);
4412 specFloat16.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
4413
4414 specMat4.assembly = string(getComputeAsmShaderPreamble()) +
4415
4416 "OpSource GLSL 430\n"
4417 "OpName %main \"main\"\n"
4418 "OpName %id \"gl_GlobalInvocationID\"\n"
4419
4420 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4421
4422 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4423 string(getComputeAsmInputOutputBuffer()) +
4424
4425 "%id = OpVariable %uvec3ptr Input\n"
4426 "%v4f32 = OpTypeVector %f32 4\n"
4427 "%mat4v4f32 = OpTypeMatrix %v4f32 4\n"
4428 "%zero = OpConstant %i32 0\n"
4429 "%float_0 = OpConstant %f32 0.0\n"
4430 "%float_1 = OpConstant %f32 1.0\n"
4431 "%float_n1 = OpConstant %f32 -1.0\n"
4432 "%m11 = OpConstantComposite %v4f32 %float_1 %float_0 %float_0 %float_0\n"
4433 "%m12 = OpConstantComposite %v4f32 %float_0 %float_1 %float_0 %float_0\n"
4434 "%m13 = OpConstantComposite %v4f32 %float_0 %float_0 %float_1 %float_0\n"
4435 "%m14 = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_1\n"
4436 "%m1 = OpConstantComposite %mat4v4f32 %m11 %m12 %m13 %m14\n"
4437 "%m21 = OpConstantComposite %v4f32 %float_n1 %float_0 %float_0 %float_0\n"
4438 "%m22 = OpConstantComposite %v4f32 %float_0 %float_n1 %float_0 %float_0\n"
4439 "%m23 = OpConstantComposite %v4f32 %float_0 %float_0 %float_n1 %float_0\n"
4440 "%m24 = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_n1\n"
4441 "%m2 = OpConstantComposite %mat4v4f32 %m21 %m22 %m23 %m24\n"
4442
4443 "%main = OpFunction %void None %voidf\n"
4444 "%entry = OpLabel\n"
4445 "%idval = OpLoad %uvec3 %id\n"
4446 "%x = OpCompositeExtract %u32 %idval 0\n"
4447 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4448 "%inval = OpLoad %f32 %inloc\n"
4449
4450 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4451 " OpSelectionMerge %cm None\n"
4452 " OpBranchConditional %comp %tb %fb\n"
4453 "%tb = OpLabel\n"
4454 " OpBranch %cm\n"
4455 "%fb = OpLabel\n"
4456 " OpBranch %cm\n"
4457 "%cm = OpLabel\n"
4458 "%mres = OpPhi %mat4v4f32 %m1 %tb %m2 %fb\n"
4459 "%res = OpCompositeExtract %f32 %mres 2 2\n"
4460
4461 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4462 " OpStore %outloc %res\n"
4463 " OpReturn\n"
4464
4465 " OpFunctionEnd\n";
4466 specMat4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4467 specMat4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4468 specMat4.numWorkGroups = IVec3(numElements, 1, 1);
4469
4470 specVec3.assembly = string(getComputeAsmShaderPreamble()) +
4471
4472 "OpSource GLSL 430\n"
4473 "OpName %main \"main\"\n"
4474 "OpName %id \"gl_GlobalInvocationID\"\n"
4475
4476 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4477
4478 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4479 string(getComputeAsmInputOutputBuffer()) +
4480
4481 "%id = OpVariable %uvec3ptr Input\n"
4482 "%zero = OpConstant %i32 0\n"
4483 "%float_0 = OpConstant %f32 0.0\n"
4484 "%float_1 = OpConstant %f32 1.0\n"
4485 "%float_n1 = OpConstant %f32 -1.0\n"
4486 "%v1 = OpConstantComposite %fvec3 %float_1 %float_1 %float_1\n"
4487 "%v2 = OpConstantComposite %fvec3 %float_n1 %float_n1 %float_n1\n"
4488
4489 "%main = OpFunction %void None %voidf\n"
4490 "%entry = OpLabel\n"
4491 "%idval = OpLoad %uvec3 %id\n"
4492 "%x = OpCompositeExtract %u32 %idval 0\n"
4493 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4494 "%inval = OpLoad %f32 %inloc\n"
4495
4496 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4497 " OpSelectionMerge %cm None\n"
4498 " OpBranchConditional %comp %tb %fb\n"
4499 "%tb = OpLabel\n"
4500 " OpBranch %cm\n"
4501 "%fb = OpLabel\n"
4502 " OpBranch %cm\n"
4503 "%cm = OpLabel\n"
4504 "%vres = OpPhi %fvec3 %v1 %tb %v2 %fb\n"
4505 "%res = OpCompositeExtract %f32 %vres 2\n"
4506
4507 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4508 " OpStore %outloc %res\n"
4509 " OpReturn\n"
4510
4511 " OpFunctionEnd\n";
4512 specVec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4513 specVec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4514 specVec3.numWorkGroups = IVec3(numElements, 1, 1);
4515
4516 specInt.assembly = string(getComputeAsmShaderPreamble()) +
4517
4518 "OpSource GLSL 430\n"
4519 "OpName %main \"main\"\n"
4520 "OpName %id \"gl_GlobalInvocationID\"\n"
4521
4522 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4523
4524 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4525 string(getComputeAsmInputOutputBuffer()) +
4526
4527 "%id = OpVariable %uvec3ptr Input\n"
4528 "%zero = OpConstant %i32 0\n"
4529 "%float_0 = OpConstant %f32 0.0\n"
4530 "%i1 = OpConstant %i32 1\n"
4531 "%i2 = OpConstant %i32 -1\n"
4532
4533 "%main = OpFunction %void None %voidf\n"
4534 "%entry = OpLabel\n"
4535 "%idval = OpLoad %uvec3 %id\n"
4536 "%x = OpCompositeExtract %u32 %idval 0\n"
4537 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4538 "%inval = OpLoad %f32 %inloc\n"
4539
4540 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4541 " OpSelectionMerge %cm None\n"
4542 " OpBranchConditional %comp %tb %fb\n"
4543 "%tb = OpLabel\n"
4544 " OpBranch %cm\n"
4545 "%fb = OpLabel\n"
4546 " OpBranch %cm\n"
4547 "%cm = OpLabel\n"
4548 "%ires = OpPhi %i32 %i1 %tb %i2 %fb\n"
4549 "%res = OpConvertSToF %f32 %ires\n"
4550
4551 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4552 " OpStore %outloc %res\n"
4553 " OpReturn\n"
4554
4555 " OpFunctionEnd\n";
4556 specInt.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4557 specInt.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4558 specInt.numWorkGroups = IVec3(numElements, 1, 1);
4559
4560 specArray.assembly =
4561 string(getComputeAsmShaderPreamble()) +
4562
4563 "OpSource GLSL 430\n"
4564 "OpName %main \"main\"\n"
4565 "OpName %id \"gl_GlobalInvocationID\"\n"
4566
4567 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4568
4569 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4570 string(getComputeAsmInputOutputBuffer()) +
4571
4572 "%id = OpVariable %uvec3ptr Input\n"
4573 "%zero = OpConstant %i32 0\n"
4574 "%u7 = OpConstant %u32 7\n"
4575 "%float_0 = OpConstant %f32 0.0\n"
4576 "%float_1 = OpConstant %f32 1.0\n"
4577 "%float_n1 = OpConstant %f32 -1.0\n"
4578 "%f32a7 = OpTypeArray %f32 %u7\n"
4579 "%a1 = OpConstantComposite %f32a7 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1\n"
4580 "%a2 = OpConstantComposite %f32a7 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 "
4581 "%float_n1\n"
4582 "%main = OpFunction %void None %voidf\n"
4583 "%entry = OpLabel\n"
4584 "%idval = OpLoad %uvec3 %id\n"
4585 "%x = OpCompositeExtract %u32 %idval 0\n"
4586 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4587 "%inval = OpLoad %f32 %inloc\n"
4588
4589 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4590 " OpSelectionMerge %cm None\n"
4591 " OpBranchConditional %comp %tb %fb\n"
4592 "%tb = OpLabel\n"
4593 " OpBranch %cm\n"
4594 "%fb = OpLabel\n"
4595 " OpBranch %cm\n"
4596 "%cm = OpLabel\n"
4597 "%ares = OpPhi %f32a7 %a1 %tb %a2 %fb\n"
4598 "%res = OpCompositeExtract %f32 %ares 5\n"
4599
4600 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4601 " OpStore %outloc %res\n"
4602 " OpReturn\n"
4603
4604 " OpFunctionEnd\n";
4605 specArray.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4606 specArray.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4607 specArray.numWorkGroups = IVec3(numElements, 1, 1);
4608
4609 specStruct.assembly = string(getComputeAsmShaderPreamble()) +
4610
4611 "OpSource GLSL 430\n"
4612 "OpName %main \"main\"\n"
4613 "OpName %id \"gl_GlobalInvocationID\"\n"
4614
4615 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4616
4617 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4618 string(getComputeAsmInputOutputBuffer()) +
4619
4620 "%id = OpVariable %uvec3ptr Input\n"
4621 "%zero = OpConstant %i32 0\n"
4622 "%float_0 = OpConstant %f32 0.0\n"
4623 "%float_1 = OpConstant %f32 1.0\n"
4624 "%float_n1 = OpConstant %f32 -1.0\n"
4625
4626 "%v2f32 = OpTypeVector %f32 2\n"
4627 "%Data2 = OpTypeStruct %f32 %v2f32\n"
4628 "%Data = OpTypeStruct %Data2 %f32\n"
4629
4630 "%in1a = OpConstantComposite %v2f32 %float_1 %float_1\n"
4631 "%in1b = OpConstantComposite %Data2 %float_1 %in1a\n"
4632 "%s1 = OpConstantComposite %Data %in1b %float_1\n"
4633 "%in2a = OpConstantComposite %v2f32 %float_n1 %float_n1\n"
4634 "%in2b = OpConstantComposite %Data2 %float_n1 %in2a\n"
4635 "%s2 = OpConstantComposite %Data %in2b %float_n1\n"
4636
4637 "%main = OpFunction %void None %voidf\n"
4638 "%entry = OpLabel\n"
4639 "%idval = OpLoad %uvec3 %id\n"
4640 "%x = OpCompositeExtract %u32 %idval 0\n"
4641 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4642 "%inval = OpLoad %f32 %inloc\n"
4643
4644 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4645 " OpSelectionMerge %cm None\n"
4646 " OpBranchConditional %comp %tb %fb\n"
4647 "%tb = OpLabel\n"
4648 " OpBranch %cm\n"
4649 "%fb = OpLabel\n"
4650 " OpBranch %cm\n"
4651 "%cm = OpLabel\n"
4652 "%sres = OpPhi %Data %s1 %tb %s2 %fb\n"
4653 "%res = OpCompositeExtract %f32 %sres 0 0\n"
4654
4655 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4656 " OpStore %outloc %res\n"
4657 " OpReturn\n"
4658
4659 " OpFunctionEnd\n";
4660 specStruct.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4661 specStruct.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4662 specStruct.numWorkGroups = IVec3(numElements, 1, 1);
4663
4664 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_int", specInt));
4665 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float", specFloat));
4666 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float16", specFloat16));
4667 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_vec3", specVec3));
4668 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_mat4", specMat4));
4669 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_array", specArray));
4670 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_struct", specStruct));
4671 }
4672
generateConstantDefinitions(int count)4673 string generateConstantDefinitions(int count)
4674 {
4675 std::ostringstream r;
4676 for (int i = 0; i < count; i++)
4677 r << "%cf" << (i * 10 + 5) << " = OpConstant %f32 " << (i * 10 + 5) << ".0\n";
4678 r << "\n";
4679 return r.str();
4680 }
4681
generateSwitchCases(int count)4682 string generateSwitchCases(int count)
4683 {
4684 std::ostringstream r;
4685 for (int i = 0; i < count; i++)
4686 r << " " << i << " %case" << i;
4687 r << "\n";
4688 return r.str();
4689 }
4690
generateSwitchTargets(int count)4691 string generateSwitchTargets(int count)
4692 {
4693 std::ostringstream r;
4694 for (int i = 0; i < count; i++)
4695 r << "%case" << i << " = OpLabel\n OpBranch %phi\n";
4696 r << "\n";
4697 return r.str();
4698 }
4699
generateOpPhiParams(int count)4700 string generateOpPhiParams(int count)
4701 {
4702 std::ostringstream r;
4703 for (int i = 0; i < count; i++)
4704 r << " %cf" << (i * 10 + 5) << " %case" << i;
4705 r << "\n";
4706 return r.str();
4707 }
4708
generateIntWidth(int value)4709 string generateIntWidth(int value)
4710 {
4711 std::ostringstream r;
4712 r << value;
4713 return r.str();
4714 }
4715
4716 // Expand input string by injecting "ABC" between the input
4717 // string characters. The acc/add/treshold parameters are used
4718 // to skip some of the injections to make the result less
4719 // uniform (and a lot shorter).
expandOpPhiCase5(const string & s,int & acc,int add,int treshold)4720 string expandOpPhiCase5(const string &s, int &acc, int add, int treshold)
4721 {
4722 std::ostringstream res;
4723 const char *p = s.c_str();
4724
4725 while (*p)
4726 {
4727 res << *p;
4728 acc += add;
4729 if (acc > treshold)
4730 {
4731 acc -= treshold;
4732 res << "ABC";
4733 }
4734 p++;
4735 }
4736 return res.str();
4737 }
4738
4739 // Calculate expected result based on the code string
calcOpPhiCase5(float val,const string & s)4740 float calcOpPhiCase5(float val, const string &s)
4741 {
4742 const char *p = s.c_str();
4743 float x[8];
4744 bool b[8];
4745 const float tv[8] = {0.5f, 1.5f, 3.5f, 7.5f, 15.5f, 31.5f, 63.5f, 127.5f};
4746 const float v = deFloatAbs(val);
4747 float res = 0;
4748 int depth = -1;
4749 int skip = 0;
4750
4751 for (int i = 7; i >= 0; --i)
4752 x[i] = std::fmod((float)v, (float)(2 << i));
4753 for (int i = 7; i >= 0; --i)
4754 b[i] = x[i] > tv[i];
4755
4756 while (*p)
4757 {
4758 if (*p == 'A')
4759 {
4760 depth++;
4761 if (skip == 0 && b[depth])
4762 {
4763 res++;
4764 }
4765 else
4766 skip++;
4767 }
4768 if (*p == 'B')
4769 {
4770 if (skip)
4771 skip--;
4772 if (b[depth] || skip)
4773 skip++;
4774 }
4775 if (*p == 'C')
4776 {
4777 depth--;
4778 if (skip)
4779 skip--;
4780 }
4781 p++;
4782 }
4783 return res;
4784 }
4785
4786 // In the code string, the letters represent the following:
4787 //
4788 // A:
4789 // if (certain bit is set)
4790 // {
4791 // result++;
4792 //
4793 // B:
4794 // } else {
4795 //
4796 // C:
4797 // }
4798 //
4799 // examples:
4800 // AABCBC leads to if(){r++;if(){r++;}else{}}else{}
4801 // ABABCC leads to if(){r++;}else{if(){r++;}else{}}
4802 // ABCABC leads to if(){r++;}else{}if(){r++;}else{}
4803 //
4804 // Code generation gets a bit complicated due to the else-branches,
4805 // which do not generate new values. Thus, the generator needs to
4806 // keep track of the previous variable change seen by the else
4807 // branch.
generateOpPhiCase5(const string & s)4808 string generateOpPhiCase5(const string &s)
4809 {
4810 std::stack<int> idStack;
4811 std::stack<std::string> value;
4812 std::stack<std::string> valueLabel;
4813 std::stack<std::string> mergeLeft;
4814 std::stack<std::string> mergeRight;
4815 std::ostringstream res;
4816 const char *p = s.c_str();
4817 int depth = -1;
4818 int currId = 0;
4819 int iter = 0;
4820
4821 idStack.push(-1);
4822 value.push("%f32_0");
4823 valueLabel.push("%f32_0 %entry");
4824
4825 while (*p)
4826 {
4827 if (*p == 'A')
4828 {
4829 depth++;
4830 currId = iter;
4831 idStack.push(currId);
4832 res << "\tOpSelectionMerge %m" << currId << " None\n";
4833 res << "\tOpBranchConditional %b" << depth << " %t" << currId << " %f" << currId << "\n";
4834 res << "%t" << currId << " = OpLabel\n";
4835 res << "%rt" << currId << " = OpFAdd %f32 " << value.top() << " %f32_1\n";
4836 std::ostringstream tag;
4837 tag << "%rt" << currId;
4838 value.push(tag.str());
4839 tag << " %t" << currId;
4840 valueLabel.push(tag.str());
4841 }
4842
4843 if (*p == 'B')
4844 {
4845 mergeLeft.push(valueLabel.top());
4846 value.pop();
4847 valueLabel.pop();
4848 res << "\tOpBranch %m" << currId << "\n";
4849 res << "%f" << currId << " = OpLabel\n";
4850 std::ostringstream tag;
4851 tag << value.top() << " %f" << currId;
4852 valueLabel.pop();
4853 valueLabel.push(tag.str());
4854 }
4855
4856 if (*p == 'C')
4857 {
4858 mergeRight.push(valueLabel.top());
4859 res << "\tOpBranch %m" << currId << "\n";
4860 res << "%m" << currId << " = OpLabel\n";
4861 if (*(p + 1) == 0)
4862 res << "%res"; // last result goes to %res
4863 else
4864 res << "%rm" << currId;
4865 res << " = OpPhi %f32 " << mergeLeft.top() << " " << mergeRight.top() << "\n";
4866 std::ostringstream tag;
4867 tag << "%rm" << currId;
4868 value.pop();
4869 value.push(tag.str());
4870 tag << " %m" << currId;
4871 valueLabel.pop();
4872 valueLabel.push(tag.str());
4873 mergeLeft.pop();
4874 mergeRight.pop();
4875 depth--;
4876 idStack.pop();
4877 currId = idStack.top();
4878 }
4879 p++;
4880 iter++;
4881 }
4882 return res.str();
4883 }
4884
createOpPhiGroup(tcu::TestContext & testCtx)4885 tcu::TestCaseGroup *createOpPhiGroup(tcu::TestContext &testCtx)
4886 {
4887 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opphi"));
4888 ComputeShaderSpec spec1;
4889 ComputeShaderSpec spec2;
4890 ComputeShaderSpec spec3;
4891 ComputeShaderSpec spec4;
4892 ComputeShaderSpec spec5;
4893 de::Random rnd(deStringHash(group->getName()));
4894 const int numElements = 100;
4895 vector<float> inputFloats(numElements, 0);
4896 vector<float> outputFloats1(numElements, 0);
4897 vector<float> outputFloats2(numElements, 0);
4898 vector<float> outputFloats3(numElements, 0);
4899 vector<float> outputFloats4(numElements, 0);
4900 vector<float> outputFloats5(numElements, 0);
4901 std::string codestring = "ABC";
4902 const int test4Width = 512;
4903
4904 // Build case 5 code string. Each iteration makes the hierarchy more complicated.
4905 // 9 iterations with (7, 24) parameters makes the hierarchy 8 deep with about 1500 lines of
4906 // shader code.
4907 for (int i = 0, acc = 0; i < 9; i++)
4908 codestring = expandOpPhiCase5(codestring, acc, 7, 24);
4909
4910 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4911
4912 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4913 floorAll(inputFloats);
4914
4915 for (size_t ndx = 0; ndx < numElements; ++ndx)
4916 {
4917 switch (ndx % 3)
4918 {
4919 case 0:
4920 outputFloats1[ndx] = inputFloats[ndx] + 5.5f;
4921 break;
4922 case 1:
4923 outputFloats1[ndx] = inputFloats[ndx] + 20.5f;
4924 break;
4925 case 2:
4926 outputFloats1[ndx] = inputFloats[ndx] + 1.75f;
4927 break;
4928 default:
4929 break;
4930 }
4931 outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
4932 outputFloats3[ndx] = 8.5f - inputFloats[ndx];
4933
4934 int index4 = (int)deFloor(deAbs((float)ndx * inputFloats[ndx]));
4935 outputFloats4[ndx] = (float)(index4 % test4Width) * 10.0f + 5.0f;
4936
4937 outputFloats5[ndx] = calcOpPhiCase5(inputFloats[ndx], codestring);
4938 }
4939
4940 spec1.assembly =
4941 string(getComputeAsmShaderPreamble()) +
4942
4943 "OpSource GLSL 430\n"
4944 "OpName %main \"main\"\n"
4945 "OpName %id \"gl_GlobalInvocationID\"\n"
4946
4947 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4948
4949 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
4950 string(getComputeAsmInputOutputBuffer()) +
4951
4952 "%id = OpVariable %uvec3ptr Input\n"
4953 "%zero = OpConstant %i32 0\n"
4954 "%three = OpConstant %u32 3\n"
4955 "%constf5p5 = OpConstant %f32 5.5\n"
4956 "%constf20p5 = OpConstant %f32 20.5\n"
4957 "%constf1p75 = OpConstant %f32 1.75\n"
4958 "%constf8p5 = OpConstant %f32 8.5\n"
4959 "%constf6p5 = OpConstant %f32 6.5\n"
4960
4961 "%main = OpFunction %void None %voidf\n"
4962 "%entry = OpLabel\n"
4963 "%idval = OpLoad %uvec3 %id\n"
4964 "%x = OpCompositeExtract %u32 %idval 0\n"
4965 "%selector = OpUMod %u32 %x %three\n"
4966 " OpSelectionMerge %phi None\n"
4967 " OpSwitch %selector %default 0 %case0 1 %case1 2 %case2\n"
4968
4969 // Case 1 before OpPhi.
4970 "%case1 = OpLabel\n"
4971 " OpBranch %phi\n"
4972
4973 "%default = OpLabel\n"
4974 " OpUnreachable\n"
4975
4976 "%phi = OpLabel\n"
4977 "%operand = OpPhi %f32 %constf1p75 %case2 %constf20p5 %case1 %constf5p5 %case0\n" // not in the order of blocks
4978 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4979 "%inval = OpLoad %f32 %inloc\n"
4980 "%add = OpFAdd %f32 %inval %operand\n"
4981 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4982 " OpStore %outloc %add\n"
4983 " OpReturn\n"
4984
4985 // Case 0 after OpPhi.
4986 "%case0 = OpLabel\n"
4987 " OpBranch %phi\n"
4988
4989 // Case 2 after OpPhi.
4990 "%case2 = OpLabel\n"
4991 " OpBranch %phi\n"
4992
4993 " OpFunctionEnd\n";
4994 spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4995 spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
4996 spec1.numWorkGroups = IVec3(numElements, 1, 1);
4997
4998 group->addChild(new SpvAsmComputeShaderCase(testCtx, "block", spec1));
4999
5000 spec2.assembly = string(getComputeAsmShaderPreamble()) +
5001
5002 "OpName %main \"main\"\n"
5003 "OpName %id \"gl_GlobalInvocationID\"\n"
5004
5005 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5006
5007 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5008 string(getComputeAsmInputOutputBuffer()) +
5009
5010 "%id = OpVariable %uvec3ptr Input\n"
5011 "%zero = OpConstant %i32 0\n"
5012 "%one = OpConstant %i32 1\n"
5013 "%three = OpConstant %i32 3\n"
5014 "%constf6p5 = OpConstant %f32 6.5\n"
5015
5016 "%main = OpFunction %void None %voidf\n"
5017 "%entry = OpLabel\n"
5018 "%idval = OpLoad %uvec3 %id\n"
5019 "%x = OpCompositeExtract %u32 %idval 0\n"
5020 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5021 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5022 "%inval = OpLoad %f32 %inloc\n"
5023 " OpBranch %phi\n"
5024
5025 "%phi = OpLabel\n"
5026 "%step = OpPhi %i32 %zero %entry %step_next %phi\n"
5027 "%accum = OpPhi %f32 %inval %entry %accum_next %phi\n"
5028 "%step_next = OpIAdd %i32 %step %one\n"
5029 "%accum_next = OpFAdd %f32 %accum %constf6p5\n"
5030 "%still_loop = OpSLessThan %bool %step %three\n"
5031 " OpLoopMerge %exit %phi None\n"
5032 " OpBranchConditional %still_loop %phi %exit\n"
5033
5034 "%exit = OpLabel\n"
5035 " OpStore %outloc %accum\n"
5036 " OpReturn\n"
5037 " OpFunctionEnd\n";
5038 spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5039 spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
5040 spec2.numWorkGroups = IVec3(numElements, 1, 1);
5041
5042 group->addChild(new SpvAsmComputeShaderCase(testCtx, "induction", spec2));
5043
5044 spec3.assembly = string(getComputeAsmShaderPreamble()) +
5045
5046 "OpName %main \"main\"\n"
5047 "OpName %id \"gl_GlobalInvocationID\"\n"
5048
5049 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5050
5051 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5052 string(getComputeAsmInputOutputBuffer()) +
5053
5054 "%f32ptr_f = OpTypePointer Function %f32\n"
5055 "%id = OpVariable %uvec3ptr Input\n"
5056 "%true = OpConstantTrue %bool\n"
5057 "%false = OpConstantFalse %bool\n"
5058 "%zero = OpConstant %i32 0\n"
5059 "%constf8p5 = OpConstant %f32 8.5\n"
5060
5061 "%main = OpFunction %void None %voidf\n"
5062 "%entry = OpLabel\n"
5063 "%b = OpVariable %f32ptr_f Function %constf8p5\n"
5064 "%idval = OpLoad %uvec3 %id\n"
5065 "%x = OpCompositeExtract %u32 %idval 0\n"
5066 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5067 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5068 "%a_init = OpLoad %f32 %inloc\n"
5069 "%b_init = OpLoad %f32 %b\n"
5070 " OpBranch %phi\n"
5071
5072 "%phi = OpLabel\n"
5073 "%still_loop = OpPhi %bool %true %entry %false %phi\n"
5074 "%a_next = OpPhi %f32 %a_init %entry %b_next %phi\n"
5075 "%b_next = OpPhi %f32 %b_init %entry %a_next %phi\n"
5076 " OpLoopMerge %exit %phi None\n"
5077 " OpBranchConditional %still_loop %phi %exit\n"
5078
5079 "%exit = OpLabel\n"
5080 "%sub = OpFSub %f32 %a_next %b_next\n"
5081 " OpStore %outloc %sub\n"
5082 " OpReturn\n"
5083 " OpFunctionEnd\n";
5084 spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5085 spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
5086 spec3.numWorkGroups = IVec3(numElements, 1, 1);
5087
5088 group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", spec3));
5089
5090 spec4.assembly = "OpCapability Shader\n"
5091 "%ext = OpExtInstImport \"GLSL.std.450\"\n"
5092 "OpMemoryModel Logical GLSL450\n"
5093 "OpEntryPoint GLCompute %main \"main\" %id\n"
5094 "OpExecutionMode %main LocalSize 1 1 1\n"
5095
5096 "OpSource GLSL 430\n"
5097 "OpName %main \"main\"\n"
5098 "OpName %id \"gl_GlobalInvocationID\"\n"
5099
5100 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5101
5102 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5103 string(getComputeAsmInputOutputBuffer()) +
5104
5105 "%id = OpVariable %uvec3ptr Input\n"
5106 "%zero = OpConstant %i32 0\n"
5107 "%cimod = OpConstant %u32 " +
5108 generateIntWidth(test4Width) + "\n"
5109
5110 + generateConstantDefinitions(test4Width) +
5111
5112 "%main = OpFunction %void None %voidf\n"
5113 "%entry = OpLabel\n"
5114 "%idval = OpLoad %uvec3 %id\n"
5115 "%x = OpCompositeExtract %u32 %idval 0\n"
5116 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5117 "%inval = OpLoad %f32 %inloc\n"
5118 "%xf = OpConvertUToF %f32 %x\n"
5119 "%xm = OpFMul %f32 %xf %inval\n"
5120 "%xa = OpExtInst %f32 %ext FAbs %xm\n"
5121 "%xi = OpConvertFToU %u32 %xa\n"
5122 "%selector = OpUMod %u32 %xi %cimod\n"
5123 " OpSelectionMerge %phi None\n"
5124 " OpSwitch %selector %default "
5125
5126 + generateSwitchCases(test4Width) +
5127
5128 "%default = OpLabel\n"
5129 " OpUnreachable\n"
5130
5131 + generateSwitchTargets(test4Width) +
5132
5133 "%phi = OpLabel\n"
5134 "%result = OpPhi %f32"
5135
5136 + generateOpPhiParams(test4Width) +
5137
5138 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5139 " OpStore %outloc %result\n"
5140 " OpReturn\n"
5141
5142 " OpFunctionEnd\n";
5143 spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5144 spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
5145 spec4.numWorkGroups = IVec3(numElements, 1, 1);
5146
5147 group->addChild(new SpvAsmComputeShaderCase(testCtx, "wide", spec4));
5148
5149 spec5.assembly = "OpCapability Shader\n"
5150 "%ext = OpExtInstImport \"GLSL.std.450\"\n"
5151 "OpMemoryModel Logical GLSL450\n"
5152 "OpEntryPoint GLCompute %main \"main\" %id\n"
5153 "OpExecutionMode %main LocalSize 1 1 1\n"
5154 "%code = OpString \"" +
5155 codestring +
5156 "\"\n"
5157
5158 "OpSource GLSL 430\n"
5159 "OpName %main \"main\"\n"
5160 "OpName %id \"gl_GlobalInvocationID\"\n"
5161
5162 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5163
5164 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5165 string(getComputeAsmInputOutputBuffer()) +
5166
5167 "%id = OpVariable %uvec3ptr Input\n"
5168 "%zero = OpConstant %i32 0\n"
5169 "%f32_0 = OpConstant %f32 0.0\n"
5170 "%f32_0_5 = OpConstant %f32 0.5\n"
5171 "%f32_1 = OpConstant %f32 1.0\n"
5172 "%f32_1_5 = OpConstant %f32 1.5\n"
5173 "%f32_2 = OpConstant %f32 2.0\n"
5174 "%f32_3_5 = OpConstant %f32 3.5\n"
5175 "%f32_4 = OpConstant %f32 4.0\n"
5176 "%f32_7_5 = OpConstant %f32 7.5\n"
5177 "%f32_8 = OpConstant %f32 8.0\n"
5178 "%f32_15_5 = OpConstant %f32 15.5\n"
5179 "%f32_16 = OpConstant %f32 16.0\n"
5180 "%f32_31_5 = OpConstant %f32 31.5\n"
5181 "%f32_32 = OpConstant %f32 32.0\n"
5182 "%f32_63_5 = OpConstant %f32 63.5\n"
5183 "%f32_64 = OpConstant %f32 64.0\n"
5184 "%f32_127_5 = OpConstant %f32 127.5\n"
5185 "%f32_128 = OpConstant %f32 128.0\n"
5186 "%f32_256 = OpConstant %f32 256.0\n"
5187
5188 "%main = OpFunction %void None %voidf\n"
5189 "%entry = OpLabel\n"
5190 "%idval = OpLoad %uvec3 %id\n"
5191 "%x = OpCompositeExtract %u32 %idval 0\n"
5192 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5193 "%inval = OpLoad %f32 %inloc\n"
5194
5195 "%xabs = OpExtInst %f32 %ext FAbs %inval\n"
5196 "%x8 = OpFMod %f32 %xabs %f32_256\n"
5197 "%x7 = OpFMod %f32 %xabs %f32_128\n"
5198 "%x6 = OpFMod %f32 %xabs %f32_64\n"
5199 "%x5 = OpFMod %f32 %xabs %f32_32\n"
5200 "%x4 = OpFMod %f32 %xabs %f32_16\n"
5201 "%x3 = OpFMod %f32 %xabs %f32_8\n"
5202 "%x2 = OpFMod %f32 %xabs %f32_4\n"
5203 "%x1 = OpFMod %f32 %xabs %f32_2\n"
5204
5205 "%b7 = OpFOrdGreaterThanEqual %bool %x8 %f32_127_5\n"
5206 "%b6 = OpFOrdGreaterThanEqual %bool %x7 %f32_63_5\n"
5207 "%b5 = OpFOrdGreaterThanEqual %bool %x6 %f32_31_5\n"
5208 "%b4 = OpFOrdGreaterThanEqual %bool %x5 %f32_15_5\n"
5209 "%b3 = OpFOrdGreaterThanEqual %bool %x4 %f32_7_5\n"
5210 "%b2 = OpFOrdGreaterThanEqual %bool %x3 %f32_3_5\n"
5211 "%b1 = OpFOrdGreaterThanEqual %bool %x2 %f32_1_5\n"
5212 "%b0 = OpFOrdGreaterThanEqual %bool %x1 %f32_0_5\n"
5213
5214 + generateOpPhiCase5(codestring) +
5215
5216 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5217 " OpStore %outloc %res\n"
5218 " OpReturn\n"
5219
5220 " OpFunctionEnd\n";
5221 spec5.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5222 spec5.outputs.push_back(BufferSp(new Float32Buffer(outputFloats5)));
5223 spec5.numWorkGroups = IVec3(numElements, 1, 1);
5224
5225 group->addChild(new SpvAsmComputeShaderCase(testCtx, "nested", spec5));
5226
5227 createOpPhiVartypeTests(group, testCtx);
5228
5229 return group.release();
5230 }
5231
5232 // Assembly code used for testing block order is based on GLSL source code:
5233 //
5234 // #version 430
5235 //
5236 // layout(std140, set = 0, binding = 0) readonly buffer Input {
5237 // float elements[];
5238 // } input_data;
5239 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
5240 // float elements[];
5241 // } output_data;
5242 //
5243 // void main() {
5244 // uint x = gl_GlobalInvocationID.x;
5245 // output_data.elements[x] = input_data.elements[x];
5246 // if (x > uint(50)) {
5247 // switch (x % uint(3)) {
5248 // case 0: output_data.elements[x] += 1.5f; break;
5249 // case 1: output_data.elements[x] += 42.f; break;
5250 // case 2: output_data.elements[x] -= 27.f; break;
5251 // default: break;
5252 // }
5253 // } else {
5254 // output_data.elements[x] = -input_data.elements[x];
5255 // }
5256 // }
createBlockOrderGroup(tcu::TestContext & testCtx)5257 tcu::TestCaseGroup *createBlockOrderGroup(tcu::TestContext &testCtx)
5258 {
5259 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "block_order"));
5260 ComputeShaderSpec spec;
5261 de::Random rnd(deStringHash(group->getName()));
5262 const int numElements = 100;
5263 vector<float> inputFloats(numElements, 0);
5264 vector<float> outputFloats(numElements, 0);
5265
5266 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
5267
5268 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
5269 floorAll(inputFloats);
5270
5271 for (size_t ndx = 0; ndx <= 50; ++ndx)
5272 outputFloats[ndx] = -inputFloats[ndx];
5273
5274 for (size_t ndx = 51; ndx < numElements; ++ndx)
5275 {
5276 switch (ndx % 3)
5277 {
5278 case 0:
5279 outputFloats[ndx] = inputFloats[ndx] + 1.5f;
5280 break;
5281 case 1:
5282 outputFloats[ndx] = inputFloats[ndx] + 42.f;
5283 break;
5284 case 2:
5285 outputFloats[ndx] = inputFloats[ndx] - 27.f;
5286 break;
5287 default:
5288 break;
5289 }
5290 }
5291
5292 spec.assembly = string(getComputeAsmShaderPreamble()) +
5293
5294 "OpSource GLSL 430\n"
5295 "OpName %main \"main\"\n"
5296 "OpName %id \"gl_GlobalInvocationID\"\n"
5297
5298 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5299
5300 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5301
5302 "%u32ptr = OpTypePointer Function %u32\n"
5303 "%u32ptr_input = OpTypePointer Input %u32\n"
5304
5305 + string(getComputeAsmInputOutputBuffer()) +
5306
5307 "%id = OpVariable %uvec3ptr Input\n"
5308 "%zero = OpConstant %i32 0\n"
5309 "%const3 = OpConstant %u32 3\n"
5310 "%const50 = OpConstant %u32 50\n"
5311 "%constf1p5 = OpConstant %f32 1.5\n"
5312 "%constf27 = OpConstant %f32 27.0\n"
5313 "%constf42 = OpConstant %f32 42.0\n"
5314
5315 "%main = OpFunction %void None %voidf\n"
5316
5317 // entry block.
5318 "%entry = OpLabel\n"
5319
5320 // Create a temporary variable to hold the value of gl_GlobalInvocationID.x.
5321 "%xvar = OpVariable %u32ptr Function\n"
5322 "%xptr = OpAccessChain %u32ptr_input %id %zero\n"
5323 "%x = OpLoad %u32 %xptr\n"
5324 " OpStore %xvar %x\n"
5325
5326 "%cmp = OpUGreaterThan %bool %x %const50\n"
5327 " OpSelectionMerge %if_merge None\n"
5328 " OpBranchConditional %cmp %if_true %if_false\n"
5329
5330 // False branch for if-statement: placed in the middle of switch cases and before true branch.
5331 "%if_false = OpLabel\n"
5332 "%x_f = OpLoad %u32 %xvar\n"
5333 "%inloc_f = OpAccessChain %f32ptr %indata %zero %x_f\n"
5334 "%inval_f = OpLoad %f32 %inloc_f\n"
5335 "%negate = OpFNegate %f32 %inval_f\n"
5336 "%outloc_f = OpAccessChain %f32ptr %outdata %zero %x_f\n"
5337 " OpStore %outloc_f %negate\n"
5338 " OpBranch %if_merge\n"
5339
5340 // Merge block for if-statement: placed in the middle of true and false branch.
5341 "%if_merge = OpLabel\n"
5342 " OpReturn\n"
5343
5344 // True branch for if-statement: placed in the middle of swtich cases and after the false branch.
5345 "%if_true = OpLabel\n"
5346 "%xval_t = OpLoad %u32 %xvar\n"
5347 "%mod = OpUMod %u32 %xval_t %const3\n"
5348 " OpSelectionMerge %switch_merge None\n"
5349 " OpSwitch %mod %default 0 %case0 1 %case1 2 %case2\n"
5350
5351 // Merge block for switch-statement: placed before the case
5352 // bodies. But it must follow OpSwitch which dominates it.
5353 "%switch_merge = OpLabel\n"
5354 " OpBranch %if_merge\n"
5355
5356 // Case 1 for switch-statement: placed before case 0.
5357 // It must follow the OpSwitch that dominates it.
5358 "%case1 = OpLabel\n"
5359 "%x_1 = OpLoad %u32 %xvar\n"
5360 "%inloc_1 = OpAccessChain %f32ptr %indata %zero %x_1\n"
5361 "%inval_1 = OpLoad %f32 %inloc_1\n"
5362 "%addf42 = OpFAdd %f32 %inval_1 %constf42\n"
5363 "%outloc_1 = OpAccessChain %f32ptr %outdata %zero %x_1\n"
5364 " OpStore %outloc_1 %addf42\n"
5365 " OpBranch %switch_merge\n"
5366
5367 // Case 2 for switch-statement.
5368 "%case2 = OpLabel\n"
5369 "%x_2 = OpLoad %u32 %xvar\n"
5370 "%inloc_2 = OpAccessChain %f32ptr %indata %zero %x_2\n"
5371 "%inval_2 = OpLoad %f32 %inloc_2\n"
5372 "%subf27 = OpFSub %f32 %inval_2 %constf27\n"
5373 "%outloc_2 = OpAccessChain %f32ptr %outdata %zero %x_2\n"
5374 " OpStore %outloc_2 %subf27\n"
5375 " OpBranch %switch_merge\n"
5376
5377 // Default case for switch-statement: placed in the middle of normal cases.
5378 "%default = OpLabel\n"
5379 " OpBranch %switch_merge\n"
5380
5381 // Case 0 for switch-statement: out of order.
5382 "%case0 = OpLabel\n"
5383 "%x_0 = OpLoad %u32 %xvar\n"
5384 "%inloc_0 = OpAccessChain %f32ptr %indata %zero %x_0\n"
5385 "%inval_0 = OpLoad %f32 %inloc_0\n"
5386 "%addf1p5 = OpFAdd %f32 %inval_0 %constf1p5\n"
5387 "%outloc_0 = OpAccessChain %f32ptr %outdata %zero %x_0\n"
5388 " OpStore %outloc_0 %addf1p5\n"
5389 " OpBranch %switch_merge\n"
5390
5391 " OpFunctionEnd\n";
5392 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5393 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5394 spec.numWorkGroups = IVec3(numElements, 1, 1);
5395
5396 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
5397
5398 return group.release();
5399 }
5400
createMultipleShaderGroup(tcu::TestContext & testCtx)5401 tcu::TestCaseGroup *createMultipleShaderGroup(tcu::TestContext &testCtx)
5402 {
5403 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "multiple_shaders"));
5404 ComputeShaderSpec spec1;
5405 ComputeShaderSpec spec2;
5406 de::Random rnd(deStringHash(group->getName()));
5407 const int numElements = 100;
5408 vector<float> inputFloats(numElements, 0);
5409 vector<float> outputFloats1(numElements, 0);
5410 vector<float> outputFloats2(numElements, 0);
5411 fillRandomScalars(rnd, -500.f, 500.f, &inputFloats[0], numElements);
5412
5413 for (size_t ndx = 0; ndx < numElements; ++ndx)
5414 {
5415 outputFloats1[ndx] = inputFloats[ndx] + inputFloats[ndx];
5416 outputFloats2[ndx] = -inputFloats[ndx];
5417 }
5418
5419 const string assembly(
5420 "OpCapability Shader\n"
5421 "OpMemoryModel Logical GLSL450\n"
5422 "OpEntryPoint GLCompute %comp_main1 \"entrypoint1\" %id\n"
5423 "OpEntryPoint GLCompute %comp_main2 \"entrypoint2\" %id\n"
5424 // A module cannot have two OpEntryPoint instructions with the same Execution Model and the same Name string.
5425 "OpEntryPoint Vertex %vert_main \"entrypoint2\" %vert_builtins %vertexIndex %instanceIndex\n"
5426 "OpExecutionMode %comp_main1 LocalSize 1 1 1\n"
5427 "OpExecutionMode %comp_main2 LocalSize 1 1 1\n"
5428
5429 "OpName %comp_main1 \"entrypoint1\"\n"
5430 "OpName %comp_main2 \"entrypoint2\"\n"
5431 "OpName %vert_main \"entrypoint2\"\n"
5432 "OpName %id \"gl_GlobalInvocationID\"\n"
5433 "OpName %vert_builtin_st \"gl_PerVertex\"\n"
5434 "OpName %vertexIndex \"gl_VertexIndex\"\n"
5435 "OpName %instanceIndex \"gl_InstanceIndex\"\n"
5436 "OpMemberName %vert_builtin_st 0 \"gl_Position\"\n"
5437 "OpMemberName %vert_builtin_st 1 \"gl_PointSize\"\n"
5438 "OpMemberName %vert_builtin_st 2 \"gl_ClipDistance\"\n"
5439
5440 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5441 "OpDecorate %vertexIndex BuiltIn VertexIndex\n"
5442 "OpDecorate %instanceIndex BuiltIn InstanceIndex\n"
5443 "OpDecorate %vert_builtin_st Block\n"
5444 "OpMemberDecorate %vert_builtin_st 0 BuiltIn Position\n"
5445 "OpMemberDecorate %vert_builtin_st 1 BuiltIn PointSize\n"
5446 "OpMemberDecorate %vert_builtin_st 2 BuiltIn ClipDistance\n"
5447
5448 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5449 string(getComputeAsmInputOutputBuffer()) +
5450
5451 "%zero = OpConstant %i32 0\n"
5452 "%one = OpConstant %u32 1\n"
5453 "%c_f32_1 = OpConstant %f32 1\n"
5454
5455 "%i32inputptr = OpTypePointer Input %i32\n"
5456 "%vec4 = OpTypeVector %f32 4\n"
5457 "%vec4ptr = OpTypePointer Output %vec4\n"
5458 "%f32arr1 = OpTypeArray %f32 %one\n"
5459 "%vert_builtin_st = OpTypeStruct %vec4 %f32 %f32arr1\n"
5460 "%vert_builtin_st_ptr = OpTypePointer Output %vert_builtin_st\n"
5461 "%vert_builtins = OpVariable %vert_builtin_st_ptr Output\n"
5462
5463 "%id = OpVariable %uvec3ptr Input\n"
5464 "%vertexIndex = OpVariable %i32inputptr Input\n"
5465 "%instanceIndex = OpVariable %i32inputptr Input\n"
5466 "%c_vec4_1 = OpConstantComposite %vec4 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
5467
5468 // gl_Position = vec4(1.);
5469 "%vert_main = OpFunction %void None %voidf\n"
5470 "%vert_entry = OpLabel\n"
5471 "%position = OpAccessChain %vec4ptr %vert_builtins %zero\n"
5472 " OpStore %position %c_vec4_1\n"
5473 " OpReturn\n"
5474 " OpFunctionEnd\n"
5475
5476 // Double inputs.
5477 "%comp_main1 = OpFunction %void None %voidf\n"
5478 "%comp1_entry = OpLabel\n"
5479 "%idval1 = OpLoad %uvec3 %id\n"
5480 "%x1 = OpCompositeExtract %u32 %idval1 0\n"
5481 "%inloc1 = OpAccessChain %f32ptr %indata %zero %x1\n"
5482 "%inval1 = OpLoad %f32 %inloc1\n"
5483 "%add = OpFAdd %f32 %inval1 %inval1\n"
5484 "%outloc1 = OpAccessChain %f32ptr %outdata %zero %x1\n"
5485 " OpStore %outloc1 %add\n"
5486 " OpReturn\n"
5487 " OpFunctionEnd\n"
5488
5489 // Negate inputs.
5490 "%comp_main2 = OpFunction %void None %voidf\n"
5491 "%comp2_entry = OpLabel\n"
5492 "%idval2 = OpLoad %uvec3 %id\n"
5493 "%x2 = OpCompositeExtract %u32 %idval2 0\n"
5494 "%inloc2 = OpAccessChain %f32ptr %indata %zero %x2\n"
5495 "%inval2 = OpLoad %f32 %inloc2\n"
5496 "%neg = OpFNegate %f32 %inval2\n"
5497 "%outloc2 = OpAccessChain %f32ptr %outdata %zero %x2\n"
5498 " OpStore %outloc2 %neg\n"
5499 " OpReturn\n"
5500 " OpFunctionEnd\n");
5501
5502 spec1.assembly = assembly;
5503 spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5504 spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
5505 spec1.numWorkGroups = IVec3(numElements, 1, 1);
5506 spec1.entryPoint = "entrypoint1";
5507
5508 spec2.assembly = assembly;
5509 spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5510 spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
5511 spec2.numWorkGroups = IVec3(numElements, 1, 1);
5512 spec2.entryPoint = "entrypoint2";
5513
5514 group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader1", spec1));
5515 group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader2", spec2));
5516
5517 return group.release();
5518 }
5519
makeLongUTF8String(size_t num4ByteChars)5520 inline std::string makeLongUTF8String(size_t num4ByteChars)
5521 {
5522 // An example of a longest valid UTF-8 character. Be explicit about the
5523 // character type because Microsoft compilers can otherwise interpret the
5524 // character string as being over wide (16-bit) characters. Ideally, we
5525 // would just use a C++11 UTF-8 string literal, but we want to support older
5526 // Microsoft compilers.
5527 const std::basic_string<char> earthAfrica("\xF0\x9F\x8C\x8D");
5528 std::string longString;
5529 longString.reserve(num4ByteChars * 4);
5530 for (size_t count = 0; count < num4ByteChars; count++)
5531 {
5532 longString += earthAfrica;
5533 }
5534 return longString;
5535 }
5536
createOpSourceGroup(tcu::TestContext & testCtx)5537 tcu::TestCaseGroup *createOpSourceGroup(tcu::TestContext &testCtx)
5538 {
5539 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsource"));
5540 vector<CaseParameter> cases;
5541 de::Random rnd(deStringHash(group->getName()));
5542 const int numElements = 100;
5543 vector<float> positiveFloats(numElements, 0);
5544 vector<float> negativeFloats(numElements, 0);
5545 const StringTemplate shaderTemplate("OpCapability Shader\n"
5546 "OpMemoryModel Logical GLSL450\n"
5547
5548 "OpEntryPoint GLCompute %main \"main\" %id\n"
5549 "OpExecutionMode %main LocalSize 1 1 1\n"
5550
5551 "${SOURCE}\n"
5552
5553 "OpName %main \"main\"\n"
5554 "OpName %id \"gl_GlobalInvocationID\"\n"
5555
5556 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5557
5558 + string(getComputeAsmInputOutputBufferTraits()) +
5559 string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5560
5561 "%id = OpVariable %uvec3ptr Input\n"
5562 "%zero = OpConstant %i32 0\n"
5563
5564 "%main = OpFunction %void None %voidf\n"
5565 "%label = OpLabel\n"
5566 "%idval = OpLoad %uvec3 %id\n"
5567 "%x = OpCompositeExtract %u32 %idval 0\n"
5568 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5569 "%inval = OpLoad %f32 %inloc\n"
5570 "%neg = OpFNegate %f32 %inval\n"
5571 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5572 " OpStore %outloc %neg\n"
5573 " OpReturn\n"
5574 " OpFunctionEnd\n");
5575
5576 cases.push_back(CaseParameter("unknown_source", "OpSource Unknown 0"));
5577 cases.push_back(CaseParameter("wrong_source", "OpSource OpenCL_C 210"));
5578 cases.push_back(CaseParameter("normal_filename", "%fname = OpString \"filename\"\n"
5579 "OpSource GLSL 430 %fname"));
5580 cases.push_back(CaseParameter("empty_filename", "%fname = OpString \"\"\n"
5581 "OpSource GLSL 430 %fname"));
5582 cases.push_back(CaseParameter("normal_source_code", "%fname = OpString \"filename\"\n"
5583 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\""));
5584 cases.push_back(CaseParameter("empty_source_code", "%fname = OpString \"filename\"\n"
5585 "OpSource GLSL 430 %fname \"\""));
5586 cases.push_back(CaseParameter("long_source_code", "%fname = OpString \"filename\"\n"
5587 "OpSource GLSL 430 %fname \"" +
5588 makeLongUTF8String(65530) + "ccc\"")); // word count: 65535
5589 cases.push_back(CaseParameter(
5590 "utf8_source_code", "%fname = OpString \"filename\"\n"
5591 "OpSource GLSL 430 %fname \"\xE2\x98\x82\xE2\x98\x85\"")); // umbrella & black star symbol
5592 cases.push_back(CaseParameter("normal_sourcecontinued", "%fname = OpString \"filename\"\n"
5593 "OpSource GLSL 430 %fname \"#version 430\nvo\"\n"
5594 "OpSourceContinued \"id main() {}\""));
5595 cases.push_back(CaseParameter("empty_sourcecontinued", "%fname = OpString \"filename\"\n"
5596 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5597 "OpSourceContinued \"\""));
5598 cases.push_back(CaseParameter("long_sourcecontinued", "%fname = OpString \"filename\"\n"
5599 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5600 "OpSourceContinued \"" +
5601 makeLongUTF8String(65533) +
5602 "ccc\"")); // word count: 65535
5603 cases.push_back(
5604 CaseParameter("utf8_sourcecontinued",
5605 "%fname = OpString \"filename\"\n"
5606 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5607 "OpSourceContinued \"\xE2\x98\x8E\xE2\x9A\x91\"")); // white telephone & black flag symbol
5608 cases.push_back(CaseParameter("multi_sourcecontinued", "%fname = OpString \"filename\"\n"
5609 "OpSource GLSL 430 %fname \"#version 430\n\"\n"
5610 "OpSourceContinued \"void\"\n"
5611 "OpSourceContinued \"main()\"\n"
5612 "OpSourceContinued \"{}\""));
5613 cases.push_back(CaseParameter("empty_source_before_sourcecontinued",
5614 "%fname = OpString \"filename\"\n"
5615 "OpSource GLSL 430 %fname \"\"\n"
5616 "OpSourceContinued \"#version 430\nvoid main() {}\""));
5617
5618 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5619
5620 for (size_t ndx = 0; ndx < numElements; ++ndx)
5621 negativeFloats[ndx] = -positiveFloats[ndx];
5622
5623 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5624 {
5625 map<string, string> specializations;
5626 ComputeShaderSpec spec;
5627
5628 specializations["SOURCE"] = cases[caseNdx].param;
5629 spec.assembly = shaderTemplate.specialize(specializations);
5630 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5631 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5632 spec.numWorkGroups = IVec3(numElements, 1, 1);
5633
5634 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
5635 }
5636
5637 return group.release();
5638 }
5639
createOpSourceExtensionGroup(tcu::TestContext & testCtx)5640 tcu::TestCaseGroup *createOpSourceExtensionGroup(tcu::TestContext &testCtx)
5641 {
5642 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsourceextension"));
5643 vector<CaseParameter> cases;
5644 de::Random rnd(deStringHash(group->getName()));
5645 const int numElements = 100;
5646 vector<float> inputFloats(numElements, 0);
5647 vector<float> outputFloats(numElements, 0);
5648 const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
5649
5650 "OpSourceExtension \"${EXTENSION}\"\n"
5651
5652 "OpName %main \"main\"\n"
5653 "OpName %id \"gl_GlobalInvocationID\"\n"
5654
5655 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5656
5657 + string(getComputeAsmInputOutputBufferTraits()) +
5658 string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5659
5660 "%id = OpVariable %uvec3ptr Input\n"
5661 "%zero = OpConstant %i32 0\n"
5662
5663 "%main = OpFunction %void None %voidf\n"
5664 "%label = OpLabel\n"
5665 "%idval = OpLoad %uvec3 %id\n"
5666 "%x = OpCompositeExtract %u32 %idval 0\n"
5667 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5668 "%inval = OpLoad %f32 %inloc\n"
5669 "%neg = OpFNegate %f32 %inval\n"
5670 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5671 " OpStore %outloc %neg\n"
5672 " OpReturn\n"
5673 " OpFunctionEnd\n");
5674
5675 cases.push_back(CaseParameter("empty_extension", ""));
5676 cases.push_back(CaseParameter("real_extension", "GL_ARB_texture_rectangle"));
5677 cases.push_back(CaseParameter("fake_extension", "GL_ARB_im_the_ultimate_extension"));
5678 cases.push_back(CaseParameter("utf8_extension", "GL_ARB_\xE2\x98\x82\xE2\x98\x85"));
5679 cases.push_back(CaseParameter("long_extension", makeLongUTF8String(65533) + "ccc")); // word count: 65535
5680
5681 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
5682
5683 for (size_t ndx = 0; ndx < numElements; ++ndx)
5684 outputFloats[ndx] = -inputFloats[ndx];
5685
5686 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5687 {
5688 map<string, string> specializations;
5689 ComputeShaderSpec spec;
5690
5691 specializations["EXTENSION"] = cases[caseNdx].param;
5692 spec.assembly = shaderTemplate.specialize(specializations);
5693 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5694 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5695 spec.numWorkGroups = IVec3(numElements, 1, 1);
5696
5697 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
5698 }
5699
5700 return group.release();
5701 }
5702
5703 // Checks that a compute shader can generate a constant null value of various types, without exercising a computation on it.
createOpConstantNullGroup(tcu::TestContext & testCtx)5704 tcu::TestCaseGroup *createOpConstantNullGroup(tcu::TestContext &testCtx)
5705 {
5706 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opconstantnull"));
5707 vector<CaseParameter> cases;
5708 de::Random rnd(deStringHash(group->getName()));
5709 const int numElements = 100;
5710 vector<float> positiveFloats(numElements, 0);
5711 vector<float> negativeFloats(numElements, 0);
5712 const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
5713
5714 "OpSource GLSL 430\n"
5715 "OpName %main \"main\"\n"
5716 "OpName %id \"gl_GlobalInvocationID\"\n"
5717
5718 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5719
5720 + string(getComputeAsmInputOutputBufferTraits()) +
5721 string(getComputeAsmCommonTypes()) +
5722 "%uvec2 = OpTypeVector %u32 2\n"
5723 "%bvec3 = OpTypeVector %bool 3\n"
5724 "%fvec4 = OpTypeVector %f32 4\n"
5725 "%fmat33 = OpTypeMatrix %fvec3 3\n"
5726 "%const100 = OpConstant %u32 100\n"
5727 "%uarr100 = OpTypeArray %i32 %const100\n"
5728 "%struct = OpTypeStruct %f32 %i32 %u32\n"
5729 "%pointer = OpTypePointer Function %i32\n" +
5730 string(getComputeAsmInputOutputBuffer()) +
5731
5732 "%null = OpConstantNull ${TYPE}\n"
5733
5734 "%id = OpVariable %uvec3ptr Input\n"
5735 "%zero = OpConstant %i32 0\n"
5736
5737 "%main = OpFunction %void None %voidf\n"
5738 "%label = OpLabel\n"
5739 "%idval = OpLoad %uvec3 %id\n"
5740 "%x = OpCompositeExtract %u32 %idval 0\n"
5741 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5742 "%inval = OpLoad %f32 %inloc\n"
5743 "%neg = OpFNegate %f32 %inval\n"
5744 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5745 " OpStore %outloc %neg\n"
5746 " OpReturn\n"
5747 " OpFunctionEnd\n");
5748
5749 cases.push_back(CaseParameter("bool", "%bool"));
5750 cases.push_back(CaseParameter("sint32", "%i32"));
5751 cases.push_back(CaseParameter("uint32", "%u32"));
5752 cases.push_back(CaseParameter("float32", "%f32"));
5753 cases.push_back(CaseParameter("vec4float32", "%fvec4"));
5754 cases.push_back(CaseParameter("vec3bool", "%bvec3"));
5755 cases.push_back(CaseParameter("vec2uint32", "%uvec2"));
5756 cases.push_back(CaseParameter("matrix", "%fmat33"));
5757 cases.push_back(CaseParameter("array", "%uarr100"));
5758 cases.push_back(CaseParameter("struct", "%struct"));
5759 cases.push_back(CaseParameter("pointer", "%pointer"));
5760
5761 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5762
5763 for (size_t ndx = 0; ndx < numElements; ++ndx)
5764 negativeFloats[ndx] = -positiveFloats[ndx];
5765
5766 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5767 {
5768 map<string, string> specializations;
5769 ComputeShaderSpec spec;
5770
5771 specializations["TYPE"] = cases[caseNdx].param;
5772 spec.assembly = shaderTemplate.specialize(specializations);
5773 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5774 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5775 spec.numWorkGroups = IVec3(numElements, 1, 1);
5776
5777 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
5778 }
5779
5780 return group.release();
5781 }
5782
5783 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createOpConstantCompositeGroup(tcu::TestContext & testCtx)5784 tcu::TestCaseGroup *createOpConstantCompositeGroup(tcu::TestContext &testCtx)
5785 {
5786 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opconstantcomposite"));
5787 vector<CaseParameter> cases;
5788 de::Random rnd(deStringHash(group->getName()));
5789 const int numElements = 100;
5790 vector<float> positiveFloats(numElements, 0);
5791 vector<float> negativeFloats(numElements, 0);
5792 const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
5793
5794 "OpSource GLSL 430\n"
5795 "OpName %main \"main\"\n"
5796 "OpName %id \"gl_GlobalInvocationID\"\n"
5797
5798 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5799
5800 + string(getComputeAsmInputOutputBufferTraits()) +
5801 string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5802
5803 "%id = OpVariable %uvec3ptr Input\n"
5804 "%zero = OpConstant %i32 0\n"
5805
5806 "${CONSTANT}\n"
5807
5808 "%main = OpFunction %void None %voidf\n"
5809 "%label = OpLabel\n"
5810 "%idval = OpLoad %uvec3 %id\n"
5811 "%x = OpCompositeExtract %u32 %idval 0\n"
5812 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5813 "%inval = OpLoad %f32 %inloc\n"
5814 "%neg = OpFNegate %f32 %inval\n"
5815 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5816 " OpStore %outloc %neg\n"
5817 " OpReturn\n"
5818 " OpFunctionEnd\n");
5819
5820 cases.push_back(CaseParameter("vector", "%five = OpConstant %i32 5\n"
5821 "%ivec3 = OpTypeVector %i32 3\n"
5822 "%const = OpConstantComposite %ivec3 %five %zero %five"));
5823 cases.push_back(CaseParameter("matrix", "%m3fvec3 = OpTypeMatrix %fvec3 3\n"
5824 "%ten = OpConstant %f32 10.\n"
5825 "%fzero = OpConstant %f32 0.\n"
5826 "%vec = OpConstantComposite %fvec3 %ten %fzero %ten\n"
5827 "%mat = OpConstantComposite %m3fvec3 %vec %vec %vec"));
5828 cases.push_back(CaseParameter("struct", "%m2vec3 = OpTypeMatrix %fvec3 2\n"
5829 "%struct = OpTypeStruct %i32 %f32 %fvec3 %m2vec3\n"
5830 "%fzero = OpConstant %f32 0.\n"
5831 "%one = OpConstant %f32 1.\n"
5832 "%point5 = OpConstant %f32 0.5\n"
5833 "%vec = OpConstantComposite %fvec3 %one %one %fzero\n"
5834 "%mat = OpConstantComposite %m2vec3 %vec %vec\n"
5835 "%const = OpConstantComposite %struct %zero %point5 %vec %mat"));
5836 cases.push_back(CaseParameter("nested_struct", "%st1 = OpTypeStruct %u32 %f32\n"
5837 "%st2 = OpTypeStruct %i32 %i32\n"
5838 "%struct = OpTypeStruct %st1 %st2\n"
5839 "%point5 = OpConstant %f32 0.5\n"
5840 "%one = OpConstant %u32 1\n"
5841 "%ten = OpConstant %i32 10\n"
5842 "%st1val = OpConstantComposite %st1 %one %point5\n"
5843 "%st2val = OpConstantComposite %st2 %ten %ten\n"
5844 "%const = OpConstantComposite %struct %st1val %st2val"));
5845
5846 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5847
5848 for (size_t ndx = 0; ndx < numElements; ++ndx)
5849 negativeFloats[ndx] = -positiveFloats[ndx];
5850
5851 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5852 {
5853 map<string, string> specializations;
5854 ComputeShaderSpec spec;
5855
5856 specializations["CONSTANT"] = cases[caseNdx].param;
5857 spec.assembly = shaderTemplate.specialize(specializations);
5858 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5859 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5860 spec.numWorkGroups = IVec3(numElements, 1, 1);
5861
5862 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
5863 }
5864
5865 return group.release();
5866 }
5867
5868 // Creates a floating point number with the given exponent, and significand
5869 // bits set. It can only create normalized numbers. Only the least significant
5870 // 24 bits of the significand will be examined. The final bit of the
5871 // significand will also be ignored. This allows alignment to be written
5872 // similarly to C99 hex-floats.
5873 // For example if you wanted to write 0x1.7f34p-12 you would call
5874 // constructNormalizedFloat(-12, 0x7f3400)
constructNormalizedFloat(int32_t exponent,uint32_t significand)5875 float constructNormalizedFloat(int32_t exponent, uint32_t significand)
5876 {
5877 float f = 1.0f;
5878
5879 for (int32_t idx = 0; idx < 23; ++idx)
5880 {
5881 f += ((significand & 0x800000) == 0) ? 0.f : std::ldexp(1.0f, -(idx + 1));
5882 significand <<= 1;
5883 }
5884
5885 return std::ldexp(f, exponent);
5886 }
5887
5888 // Compare instruction for the OpQuantizeF16 compute exact case.
5889 // Returns true if the output is what is expected from the test case.
compareOpQuantizeF16ComputeExactCase(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)5890 bool compareOpQuantizeF16ComputeExactCase(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
5891 const std::vector<Resource> &expectedOutputs, TestLog &)
5892 {
5893 assert(outputAllocs.size() == 1);
5894
5895 // Only size is needed because we cannot compare Nans.
5896 size_t byteSize = expectedOutputs[0].getByteSize();
5897
5898 const float *outputAsFloat = static_cast<const float *>(outputAllocs[0]->getHostPtr());
5899
5900 if (byteSize != 4 * sizeof(float))
5901 {
5902 return false;
5903 }
5904
5905 if (*outputAsFloat != constructNormalizedFloat(8, 0x304000) &&
5906 *outputAsFloat != constructNormalizedFloat(8, 0x300000))
5907 {
5908 return false;
5909 }
5910 outputAsFloat++;
5911
5912 if (*outputAsFloat != -constructNormalizedFloat(-7, 0x600000) &&
5913 *outputAsFloat != -constructNormalizedFloat(-7, 0x604000))
5914 {
5915 return false;
5916 }
5917 outputAsFloat++;
5918
5919 if (*outputAsFloat != constructNormalizedFloat(2, 0x01C000) &&
5920 *outputAsFloat != constructNormalizedFloat(2, 0x020000))
5921 {
5922 return false;
5923 }
5924 outputAsFloat++;
5925
5926 if (*outputAsFloat != constructNormalizedFloat(1, 0xFFC000) &&
5927 *outputAsFloat != constructNormalizedFloat(2, 0x000000))
5928 {
5929 return false;
5930 }
5931
5932 return true;
5933 }
5934
5935 // Checks that every output from a test-case is a float NaN.
compareNan(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)5936 bool compareNan(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
5937 const std::vector<Resource> &expectedOutputs, TestLog &)
5938 {
5939 assert(outputAllocs.size() == 1);
5940
5941 // Only size is needed because we cannot compare Nans.
5942 size_t byteSize = expectedOutputs[0].getByteSize();
5943
5944 const float *const output_as_float = static_cast<const float *>(outputAllocs[0]->getHostPtr());
5945
5946 for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
5947 {
5948 if (!deFloatIsNaN(output_as_float[idx]))
5949 {
5950 return false;
5951 }
5952 }
5953
5954 return true;
5955 }
5956
5957 // Checks that every output from a test-case is either +0.0f or -0.0f
compareZeros(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)5958 bool compareZeros(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
5959 const std::vector<Resource> &expectedOutputs, TestLog &)
5960 {
5961 assert(outputAllocs.size() == 1);
5962
5963 // Only size is needed because all the results are supposed to be zero.
5964 size_t byteSize = expectedOutputs[0].getByteSize();
5965
5966 const float *const output_as_float = static_cast<const float *>(outputAllocs[0]->getHostPtr());
5967
5968 for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
5969 {
5970 if (output_as_float[idx] != 0)
5971 return false;
5972 }
5973
5974 return true;
5975 }
5976
5977 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createOpQuantizeToF16Group(tcu::TestContext & testCtx)5978 tcu::TestCaseGroup *createOpQuantizeToF16Group(tcu::TestContext &testCtx)
5979 {
5980 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opquantize"));
5981
5982 const std::string shader(string(getComputeAsmShaderPreamble()) +
5983
5984 "OpSource GLSL 430\n"
5985 "OpName %main \"main\"\n"
5986 "OpName %id \"gl_GlobalInvocationID\"\n"
5987
5988 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5989
5990 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5991 string(getComputeAsmInputOutputBuffer()) +
5992
5993 "%id = OpVariable %uvec3ptr Input\n"
5994 "%zero = OpConstant %i32 0\n"
5995
5996 "%main = OpFunction %void None %voidf\n"
5997 "%label = OpLabel\n"
5998 "%idval = OpLoad %uvec3 %id\n"
5999 "%x = OpCompositeExtract %u32 %idval 0\n"
6000 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6001 "%inval = OpLoad %f32 %inloc\n"
6002 "%quant = OpQuantizeToF16 %f32 %inval\n"
6003 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6004 " OpStore %outloc %quant\n"
6005 " OpReturn\n"
6006 " OpFunctionEnd\n");
6007
6008 {
6009 ComputeShaderSpec spec;
6010 const uint32_t numElements = 100;
6011 vector<float> infinities;
6012 vector<float> results;
6013
6014 infinities.reserve(numElements);
6015 results.reserve(numElements);
6016
6017 for (size_t idx = 0; idx < numElements; ++idx)
6018 {
6019 switch (idx % 4)
6020 {
6021 case 0:
6022 infinities.push_back(std::numeric_limits<float>::infinity());
6023 results.push_back(std::numeric_limits<float>::infinity());
6024 break;
6025 case 1:
6026 infinities.push_back(-std::numeric_limits<float>::infinity());
6027 results.push_back(-std::numeric_limits<float>::infinity());
6028 break;
6029 case 2:
6030 infinities.push_back(std::ldexp(1.0f, 16));
6031 results.push_back(std::numeric_limits<float>::infinity());
6032 break;
6033 case 3:
6034 infinities.push_back(std::ldexp(-1.0f, 32));
6035 results.push_back(-std::numeric_limits<float>::infinity());
6036 break;
6037 }
6038 }
6039
6040 spec.assembly = shader;
6041 spec.inputs.push_back(BufferSp(new Float32Buffer(infinities)));
6042 spec.outputs.push_back(BufferSp(new Float32Buffer(results)));
6043 spec.numWorkGroups = IVec3(numElements, 1, 1);
6044
6045 group->addChild(new SpvAsmComputeShaderCase(testCtx, "infinities", spec));
6046 }
6047
6048 {
6049 ComputeShaderSpec spec;
6050 vector<float> nans;
6051 const uint32_t numElements = 100;
6052
6053 nans.reserve(numElements);
6054
6055 for (size_t idx = 0; idx < numElements; ++idx)
6056 {
6057 if (idx % 2 == 0)
6058 {
6059 nans.push_back(std::numeric_limits<float>::quiet_NaN());
6060 }
6061 else
6062 {
6063 nans.push_back(-std::numeric_limits<float>::quiet_NaN());
6064 }
6065 }
6066
6067 spec.assembly = shader;
6068 spec.inputs.push_back(BufferSp(new Float32Buffer(nans)));
6069 spec.outputs.push_back(BufferSp(new Float32Buffer(nans)));
6070 spec.numWorkGroups = IVec3(numElements, 1, 1);
6071 spec.verifyIO = &compareNan;
6072
6073 group->addChild(new SpvAsmComputeShaderCase(testCtx, "propagated_nans", spec));
6074 }
6075
6076 {
6077 ComputeShaderSpec spec;
6078 vector<float> small;
6079 vector<float> zeros;
6080 const uint32_t numElements = 100;
6081
6082 small.reserve(numElements);
6083 zeros.reserve(numElements);
6084
6085 for (size_t idx = 0; idx < numElements; ++idx)
6086 {
6087 switch (idx % 6)
6088 {
6089 case 0:
6090 small.push_back(0.f);
6091 break;
6092 case 1:
6093 small.push_back(-0.f);
6094 break;
6095 case 2:
6096 small.push_back(std::ldexp(1.0f, -16));
6097 break;
6098 case 3:
6099 small.push_back(std::ldexp(-1.0f, -32));
6100 break;
6101 case 4:
6102 small.push_back(std::ldexp(1.0f, -127));
6103 break;
6104 case 5:
6105 small.push_back(-std::ldexp(1.0f, -128));
6106 break;
6107 }
6108 }
6109
6110 spec.assembly = shader;
6111 spec.inputs.push_back(BufferSp(new Float32Buffer(small)));
6112 // Only the size of outputs[0] will be used, actual expected values aren't needed.
6113 spec.outputs.push_back(BufferSp(new Float32Buffer(small)));
6114 spec.numWorkGroups = IVec3(numElements, 1, 1);
6115 spec.verifyIO = &compareZeros;
6116
6117 group->addChild(new SpvAsmComputeShaderCase(testCtx, "flush_to_zero", spec));
6118 }
6119
6120 {
6121 ComputeShaderSpec spec;
6122 vector<float> exact;
6123 const uint32_t numElements = 200;
6124
6125 exact.reserve(numElements);
6126
6127 for (size_t idx = 0; idx < numElements; ++idx)
6128 exact.push_back(static_cast<float>(static_cast<int>(idx) - 100));
6129
6130 spec.assembly = shader;
6131 spec.inputs.push_back(BufferSp(new Float32Buffer(exact)));
6132 spec.outputs.push_back(BufferSp(new Float32Buffer(exact)));
6133 spec.numWorkGroups = IVec3(numElements, 1, 1);
6134
6135 group->addChild(new SpvAsmComputeShaderCase(testCtx, "exact", spec));
6136 }
6137
6138 {
6139 ComputeShaderSpec spec;
6140 vector<float> inputs;
6141 const uint32_t numElements = 4;
6142
6143 inputs.push_back(constructNormalizedFloat(8, 0x300300));
6144 inputs.push_back(-constructNormalizedFloat(-7, 0x600800));
6145 inputs.push_back(constructNormalizedFloat(2, 0x01E000));
6146 inputs.push_back(constructNormalizedFloat(1, 0xFFE000));
6147
6148 spec.assembly = shader;
6149 spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
6150 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6151 spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
6152 spec.numWorkGroups = IVec3(numElements, 1, 1);
6153
6154 group->addChild(new SpvAsmComputeShaderCase(testCtx, "rounded", spec));
6155 }
6156
6157 return group.release();
6158 }
6159
createSpecConstantOpQuantizeToF16Group(tcu::TestContext & testCtx)6160 tcu::TestCaseGroup *createSpecConstantOpQuantizeToF16Group(tcu::TestContext &testCtx)
6161 {
6162 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opspecconstantop_opquantize"));
6163
6164 const std::string shader(
6165 string(getComputeAsmShaderPreamble()) +
6166
6167 "OpName %main \"main\"\n"
6168 "OpName %id \"gl_GlobalInvocationID\"\n"
6169
6170 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6171
6172 "OpDecorate %sc_0 SpecId 0\n"
6173 "OpDecorate %sc_1 SpecId 1\n"
6174 "OpDecorate %sc_2 SpecId 2\n"
6175 "OpDecorate %sc_3 SpecId 3\n"
6176 "OpDecorate %sc_4 SpecId 4\n"
6177 "OpDecorate %sc_5 SpecId 5\n"
6178
6179 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6180 string(getComputeAsmInputOutputBuffer()) +
6181
6182 "%id = OpVariable %uvec3ptr Input\n"
6183 "%zero = OpConstant %i32 0\n"
6184 "%c_u32_6 = OpConstant %u32 6\n"
6185
6186 "%sc_0 = OpSpecConstant %f32 0.\n"
6187 "%sc_1 = OpSpecConstant %f32 0.\n"
6188 "%sc_2 = OpSpecConstant %f32 0.\n"
6189 "%sc_3 = OpSpecConstant %f32 0.\n"
6190 "%sc_4 = OpSpecConstant %f32 0.\n"
6191 "%sc_5 = OpSpecConstant %f32 0.\n"
6192
6193 "%sc_0_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_0\n"
6194 "%sc_1_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_1\n"
6195 "%sc_2_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_2\n"
6196 "%sc_3_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_3\n"
6197 "%sc_4_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_4\n"
6198 "%sc_5_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_5\n"
6199
6200 "%main = OpFunction %void None %voidf\n"
6201 "%label = OpLabel\n"
6202 "%idval = OpLoad %uvec3 %id\n"
6203 "%x = OpCompositeExtract %u32 %idval 0\n"
6204 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6205 "%selector = OpUMod %u32 %x %c_u32_6\n"
6206 " OpSelectionMerge %exit None\n"
6207 " OpSwitch %selector %exit 0 %case0 1 %case1 2 %case2 3 %case3 4 %case4 5 %case5\n"
6208
6209 "%case0 = OpLabel\n"
6210 " OpStore %outloc %sc_0_quant\n"
6211 " OpBranch %exit\n"
6212
6213 "%case1 = OpLabel\n"
6214 " OpStore %outloc %sc_1_quant\n"
6215 " OpBranch %exit\n"
6216
6217 "%case2 = OpLabel\n"
6218 " OpStore %outloc %sc_2_quant\n"
6219 " OpBranch %exit\n"
6220
6221 "%case3 = OpLabel\n"
6222 " OpStore %outloc %sc_3_quant\n"
6223 " OpBranch %exit\n"
6224
6225 "%case4 = OpLabel\n"
6226 " OpStore %outloc %sc_4_quant\n"
6227 " OpBranch %exit\n"
6228
6229 "%case5 = OpLabel\n"
6230 " OpStore %outloc %sc_5_quant\n"
6231 " OpBranch %exit\n"
6232
6233 "%exit = OpLabel\n"
6234 " OpReturn\n"
6235
6236 " OpFunctionEnd\n");
6237
6238 {
6239 ComputeShaderSpec spec;
6240 const uint8_t numCases = 4;
6241 vector<float> inputs(numCases, 0.f);
6242 vector<float> outputs;
6243
6244 spec.assembly = shader;
6245 spec.numWorkGroups = IVec3(numCases, 1, 1);
6246
6247 spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(std::numeric_limits<float>::infinity()));
6248 spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(-std::numeric_limits<float>::infinity()));
6249 spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(std::ldexp(1.0f, 16)));
6250 spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(std::ldexp(-1.0f, 32)));
6251
6252 outputs.push_back(std::numeric_limits<float>::infinity());
6253 outputs.push_back(-std::numeric_limits<float>::infinity());
6254 outputs.push_back(std::numeric_limits<float>::infinity());
6255 outputs.push_back(-std::numeric_limits<float>::infinity());
6256
6257 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6258 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6259
6260 group->addChild(new SpvAsmComputeShaderCase(testCtx, "infinities", spec));
6261 }
6262
6263 {
6264 ComputeShaderSpec spec;
6265 const uint8_t numCases = 2;
6266 vector<float> inputs(numCases, 0.f);
6267 vector<float> outputs;
6268
6269 spec.assembly = shader;
6270 spec.numWorkGroups = IVec3(numCases, 1, 1);
6271 spec.verifyIO = &compareNan;
6272
6273 outputs.push_back(std::numeric_limits<float>::quiet_NaN());
6274 outputs.push_back(-std::numeric_limits<float>::quiet_NaN());
6275
6276 for (uint8_t idx = 0; idx < numCases; ++idx)
6277 spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(outputs[idx]));
6278
6279 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6280 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6281
6282 group->addChild(new SpvAsmComputeShaderCase(testCtx, "propagated_nans", spec));
6283 }
6284
6285 {
6286 ComputeShaderSpec spec;
6287 const uint8_t numCases = 6;
6288 vector<float> inputs(numCases, 0.f);
6289 vector<float> outputs;
6290
6291 spec.assembly = shader;
6292 spec.numWorkGroups = IVec3(numCases, 1, 1);
6293
6294 spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(0.f));
6295 spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(-0.f));
6296 spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(std::ldexp(1.0f, -16)));
6297 spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(std::ldexp(-1.0f, -32)));
6298 spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(std::ldexp(1.0f, -127)));
6299 spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(-std::ldexp(1.0f, -128)));
6300
6301 spec.verifyIO = &compareZeros;
6302
6303 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6304 // Only the size of outputs[0] will be used, actual expected values aren't needed.
6305 spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
6306
6307 group->addChild(new SpvAsmComputeShaderCase(testCtx, "flush_to_zero", spec));
6308 }
6309
6310 {
6311 ComputeShaderSpec spec;
6312 const uint8_t numCases = 6;
6313 vector<float> inputs(numCases, 0.f);
6314 vector<float> outputs;
6315
6316 spec.assembly = shader;
6317 spec.numWorkGroups = IVec3(numCases, 1, 1);
6318
6319 for (uint8_t idx = 0; idx < 6; ++idx)
6320 {
6321 const float f = static_cast<float>(idx * 10 - 30) / 4.f;
6322 spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(f));
6323 outputs.push_back(f);
6324 }
6325
6326 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6327 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6328
6329 group->addChild(new SpvAsmComputeShaderCase(testCtx, "exact", spec));
6330 }
6331
6332 {
6333 ComputeShaderSpec spec;
6334 const uint8_t numCases = 4;
6335 vector<float> inputs(numCases, 0.f);
6336 vector<float> outputs;
6337
6338 spec.assembly = shader;
6339 spec.numWorkGroups = IVec3(numCases, 1, 1);
6340 spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
6341
6342 outputs.push_back(constructNormalizedFloat(8, 0x300300));
6343 outputs.push_back(-constructNormalizedFloat(-7, 0x600800));
6344 outputs.push_back(constructNormalizedFloat(2, 0x01E000));
6345 outputs.push_back(constructNormalizedFloat(1, 0xFFE000));
6346
6347 for (uint8_t idx = 0; idx < numCases; ++idx)
6348 spec.specConstants.append<int32_t>(bitwiseCast<uint32_t>(outputs[idx]));
6349
6350 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6351 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6352
6353 group->addChild(new SpvAsmComputeShaderCase(testCtx, "rounded", spec));
6354 }
6355
6356 return group.release();
6357 }
6358
6359 // Checks that constant null/composite values can be used in computation.
createOpConstantUsageGroup(tcu::TestContext & testCtx)6360 tcu::TestCaseGroup *createOpConstantUsageGroup(tcu::TestContext &testCtx)
6361 {
6362 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opconstantnullcomposite"));
6363 ComputeShaderSpec spec;
6364 de::Random rnd(deStringHash(group->getName()));
6365 const int numElements = 100;
6366 vector<float> positiveFloats(numElements, 0);
6367 vector<float> negativeFloats(numElements, 0);
6368
6369 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
6370
6371 for (size_t ndx = 0; ndx < numElements; ++ndx)
6372 negativeFloats[ndx] = -positiveFloats[ndx];
6373
6374 spec.assembly = "OpCapability Shader\n"
6375 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
6376 "OpMemoryModel Logical GLSL450\n"
6377 "OpEntryPoint GLCompute %main \"main\" %id\n"
6378 "OpExecutionMode %main LocalSize 1 1 1\n"
6379
6380 "OpSource GLSL 430\n"
6381 "OpName %main \"main\"\n"
6382 "OpName %id \"gl_GlobalInvocationID\"\n"
6383
6384 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6385
6386 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6387
6388 "%fmat = OpTypeMatrix %fvec3 3\n"
6389 "%ten = OpConstant %u32 10\n"
6390 "%f32arr10 = OpTypeArray %f32 %ten\n"
6391 "%fst = OpTypeStruct %f32 %f32\n"
6392
6393 + string(getComputeAsmInputOutputBuffer()) +
6394
6395 "%id = OpVariable %uvec3ptr Input\n"
6396 "%zero = OpConstant %i32 0\n"
6397
6398 // Create a bunch of null values
6399 "%unull = OpConstantNull %u32\n"
6400 "%fnull = OpConstantNull %f32\n"
6401 "%vnull = OpConstantNull %fvec3\n"
6402 "%mnull = OpConstantNull %fmat\n"
6403 "%anull = OpConstantNull %f32arr10\n"
6404 "%snull = OpConstantComposite %fst %fnull %fnull\n"
6405
6406 "%main = OpFunction %void None %voidf\n"
6407 "%label = OpLabel\n"
6408 "%idval = OpLoad %uvec3 %id\n"
6409 "%x = OpCompositeExtract %u32 %idval 0\n"
6410 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6411 "%inval = OpLoad %f32 %inloc\n"
6412 "%neg = OpFNegate %f32 %inval\n"
6413
6414 // Get the abs() of (a certain element of) those null values
6415 "%unull_cov = OpConvertUToF %f32 %unull\n"
6416 "%unull_abs = OpExtInst %f32 %std450 FAbs %unull_cov\n"
6417 "%fnull_abs = OpExtInst %f32 %std450 FAbs %fnull\n"
6418 "%vnull_0 = OpCompositeExtract %f32 %vnull 0\n"
6419 "%vnull_abs = OpExtInst %f32 %std450 FAbs %vnull_0\n"
6420 "%mnull_12 = OpCompositeExtract %f32 %mnull 1 2\n"
6421 "%mnull_abs = OpExtInst %f32 %std450 FAbs %mnull_12\n"
6422 "%anull_3 = OpCompositeExtract %f32 %anull 3\n"
6423 "%anull_abs = OpExtInst %f32 %std450 FAbs %anull_3\n"
6424 "%snull_1 = OpCompositeExtract %f32 %snull 1\n"
6425 "%snull_abs = OpExtInst %f32 %std450 FAbs %snull_1\n"
6426
6427 // Add them all
6428 "%add1 = OpFAdd %f32 %neg %unull_abs\n"
6429 "%add2 = OpFAdd %f32 %add1 %fnull_abs\n"
6430 "%add3 = OpFAdd %f32 %add2 %vnull_abs\n"
6431 "%add4 = OpFAdd %f32 %add3 %mnull_abs\n"
6432 "%add5 = OpFAdd %f32 %add4 %anull_abs\n"
6433 "%final = OpFAdd %f32 %add5 %snull_abs\n"
6434
6435 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6436 " OpStore %outloc %final\n" // write to output
6437 " OpReturn\n"
6438 " OpFunctionEnd\n";
6439 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
6440 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
6441 spec.numWorkGroups = IVec3(numElements, 1, 1);
6442
6443 group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", spec));
6444
6445 return group.release();
6446 }
6447
6448 // Assembly code used for testing loop control is based on GLSL source code:
6449 // #version 430
6450 //
6451 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6452 // float elements[];
6453 // } input_data;
6454 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6455 // float elements[];
6456 // } output_data;
6457 //
6458 // void main() {
6459 // uint x = gl_GlobalInvocationID.x;
6460 // output_data.elements[x] = input_data.elements[x];
6461 // for (uint i = 0; i < 4; ++i)
6462 // output_data.elements[x] += 1.f;
6463 // }
createLoopControlGroup(tcu::TestContext & testCtx)6464 tcu::TestCaseGroup *createLoopControlGroup(tcu::TestContext &testCtx)
6465 {
6466 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "loop_control"));
6467 vector<CaseParameter> cases;
6468 de::Random rnd(deStringHash(group->getName()));
6469 const int numElements = 100;
6470 vector<float> inputFloats(numElements, 0);
6471 vector<float> outputFloats(numElements, 0);
6472 const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
6473
6474 "OpSource GLSL 430\n"
6475 "OpName %main \"main\"\n"
6476 "OpName %id \"gl_GlobalInvocationID\"\n"
6477
6478 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6479
6480 + string(getComputeAsmInputOutputBufferTraits()) +
6481 string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6482
6483 "%u32ptr = OpTypePointer Function %u32\n"
6484
6485 "%id = OpVariable %uvec3ptr Input\n"
6486 "%zero = OpConstant %i32 0\n"
6487 "%uzero = OpConstant %u32 0\n"
6488 "%one = OpConstant %i32 1\n"
6489 "%constf1 = OpConstant %f32 1.0\n"
6490 "%four = OpConstant %u32 4\n"
6491
6492 "%main = OpFunction %void None %voidf\n"
6493 "%entry = OpLabel\n"
6494 "%i = OpVariable %u32ptr Function\n"
6495 " OpStore %i %uzero\n"
6496
6497 "%idval = OpLoad %uvec3 %id\n"
6498 "%x = OpCompositeExtract %u32 %idval 0\n"
6499 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6500 "%inval = OpLoad %f32 %inloc\n"
6501 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6502 " OpStore %outloc %inval\n"
6503 " OpBranch %loop_entry\n"
6504
6505 "%loop_entry = OpLabel\n"
6506 "%i_val = OpLoad %u32 %i\n"
6507 "%cmp_lt = OpULessThan %bool %i_val %four\n"
6508 " OpLoopMerge %loop_merge %loop_body ${CONTROL}\n"
6509 " OpBranchConditional %cmp_lt %loop_body %loop_merge\n"
6510 "%loop_body = OpLabel\n"
6511 "%outval = OpLoad %f32 %outloc\n"
6512 "%addf1 = OpFAdd %f32 %outval %constf1\n"
6513 " OpStore %outloc %addf1\n"
6514 "%new_i = OpIAdd %u32 %i_val %one\n"
6515 " OpStore %i %new_i\n"
6516 " OpBranch %loop_entry\n"
6517 "%loop_merge = OpLabel\n"
6518 " OpReturn\n"
6519 " OpFunctionEnd\n");
6520
6521 cases.push_back(CaseParameter("none", "None"));
6522 cases.push_back(CaseParameter("unroll", "Unroll"));
6523 cases.push_back(CaseParameter("dont_unroll", "DontUnroll"));
6524
6525 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6526
6527 for (size_t ndx = 0; ndx < numElements; ++ndx)
6528 outputFloats[ndx] = inputFloats[ndx] + 4.f;
6529
6530 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6531 {
6532 map<string, string> specializations;
6533 ComputeShaderSpec spec;
6534
6535 specializations["CONTROL"] = cases[caseNdx].param;
6536 spec.assembly = shaderTemplate.specialize(specializations);
6537 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6538 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6539 spec.numWorkGroups = IVec3(numElements, 1, 1);
6540
6541 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
6542 }
6543
6544 group->addChild(new SpvAsmLoopControlDependencyLengthCase(testCtx, "dependency_length"));
6545 group->addChild(new SpvAsmLoopControlDependencyInfiniteCase(testCtx, "dependency_infinite"));
6546
6547 return group.release();
6548 }
6549
6550 // Assembly code used for testing selection control is based on GLSL source code:
6551 // #version 430
6552 //
6553 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6554 // float elements[];
6555 // } input_data;
6556 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6557 // float elements[];
6558 // } output_data;
6559 //
6560 // void main() {
6561 // uint x = gl_GlobalInvocationID.x;
6562 // float val = input_data.elements[x];
6563 // if (val > 10.f)
6564 // output_data.elements[x] = val + 1.f;
6565 // else
6566 // output_data.elements[x] = val - 1.f;
6567 // }
createSelectionControlGroup(tcu::TestContext & testCtx)6568 tcu::TestCaseGroup *createSelectionControlGroup(tcu::TestContext &testCtx)
6569 {
6570 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "selection_control"));
6571 vector<CaseParameter> cases;
6572 de::Random rnd(deStringHash(group->getName()));
6573 const int numElements = 100;
6574 vector<float> inputFloats(numElements, 0);
6575 vector<float> outputFloats(numElements, 0);
6576 const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
6577
6578 "OpSource GLSL 430\n"
6579 "OpName %main \"main\"\n"
6580 "OpName %id \"gl_GlobalInvocationID\"\n"
6581
6582 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6583
6584 + string(getComputeAsmInputOutputBufferTraits()) +
6585 string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6586
6587 "%id = OpVariable %uvec3ptr Input\n"
6588 "%zero = OpConstant %i32 0\n"
6589 "%constf1 = OpConstant %f32 1.0\n"
6590 "%constf10 = OpConstant %f32 10.0\n"
6591
6592 "%main = OpFunction %void None %voidf\n"
6593 "%entry = OpLabel\n"
6594 "%idval = OpLoad %uvec3 %id\n"
6595 "%x = OpCompositeExtract %u32 %idval 0\n"
6596 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6597 "%inval = OpLoad %f32 %inloc\n"
6598 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6599 "%cmp_gt = OpFOrdGreaterThan %bool %inval %constf10\n"
6600
6601 " OpSelectionMerge %if_end ${CONTROL}\n"
6602 " OpBranchConditional %cmp_gt %if_true %if_false\n"
6603 "%if_true = OpLabel\n"
6604 "%addf1 = OpFAdd %f32 %inval %constf1\n"
6605 " OpStore %outloc %addf1\n"
6606 " OpBranch %if_end\n"
6607 "%if_false = OpLabel\n"
6608 "%subf1 = OpFSub %f32 %inval %constf1\n"
6609 " OpStore %outloc %subf1\n"
6610 " OpBranch %if_end\n"
6611 "%if_end = OpLabel\n"
6612 " OpReturn\n"
6613 " OpFunctionEnd\n");
6614
6615 cases.push_back(CaseParameter("none", "None"));
6616 cases.push_back(CaseParameter("flatten", "Flatten"));
6617 cases.push_back(CaseParameter("dont_flatten", "DontFlatten"));
6618 cases.push_back(CaseParameter("flatten_dont_flatten", "DontFlatten|Flatten"));
6619
6620 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6621
6622 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6623 floorAll(inputFloats);
6624
6625 for (size_t ndx = 0; ndx < numElements; ++ndx)
6626 outputFloats[ndx] = inputFloats[ndx] + (inputFloats[ndx] > 10.f ? 1.f : -1.f);
6627
6628 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6629 {
6630 map<string, string> specializations;
6631 ComputeShaderSpec spec;
6632
6633 specializations["CONTROL"] = cases[caseNdx].param;
6634 spec.assembly = shaderTemplate.specialize(specializations);
6635 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6636 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6637 spec.numWorkGroups = IVec3(numElements, 1, 1);
6638
6639 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
6640 }
6641
6642 return group.release();
6643 }
6644
getOpNameAbuseCases(vector<CaseParameter> & abuseCases)6645 void getOpNameAbuseCases(vector<CaseParameter> &abuseCases)
6646 {
6647 // Generate a long name.
6648 std::string longname;
6649 longname.resize(65535, 'k'); // max string literal, spir-v 2.17
6650
6651 // Some bad names, abusing utf-8 encoding. This may also cause problems
6652 // with the logs.
6653 // 1. Various illegal code points in utf-8
6654 std::string utf8illegal = "Illegal bytes in UTF-8: "
6655 "\xc0 \xc1 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff"
6656 "illegal surrogates: \xed\xad\xbf \xed\xbe\x80";
6657
6658 // 2. Zero encoded as overlong, not exactly legal but often supported to differentiate from terminating zero
6659 std::string utf8nul = "UTF-8 encoded nul \xC0\x80 (should not end name)";
6660
6661 // 3. Some overlong encodings
6662 std::string utf8overlong = "UTF-8 overlong \xF0\x82\x82\xAC \xfc\x83\xbf\xbf\xbf\xbf \xf8\x87\xbf\xbf\xbf "
6663 "\xf0\x8f\xbf\xbf";
6664
6665 // 4. Internet "zalgo" meme "bleeding text"
6666 std::string utf8zalgo = "\x56\xcc\xb5\xcc\x85\xcc\x94\xcc\x88\xcd\x8a\xcc\x91\xcc\x88\xcd\x91\xcc\x83\xcd\x82"
6667 "\xcc\x83\xcd\x90\xcc\x8a\xcc\x92\xcc\x92\xcd\x8b\xcc\x94\xcd\x9d\xcc\x98\xcc\xab\xcc"
6668 "\xae\xcc\xa9\xcc\xad\xcc\x97\xcc\xb0\x75\xcc\xb6\xcc\xbe\xcc\x80\xcc\x82\xcc\x84\xcd"
6669 "\x84\xcc\x90\xcd\x86\xcc\x9a\xcd\x84\xcc\x9b\xcd\x86\xcd\x92\xcc\x9a\xcd\x99\xcd\x99"
6670 "\xcc\xbb\xcc\x98\xcd\x8e\xcd\x88\xcd\x9a\xcc\xa6\xcc\x9c\xcc\xab\xcc\x99\xcd\x94\xcd"
6671 "\x99\xcd\x95\xcc\xa5\xcc\xab\xcd\x89\x6c\xcc\xb8\xcc\x8e\xcc\x8b\xcc\x8b\xcc\x9a\xcc"
6672 "\x8e\xcd\x9d\xcc\x80\xcc\xa1\xcc\xad\xcd\x9c\xcc\xba\xcc\x96\xcc\xb3\xcc\xa2\xcd\x8e"
6673 "\xcc\xa2\xcd\x96\x6b\xcc\xb8\xcc\x84\xcd\x81\xcc\xbf\xcc\x8d\xcc\x89\xcc\x85\xcc\x92"
6674 "\xcc\x84\xcc\x90\xcd\x81\xcc\x93\xcd\x90\xcd\x92\xcd\x9d\xcc\x84\xcd\x98\xcd\x9d\xcd"
6675 "\xa0\xcd\x91\xcc\x94\xcc\xb9\xcd\x93\xcc\xa5\xcd\x87\xcc\xad\xcc\xa7\xcd\x96\xcd\x99"
6676 "\xcc\x9d\xcc\xbc\xcd\x96\xcd\x93\xcc\x9d\xcc\x99\xcc\xa8\xcc\xb1\xcd\x85\xcc\xba\xcc"
6677 "\xa7\x61\xcc\xb8\xcc\x8e\xcc\x81\xcd\x90\xcd\x84\xcd\x8c\xcc\x8c\xcc\x85\xcd\x86\xcc"
6678 "\x84\xcd\x84\xcc\x90\xcc\x84\xcc\x8d\xcd\x99\xcd\x8d\xcc\xb0\xcc\xa3\xcc\xa6\xcd\x89"
6679 "\xcd\x8d\xcd\x87\xcc\x98\xcd\x8d\xcc\xa4\xcd\x9a\xcd\x8e\xcc\xab\xcc\xb9\xcc\xac\xcc"
6680 "\xa2\xcd\x87\xcc\xa0\xcc\xb3\xcd\x89\xcc\xb9\xcc\xa7\xcc\xa6\xcd\x89\xcd\x95\x6e\xcc"
6681 "\xb8\xcd\x8a\xcc\x8a\xcd\x82\xcc\x9b\xcd\x81\xcd\x90\xcc\x85\xcc\x9b\xcd\x80\xcd\x91"
6682 "\xcd\x9b\xcc\x81\xcd\x81\xcc\x9a\xcc\xb3\xcd\x9c\xcc\x9e\xcc\x9d\xcd\x99\xcc\xa2\xcd"
6683 "\x93\xcd\x96\xcc\x97\xff";
6684
6685 // General name abuses
6686 abuseCases.push_back(CaseParameter("_has_very_long_name", longname));
6687 abuseCases.push_back(CaseParameter("_utf8_illegal", utf8illegal));
6688 abuseCases.push_back(CaseParameter("_utf8_nul", utf8nul));
6689 abuseCases.push_back(CaseParameter("_utf8_overlong", utf8overlong));
6690 abuseCases.push_back(CaseParameter("_utf8_zalgo", utf8zalgo));
6691
6692 // GL keywords
6693 abuseCases.push_back(CaseParameter("_is_gl_Position", "gl_Position"));
6694 abuseCases.push_back(CaseParameter("_is_gl_InstanceID", "gl_InstanceID"));
6695 abuseCases.push_back(CaseParameter("_is_gl_PrimitiveID", "gl_PrimitiveID"));
6696 abuseCases.push_back(CaseParameter("_is_gl_TessCoord", "gl_TessCoord"));
6697 abuseCases.push_back(CaseParameter("_is_gl_PerVertex", "gl_PerVertex"));
6698 abuseCases.push_back(CaseParameter("_is_gl_InvocationID", "gl_InvocationID"));
6699 abuseCases.push_back(CaseParameter("_is_gl_PointSize", "gl_PointSize"));
6700 abuseCases.push_back(CaseParameter("_is_gl_PointCoord", "gl_PointCoord"));
6701 abuseCases.push_back(CaseParameter("_is_gl_Layer", "gl_Layer"));
6702 abuseCases.push_back(CaseParameter("_is_gl_FragDepth", "gl_FragDepth"));
6703 abuseCases.push_back(CaseParameter("_is_gl_NumWorkGroups", "gl_NumWorkGroups"));
6704 abuseCases.push_back(CaseParameter("_is_gl_WorkGroupID", "gl_WorkGroupID"));
6705 abuseCases.push_back(CaseParameter("_is_gl_LocalInvocationID", "gl_LocalInvocationID"));
6706 abuseCases.push_back(CaseParameter("_is_gl_GlobalInvocationID", "gl_GlobalInvocationID"));
6707 abuseCases.push_back(CaseParameter("_is_gl_MaxVertexAttribs", "gl_MaxVertexAttribs"));
6708 abuseCases.push_back(CaseParameter("_is_gl_MaxViewports", "gl_MaxViewports"));
6709 abuseCases.push_back(CaseParameter("_is_gl_MaxComputeWorkGroupCount", "gl_MaxComputeWorkGroupCount"));
6710 abuseCases.push_back(CaseParameter("_is_mat3", "mat3"));
6711 abuseCases.push_back(CaseParameter("_is_volatile", "volatile"));
6712 abuseCases.push_back(CaseParameter("_is_inout", "inout"));
6713 abuseCases.push_back(CaseParameter("_is_isampler3d", "isampler3d"));
6714 }
6715
createOpNameGroup(tcu::TestContext & testCtx)6716 tcu::TestCaseGroup *createOpNameGroup(tcu::TestContext &testCtx)
6717 {
6718 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opname"));
6719 de::MovePtr<tcu::TestCaseGroup> entryMainGroup(new tcu::TestCaseGroup(testCtx, "entry_main"));
6720 de::MovePtr<tcu::TestCaseGroup> entryNotGroup(new tcu::TestCaseGroup(testCtx, "entry_rdc"));
6721 de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "abuse"));
6722 vector<CaseParameter> cases;
6723 vector<CaseParameter> abuseCases;
6724 vector<string> testFunc;
6725 de::Random rnd(deStringHash(group->getName()));
6726 const int numElements = 128;
6727 vector<float> inputFloats(numElements, 0);
6728 vector<float> outputFloats(numElements, 0);
6729
6730 getOpNameAbuseCases(abuseCases);
6731
6732 fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6733
6734 for (size_t ndx = 0; ndx < numElements; ++ndx)
6735 outputFloats[ndx] = -inputFloats[ndx];
6736
6737 const string commonShaderHeader = "OpCapability Shader\n"
6738 "OpMemoryModel Logical GLSL450\n"
6739 "OpEntryPoint GLCompute %main \"main\" %id\n"
6740 "OpExecutionMode %main LocalSize 1 1 1\n";
6741
6742 const string commonShaderFooter = "OpDecorate %id BuiltIn GlobalInvocationId\n"
6743
6744 + string(getComputeAsmInputOutputBufferTraits()) +
6745 string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6746
6747 "%id = OpVariable %uvec3ptr Input\n"
6748 "%zero = OpConstant %i32 0\n"
6749
6750 "%func = OpFunction %void None %voidf\n"
6751 "%5 = OpLabel\n"
6752 " OpReturn\n"
6753 " OpFunctionEnd\n"
6754
6755 "%main = OpFunction %void None %voidf\n"
6756 "%entry = OpLabel\n"
6757 "%7 = OpFunctionCall %void %func\n"
6758
6759 "%idval = OpLoad %uvec3 %id\n"
6760 "%x = OpCompositeExtract %u32 %idval 0\n"
6761
6762 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6763 "%inval = OpLoad %f32 %inloc\n"
6764 "%neg = OpFNegate %f32 %inval\n"
6765 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6766 " OpStore %outloc %neg\n"
6767
6768 " OpReturn\n"
6769 " OpFunctionEnd\n";
6770
6771 const StringTemplate shaderTemplate("OpCapability Shader\n"
6772 "OpMemoryModel Logical GLSL450\n"
6773 "OpEntryPoint GLCompute %main \"${ENTRY}\" %id\n"
6774 "OpExecutionMode %main LocalSize 1 1 1\n"
6775 "OpName %${ID} \"${NAME}\"\n" +
6776 commonShaderFooter);
6777
6778 const std::string multipleNames = commonShaderHeader +
6779 "OpName %main \"to_be\"\n"
6780 "OpName %id \"or_not\"\n"
6781 "OpName %main \"to_be\"\n"
6782 "OpName %main \"makes_no\"\n"
6783 "OpName %func \"difference\"\n"
6784 "OpName %5 \"to_me\"\n" +
6785 commonShaderFooter;
6786
6787 {
6788 ComputeShaderSpec spec;
6789
6790 spec.assembly = multipleNames;
6791 spec.numWorkGroups = IVec3(numElements, 1, 1);
6792 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6793 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6794
6795 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "main_has_multiple_names", spec));
6796 }
6797
6798 const std::string everythingNamed = commonShaderHeader +
6799 "OpName %main \"name1\"\n"
6800 "OpName %id \"name2\"\n"
6801 "OpName %zero \"name3\"\n"
6802 "OpName %entry \"name4\"\n"
6803 "OpName %func \"name5\"\n"
6804 "OpName %5 \"name6\"\n"
6805 "OpName %7 \"name7\"\n"
6806 "OpName %idval \"name8\"\n"
6807 "OpName %inloc \"name9\"\n"
6808 "OpName %inval \"name10\"\n"
6809 "OpName %neg \"name11\"\n"
6810 "OpName %outloc \"name12\"\n" +
6811 commonShaderFooter;
6812 {
6813 ComputeShaderSpec spec;
6814
6815 spec.assembly = everythingNamed;
6816 spec.numWorkGroups = IVec3(numElements, 1, 1);
6817 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6818 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6819
6820 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named", spec));
6821 }
6822
6823 const std::string everythingNamedTheSame = commonShaderHeader +
6824 "OpName %main \"the_same\"\n"
6825 "OpName %id \"the_same\"\n"
6826 "OpName %zero \"the_same\"\n"
6827 "OpName %entry \"the_same\"\n"
6828 "OpName %func \"the_same\"\n"
6829 "OpName %5 \"the_same\"\n"
6830 "OpName %7 \"the_same\"\n"
6831 "OpName %idval \"the_same\"\n"
6832 "OpName %inloc \"the_same\"\n"
6833 "OpName %inval \"the_same\"\n"
6834 "OpName %neg \"the_same\"\n"
6835 "OpName %outloc \"the_same\"\n" +
6836 commonShaderFooter;
6837 {
6838 ComputeShaderSpec spec;
6839
6840 spec.assembly = everythingNamedTheSame;
6841 spec.numWorkGroups = IVec3(numElements, 1, 1);
6842 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6843 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6844
6845 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", spec));
6846 }
6847
6848 // main_is_...
6849 for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6850 {
6851 map<string, string> specializations;
6852 ComputeShaderSpec spec;
6853
6854 specializations["ENTRY"] = "main";
6855 specializations["ID"] = "main";
6856 specializations["NAME"] = abuseCases[ndx].param;
6857 spec.assembly = shaderTemplate.specialize(specializations);
6858 spec.numWorkGroups = IVec3(numElements, 1, 1);
6859 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6860 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6861
6862 abuseGroup->addChild(
6863 new SpvAsmComputeShaderCase(testCtx, (std::string("main") + abuseCases[ndx].name).c_str(), spec));
6864 }
6865
6866 // x_is_....
6867 for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6868 {
6869 map<string, string> specializations;
6870 ComputeShaderSpec spec;
6871
6872 specializations["ENTRY"] = "main";
6873 specializations["ID"] = "x";
6874 specializations["NAME"] = abuseCases[ndx].param;
6875 spec.assembly = shaderTemplate.specialize(specializations);
6876 spec.numWorkGroups = IVec3(numElements, 1, 1);
6877 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6878 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6879
6880 abuseGroup->addChild(
6881 new SpvAsmComputeShaderCase(testCtx, (std::string("x") + abuseCases[ndx].name).c_str(), spec));
6882 }
6883
6884 cases.push_back(CaseParameter("_is_main", "main"));
6885 cases.push_back(CaseParameter("_is_not_main", "not_main"));
6886 testFunc.push_back("main");
6887 testFunc.push_back("func");
6888
6889 for (size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6890 {
6891 for (size_t ndx = 0; ndx < cases.size(); ++ndx)
6892 {
6893 map<string, string> specializations;
6894 ComputeShaderSpec spec;
6895
6896 specializations["ENTRY"] = "main";
6897 specializations["ID"] = testFunc[fNdx];
6898 specializations["NAME"] = cases[ndx].param;
6899 spec.assembly = shaderTemplate.specialize(specializations);
6900 spec.numWorkGroups = IVec3(numElements, 1, 1);
6901 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6902 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6903
6904 entryMainGroup->addChild(
6905 new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), spec));
6906 }
6907 }
6908
6909 cases.push_back(CaseParameter("_is_entry", "rdc"));
6910
6911 for (size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6912 {
6913 for (size_t ndx = 0; ndx < cases.size(); ++ndx)
6914 {
6915 map<string, string> specializations;
6916 ComputeShaderSpec spec;
6917
6918 specializations["ENTRY"] = "rdc";
6919 specializations["ID"] = testFunc[fNdx];
6920 specializations["NAME"] = cases[ndx].param;
6921 spec.assembly = shaderTemplate.specialize(specializations);
6922 spec.numWorkGroups = IVec3(numElements, 1, 1);
6923 spec.entryPoint = "rdc";
6924 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6925 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6926
6927 entryNotGroup->addChild(
6928 new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), spec));
6929 }
6930 }
6931
6932 group->addChild(entryMainGroup.release());
6933 group->addChild(entryNotGroup.release());
6934 group->addChild(abuseGroup.release());
6935
6936 return group.release();
6937 }
6938
createOpMemberNameGroup(tcu::TestContext & testCtx)6939 tcu::TestCaseGroup *createOpMemberNameGroup(tcu::TestContext &testCtx)
6940 {
6941 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opmembername"));
6942 de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "abuse"));
6943 vector<CaseParameter> abuseCases;
6944 vector<string> testFunc;
6945 de::Random rnd(deStringHash(group->getName()));
6946 const int numElements = 128;
6947 vector<float> inputFloats(numElements, 0);
6948 vector<float> outputFloats(numElements, 0);
6949
6950 getOpNameAbuseCases(abuseCases);
6951
6952 fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6953
6954 for (size_t ndx = 0; ndx < numElements; ++ndx)
6955 outputFloats[ndx] = -inputFloats[ndx];
6956
6957 const string commonShaderHeader = "OpCapability Shader\n"
6958 "OpMemoryModel Logical GLSL450\n"
6959 "OpEntryPoint GLCompute %main \"main\" %id\n"
6960 "OpExecutionMode %main LocalSize 1 1 1\n";
6961
6962 const string commonShaderFooter = "OpDecorate %id BuiltIn GlobalInvocationId\n"
6963
6964 + string(getComputeAsmInputOutputBufferTraits()) +
6965 string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6966
6967 "%u3str = OpTypeStruct %u32 %u32 %u32\n"
6968
6969 "%id = OpVariable %uvec3ptr Input\n"
6970 "%zero = OpConstant %i32 0\n"
6971
6972 "%main = OpFunction %void None %voidf\n"
6973 "%entry = OpLabel\n"
6974
6975 "%idval = OpLoad %uvec3 %id\n"
6976 "%x0 = OpCompositeExtract %u32 %idval 0\n"
6977
6978 "%idstr = OpCompositeConstruct %u3str %x0 %x0 %x0\n"
6979 "%x = OpCompositeExtract %u32 %idstr 0\n"
6980
6981 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6982 "%inval = OpLoad %f32 %inloc\n"
6983 "%neg = OpFNegate %f32 %inval\n"
6984 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6985 " OpStore %outloc %neg\n"
6986
6987 " OpReturn\n"
6988 " OpFunctionEnd\n";
6989
6990 const StringTemplate shaderTemplate(commonShaderHeader + "OpMemberName %u3str 0 \"${NAME}\"\n" +
6991 commonShaderFooter);
6992
6993 const std::string multipleNames = commonShaderHeader +
6994 "OpMemberName %u3str 0 \"to_be\"\n"
6995 "OpMemberName %u3str 1 \"or_not\"\n"
6996 "OpMemberName %u3str 0 \"to_be\"\n"
6997 "OpMemberName %u3str 2 \"makes_no\"\n"
6998 "OpMemberName %u3str 0 \"difference\"\n"
6999 "OpMemberName %u3str 0 \"to_me\"\n" +
7000 commonShaderFooter;
7001 {
7002 ComputeShaderSpec spec;
7003
7004 spec.assembly = multipleNames;
7005 spec.numWorkGroups = IVec3(numElements, 1, 1);
7006 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
7007 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
7008
7009 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "u3str_x_has_multiple_names", spec));
7010 }
7011
7012 const std::string everythingNamedTheSame = commonShaderHeader +
7013 "OpMemberName %u3str 0 \"the_same\"\n"
7014 "OpMemberName %u3str 1 \"the_same\"\n"
7015 "OpMemberName %u3str 2 \"the_same\"\n" +
7016 commonShaderFooter;
7017
7018 {
7019 ComputeShaderSpec spec;
7020
7021 spec.assembly = everythingNamedTheSame;
7022 spec.numWorkGroups = IVec3(numElements, 1, 1);
7023 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
7024 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
7025
7026 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", spec));
7027 }
7028
7029 // u3str_x_is_....
7030 for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
7031 {
7032 map<string, string> specializations;
7033 ComputeShaderSpec spec;
7034
7035 specializations["NAME"] = abuseCases[ndx].param;
7036 spec.assembly = shaderTemplate.specialize(specializations);
7037 spec.numWorkGroups = IVec3(numElements, 1, 1);
7038 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
7039 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
7040
7041 abuseGroup->addChild(
7042 new SpvAsmComputeShaderCase(testCtx, (std::string("u3str_x") + abuseCases[ndx].name).c_str(), spec));
7043 }
7044
7045 group->addChild(abuseGroup.release());
7046
7047 return group.release();
7048 }
7049
7050 // Assembly code used for testing function control is based on GLSL source code:
7051 //
7052 // #version 430
7053 //
7054 // layout(std140, set = 0, binding = 0) readonly buffer Input {
7055 // float elements[];
7056 // } input_data;
7057 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
7058 // float elements[];
7059 // } output_data;
7060 //
7061 // float const10() { return 10.f; }
7062 //
7063 // void main() {
7064 // uint x = gl_GlobalInvocationID.x;
7065 // output_data.elements[x] = input_data.elements[x] + const10();
7066 // }
createFunctionControlGroup(tcu::TestContext & testCtx)7067 tcu::TestCaseGroup *createFunctionControlGroup(tcu::TestContext &testCtx)
7068 {
7069 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "function_control"));
7070 vector<CaseParameter> cases;
7071 de::Random rnd(deStringHash(group->getName()));
7072 const int numElements = 100;
7073 vector<float> inputFloats(numElements, 0);
7074 vector<float> outputFloats(numElements, 0);
7075 const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
7076
7077 "OpSource GLSL 430\n"
7078 "OpName %main \"main\"\n"
7079 "OpName %func_const10 \"const10(\"\n"
7080 "OpName %id \"gl_GlobalInvocationID\"\n"
7081
7082 "OpDecorate %id BuiltIn GlobalInvocationId\n"
7083
7084 + string(getComputeAsmInputOutputBufferTraits()) +
7085 string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
7086
7087 "%f32f = OpTypeFunction %f32\n"
7088 "%id = OpVariable %uvec3ptr Input\n"
7089 "%zero = OpConstant %i32 0\n"
7090 "%constf10 = OpConstant %f32 10.0\n"
7091
7092 "%main = OpFunction %void None %voidf\n"
7093 "%entry = OpLabel\n"
7094 "%idval = OpLoad %uvec3 %id\n"
7095 "%x = OpCompositeExtract %u32 %idval 0\n"
7096 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
7097 "%inval = OpLoad %f32 %inloc\n"
7098 "%ret_10 = OpFunctionCall %f32 %func_const10\n"
7099 "%fadd = OpFAdd %f32 %inval %ret_10\n"
7100 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
7101 " OpStore %outloc %fadd\n"
7102 " OpReturn\n"
7103 " OpFunctionEnd\n"
7104
7105 "%func_const10 = OpFunction %f32 ${CONTROL} %f32f\n"
7106 "%label = OpLabel\n"
7107 " OpReturnValue %constf10\n"
7108 " OpFunctionEnd\n");
7109
7110 cases.push_back(CaseParameter("none", "None"));
7111 cases.push_back(CaseParameter("inline", "Inline"));
7112 cases.push_back(CaseParameter("dont_inline", "DontInline"));
7113 cases.push_back(CaseParameter("pure", "Pure"));
7114 cases.push_back(CaseParameter("const", "Const"));
7115 cases.push_back(CaseParameter("inline_pure", "Inline|Pure"));
7116 cases.push_back(CaseParameter("const_dont_inline", "Const|DontInline"));
7117 cases.push_back(CaseParameter("inline_dont_inline", "Inline|DontInline"));
7118 cases.push_back(CaseParameter("pure_inline_dont_inline", "Pure|Inline|DontInline"));
7119
7120 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
7121
7122 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
7123 floorAll(inputFloats);
7124
7125 for (size_t ndx = 0; ndx < numElements; ++ndx)
7126 outputFloats[ndx] = inputFloats[ndx] + 10.f;
7127
7128 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7129 {
7130 map<string, string> specializations;
7131 ComputeShaderSpec spec;
7132
7133 specializations["CONTROL"] = cases[caseNdx].param;
7134 spec.assembly = shaderTemplate.specialize(specializations);
7135 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
7136 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
7137 spec.numWorkGroups = IVec3(numElements, 1, 1);
7138
7139 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
7140 }
7141
7142 return group.release();
7143 }
7144
createMemoryAccessGroup(tcu::TestContext & testCtx)7145 tcu::TestCaseGroup *createMemoryAccessGroup(tcu::TestContext &testCtx)
7146 {
7147 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "memory_access"));
7148 vector<CaseParameter> cases;
7149 de::Random rnd(deStringHash(group->getName()));
7150 const int numElements = 100;
7151 vector<float> inputFloats(numElements, 0);
7152 vector<float> outputFloats(numElements, 0);
7153 const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
7154
7155 "OpSource GLSL 430\n"
7156 "OpName %main \"main\"\n"
7157 "OpName %id \"gl_GlobalInvocationID\"\n"
7158
7159 "OpDecorate %id BuiltIn GlobalInvocationId\n"
7160
7161 + string(getComputeAsmInputOutputBufferTraits()) +
7162 string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
7163
7164 "%f32ptr_f = OpTypePointer Function %f32\n"
7165
7166 "%id = OpVariable %uvec3ptr Input\n"
7167 "%zero = OpConstant %i32 0\n"
7168 "%four = OpConstant %i32 4\n"
7169
7170 "%main = OpFunction %void None %voidf\n"
7171 "%label = OpLabel\n"
7172 "%copy = OpVariable %f32ptr_f Function\n"
7173 "%idval = OpLoad %uvec3 %id ${ACCESS}\n"
7174 "%x = OpCompositeExtract %u32 %idval 0\n"
7175 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
7176 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
7177 " OpCopyMemory %copy %inloc ${ACCESS}\n"
7178 "%val1 = OpLoad %f32 %copy\n"
7179 "%val2 = OpLoad %f32 %inloc\n"
7180 "%add = OpFAdd %f32 %val1 %val2\n"
7181 " OpStore %outloc %add ${ACCESS}\n"
7182 " OpReturn\n"
7183 " OpFunctionEnd\n");
7184
7185 cases.push_back(CaseParameter("null", ""));
7186 cases.push_back(CaseParameter("none", "None"));
7187 cases.push_back(CaseParameter("volatile", "Volatile"));
7188 cases.push_back(CaseParameter("aligned", "Aligned 4"));
7189 cases.push_back(CaseParameter("nontemporal", "Nontemporal"));
7190 cases.push_back(CaseParameter("aligned_nontemporal", "Aligned|Nontemporal 4"));
7191 cases.push_back(CaseParameter("aligned_volatile", "Volatile|Aligned 4"));
7192
7193 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
7194
7195 for (size_t ndx = 0; ndx < numElements; ++ndx)
7196 outputFloats[ndx] = inputFloats[ndx] + inputFloats[ndx];
7197
7198 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7199 {
7200 map<string, string> specializations;
7201 ComputeShaderSpec spec;
7202
7203 specializations["ACCESS"] = cases[caseNdx].param;
7204 spec.assembly = shaderTemplate.specialize(specializations);
7205 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
7206 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
7207 spec.numWorkGroups = IVec3(numElements, 1, 1);
7208
7209 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
7210 }
7211
7212 return group.release();
7213 }
7214
7215 // Checks that we can get undefined values for various types, without exercising a computation with it.
createOpUndefGroup(tcu::TestContext & testCtx)7216 tcu::TestCaseGroup *createOpUndefGroup(tcu::TestContext &testCtx)
7217 {
7218 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opundef"));
7219 vector<CaseParameter> cases;
7220 de::Random rnd(deStringHash(group->getName()));
7221 const int numElements = 100;
7222 vector<float> positiveFloats(numElements, 0);
7223 vector<float> negativeFloats(numElements, 0);
7224 const StringTemplate shaderTemplate(string(getComputeAsmShaderPreamble()) +
7225
7226 "OpSource GLSL 430\n"
7227 "OpName %main \"main\"\n"
7228 "OpName %id \"gl_GlobalInvocationID\"\n"
7229
7230 "OpDecorate %id BuiltIn GlobalInvocationId\n"
7231
7232 + string(getComputeAsmInputOutputBufferTraits()) +
7233 string(getComputeAsmCommonTypes()) +
7234 "%uvec2 = OpTypeVector %u32 2\n"
7235 "%fvec4 = OpTypeVector %f32 4\n"
7236 "%fmat33 = OpTypeMatrix %fvec3 3\n"
7237 "%image = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
7238 "%sampler = OpTypeSampler\n"
7239 "%simage = OpTypeSampledImage %image\n"
7240 "%const100 = OpConstant %u32 100\n"
7241 "%uarr100 = OpTypeArray %i32 %const100\n"
7242 "%struct = OpTypeStruct %f32 %i32 %u32\n"
7243 "%pointer = OpTypePointer Function %i32\n" +
7244 string(getComputeAsmInputOutputBuffer()) +
7245
7246 "%id = OpVariable %uvec3ptr Input\n"
7247 "%zero = OpConstant %i32 0\n"
7248
7249 "%main = OpFunction %void None %voidf\n"
7250 "%label = OpLabel\n"
7251
7252 "%undef = OpUndef ${TYPE}\n"
7253
7254 "%idval = OpLoad %uvec3 %id\n"
7255 "%x = OpCompositeExtract %u32 %idval 0\n"
7256
7257 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
7258 "%inval = OpLoad %f32 %inloc\n"
7259 "%neg = OpFNegate %f32 %inval\n"
7260 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
7261 " OpStore %outloc %neg\n"
7262 " OpReturn\n"
7263 " OpFunctionEnd\n");
7264
7265 cases.push_back(CaseParameter("bool", "%bool"));
7266 cases.push_back(CaseParameter("sint32", "%i32"));
7267 cases.push_back(CaseParameter("uint32", "%u32"));
7268 cases.push_back(CaseParameter("float32", "%f32"));
7269 cases.push_back(CaseParameter("vec4float32", "%fvec4"));
7270 cases.push_back(CaseParameter("vec2uint32", "%uvec2"));
7271 cases.push_back(CaseParameter("matrix", "%fmat33"));
7272 cases.push_back(CaseParameter("image", "%image"));
7273 cases.push_back(CaseParameter("sampler", "%sampler"));
7274 cases.push_back(CaseParameter("sampledimage", "%simage"));
7275 cases.push_back(CaseParameter("array", "%uarr100"));
7276 cases.push_back(CaseParameter("runtimearray", "%f32arr"));
7277 cases.push_back(CaseParameter("struct", "%struct"));
7278 cases.push_back(CaseParameter("pointer", "%pointer"));
7279
7280 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7281
7282 for (size_t ndx = 0; ndx < numElements; ++ndx)
7283 negativeFloats[ndx] = -positiveFloats[ndx];
7284
7285 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7286 {
7287 map<string, string> specializations;
7288 ComputeShaderSpec spec;
7289
7290 specializations["TYPE"] = cases[caseNdx].param;
7291 spec.assembly = shaderTemplate.specialize(specializations);
7292 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7293 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7294 spec.numWorkGroups = IVec3(numElements, 1, 1);
7295
7296 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
7297 }
7298
7299 // OpUndef with constants.
7300 #ifndef CTS_USES_VULKANSC
7301 {
7302 static const char data_dir[] = "spirv_assembly/instruction/compute/undef";
7303
7304 static const struct
7305 {
7306 const std::string name;
7307 const std::string desc;
7308 } amberCases[] = {
7309 {"undefined_constant_composite", "OpUndef value in OpConstantComposite"},
7310 {"undefined_spec_constant_composite", "OpUndef value in OpSpecConstantComposite"},
7311 };
7312
7313 for (int i = 0; i < DE_LENGTH_OF_ARRAY(amberCases); ++i)
7314 {
7315 cts_amber::AmberTestCase *testCase =
7316 cts_amber::createAmberTestCase(testCtx, amberCases[i].name.c_str(), amberCases[i].desc.c_str(),
7317 data_dir, amberCases[i].name + ".amber");
7318 group->addChild(testCase);
7319 }
7320 }
7321 #endif
7322
7323 return group.release();
7324 }
7325
7326 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createFloat16OpConstantCompositeGroup(tcu::TestContext & testCtx)7327 tcu::TestCaseGroup *createFloat16OpConstantCompositeGroup(tcu::TestContext &testCtx)
7328 {
7329 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opconstantcomposite"));
7330 vector<CaseParameter> cases;
7331 de::Random rnd(deStringHash(group->getName()));
7332 const int numElements = 100;
7333 vector<float> positiveFloats(numElements, 0);
7334 vector<float> negativeFloats(numElements, 0);
7335 const StringTemplate shaderTemplate("OpCapability Shader\n"
7336 "OpCapability Float16\n"
7337 "OpMemoryModel Logical GLSL450\n"
7338 "OpEntryPoint GLCompute %main \"main\" %id\n"
7339 "OpExecutionMode %main LocalSize 1 1 1\n"
7340 "OpSource GLSL 430\n"
7341 "OpName %main \"main\"\n"
7342 "OpName %id \"gl_GlobalInvocationID\"\n"
7343
7344 "OpDecorate %id BuiltIn GlobalInvocationId\n"
7345
7346 + string(getComputeAsmInputOutputBufferTraits()) +
7347 string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
7348
7349 "%id = OpVariable %uvec3ptr Input\n"
7350 "%zero = OpConstant %i32 0\n"
7351 "%f16 = OpTypeFloat 16\n"
7352 "%c_f16_0 = OpConstant %f16 0.0\n"
7353 "%c_f16_0_5 = OpConstant %f16 0.5\n"
7354 "%c_f16_1 = OpConstant %f16 1.0\n"
7355 "%v2f16 = OpTypeVector %f16 2\n"
7356 "%v3f16 = OpTypeVector %f16 3\n"
7357 "%v4f16 = OpTypeVector %f16 4\n"
7358
7359 "${CONSTANT}\n"
7360
7361 "%main = OpFunction %void None %voidf\n"
7362 "%label = OpLabel\n"
7363 "%idval = OpLoad %uvec3 %id\n"
7364 "%x = OpCompositeExtract %u32 %idval 0\n"
7365 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
7366 "%inval = OpLoad %f32 %inloc\n"
7367 "%neg = OpFNegate %f32 %inval\n"
7368 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
7369 " OpStore %outloc %neg\n"
7370 " OpReturn\n"
7371 " OpFunctionEnd\n");
7372
7373 cases.push_back(CaseParameter("vector", "%const = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"));
7374 cases.push_back(CaseParameter("matrix", "%m3v3f16 = OpTypeMatrix %v3f16 3\n"
7375 "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7376 "%mat = OpConstantComposite %m3v3f16 %vec %vec %vec"));
7377 cases.push_back(CaseParameter("struct", "%m2v3f16 = OpTypeMatrix %v3f16 2\n"
7378 "%struct = OpTypeStruct %i32 %f16 %v3f16 %m2v3f16\n"
7379 "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7380 "%mat = OpConstantComposite %m2v3f16 %vec %vec\n"
7381 "%const = OpConstantComposite %struct %zero %c_f16_0_5 %vec %mat\n"));
7382 cases.push_back(CaseParameter("nested_struct", "%st1 = OpTypeStruct %i32 %f16\n"
7383 "%st2 = OpTypeStruct %i32 %i32\n"
7384 "%struct = OpTypeStruct %st1 %st2\n"
7385 "%st1val = OpConstantComposite %st1 %zero %c_f16_0_5\n"
7386 "%st2val = OpConstantComposite %st2 %zero %zero\n"
7387 "%const = OpConstantComposite %struct %st1val %st2val"));
7388
7389 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7390
7391 for (size_t ndx = 0; ndx < numElements; ++ndx)
7392 negativeFloats[ndx] = -positiveFloats[ndx];
7393
7394 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7395 {
7396 map<string, string> specializations;
7397 ComputeShaderSpec spec;
7398
7399 specializations["CONSTANT"] = cases[caseNdx].param;
7400 spec.assembly = shaderTemplate.specialize(specializations);
7401 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7402 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7403 spec.numWorkGroups = IVec3(numElements, 1, 1);
7404
7405 spec.extensions.push_back("VK_KHR_shader_float16_int8");
7406
7407 spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
7408
7409 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
7410 }
7411
7412 return group.release();
7413 }
7414
squarize(const vector<deFloat16> & inData,const uint32_t argNo)7415 const vector<deFloat16> squarize(const vector<deFloat16> &inData, const uint32_t argNo)
7416 {
7417 const size_t inDataLength = inData.size();
7418 vector<deFloat16> result;
7419
7420 result.reserve(inDataLength * inDataLength);
7421
7422 if (argNo == 0)
7423 {
7424 for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7425 result.insert(result.end(), inData.begin(), inData.end());
7426 }
7427
7428 if (argNo == 1)
7429 {
7430 for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7431 {
7432 const vector<deFloat16> tmp(inDataLength, inData[numIdx]);
7433
7434 result.insert(result.end(), tmp.begin(), tmp.end());
7435 }
7436 }
7437
7438 return result;
7439 }
7440
squarizeVector(const vector<deFloat16> & inData,const uint32_t argNo)7441 const vector<deFloat16> squarizeVector(const vector<deFloat16> &inData, const uint32_t argNo)
7442 {
7443 vector<deFloat16> vec;
7444 vector<deFloat16> result;
7445
7446 // Create vectors. vec will contain each possible pair from inData
7447 {
7448 const size_t inDataLength = inData.size();
7449
7450 DE_ASSERT(inDataLength <= 64);
7451
7452 vec.reserve(2 * inDataLength * inDataLength);
7453
7454 for (size_t numIdxX = 0; numIdxX < inDataLength; ++numIdxX)
7455 for (size_t numIdxY = 0; numIdxY < inDataLength; ++numIdxY)
7456 {
7457 vec.push_back(inData[numIdxX]);
7458 vec.push_back(inData[numIdxY]);
7459 }
7460 }
7461
7462 // Create vector pairs. result will contain each possible pair from vec
7463 {
7464 const size_t coordsPerVector = 2;
7465 const size_t vectorsCount = vec.size() / coordsPerVector;
7466
7467 result.reserve(coordsPerVector * vectorsCount * vectorsCount);
7468
7469 if (argNo == 0)
7470 {
7471 for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7472 for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7473 {
7474 for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7475 result.push_back(vec[coordsPerVector * numIdxY + coordNdx]);
7476 }
7477 }
7478
7479 if (argNo == 1)
7480 {
7481 for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7482 for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7483 {
7484 for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7485 result.push_back(vec[coordsPerVector * numIdxX + coordNdx]);
7486 }
7487 }
7488 }
7489
7490 return result;
7491 }
7492
7493 struct fp16isNan
7494 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isNan7495 bool operator()(const tcu::Float16 in1, const tcu::Float16)
7496 {
7497 return in1.isNaN();
7498 }
7499 };
7500 struct fp16isInf
7501 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isInf7502 bool operator()(const tcu::Float16 in1, const tcu::Float16)
7503 {
7504 return in1.isInf();
7505 }
7506 };
7507 struct fp16isEqual
7508 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isEqual7509 bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)
7510 {
7511 return in1.asFloat() == in2.asFloat();
7512 }
7513 };
7514 struct fp16isUnequal
7515 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isUnequal7516 bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)
7517 {
7518 return in1.asFloat() != in2.asFloat();
7519 }
7520 };
7521 struct fp16isLess
7522 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isLess7523 bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)
7524 {
7525 return in1.asFloat() < in2.asFloat();
7526 }
7527 };
7528 struct fp16isGreater
7529 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isGreater7530 bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)
7531 {
7532 return in1.asFloat() > in2.asFloat();
7533 }
7534 };
7535 struct fp16isLessOrEqual
7536 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isLessOrEqual7537 bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)
7538 {
7539 return in1.asFloat() <= in2.asFloat();
7540 }
7541 };
7542 struct fp16isGreaterOrEqual
7543 {
operator ()vkt::SpirVAssembly::__anon8834af5b0111::fp16isGreaterOrEqual7544 bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)
7545 {
7546 return in1.asFloat() >= in2.asFloat();
7547 }
7548 };
7549
7550 template <class TestedLogicalFunction, bool onlyTestFunc, bool unationModeAnd, bool nanSupported>
compareFP16Logical(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)7551 bool compareFP16Logical(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
7552 const std::vector<Resource> &, TestLog &log)
7553 {
7554 if (inputs.size() != 2 || outputAllocs.size() != 1)
7555 return false;
7556
7557 vector<uint8_t> input1Bytes;
7558 vector<uint8_t> input2Bytes;
7559
7560 inputs[0].getBytes(input1Bytes);
7561 inputs[1].getBytes(input2Bytes);
7562
7563 const uint32_t denormModesCount = 2;
7564 const deFloat16 float16one = tcu::Float16(1.0f).bits();
7565 const deFloat16 float16zero = tcu::Float16(0.0f).bits();
7566 const tcu::Float16 zero = tcu::Float16::zero(1);
7567 const deFloat16 *const outputAsFP16 = static_cast<deFloat16 *>(outputAllocs[0]->getHostPtr());
7568 const deFloat16 *const input1AsFP16 = reinterpret_cast<deFloat16 *const>(&input1Bytes.front());
7569 const deFloat16 *const input2AsFP16 = reinterpret_cast<deFloat16 *const>(&input2Bytes.front());
7570 uint32_t successfulRuns = denormModesCount;
7571 std::string results[denormModesCount];
7572 TestedLogicalFunction testedLogicalFunction;
7573
7574 for (uint32_t denormMode = 0; denormMode < denormModesCount; denormMode++)
7575 {
7576 const bool flushToZero = (denormMode == 1);
7577
7578 for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deFloat16); ++idx)
7579 {
7580 const tcu::Float16 f1pre = tcu::Float16(input1AsFP16[idx]);
7581 const tcu::Float16 f2pre = tcu::Float16(input2AsFP16[idx]);
7582 const tcu::Float16 f1 = (flushToZero && f1pre.isDenorm()) ? zero : f1pre;
7583 const tcu::Float16 f2 = (flushToZero && f2pre.isDenorm()) ? zero : f2pre;
7584 deFloat16 expectedOutput = float16zero;
7585
7586 if (onlyTestFunc)
7587 {
7588 if (testedLogicalFunction(f1, f2))
7589 expectedOutput = float16one;
7590 }
7591 else
7592 {
7593 const bool f1nan = f1.isNaN();
7594 const bool f2nan = f2.isNaN();
7595
7596 // Skip NaN floats if not supported by implementation
7597 if (!nanSupported && (f1nan || f2nan))
7598 continue;
7599
7600 if (unationModeAnd)
7601 {
7602 const bool ordered = !f1nan && !f2nan;
7603
7604 if (ordered && testedLogicalFunction(f1, f2))
7605 expectedOutput = float16one;
7606 }
7607 else
7608 {
7609 const bool unordered = f1nan || f2nan;
7610
7611 if (unordered || testedLogicalFunction(f1, f2))
7612 expectedOutput = float16one;
7613 }
7614 }
7615
7616 if (outputAsFP16[idx] != expectedOutput)
7617 {
7618 std::ostringstream str;
7619
7620 str << "ERROR: Sub-case #" << idx << " flushToZero:" << flushToZero << std::hex << " failed, inputs: 0x"
7621 << f1.bits() << ";0x" << f2.bits() << " output: 0x" << outputAsFP16[idx] << " expected output: 0x"
7622 << expectedOutput;
7623
7624 results[denormMode] = str.str();
7625
7626 successfulRuns--;
7627
7628 break;
7629 }
7630 }
7631 }
7632
7633 if (successfulRuns == 0)
7634 for (uint32_t denormMode = 0; denormMode < denormModesCount; denormMode++)
7635 log << TestLog::Message << results[denormMode] << TestLog::EndMessage;
7636
7637 return successfulRuns > 0;
7638 }
7639
7640 } // namespace
7641
createOpSourceTests(tcu::TestContext & testCtx)7642 tcu::TestCaseGroup *createOpSourceTests(tcu::TestContext &testCtx)
7643 {
7644 struct NameCodePair
7645 {
7646 string name, code;
7647 };
7648 RGBA defaultColors[4];
7649 de::MovePtr<tcu::TestCaseGroup> opSourceTests(new tcu::TestCaseGroup(testCtx, "opsource"));
7650 const std::string opsourceGLSLWithFile = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile ";
7651 map<string, string> fragments = passthruFragments();
7652 const NameCodePair tests[] = {{"unknown", "OpSource Unknown 321"},
7653 {"essl", "OpSource ESSL 310"},
7654 {"glsl", "OpSource GLSL 450"},
7655 {"opencl_cpp", "OpSource OpenCL_CPP 120"},
7656 {"opencl_c", "OpSource OpenCL_C 120"},
7657 {"multiple", "OpSource GLSL 450\nOpSource GLSL 450"},
7658 {"file", opsourceGLSLWithFile},
7659 {"source", opsourceGLSLWithFile + "\"void main(){}\""},
7660 // Longest possible source string: SPIR-V limits instructions to 65535
7661 // words, of which the first 4 are opsourceGLSLWithFile; the rest will
7662 // contain 65530 UTF8 characters (one word each) plus one last word
7663 // containing 3 ASCII characters and \0.
7664 {"longsource", opsourceGLSLWithFile + '"' + makeLongUTF8String(65530) + "ccc" + '"'}};
7665
7666 getDefaultColors(defaultColors);
7667 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7668 {
7669 fragments["debug"] = tests[testNdx].code;
7670 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7671 }
7672
7673 return opSourceTests.release();
7674 }
7675
createOpSourceContinuedTests(tcu::TestContext & testCtx)7676 tcu::TestCaseGroup *createOpSourceContinuedTests(tcu::TestContext &testCtx)
7677 {
7678 struct NameCodePair
7679 {
7680 string name, code;
7681 };
7682 RGBA defaultColors[4];
7683 de::MovePtr<tcu::TestCaseGroup> opSourceTests(new tcu::TestCaseGroup(testCtx, "opsourcecontinued"));
7684 map<string, string> fragments = passthruFragments();
7685 const std::string opsource = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile \"void main(){}\"\n";
7686 const NameCodePair tests[] = {{"empty", opsource + "OpSourceContinued \"\""},
7687 {"short", opsource + "OpSourceContinued \"abcde\""},
7688 {"multiple", opsource + "OpSourceContinued \"abcde\"\nOpSourceContinued \"fghij\""},
7689 // Longest possible source string: SPIR-V limits instructions to 65535
7690 // words, of which the first one is OpSourceContinued/length; the rest
7691 // will contain 65533 UTF8 characters (one word each) plus one last word
7692 // containing 3 ASCII characters and \0.
7693 {"long", opsource + "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\""}};
7694
7695 getDefaultColors(defaultColors);
7696 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7697 {
7698 fragments["debug"] = tests[testNdx].code;
7699 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7700 }
7701
7702 return opSourceTests.release();
7703 }
createOpNoLineTests(tcu::TestContext & testCtx)7704 tcu::TestCaseGroup *createOpNoLineTests(tcu::TestContext &testCtx)
7705 {
7706 RGBA defaultColors[4];
7707 de::MovePtr<tcu::TestCaseGroup> opLineTests(new tcu::TestCaseGroup(testCtx, "opnoline"));
7708 map<string, string> fragments;
7709 getDefaultColors(defaultColors);
7710 fragments["debug"] = "%name = OpString \"name\"\n";
7711
7712 fragments["pre_main"] = "OpNoLine\n"
7713 "OpNoLine\n"
7714 "OpLine %name 1 1\n"
7715 "OpNoLine\n"
7716 "OpLine %name 1 1\n"
7717 "OpLine %name 1 1\n"
7718 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7719 "OpNoLine\n"
7720 "OpLine %name 1 1\n"
7721 "OpNoLine\n"
7722 "OpLine %name 1 1\n"
7723 "OpLine %name 1 1\n"
7724 "%second_param1 = OpFunctionParameter %v4f32\n"
7725 "OpNoLine\n"
7726 "OpNoLine\n"
7727 "%label_secondfunction = OpLabel\n"
7728 "OpNoLine\n"
7729 "OpReturnValue %second_param1\n"
7730 "OpFunctionEnd\n"
7731 "OpNoLine\n"
7732 "OpNoLine\n";
7733
7734 fragments["testfun"] =
7735 // A %test_code function that returns its argument unchanged.
7736 "OpNoLine\n"
7737 "OpNoLine\n"
7738 "OpLine %name 1 1\n"
7739 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7740 "OpNoLine\n"
7741 "%param1 = OpFunctionParameter %v4f32\n"
7742 "OpNoLine\n"
7743 "OpNoLine\n"
7744 "%label_testfun = OpLabel\n"
7745 "OpNoLine\n"
7746 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7747 "OpReturnValue %val1\n"
7748 "OpFunctionEnd\n"
7749 "OpLine %name 1 1\n"
7750 "OpNoLine\n";
7751
7752 createTestsForAllStages("opnoline", defaultColors, defaultColors, fragments, opLineTests.get());
7753
7754 return opLineTests.release();
7755 }
7756
createOpModuleProcessedTests(tcu::TestContext & testCtx)7757 tcu::TestCaseGroup *createOpModuleProcessedTests(tcu::TestContext &testCtx)
7758 {
7759 RGBA defaultColors[4];
7760 de::MovePtr<tcu::TestCaseGroup> opModuleProcessedTests(new tcu::TestCaseGroup(testCtx, "opmoduleprocessed"));
7761 map<string, string> fragments;
7762 std::vector<std::string> noExtensions;
7763 GraphicsResources resources;
7764
7765 getDefaultColors(defaultColors);
7766 resources.verifyBinary = veryfiBinaryShader;
7767 resources.spirvVersion = SPIRV_VERSION_1_3;
7768
7769 fragments["moduleprocessed"] = "OpModuleProcessed \"VULKAN CTS\"\n"
7770 "OpModuleProcessed \"Negative values\"\n"
7771 "OpModuleProcessed \"Date: 2017/09/21\"\n";
7772
7773 fragments["pre_main"] = "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7774 "%second_param1 = OpFunctionParameter %v4f32\n"
7775 "%label_secondfunction = OpLabel\n"
7776 "OpReturnValue %second_param1\n"
7777 "OpFunctionEnd\n";
7778
7779 fragments["testfun"] =
7780 // A %test_code function that returns its argument unchanged.
7781 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7782 "%param1 = OpFunctionParameter %v4f32\n"
7783 "%label_testfun = OpLabel\n"
7784 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7785 "OpReturnValue %val1\n"
7786 "OpFunctionEnd\n";
7787
7788 createTestsForAllStages("opmoduleprocessed", defaultColors, defaultColors, fragments, resources, noExtensions,
7789 opModuleProcessedTests.get());
7790
7791 return opModuleProcessedTests.release();
7792 }
7793
createOpLineTests(tcu::TestContext & testCtx)7794 tcu::TestCaseGroup *createOpLineTests(tcu::TestContext &testCtx)
7795 {
7796 RGBA defaultColors[4];
7797 de::MovePtr<tcu::TestCaseGroup> opLineTests(new tcu::TestCaseGroup(testCtx, "opline"));
7798 map<string, string> fragments;
7799 std::vector<std::pair<std::string, std::string>> problemStrings;
7800
7801 problemStrings.push_back(std::make_pair<std::string, std::string>("empty_name", ""));
7802 problemStrings.push_back(std::make_pair<std::string, std::string>("short_name", "short_name"));
7803 problemStrings.push_back(std::make_pair<std::string, std::string>("long_name", makeLongUTF8String(65530) + "ccc"));
7804 getDefaultColors(defaultColors);
7805
7806 fragments["debug"] = "%other_name = OpString \"other_name\"\n";
7807
7808 fragments["pre_main"] = "OpLine %file_name 32 0\n"
7809 "OpLine %file_name 32 32\n"
7810 "OpLine %file_name 32 40\n"
7811 "OpLine %other_name 32 40\n"
7812 "OpLine %other_name 0 100\n"
7813 "OpLine %other_name 0 4294967295\n"
7814 "OpLine %other_name 4294967295 0\n"
7815 "OpLine %other_name 32 40\n"
7816 "OpLine %file_name 0 0\n"
7817 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7818 "OpLine %file_name 1 0\n"
7819 "%second_param1 = OpFunctionParameter %v4f32\n"
7820 "OpLine %file_name 1 3\n"
7821 "OpLine %file_name 1 2\n"
7822 "%label_secondfunction = OpLabel\n"
7823 "OpLine %file_name 0 2\n"
7824 "OpReturnValue %second_param1\n"
7825 "OpFunctionEnd\n"
7826 "OpLine %file_name 0 2\n"
7827 "OpLine %file_name 0 2\n";
7828
7829 fragments["testfun"] =
7830 // A %test_code function that returns its argument unchanged.
7831 "OpLine %file_name 1 0\n"
7832 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7833 "OpLine %file_name 16 330\n"
7834 "%param1 = OpFunctionParameter %v4f32\n"
7835 "OpLine %file_name 14 442\n"
7836 "%label_testfun = OpLabel\n"
7837 "OpLine %file_name 11 1024\n"
7838 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7839 "OpLine %file_name 2 97\n"
7840 "OpReturnValue %val1\n"
7841 "OpFunctionEnd\n"
7842 "OpLine %file_name 5 32\n";
7843
7844 for (size_t i = 0; i < problemStrings.size(); ++i)
7845 {
7846 map<string, string> testFragments = fragments;
7847 testFragments["debug"] += "%file_name = OpString \"" + problemStrings[i].second + "\"\n";
7848 createTestsForAllStages(string("opline") + "_" + problemStrings[i].first, defaultColors, defaultColors,
7849 testFragments, opLineTests.get());
7850 }
7851
7852 return opLineTests.release();
7853 }
7854
createOpConstantNullTests(tcu::TestContext & testCtx)7855 tcu::TestCaseGroup *createOpConstantNullTests(tcu::TestContext &testCtx)
7856 {
7857 de::MovePtr<tcu::TestCaseGroup> opConstantNullTests(new tcu::TestCaseGroup(testCtx, "opconstantnull"));
7858 RGBA colors[4];
7859
7860 const char functionStart[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7861 "%param1 = OpFunctionParameter %v4f32\n"
7862 "%lbl = OpLabel\n";
7863
7864 const char functionEnd[] = "OpReturnValue %transformed_param\n"
7865 "OpFunctionEnd\n";
7866
7867 struct NameConstantsCode
7868 {
7869 string name;
7870 string constants;
7871 string code;
7872 };
7873
7874 NameConstantsCode tests[] = {
7875 {"vec4", "%cnull = OpConstantNull %v4f32\n", "%transformed_param = OpFAdd %v4f32 %param1 %cnull\n"},
7876 {"float", "%cnull = OpConstantNull %f32\n",
7877 "%vp = OpVariable %fp_v4f32 Function\n"
7878 "%v = OpLoad %v4f32 %vp\n"
7879 "%v0 = OpVectorInsertDynamic %v4f32 %v %cnull %c_i32_0\n"
7880 "%v1 = OpVectorInsertDynamic %v4f32 %v0 %cnull %c_i32_1\n"
7881 "%v2 = OpVectorInsertDynamic %v4f32 %v1 %cnull %c_i32_2\n"
7882 "%v3 = OpVectorInsertDynamic %v4f32 %v2 %cnull %c_i32_3\n"
7883 "%transformed_param = OpFAdd %v4f32 %param1 %v3\n"},
7884 {"bool", "%cnull = OpConstantNull %bool\n",
7885 "%v = OpVariable %fp_v4f32 Function\n"
7886 " OpStore %v %param1\n"
7887 " OpSelectionMerge %false_label None\n"
7888 " OpBranchConditional %cnull %true_label %false_label\n"
7889 "%true_label = OpLabel\n"
7890 " OpStore %v %c_v4f32_0_5_0_5_0_5_0_5\n"
7891 " OpBranch %false_label\n"
7892 "%false_label = OpLabel\n"
7893 "%transformed_param = OpLoad %v4f32 %v\n"},
7894 {"i32", "%cnull = OpConstantNull %i32\n",
7895 "%v = OpVariable %fp_v4f32 Function %c_v4f32_0_5_0_5_0_5_0_5\n"
7896 "%b = OpIEqual %bool %cnull %c_i32_0\n"
7897 " OpSelectionMerge %false_label None\n"
7898 " OpBranchConditional %b %true_label %false_label\n"
7899 "%true_label = OpLabel\n"
7900 " OpStore %v %param1\n"
7901 " OpBranch %false_label\n"
7902 "%false_label = OpLabel\n"
7903 "%transformed_param = OpLoad %v4f32 %v\n"},
7904 {"struct",
7905 "%stype = OpTypeStruct %f32 %v4f32\n"
7906 "%fp_stype = OpTypePointer Function %stype\n"
7907 "%cnull = OpConstantNull %stype\n",
7908 "%v = OpVariable %fp_stype Function %cnull\n"
7909 "%f = OpAccessChain %fp_v4f32 %v %c_i32_1\n"
7910 "%f_val = OpLoad %v4f32 %f\n"
7911 "%transformed_param = OpFAdd %v4f32 %param1 %f_val\n"},
7912 {"array",
7913 "%a4_v4f32 = OpTypeArray %v4f32 %c_u32_4\n"
7914 "%fp_a4_v4f32 = OpTypePointer Function %a4_v4f32\n"
7915 "%cnull = OpConstantNull %a4_v4f32\n",
7916 "%v = OpVariable %fp_a4_v4f32 Function %cnull\n"
7917 "%f = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7918 "%f1 = OpAccessChain %fp_v4f32 %v %c_u32_1\n"
7919 "%f2 = OpAccessChain %fp_v4f32 %v %c_u32_2\n"
7920 "%f3 = OpAccessChain %fp_v4f32 %v %c_u32_3\n"
7921 "%f_val = OpLoad %v4f32 %f\n"
7922 "%f1_val = OpLoad %v4f32 %f1\n"
7923 "%f2_val = OpLoad %v4f32 %f2\n"
7924 "%f3_val = OpLoad %v4f32 %f3\n"
7925 "%t0 = OpFAdd %v4f32 %param1 %f_val\n"
7926 "%t1 = OpFAdd %v4f32 %t0 %f1_val\n"
7927 "%t2 = OpFAdd %v4f32 %t1 %f2_val\n"
7928 "%transformed_param = OpFAdd %v4f32 %t2 %f3_val\n"},
7929 {"matrix",
7930 "%mat4x4_f32 = OpTypeMatrix %v4f32 4\n"
7931 "%cnull = OpConstantNull %mat4x4_f32\n",
7932 // Our null matrix * any vector should result in a zero vector.
7933 "%v = OpVectorTimesMatrix %v4f32 %param1 %cnull\n"
7934 "%transformed_param = OpFAdd %v4f32 %param1 %v\n"}};
7935
7936 getHalfColorsFullAlpha(colors);
7937
7938 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7939 {
7940 map<string, string> fragments;
7941 fragments["pre_main"] = tests[testNdx].constants;
7942 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7943 createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, opConstantNullTests.get());
7944 }
7945 return opConstantNullTests.release();
7946 }
createOpConstantCompositeTests(tcu::TestContext & testCtx)7947 tcu::TestCaseGroup *createOpConstantCompositeTests(tcu::TestContext &testCtx)
7948 {
7949 de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests(new tcu::TestCaseGroup(testCtx, "opconstantcomposite"));
7950 RGBA inputColors[4];
7951 RGBA outputColors[4];
7952
7953 const char functionStart[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7954 "%param1 = OpFunctionParameter %v4f32\n"
7955 "%lbl = OpLabel\n";
7956
7957 const char functionEnd[] = "OpReturnValue %transformed_param\n"
7958 "OpFunctionEnd\n";
7959
7960 struct NameConstantsCode
7961 {
7962 string name;
7963 string constants;
7964 string code;
7965 };
7966
7967 NameConstantsCode tests[] = {
7968 {"vec4",
7969
7970 "%cval = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0\n",
7971 "%transformed_param = OpFAdd %v4f32 %param1 %cval\n"},
7972 {
7973 "struct",
7974
7975 "%stype = OpTypeStruct %v4f32 %f32\n"
7976 "%fp_stype = OpTypePointer Function %stype\n"
7977 "%f32_n_1 = OpConstant %f32 -1.0\n"
7978 "%f32_1_5 = OpConstant %f32 !0x3fc00000\n" // +1.5
7979 "%cvec = OpConstantComposite %v4f32 %f32_1_5 %f32_1_5 %f32_1_5 %c_f32_1\n"
7980 "%cval = OpConstantComposite %stype %cvec %f32_n_1\n",
7981
7982 "%v = OpVariable %fp_stype Function %cval\n"
7983 "%vec_ptr = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7984 "%f32_ptr = OpAccessChain %fp_f32 %v %c_u32_1\n"
7985 "%vec_val = OpLoad %v4f32 %vec_ptr\n"
7986 "%f32_val = OpLoad %f32 %f32_ptr\n"
7987 "%tmp1 = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_1 %f32_val\n" // vec4(-1)
7988 "%tmp2 = OpFAdd %v4f32 %tmp1 %param1\n" // param1 + vec4(-1)
7989 "%transformed_param = OpFAdd %v4f32 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
7990 },
7991 {// [1|0|0|0.5] [x] = x + 0.5
7992 // [0|1|0|0.5] [y] = y + 0.5
7993 // [0|0|1|0.5] [z] = z + 0.5
7994 // [0|0|0|1 ] [1] = 1
7995 "matrix",
7996
7997 "%mat4x4_f32 = OpTypeMatrix %v4f32 4\n"
7998 "%v4f32_1_0_0_0 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_0 %c_f32_0 %c_f32_0\n"
7999 "%v4f32_0_1_0_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_1 %c_f32_0 %c_f32_0\n"
8000 "%v4f32_0_0_1_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_1 %c_f32_0\n"
8001 "%v4f32_0_5_0_5_0_5_1 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_1\n"
8002 "%cval = OpConstantComposite %mat4x4_f32 %v4f32_1_0_0_0 %v4f32_0_1_0_0 %v4f32_0_0_1_0 "
8003 "%v4f32_0_5_0_5_0_5_1\n",
8004
8005 "%transformed_param = OpMatrixTimesVector %v4f32 %cval %param1\n"},
8006 {"array",
8007
8008 "%c_v4f32_1_1_1_0 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
8009 "%fp_a4f32 = OpTypePointer Function %a4f32\n"
8010 "%f32_n_1 = OpConstant %f32 -1.0\n"
8011 "%f32_1_5 = OpConstant %f32 !0x3fc00000\n" // +1.5
8012 "%carr = OpConstantComposite %a4f32 %c_f32_0 %f32_n_1 %f32_1_5 %c_f32_0\n",
8013
8014 "%v = OpVariable %fp_a4f32 Function %carr\n"
8015 "%f = OpAccessChain %fp_f32 %v %c_u32_0\n"
8016 "%f1 = OpAccessChain %fp_f32 %v %c_u32_1\n"
8017 "%f2 = OpAccessChain %fp_f32 %v %c_u32_2\n"
8018 "%f3 = OpAccessChain %fp_f32 %v %c_u32_3\n"
8019 "%f_val = OpLoad %f32 %f\n"
8020 "%f1_val = OpLoad %f32 %f1\n"
8021 "%f2_val = OpLoad %f32 %f2\n"
8022 "%f3_val = OpLoad %f32 %f3\n"
8023 "%ftot1 = OpFAdd %f32 %f_val %f1_val\n"
8024 "%ftot2 = OpFAdd %f32 %ftot1 %f2_val\n"
8025 "%ftot3 = OpFAdd %f32 %ftot2 %f3_val\n" // 0 - 1 + 1.5 + 0
8026 "%add_vec = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %ftot3\n"
8027 "%transformed_param = OpFAdd %v4f32 %param1 %add_vec\n"},
8028 {//
8029 // [
8030 // {
8031 // 0.0,
8032 // [ 1.0, 1.0, 1.0, 1.0]
8033 // },
8034 // {
8035 // 1.0,
8036 // [ 0.0, 0.5, 0.0, 0.0]
8037 // }, // ^^^
8038 // {
8039 // 0.0,
8040 // [ 1.0, 1.0, 1.0, 1.0]
8041 // }
8042 // ]
8043 "array_of_struct_of_array",
8044
8045 "%c_v4f32_1_1_1_0 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
8046 "%fp_a4f32 = OpTypePointer Function %a4f32\n"
8047 "%stype = OpTypeStruct %f32 %a4f32\n"
8048 "%a3stype = OpTypeArray %stype %c_u32_3\n"
8049 "%fp_a3stype = OpTypePointer Function %a3stype\n"
8050 "%ca4f32_0 = OpConstantComposite %a4f32 %c_f32_0 %c_f32_0_5 %c_f32_0 %c_f32_0\n"
8051 "%ca4f32_1 = OpConstantComposite %a4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
8052 "%cstype1 = OpConstantComposite %stype %c_f32_0 %ca4f32_1\n"
8053 "%cstype2 = OpConstantComposite %stype %c_f32_1 %ca4f32_0\n"
8054 "%carr = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
8055
8056 "%v = OpVariable %fp_a3stype Function %carr\n"
8057 "%f = OpAccessChain %fp_f32 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
8058 "%f_l = OpLoad %f32 %f\n"
8059 "%add_vec = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %f_l\n"
8060 "%transformed_param = OpFAdd %v4f32 %param1 %add_vec\n"}};
8061
8062 getHalfColorsFullAlpha(inputColors);
8063 outputColors[0] = RGBA(255, 255, 255, 255);
8064 outputColors[1] = RGBA(255, 127, 127, 255);
8065 outputColors[2] = RGBA(127, 255, 127, 255);
8066 outputColors[3] = RGBA(127, 127, 255, 255);
8067
8068 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
8069 {
8070 map<string, string> fragments;
8071 fragments["pre_main"] = tests[testNdx].constants;
8072 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
8073 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments,
8074 opConstantCompositeTests.get());
8075 }
8076 return opConstantCompositeTests.release();
8077 }
8078
createSelectionBlockOrderTests(tcu::TestContext & testCtx)8079 tcu::TestCaseGroup *createSelectionBlockOrderTests(tcu::TestContext &testCtx)
8080 {
8081 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "selection_block_order"));
8082 RGBA inputColors[4];
8083 RGBA outputColors[4];
8084 map<string, string> fragments;
8085
8086 // vec4 test_code(vec4 param) {
8087 // vec4 result = param;
8088 // for (int i = 0; i < 4; ++i) {
8089 // if (i == 0) result[i] = 0.;
8090 // else result[i] = 1. - result[i];
8091 // }
8092 // return result;
8093 // }
8094 const char function[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8095 "%param1 = OpFunctionParameter %v4f32\n"
8096 "%lbl = OpLabel\n"
8097 "%iptr = OpVariable %fp_i32 Function\n"
8098 "%result = OpVariable %fp_v4f32 Function\n"
8099 " OpStore %iptr %c_i32_0\n"
8100 " OpStore %result %param1\n"
8101 " OpBranch %loop\n"
8102
8103 // Loop entry block.
8104 "%loop = OpLabel\n"
8105 "%ival = OpLoad %i32 %iptr\n"
8106 "%lt_4 = OpSLessThan %bool %ival %c_i32_4\n"
8107 " OpLoopMerge %exit %if_entry None\n"
8108 " OpBranchConditional %lt_4 %if_entry %exit\n"
8109
8110 // Merge block for loop.
8111 "%exit = OpLabel\n"
8112 "%ret = OpLoad %v4f32 %result\n"
8113 " OpReturnValue %ret\n"
8114
8115 // If-statement entry block.
8116 "%if_entry = OpLabel\n"
8117 "%loc = OpAccessChain %fp_f32 %result %ival\n"
8118 "%eq_0 = OpIEqual %bool %ival %c_i32_0\n"
8119 " OpSelectionMerge %if_exit None\n"
8120 " OpBranchConditional %eq_0 %if_true %if_false\n"
8121
8122 // False branch for if-statement.
8123 "%if_false = OpLabel\n"
8124 "%val = OpLoad %f32 %loc\n"
8125 "%sub = OpFSub %f32 %c_f32_1 %val\n"
8126 " OpStore %loc %sub\n"
8127 " OpBranch %if_exit\n"
8128
8129 // Merge block for if-statement.
8130 "%if_exit = OpLabel\n"
8131 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
8132 " OpStore %iptr %ival_next\n"
8133 " OpBranch %loop\n"
8134
8135 // True branch for if-statement.
8136 "%if_true = OpLabel\n"
8137 " OpStore %loc %c_f32_0\n"
8138 " OpBranch %if_exit\n"
8139
8140 " OpFunctionEnd\n";
8141
8142 fragments["testfun"] = function;
8143
8144 inputColors[0] = RGBA(127, 127, 127, 0);
8145 inputColors[1] = RGBA(127, 0, 0, 0);
8146 inputColors[2] = RGBA(0, 127, 0, 0);
8147 inputColors[3] = RGBA(0, 0, 127, 0);
8148
8149 outputColors[0] = RGBA(0, 128, 128, 255);
8150 outputColors[1] = RGBA(0, 255, 255, 255);
8151 outputColors[2] = RGBA(0, 128, 255, 255);
8152 outputColors[3] = RGBA(0, 255, 128, 255);
8153
8154 createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
8155
8156 return group.release();
8157 }
8158
createSwitchBlockOrderTests(tcu::TestContext & testCtx)8159 tcu::TestCaseGroup *createSwitchBlockOrderTests(tcu::TestContext &testCtx)
8160 {
8161 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "switch_block_order"));
8162 RGBA inputColors[4];
8163 RGBA outputColors[4];
8164 map<string, string> fragments;
8165
8166 const char typesAndConstants[] = "%c_f32_p2 = OpConstant %f32 0.2\n"
8167 "%c_f32_p4 = OpConstant %f32 0.4\n"
8168 "%c_f32_p6 = OpConstant %f32 0.6\n"
8169 "%c_f32_p8 = OpConstant %f32 0.8\n";
8170
8171 // vec4 test_code(vec4 param) {
8172 // vec4 result = param;
8173 // for (int i = 0; i < 4; ++i) {
8174 // switch (i) {
8175 // case 0: result[i] += .2; break;
8176 // case 1: result[i] += .6; break;
8177 // case 2: result[i] += .4; break;
8178 // case 3: result[i] += .8; break;
8179 // default: break; // unreachable
8180 // }
8181 // }
8182 // return result;
8183 // }
8184 const char function[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8185 "%param1 = OpFunctionParameter %v4f32\n"
8186 "%lbl = OpLabel\n"
8187 "%iptr = OpVariable %fp_i32 Function\n"
8188 "%result = OpVariable %fp_v4f32 Function\n"
8189 " OpStore %iptr %c_i32_0\n"
8190 " OpStore %result %param1\n"
8191 " OpBranch %loop\n"
8192
8193 // Loop entry block.
8194 "%loop = OpLabel\n"
8195 "%ival = OpLoad %i32 %iptr\n"
8196 "%lt_4 = OpSLessThan %bool %ival %c_i32_4\n"
8197 " OpLoopMerge %exit %cont None\n"
8198 " OpBranchConditional %lt_4 %switch_entry %exit\n"
8199
8200 // Merge block for loop.
8201 "%exit = OpLabel\n"
8202 "%ret = OpLoad %v4f32 %result\n"
8203 " OpReturnValue %ret\n"
8204
8205 // Switch-statement entry block.
8206 "%switch_entry = OpLabel\n"
8207 "%loc = OpAccessChain %fp_f32 %result %ival\n"
8208 "%val = OpLoad %f32 %loc\n"
8209 " OpSelectionMerge %switch_exit None\n"
8210 " OpSwitch %ival %switch_default 0 %case0 1 %case1 2 %case2 3 %case3\n"
8211
8212 "%case2 = OpLabel\n"
8213 "%addp4 = OpFAdd %f32 %val %c_f32_p4\n"
8214 " OpStore %loc %addp4\n"
8215 " OpBranch %switch_exit\n"
8216
8217 "%switch_default = OpLabel\n"
8218 " OpUnreachable\n"
8219
8220 "%case3 = OpLabel\n"
8221 "%addp8 = OpFAdd %f32 %val %c_f32_p8\n"
8222 " OpStore %loc %addp8\n"
8223 " OpBranch %switch_exit\n"
8224
8225 "%case0 = OpLabel\n"
8226 "%addp2 = OpFAdd %f32 %val %c_f32_p2\n"
8227 " OpStore %loc %addp2\n"
8228 " OpBranch %switch_exit\n"
8229
8230 // Merge block for switch-statement.
8231 "%switch_exit = OpLabel\n"
8232 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
8233 " OpStore %iptr %ival_next\n"
8234 " OpBranch %cont\n"
8235 "%cont = OpLabel\n"
8236 " OpBranch %loop\n"
8237
8238 "%case1 = OpLabel\n"
8239 "%addp6 = OpFAdd %f32 %val %c_f32_p6\n"
8240 " OpStore %loc %addp6\n"
8241 " OpBranch %switch_exit\n"
8242
8243 " OpFunctionEnd\n";
8244
8245 fragments["pre_main"] = typesAndConstants;
8246 fragments["testfun"] = function;
8247
8248 inputColors[0] = RGBA(127, 27, 127, 51);
8249 inputColors[1] = RGBA(127, 0, 0, 51);
8250 inputColors[2] = RGBA(0, 27, 0, 51);
8251 inputColors[3] = RGBA(0, 0, 127, 51);
8252
8253 outputColors[0] = RGBA(178, 180, 229, 255);
8254 outputColors[1] = RGBA(178, 153, 102, 255);
8255 outputColors[2] = RGBA(51, 180, 102, 255);
8256 outputColors[3] = RGBA(51, 153, 229, 255);
8257
8258 createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
8259
8260 addOpSwitchAmberTests(*group, testCtx);
8261
8262 return group.release();
8263 }
8264
createDecorationGroupTests(tcu::TestContext & testCtx)8265 tcu::TestCaseGroup *createDecorationGroupTests(tcu::TestContext &testCtx)
8266 {
8267 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "decoration_group"));
8268 RGBA inputColors[4];
8269 RGBA outputColors[4];
8270 map<string, string> fragments;
8271
8272 const char decorations[] = "OpDecorate %array_group ArrayStride 4\n"
8273 "OpDecorate %struct_member_group Offset 0\n"
8274 "%array_group = OpDecorationGroup\n"
8275 "%struct_member_group = OpDecorationGroup\n"
8276
8277 "OpDecorate %group1 RelaxedPrecision\n"
8278 "OpDecorate %group3 RelaxedPrecision\n"
8279 "OpDecorate %group3 Flat\n"
8280 "OpDecorate %group3 Restrict\n"
8281 "%group0 = OpDecorationGroup\n"
8282 "%group1 = OpDecorationGroup\n"
8283 "%group3 = OpDecorationGroup\n";
8284
8285 const char typesAndConstants[] = "%a3f32 = OpTypeArray %f32 %c_u32_3\n"
8286 "%struct1 = OpTypeStruct %a3f32\n"
8287 "%struct2 = OpTypeStruct %a3f32\n"
8288 "%fp_struct1 = OpTypePointer Function %struct1\n"
8289 "%fp_struct2 = OpTypePointer Function %struct2\n"
8290 "%c_f32_2 = OpConstant %f32 2.\n"
8291 "%c_f32_n2 = OpConstant %f32 -2.\n"
8292
8293 "%c_a3f32_1 = OpConstantComposite %a3f32 %c_f32_1 %c_f32_2 %c_f32_1\n"
8294 "%c_a3f32_2 = OpConstantComposite %a3f32 %c_f32_n1 %c_f32_n2 %c_f32_n1\n"
8295 "%c_struct1 = OpConstantComposite %struct1 %c_a3f32_1\n"
8296 "%c_struct2 = OpConstantComposite %struct2 %c_a3f32_2\n";
8297
8298 const char function[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8299 "%param = OpFunctionParameter %v4f32\n"
8300 "%entry = OpLabel\n"
8301 "%result = OpVariable %fp_v4f32 Function\n"
8302 "%v_struct1 = OpVariable %fp_struct1 Function\n"
8303 "%v_struct2 = OpVariable %fp_struct2 Function\n"
8304 " OpStore %result %param\n"
8305 " OpStore %v_struct1 %c_struct1\n"
8306 " OpStore %v_struct2 %c_struct2\n"
8307 "%ptr1 = OpAccessChain %fp_f32 %v_struct1 %c_i32_0 %c_i32_2\n"
8308 "%val1 = OpLoad %f32 %ptr1\n"
8309 "%ptr2 = OpAccessChain %fp_f32 %v_struct2 %c_i32_0 %c_i32_2\n"
8310 "%val2 = OpLoad %f32 %ptr2\n"
8311 "%addvalues = OpFAdd %f32 %val1 %val2\n"
8312 "%ptr = OpAccessChain %fp_f32 %result %c_i32_1\n"
8313 "%val = OpLoad %f32 %ptr\n"
8314 "%addresult = OpFAdd %f32 %addvalues %val\n"
8315 " OpStore %ptr %addresult\n"
8316 "%ret = OpLoad %v4f32 %result\n"
8317 " OpReturnValue %ret\n"
8318 " OpFunctionEnd\n";
8319
8320 struct CaseNameDecoration
8321 {
8322 string name;
8323 string decoration;
8324 };
8325
8326 CaseNameDecoration tests[] = {
8327 {"same_decoration_group_on_multiple_types",
8328 "OpGroupMemberDecorate %struct_member_group %struct1 0 %struct2 0\n"},
8329 {"empty_decoration_group", "OpGroupDecorate %group0 %a3f32\n"
8330 "OpGroupDecorate %group0 %result\n"},
8331 {"one_element_decoration_group", "OpGroupDecorate %array_group %a3f32\n"},
8332 {"multiple_elements_decoration_group", "OpGroupDecorate %group3 %v_struct1\n"},
8333 {"multiple_decoration_groups_on_same_variable", "OpGroupDecorate %group0 %v_struct2\n"
8334 "OpGroupDecorate %group1 %v_struct2\n"
8335 "OpGroupDecorate %group3 %v_struct2\n"},
8336 {"same_decoration_group_multiple_times", "OpGroupDecorate %group1 %addvalues\n"
8337 "OpGroupDecorate %group1 %addvalues\n"
8338 "OpGroupDecorate %group1 %addvalues\n"},
8339
8340 };
8341
8342 getHalfColorsFullAlpha(inputColors);
8343 getHalfColorsFullAlpha(outputColors);
8344
8345 for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
8346 {
8347 fragments["decoration"] = decorations + tests[idx].decoration;
8348 fragments["pre_main"] = typesAndConstants;
8349 fragments["testfun"] = function;
8350
8351 createTestsForAllStages(tests[idx].name, inputColors, outputColors, fragments, group.get());
8352 }
8353
8354 return group.release();
8355 }
8356
8357 struct SpecConstantTwoValGraphicsCase
8358 {
8359 const std::string caseName;
8360 const std::string scDefinition0;
8361 const std::string scDefinition1;
8362 const std::string scResultType;
8363 const std::string scOperation;
8364 SpecConstantValue scActualValue0;
8365 SpecConstantValue scActualValue1;
8366 const std::string resultOperation;
8367 RGBA expectedColors[4];
8368 CaseFlags caseFlags;
8369
SpecConstantTwoValGraphicsCasevkt::SpirVAssembly::SpecConstantTwoValGraphicsCase8370 SpecConstantTwoValGraphicsCase(const std::string &name, const std::string &definition0,
8371 const std::string &definition1, const std::string &resultType,
8372 const std::string &operation, const SpecConstantValue &value0,
8373 const SpecConstantValue &value1, const std::string &resultOp,
8374 const RGBA (&output)[4], CaseFlags flags = FLAG_NONE)
8375 : caseName(name)
8376 , scDefinition0(definition0)
8377 , scDefinition1(definition1)
8378 , scResultType(resultType)
8379 , scOperation(operation)
8380 , scActualValue0(value0)
8381 , scActualValue1(value1)
8382 , resultOperation(resultOp)
8383 , caseFlags(flags)
8384 {
8385 expectedColors[0] = output[0];
8386 expectedColors[1] = output[1];
8387 expectedColors[2] = output[2];
8388 expectedColors[3] = output[3];
8389 }
8390 };
8391
createSpecConstantTests(tcu::TestContext & testCtx)8392 tcu::TestCaseGroup *createSpecConstantTests(tcu::TestContext &testCtx)
8393 {
8394 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opspecconstantop"));
8395 vector<SpecConstantTwoValGraphicsCase> cases;
8396 RGBA inputColors[4];
8397 RGBA outputColors0[4];
8398 RGBA outputColors1[4];
8399 RGBA outputColors2[4];
8400
8401 const char decorations1[] = "OpDecorate %sc_0 SpecId 0\n"
8402 "OpDecorate %sc_1 SpecId 1\n";
8403
8404 const char typesAndConstants1[] = "${OPTYPE_DEFINITIONS:opt}"
8405 "%sc_0 = OpSpecConstant${SC_DEF0}\n"
8406 "%sc_1 = OpSpecConstant${SC_DEF1}\n"
8407 "%sc_op = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
8408
8409 const char function1[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8410 "%param = OpFunctionParameter %v4f32\n"
8411 "%label = OpLabel\n"
8412 "%result = OpVariable %fp_v4f32 Function\n"
8413 "${TYPE_CONVERT:opt}"
8414 " OpStore %result %param\n"
8415 "%gen = ${GEN_RESULT}\n"
8416 "%index = OpIAdd %i32 %gen %c_i32_1\n"
8417 "%loc = OpAccessChain %fp_f32 %result %index\n"
8418 "%val = OpLoad %f32 %loc\n"
8419 "%add = OpFAdd %f32 %val %c_f32_0_5\n"
8420 " OpStore %loc %add\n"
8421 "%ret = OpLoad %v4f32 %result\n"
8422 " OpReturnValue %ret\n"
8423 " OpFunctionEnd\n";
8424
8425 inputColors[0] = RGBA(127, 127, 127, 255);
8426 inputColors[1] = RGBA(127, 0, 0, 255);
8427 inputColors[2] = RGBA(0, 127, 0, 255);
8428 inputColors[3] = RGBA(0, 0, 127, 255);
8429
8430 // Derived from inputColors[x] by adding 128 to inputColors[x][0].
8431 outputColors0[0] = RGBA(255, 127, 127, 255);
8432 outputColors0[1] = RGBA(255, 0, 0, 255);
8433 outputColors0[2] = RGBA(128, 127, 0, 255);
8434 outputColors0[3] = RGBA(128, 0, 127, 255);
8435
8436 // Derived from inputColors[x] by adding 128 to inputColors[x][1].
8437 outputColors1[0] = RGBA(127, 255, 127, 255);
8438 outputColors1[1] = RGBA(127, 128, 0, 255);
8439 outputColors1[2] = RGBA(0, 255, 0, 255);
8440 outputColors1[3] = RGBA(0, 128, 127, 255);
8441
8442 // Derived from inputColors[x] by adding 128 to inputColors[x][2].
8443 outputColors2[0] = RGBA(127, 127, 255, 255);
8444 outputColors2[1] = RGBA(127, 0, 128, 255);
8445 outputColors2[2] = RGBA(0, 127, 128, 255);
8446 outputColors2[3] = RGBA(0, 0, 255, 255);
8447
8448 const char addZeroToSc[] = "OpIAdd %i32 %c_i32_0 %sc_op";
8449 const char addZeroToSc32[] = "OpIAdd %i32 %c_i32_0 %sc_op32";
8450 const char selectTrueUsingSc[] = "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
8451 const char selectFalseUsingSc[] = "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
8452
8453 cases.push_back(SpecConstantTwoValGraphicsCase(
8454 "iadd", " %i32 0", " %i32 0", "%i32", "IAdd %sc_0 %sc_1", 19, -20, addZeroToSc, outputColors0));
8455 cases.push_back(SpecConstantTwoValGraphicsCase(
8456 "isub", " %i32 0", " %i32 0", "%i32", "ISub %sc_0 %sc_1", 19, 20, addZeroToSc, outputColors0));
8457 cases.push_back(SpecConstantTwoValGraphicsCase(
8458 "imul", " %i32 0", " %i32 0", "%i32", "IMul %sc_0 %sc_1", -1, -1, addZeroToSc, outputColors2));
8459 cases.push_back(SpecConstantTwoValGraphicsCase("sdiv", " %i32 0", " %i32 0", "%i32",
8460 "SDiv %sc_0 %sc_1", -126, 126, addZeroToSc,
8461 outputColors0));
8462 cases.push_back(SpecConstantTwoValGraphicsCase("udiv", " %i32 0", " %i32 0", "%i32",
8463 "UDiv %sc_0 %sc_1", 126, 126, addZeroToSc,
8464 outputColors2));
8465 cases.push_back(SpecConstantTwoValGraphicsCase(
8466 "srem", " %i32 0", " %i32 0", "%i32", "SRem %sc_0 %sc_1", 3, 2, addZeroToSc, outputColors2));
8467 cases.push_back(SpecConstantTwoValGraphicsCase(
8468 "smod", " %i32 0", " %i32 0", "%i32", "SMod %sc_0 %sc_1", 3, 2, addZeroToSc, outputColors2));
8469 cases.push_back(SpecConstantTwoValGraphicsCase("umod", " %i32 0", " %i32 0", "%i32",
8470 "UMod %sc_0 %sc_1", 1001, 500, addZeroToSc,
8471 outputColors2));
8472 cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseand", " %i32 0", " %i32 0", "%i32",
8473 "BitwiseAnd %sc_0 %sc_1", 0x33, 0x0d, addZeroToSc,
8474 outputColors2));
8475 cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseor", " %i32 0", " %i32 0", "%i32",
8476 "BitwiseOr %sc_0 %sc_1", 0, 1, addZeroToSc,
8477 outputColors2));
8478 cases.push_back(SpecConstantTwoValGraphicsCase("bitwisexor", " %i32 0", " %i32 0", "%i32",
8479 "BitwiseXor %sc_0 %sc_1", 0x2e, 0x2f, addZeroToSc,
8480 outputColors2));
8481 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical", " %i32 0", " %i32 0", "%i32",
8482 "ShiftRightLogical %sc_0 %sc_1", 2, 1, addZeroToSc,
8483 outputColors2));
8484 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic", " %i32 0", " %i32 0", "%i32",
8485 "ShiftRightArithmetic %sc_0 %sc_1", -4, 2, addZeroToSc,
8486 outputColors0));
8487 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical", " %i32 0", " %i32 0", "%i32",
8488 "ShiftLeftLogical %sc_0 %sc_1", 1, 0, addZeroToSc,
8489 outputColors2));
8490
8491 // Shifts for other integer sizes.
8492 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i64", " %i64 0", " %i64 0", "%i64",
8493 "ShiftRightLogical %sc_0 %sc_1", int64_t{2}, int64_t{1},
8494 addZeroToSc32, outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8495 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i64", " %i64 0", " %i64 0", "%i64",
8496 "ShiftRightArithmetic %sc_0 %sc_1", int64_t{-4}, int64_t{2},
8497 addZeroToSc32, outputColors0, (FLAG_I64 | FLAG_CONVERT)));
8498 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i64", " %i64 0", " %i64 0", "%i64",
8499 "ShiftLeftLogical %sc_0 %sc_1", int64_t{1}, int64_t{0},
8500 addZeroToSc32, outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8501 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i16", " %i16 0", " %i16 0", "%i16",
8502 "ShiftRightLogical %sc_0 %sc_1", int16_t{2}, int16_t{1},
8503 addZeroToSc32, outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8504 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i16", " %i16 0", " %i16 0", "%i16",
8505 "ShiftRightArithmetic %sc_0 %sc_1", int16_t{-4}, int16_t{2},
8506 addZeroToSc32, outputColors0, (FLAG_I16 | FLAG_CONVERT)));
8507 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i16", " %i16 0", " %i16 0", "%i16",
8508 "ShiftLeftLogical %sc_0 %sc_1", int16_t{1}, int16_t{0},
8509 addZeroToSc32, outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8510 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i8", " %i8 0", " %i8 0", "%i8",
8511 "ShiftRightLogical %sc_0 %sc_1", int8_t{2}, int8_t{1},
8512 addZeroToSc32, outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8513 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i8", " %i8 0", " %i8 0", "%i8",
8514 "ShiftRightArithmetic %sc_0 %sc_1", int8_t{-4}, int8_t{2},
8515 addZeroToSc32, outputColors0, (FLAG_I8 | FLAG_CONVERT)));
8516 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i8", " %i8 0", " %i8 0", "%i8",
8517 "ShiftLeftLogical %sc_0 %sc_1", int8_t{1}, int8_t{0},
8518 addZeroToSc32, outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8519
8520 // Shifts for other integer sizes but only in the shift amount.
8521 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i64", " %i32 0", " %i64 0", "%i32",
8522 "ShiftRightLogical %sc_0 %sc_1", 2, int64_t{1}, addZeroToSc,
8523 outputColors2, (FLAG_I64)));
8524 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i64", " %i32 0", " %i64 0", "%i32",
8525 "ShiftRightArithmetic %sc_0 %sc_1", -4, int64_t{2}, addZeroToSc,
8526 outputColors0, (FLAG_I64)));
8527 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i64", " %i32 0", " %i64 0", "%i32",
8528 "ShiftLeftLogical %sc_0 %sc_1", 1, int64_t{0}, addZeroToSc,
8529 outputColors2, (FLAG_I64)));
8530 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i16", " %i32 0", " %i16 0", "%i32",
8531 "ShiftRightLogical %sc_0 %sc_1", 2, int16_t{1}, addZeroToSc,
8532 outputColors2, (FLAG_I16)));
8533 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i16", " %i32 0", " %i16 0", "%i32",
8534 "ShiftRightArithmetic %sc_0 %sc_1", -4, int16_t{2}, addZeroToSc,
8535 outputColors0, (FLAG_I16)));
8536 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i16", " %i32 0", " %i16 0", "%i32",
8537 "ShiftLeftLogical %sc_0 %sc_1", 1, int16_t{0}, addZeroToSc,
8538 outputColors2, (FLAG_I16)));
8539 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i8", " %i32 0", " %i8 0", "%i32",
8540 "ShiftRightLogical %sc_0 %sc_1", 2, int8_t{1}, addZeroToSc,
8541 outputColors2, (FLAG_I8)));
8542 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i8", " %i32 0", " %i8 0", "%i32",
8543 "ShiftRightArithmetic %sc_0 %sc_1", -4, int8_t{2}, addZeroToSc,
8544 outputColors0, (FLAG_I8)));
8545 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i8", " %i32 0", " %i8 0", "%i32",
8546 "ShiftLeftLogical %sc_0 %sc_1", 1, int8_t{0}, addZeroToSc,
8547 outputColors2, (FLAG_I8)));
8548
8549 cases.push_back(SpecConstantTwoValGraphicsCase("slessthan", " %i32 0", " %i32 0", "%bool",
8550 "SLessThan %sc_0 %sc_1", -20, -10, selectTrueUsingSc,
8551 outputColors2));
8552 cases.push_back(SpecConstantTwoValGraphicsCase("ulessthan", " %i32 0", " %i32 0", "%bool",
8553 "ULessThan %sc_0 %sc_1", 10, 20, selectTrueUsingSc,
8554 outputColors2));
8555 cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthan", " %i32 0", " %i32 0", "%bool",
8556 "SGreaterThan %sc_0 %sc_1", -1000, 50, selectFalseUsingSc,
8557 outputColors2));
8558 cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthan", " %i32 0", " %i32 0", "%bool",
8559 "UGreaterThan %sc_0 %sc_1", 10, 5, selectTrueUsingSc,
8560 outputColors2));
8561 cases.push_back(SpecConstantTwoValGraphicsCase("slessthanequal", " %i32 0", " %i32 0", "%bool",
8562 "SLessThanEqual %sc_0 %sc_1", -10, -10, selectTrueUsingSc,
8563 outputColors2));
8564 cases.push_back(SpecConstantTwoValGraphicsCase("ulessthanequal", " %i32 0", " %i32 0", "%bool",
8565 "ULessThanEqual %sc_0 %sc_1", 50, 100, selectTrueUsingSc,
8566 outputColors2));
8567 cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthanequal", " %i32 0", " %i32 0", "%bool",
8568 "SGreaterThanEqual %sc_0 %sc_1", -1000, 50, selectFalseUsingSc,
8569 outputColors2));
8570 cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthanequal", " %i32 0", " %i32 0", "%bool",
8571 "UGreaterThanEqual %sc_0 %sc_1", 10, 10, selectTrueUsingSc,
8572 outputColors2));
8573 cases.push_back(SpecConstantTwoValGraphicsCase("iequal", " %i32 0", " %i32 0", "%bool",
8574 "IEqual %sc_0 %sc_1", 42, 24, selectFalseUsingSc,
8575 outputColors2));
8576 cases.push_back(SpecConstantTwoValGraphicsCase("inotequal", " %i32 0", " %i32 0", "%bool",
8577 "INotEqual %sc_0 %sc_1", 42, 24, selectTrueUsingSc,
8578 outputColors2));
8579 cases.push_back(SpecConstantTwoValGraphicsCase("logicaland", "True %bool", "True %bool", "%bool",
8580 "LogicalAnd %sc_0 %sc_1", 0, 1, selectFalseUsingSc,
8581 outputColors2));
8582 cases.push_back(SpecConstantTwoValGraphicsCase("logicalor", "False %bool", "False %bool", "%bool",
8583 "LogicalOr %sc_0 %sc_1", 1, 0, selectTrueUsingSc,
8584 outputColors2));
8585 cases.push_back(SpecConstantTwoValGraphicsCase("logicalequal", "True %bool", "True %bool", "%bool",
8586 "LogicalEqual %sc_0 %sc_1", 0, 1, selectFalseUsingSc,
8587 outputColors2));
8588 cases.push_back(SpecConstantTwoValGraphicsCase("logicalnotequal", "False %bool", "False %bool", "%bool",
8589 "LogicalNotEqual %sc_0 %sc_1", 1, 0, selectTrueUsingSc,
8590 outputColors2));
8591 cases.push_back(SpecConstantTwoValGraphicsCase("snegate", " %i32 0", " %i32 0", "%i32",
8592 "SNegate %sc_0", -1, 0, addZeroToSc, outputColors2));
8593 cases.push_back(SpecConstantTwoValGraphicsCase("not", " %i32 0", " %i32 0", "%i32", "Not %sc_0",
8594 -2, 0, addZeroToSc, outputColors2));
8595 cases.push_back(SpecConstantTwoValGraphicsCase("logicalnot", "False %bool", "False %bool", "%bool",
8596 "LogicalNot %sc_0", 1, 0, selectFalseUsingSc,
8597 outputColors2));
8598 cases.push_back(SpecConstantTwoValGraphicsCase("select", "False %bool", " %i32 0", "%i32",
8599 "Select %sc_0 %sc_1 %c_i32_0", 1, 1, addZeroToSc,
8600 outputColors2));
8601 cases.push_back(SpecConstantTwoValGraphicsCase("sconvert", " %i32 0", " %i32 0", "%i16",
8602 "SConvert %sc_0", -1, 0, addZeroToSc32, outputColors0,
8603 (FLAG_I16 | FLAG_CONVERT)));
8604 cases.push_back(SpecConstantTwoValGraphicsCase("fconvert", " %f32 0", " %f32 0", "%f64",
8605 "FConvert %sc_0", tcu::Float32(-1.0), tcu::Float32(0.0),
8606 addZeroToSc32, outputColors0, (FLAG_F64 | FLAG_CONVERT)));
8607 cases.push_back(SpecConstantTwoValGraphicsCase("fconvert16", " %f16 0", " %f16 0", "%f32",
8608 "FConvert %sc_0", tcu::Float16(-1.0), tcu::Float16(0.0),
8609 addZeroToSc32, outputColors0, (FLAG_F16 | FLAG_CONVERT)));
8610 // \todo[2015-12-1 antiagainst] OpQuantizeToF16
8611
8612 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
8613 {
8614 map<string, string> specializations;
8615 map<string, string> fragments;
8616 SpecConstants specConstants;
8617 PushConstants noPushConstants;
8618 GraphicsResources noResources;
8619 GraphicsInterfaces noInterfaces;
8620 vector<string> extensions;
8621 VulkanFeatures requiredFeatures;
8622
8623 // Special SPIR-V code when using 16-bit integers.
8624 if (cases[caseNdx].caseFlags & FLAG_I16)
8625 {
8626 requiredFeatures.coreFeatures.shaderInt16 = VK_TRUE;
8627 fragments["capability"] += "OpCapability Int16\n"; // Adds 16-bit integer capability
8628 specializations["OPTYPE_DEFINITIONS"] += "%i16 = OpTypeInt 16 1\n"; // Adds 16-bit integer type
8629 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8630 specializations["TYPE_CONVERT"] +=
8631 "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 16-bit integer to 32-bit integer
8632 }
8633
8634 // Special SPIR-V code when using 64-bit integers.
8635 if (cases[caseNdx].caseFlags & FLAG_I64)
8636 {
8637 requiredFeatures.coreFeatures.shaderInt64 = VK_TRUE;
8638 fragments["capability"] += "OpCapability Int64\n"; // Adds 64-bit integer capability
8639 specializations["OPTYPE_DEFINITIONS"] += "%i64 = OpTypeInt 64 1\n"; // Adds 64-bit integer type
8640 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8641 specializations["TYPE_CONVERT"] +=
8642 "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 64-bit integer to 32-bit integer
8643 }
8644
8645 // Special SPIR-V code when using 64-bit floats.
8646 if (cases[caseNdx].caseFlags & FLAG_F64)
8647 {
8648 requiredFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
8649 fragments["capability"] += "OpCapability Float64\n"; // Adds 64-bit float capability
8650 specializations["OPTYPE_DEFINITIONS"] += "%f64 = OpTypeFloat 64\n"; // Adds 64-bit float type
8651 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8652 specializations["TYPE_CONVERT"] +=
8653 "%sc_op32 = OpConvertFToS %i32 %sc_op\n"; // Converts 64-bit float to 32-bit integer
8654 }
8655
8656 // Extension needed for float16 and int8.
8657 if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
8658 extensions.push_back("VK_KHR_shader_float16_int8");
8659
8660 // Special SPIR-V code when using 16-bit floats.
8661 if (cases[caseNdx].caseFlags & FLAG_F16)
8662 {
8663 requiredFeatures.extFloat16Int8.shaderFloat16 = true;
8664 fragments["capability"] += "OpCapability Float16\n"; // Adds 16-bit float capability
8665 specializations["OPTYPE_DEFINITIONS"] += "%f16 = OpTypeFloat 16\n"; // Adds 16-bit float type
8666 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8667 specializations["TYPE_CONVERT"] +=
8668 "%sc_op32 = OpConvertFToS %i32 %sc_op\n"; // Converts 16-bit float to 32-bit integer
8669 }
8670
8671 // Special SPIR-V code when using 8-bit integers.
8672 if (cases[caseNdx].caseFlags & FLAG_I8)
8673 {
8674 requiredFeatures.extFloat16Int8.shaderInt8 = true;
8675 fragments["capability"] += "OpCapability Int8\n"; // Adds 8-bit integer capability
8676 specializations["OPTYPE_DEFINITIONS"] += "%i8 = OpTypeInt 8 1\n"; // Adds 8-bit integer type
8677 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8678 specializations["TYPE_CONVERT"] +=
8679 "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 8-bit integer to 32-bit integer
8680 }
8681
8682 specializations["SC_DEF0"] = cases[caseNdx].scDefinition0;
8683 specializations["SC_DEF1"] = cases[caseNdx].scDefinition1;
8684 specializations["SC_RESULT_TYPE"] = cases[caseNdx].scResultType;
8685 specializations["SC_OP"] = cases[caseNdx].scOperation;
8686 specializations["GEN_RESULT"] = cases[caseNdx].resultOperation;
8687
8688 fragments["decoration"] = tcu::StringTemplate(decorations1).specialize(specializations);
8689 fragments["pre_main"] = tcu::StringTemplate(typesAndConstants1).specialize(specializations);
8690 fragments["testfun"] = tcu::StringTemplate(function1).specialize(specializations);
8691
8692 cases[caseNdx].scActualValue0.appendTo(specConstants);
8693 cases[caseNdx].scActualValue1.appendTo(specConstants);
8694
8695 createTestsForAllStages(cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments,
8696 specConstants, noPushConstants, noResources, noInterfaces, extensions, requiredFeatures,
8697 group.get());
8698 }
8699
8700 const char decorations2[] = "OpDecorate %sc_0 SpecId 0\n"
8701 "OpDecorate %sc_1 SpecId 1\n"
8702 "OpDecorate %sc_2 SpecId 2\n";
8703
8704 const std::string typesAndConstants2 = "%vec3_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
8705 "%vec3_undef = OpUndef %v3i32\n"
8706
8707 + getSpecConstantOpStructConstantsAndTypes() +
8708 getSpecConstantOpStructComposites() +
8709
8710 "%sc_0 = OpSpecConstant %i32 0\n"
8711 "%sc_1 = OpSpecConstant %i32 0\n"
8712 "%sc_2 = OpSpecConstant %i32 0\n"
8713
8714 + getSpecConstantOpStructConstBlock() +
8715
8716 "%sc_vec3_0 = OpSpecConstantOp %v3i32 CompositeInsert %sc_0 "
8717 "%vec3_0 0\n" // (sc_0, 0, 0)
8718 "%sc_vec3_1 = OpSpecConstantOp %v3i32 CompositeInsert %sc_1 "
8719 "%vec3_0 1\n" // (0, sc_1, 0)
8720 "%sc_vec3_2 = OpSpecConstantOp %v3i32 CompositeInsert %sc_2 "
8721 "%vec3_0 2\n" // (0, 0, sc_2)
8722 "%sc_vec3_0_s = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_0 "
8723 "%vec3_undef 0 0xFFFFFFFF 2\n" // (sc_0, ???, 0)
8724 "%sc_vec3_1_s = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_1 "
8725 "%vec3_undef 0xFFFFFFFF 1 0\n" // (???, sc_1, 0)
8726 "%sc_vec3_2_s = OpSpecConstantOp %v3i32 VectorShuffle %vec3_undef "
8727 "%sc_vec3_2 5 0xFFFFFFFF 5\n" // (sc_2, ???, sc_2)
8728 "%sc_vec3_01 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_0_s "
8729 "%sc_vec3_1_s 1 0 4\n" // (0, sc_0, sc_1)
8730 "%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_01 "
8731 "%sc_vec3_2_s 5 1 2\n" // (sc_2, sc_0, sc_1)
8732 "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 "
8733 " 0\n" // sc_2
8734 "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 "
8735 " 1\n" // sc_0
8736 "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 "
8737 " 2\n" // sc_1
8738 "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 "
8739 "%sc_ext_1\n" // (sc_2 - sc_0)
8740 "%sc_factor = OpSpecConstantOp %i32 IMul %sc_sub "
8741 "%sc_ext_2\n"; // (sc_2 - sc_0) * sc_1
8742
8743 const std::string function2 = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8744 "%param = OpFunctionParameter %v4f32\n"
8745 "%label = OpLabel\n"
8746 "%result = OpVariable %fp_v4f32 Function\n"
8747
8748 + getSpecConstantOpStructInstructions() +
8749
8750 " OpStore %result %param\n"
8751 "%loc = OpAccessChain %fp_f32 %result %sc_final\n"
8752 "%val = OpLoad %f32 %loc\n"
8753 "%add = OpFAdd %f32 %val %c_f32_0_5\n"
8754 " OpStore %loc %add\n"
8755 "%ret = OpLoad %v4f32 %result\n"
8756 " OpReturnValue %ret\n"
8757 " OpFunctionEnd\n";
8758
8759 map<string, string> fragments;
8760 SpecConstants specConstants;
8761
8762 fragments["decoration"] = decorations2;
8763 fragments["pre_main"] = typesAndConstants2;
8764 fragments["testfun"] = function2;
8765
8766 specConstants.append<int32_t>(56789);
8767 specConstants.append<int32_t>(-2);
8768 specConstants.append<int32_t>(56788);
8769
8770 createTestsForAllStages("vector_related", inputColors, outputColors2, fragments, specConstants, group.get());
8771
8772 return group.release();
8773 }
8774
createOpPhiTests(tcu::TestContext & testCtx)8775 tcu::TestCaseGroup *createOpPhiTests(tcu::TestContext &testCtx)
8776 {
8777 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opphi"));
8778 RGBA inputColors[4];
8779 RGBA outputColors1[4];
8780 RGBA outputColors2[4];
8781 RGBA outputColors3[4];
8782 RGBA outputColors4[4];
8783 map<string, string> fragments1;
8784 map<string, string> fragments2;
8785 map<string, string> fragments3;
8786 map<string, string> fragments4;
8787 std::vector<std::string> extensions4;
8788 GraphicsResources resources4;
8789 VulkanFeatures vulkanFeatures4;
8790
8791 const char typesAndConstants1[] = "%c_f32_p2 = OpConstant %f32 0.2\n"
8792 "%c_f32_p4 = OpConstant %f32 0.4\n"
8793 "%c_f32_p5 = OpConstant %f32 0.5\n"
8794 "%c_f32_p8 = OpConstant %f32 0.8\n";
8795
8796 // vec4 test_code(vec4 param) {
8797 // vec4 result = param;
8798 // for (int i = 0; i < 4; ++i) {
8799 // float operand;
8800 // switch (i) {
8801 // case 0: operand = .2; break;
8802 // case 1: operand = .5; break;
8803 // case 2: operand = .4; break;
8804 // case 3: operand = .0; break;
8805 // default: break; // unreachable
8806 // }
8807 // result[i] += operand;
8808 // }
8809 // return result;
8810 // }
8811 const char function1[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8812 "%param1 = OpFunctionParameter %v4f32\n"
8813 "%lbl = OpLabel\n"
8814 "%iptr = OpVariable %fp_i32 Function\n"
8815 "%result = OpVariable %fp_v4f32 Function\n"
8816 " OpStore %iptr %c_i32_0\n"
8817 " OpStore %result %param1\n"
8818 " OpBranch %loop\n"
8819
8820 "%loop = OpLabel\n"
8821 "%ival = OpLoad %i32 %iptr\n"
8822 "%lt_4 = OpSLessThan %bool %ival %c_i32_4\n"
8823 " OpLoopMerge %exit %cont None\n"
8824 " OpBranchConditional %lt_4 %entry %exit\n"
8825
8826 "%entry = OpLabel\n"
8827 "%loc = OpAccessChain %fp_f32 %result %ival\n"
8828 "%val = OpLoad %f32 %loc\n"
8829 " OpSelectionMerge %phi None\n"
8830 " OpSwitch %ival %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
8831
8832 "%case0 = OpLabel\n"
8833 " OpBranch %phi\n"
8834 "%case1 = OpLabel\n"
8835 " OpBranch %phi\n"
8836 "%case2 = OpLabel\n"
8837 " OpBranch %phi\n"
8838 "%case3 = OpLabel\n"
8839 " OpBranch %phi\n"
8840
8841 "%default = OpLabel\n"
8842 " OpUnreachable\n"
8843
8844 "%phi = OpLabel\n"
8845 "%operand = OpPhi %f32 %c_f32_p4 %case2 %c_f32_p5 %case1 %c_f32_p2 %case0 %c_f32_0 "
8846 "%case3\n" // not in the order of blocks
8847 " OpBranch %cont\n"
8848 "%cont = OpLabel\n"
8849 "%add = OpFAdd %f32 %val %operand\n"
8850 " OpStore %loc %add\n"
8851 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
8852 " OpStore %iptr %ival_next\n"
8853 " OpBranch %loop\n"
8854
8855 "%exit = OpLabel\n"
8856 "%ret = OpLoad %v4f32 %result\n"
8857 " OpReturnValue %ret\n"
8858
8859 " OpFunctionEnd\n";
8860
8861 fragments1["pre_main"] = typesAndConstants1;
8862 fragments1["testfun"] = function1;
8863
8864 getHalfColorsFullAlpha(inputColors);
8865
8866 outputColors1[0] = RGBA(178, 255, 229, 255);
8867 outputColors1[1] = RGBA(178, 127, 102, 255);
8868 outputColors1[2] = RGBA(51, 255, 102, 255);
8869 outputColors1[3] = RGBA(51, 127, 229, 255);
8870
8871 createTestsForAllStages("out_of_order", inputColors, outputColors1, fragments1, group.get());
8872
8873 const char typesAndConstants2[] = "%c_f32_p2 = OpConstant %f32 0.2\n";
8874
8875 // Add .4 to the second element of the given parameter.
8876 const char function2[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8877 "%param = OpFunctionParameter %v4f32\n"
8878 "%entry = OpLabel\n"
8879 "%result = OpVariable %fp_v4f32 Function\n"
8880 " OpStore %result %param\n"
8881 "%loc = OpAccessChain %fp_f32 %result %c_i32_1\n"
8882 "%val = OpLoad %f32 %loc\n"
8883 " OpBranch %phi\n"
8884
8885 "%phi = OpLabel\n"
8886 "%step = OpPhi %i32 %c_i32_0 %entry %step_next %phi\n"
8887 "%accum = OpPhi %f32 %val %entry %accum_next %phi\n"
8888 "%step_next = OpIAdd %i32 %step %c_i32_1\n"
8889 "%accum_next = OpFAdd %f32 %accum %c_f32_p2\n"
8890 "%still_loop = OpSLessThan %bool %step %c_i32_2\n"
8891 " OpLoopMerge %exit %phi None\n"
8892 " OpBranchConditional %still_loop %phi %exit\n"
8893
8894 "%exit = OpLabel\n"
8895 " OpStore %loc %accum\n"
8896 "%ret = OpLoad %v4f32 %result\n"
8897 " OpReturnValue %ret\n"
8898
8899 " OpFunctionEnd\n";
8900
8901 fragments2["pre_main"] = typesAndConstants2;
8902 fragments2["testfun"] = function2;
8903
8904 outputColors2[0] = RGBA(127, 229, 127, 255);
8905 outputColors2[1] = RGBA(127, 102, 0, 255);
8906 outputColors2[2] = RGBA(0, 229, 0, 255);
8907 outputColors2[3] = RGBA(0, 102, 127, 255);
8908
8909 createTestsForAllStages("induction", inputColors, outputColors2, fragments2, group.get());
8910
8911 const char typesAndConstants3[] = "%true = OpConstantTrue %bool\n"
8912 "%false = OpConstantFalse %bool\n"
8913 "%c_f32_p2 = OpConstant %f32 0.2\n";
8914
8915 // Swap the second and the third element of the given parameter.
8916 const char function3[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8917 "%param = OpFunctionParameter %v4f32\n"
8918 "%entry = OpLabel\n"
8919 "%result = OpVariable %fp_v4f32 Function\n"
8920 " OpStore %result %param\n"
8921 "%a_loc = OpAccessChain %fp_f32 %result %c_i32_1\n"
8922 "%a_init = OpLoad %f32 %a_loc\n"
8923 "%b_loc = OpAccessChain %fp_f32 %result %c_i32_2\n"
8924 "%b_init = OpLoad %f32 %b_loc\n"
8925 " OpBranch %phi\n"
8926
8927 "%phi = OpLabel\n"
8928 "%still_loop = OpPhi %bool %true %entry %false %phi\n"
8929 "%a_next = OpPhi %f32 %a_init %entry %b_next %phi\n"
8930 "%b_next = OpPhi %f32 %b_init %entry %a_next %phi\n"
8931 " OpLoopMerge %exit %phi None\n"
8932 " OpBranchConditional %still_loop %phi %exit\n"
8933
8934 "%exit = OpLabel\n"
8935 " OpStore %a_loc %a_next\n"
8936 " OpStore %b_loc %b_next\n"
8937 "%ret = OpLoad %v4f32 %result\n"
8938 " OpReturnValue %ret\n"
8939
8940 " OpFunctionEnd\n";
8941
8942 fragments3["pre_main"] = typesAndConstants3;
8943 fragments3["testfun"] = function3;
8944
8945 outputColors3[0] = RGBA(127, 127, 127, 255);
8946 outputColors3[1] = RGBA(127, 0, 0, 255);
8947 outputColors3[2] = RGBA(0, 0, 127, 255);
8948 outputColors3[3] = RGBA(0, 127, 0, 255);
8949
8950 createTestsForAllStages("swap", inputColors, outputColors3, fragments3, group.get());
8951
8952 const char typesAndConstants4[] = "%f16 = OpTypeFloat 16\n"
8953 "%v4f16 = OpTypeVector %f16 4\n"
8954 "%fp_f16 = OpTypePointer Function %f16\n"
8955 "%fp_v4f16 = OpTypePointer Function %v4f16\n"
8956 "%true = OpConstantTrue %bool\n"
8957 "%false = OpConstantFalse %bool\n"
8958 "%c_f32_p2 = OpConstant %f32 0.2\n";
8959
8960 // Swap the second and the third element of the given parameter.
8961 const char function4[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8962 "%param = OpFunctionParameter %v4f32\n"
8963 "%entry = OpLabel\n"
8964 "%result = OpVariable %fp_v4f16 Function\n"
8965 "%param16 = OpFConvert %v4f16 %param\n"
8966 " OpStore %result %param16\n"
8967 "%a_loc = OpAccessChain %fp_f16 %result %c_i32_1\n"
8968 "%a_init = OpLoad %f16 %a_loc\n"
8969 "%b_loc = OpAccessChain %fp_f16 %result %c_i32_2\n"
8970 "%b_init = OpLoad %f16 %b_loc\n"
8971 " OpBranch %phi\n"
8972
8973 "%phi = OpLabel\n"
8974 "%still_loop = OpPhi %bool %true %entry %false %phi\n"
8975 "%a_next = OpPhi %f16 %a_init %entry %b_next %phi\n"
8976 "%b_next = OpPhi %f16 %b_init %entry %a_next %phi\n"
8977 " OpLoopMerge %exit %phi None\n"
8978 " OpBranchConditional %still_loop %phi %exit\n"
8979
8980 "%exit = OpLabel\n"
8981 " OpStore %a_loc %a_next\n"
8982 " OpStore %b_loc %b_next\n"
8983 "%ret16 = OpLoad %v4f16 %result\n"
8984 "%ret = OpFConvert %v4f32 %ret16\n"
8985 " OpReturnValue %ret\n"
8986
8987 " OpFunctionEnd\n";
8988
8989 fragments4["pre_main"] = typesAndConstants4;
8990 fragments4["testfun"] = function4;
8991 fragments4["capability"] = "OpCapability Float16\n";
8992
8993 extensions4.push_back("VK_KHR_shader_float16_int8");
8994
8995 vulkanFeatures4.extFloat16Int8.shaderFloat16 = true;
8996
8997 outputColors4[0] = RGBA(127, 127, 127, 255);
8998 outputColors4[1] = RGBA(127, 0, 0, 255);
8999 outputColors4[2] = RGBA(0, 0, 127, 255);
9000 outputColors4[3] = RGBA(0, 127, 0, 255);
9001
9002 createTestsForAllStages("swap16", inputColors, outputColors4, fragments4, resources4, extensions4, group.get(),
9003 vulkanFeatures4);
9004
9005 return group.release();
9006 }
9007
createNoContractionTests(tcu::TestContext & testCtx)9008 tcu::TestCaseGroup *createNoContractionTests(tcu::TestContext &testCtx)
9009 {
9010 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "nocontraction"));
9011 RGBA inputColors[4];
9012 RGBA outputColors[4];
9013
9014 // With NoContraction, (1 + 2^-23) * (1 - 2^-23) - 1 should be conducted as a multiplication and an addition separately.
9015 // For the multiplication, the result is 1 - 2^-46, which is out of the precision range for 32-bit float. (32-bit float
9016 // only have 23-bit fraction.) So it will be rounded to 1. Or 0x1.fffffc. Then the final result is 0 or -0x1p-24.
9017 // On the contrary, the result will be 2^-46, which is a normalized number perfectly representable as 32-bit float.
9018 const char constantsAndTypes[] =
9019 "%c_vec4_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_1\n"
9020 "%c_vec4_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
9021 "%c_f32_1pl2_23 = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
9022 "%c_f32_1mi2_23 = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
9023 "%c_f32_n1pn24 = OpConstant %f32 -0x1p-24\n";
9024
9025 const char function[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9026 "%param = OpFunctionParameter %v4f32\n"
9027 "%label = OpLabel\n"
9028 "%var1 = OpVariable %fp_f32 Function %c_f32_1pl2_23\n"
9029 "%var2 = OpVariable %fp_f32 Function\n"
9030 "%red = OpCompositeExtract %f32 %param 0\n"
9031 "%plus_red = OpFAdd %f32 %c_f32_1mi2_23 %red\n"
9032 " OpStore %var2 %plus_red\n"
9033 "%val1 = OpLoad %f32 %var1\n"
9034 "%val2 = OpLoad %f32 %var2\n"
9035 "%mul = OpFMul %f32 %val1 %val2\n"
9036 "%add = OpFAdd %f32 %mul %c_f32_n1\n"
9037 "%is0 = OpFOrdEqual %bool %add %c_f32_0\n"
9038 "%isn1n24 = OpFOrdEqual %bool %add %c_f32_n1pn24\n"
9039 "%success = OpLogicalOr %bool %is0 %isn1n24\n"
9040 "%v4success = OpCompositeConstruct %v4bool %success %success %success %success\n"
9041 "%ret = OpSelect %v4f32 %v4success %c_vec4_0 %c_vec4_1\n"
9042 " OpReturnValue %ret\n"
9043 " OpFunctionEnd\n";
9044
9045 struct CaseNameDecoration
9046 {
9047 string name;
9048 string decoration;
9049 };
9050
9051 CaseNameDecoration tests[] = {
9052 {"multiplication", "OpDecorate %mul NoContraction"},
9053 {"addition", "OpDecorate %add NoContraction"},
9054 {"both", "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"},
9055 };
9056
9057 getHalfColorsFullAlpha(inputColors);
9058
9059 for (uint8_t idx = 0; idx < 4; ++idx)
9060 {
9061 inputColors[idx].setRed(0);
9062 outputColors[idx] = RGBA(0, 0, 0, 255);
9063 }
9064
9065 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(CaseNameDecoration); ++testNdx)
9066 {
9067 map<string, string> fragments;
9068
9069 fragments["decoration"] = tests[testNdx].decoration;
9070 fragments["pre_main"] = constantsAndTypes;
9071 fragments["testfun"] = function;
9072
9073 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, group.get());
9074 }
9075
9076 return group.release();
9077 }
9078
createMemoryAccessTests(tcu::TestContext & testCtx)9079 tcu::TestCaseGroup *createMemoryAccessTests(tcu::TestContext &testCtx)
9080 {
9081 de::MovePtr<tcu::TestCaseGroup> memoryAccessTests(new tcu::TestCaseGroup(testCtx, "opmemoryaccess"));
9082 RGBA colors[4];
9083
9084 const char constantsAndTypes[] = "%c_a2f32_1 = OpConstantComposite %a2f32 %c_f32_1 %c_f32_1\n"
9085 "%fp_a2f32 = OpTypePointer Function %a2f32\n"
9086 "%stype = OpTypeStruct %v4f32 %a2f32 %f32\n"
9087 "%fp_stype = OpTypePointer Function %stype\n";
9088
9089 const char function[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9090 "%param1 = OpFunctionParameter %v4f32\n"
9091 "%lbl = OpLabel\n"
9092 "%v1 = OpVariable %fp_v4f32 Function\n"
9093 "%v2 = OpVariable %fp_a2f32 Function\n"
9094 "%v3 = OpVariable %fp_f32 Function\n"
9095 "%v = OpVariable %fp_stype Function\n"
9096 "%vv = OpVariable %fp_stype Function\n"
9097 "%vvv = OpVariable %fp_f32 Function\n"
9098
9099 " OpStore %v1 %c_v4f32_1_1_1_1\n"
9100 " OpStore %v2 %c_a2f32_1\n"
9101 " OpStore %v3 %c_f32_1\n"
9102
9103 "%p_v4f32 = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
9104 "%p_a2f32 = OpAccessChain %fp_a2f32 %v %c_u32_1\n"
9105 "%p_f32 = OpAccessChain %fp_f32 %v %c_u32_2\n"
9106 "%v1_v = OpLoad %v4f32 %v1 ${access_type}\n"
9107 "%v2_v = OpLoad %a2f32 %v2 ${access_type}\n"
9108 "%v3_v = OpLoad %f32 %v3 ${access_type}\n"
9109
9110 " OpStore %p_v4f32 %v1_v ${access_type}\n"
9111 " OpStore %p_a2f32 %v2_v ${access_type}\n"
9112 " OpStore %p_f32 %v3_v ${access_type}\n"
9113
9114 " OpCopyMemory %vv %v ${access_type}\n"
9115 " OpCopyMemory %vvv %p_f32 ${access_type}\n"
9116
9117 "%p_f32_2 = OpAccessChain %fp_f32 %vv %c_u32_2\n"
9118 "%v_f32_2 = OpLoad %f32 %p_f32_2\n"
9119 "%v_f32_3 = OpLoad %f32 %vvv\n"
9120
9121 "%ret1 = OpVectorTimesScalar %v4f32 %param1 %v_f32_2\n"
9122 "%ret2 = OpVectorTimesScalar %v4f32 %ret1 %v_f32_3\n"
9123 " OpReturnValue %ret2\n"
9124 " OpFunctionEnd\n";
9125
9126 struct NameMemoryAccess
9127 {
9128 string name;
9129 string accessType;
9130 };
9131
9132 NameMemoryAccess tests[] = {
9133 {"none", ""},
9134 {"volatile", "Volatile"},
9135 {"aligned", "Aligned 1"},
9136 {"volatile_aligned", "Volatile|Aligned 1"},
9137 {"nontemporal_aligned", "Nontemporal|Aligned 1"},
9138 {"volatile_nontemporal", "Volatile|Nontemporal"},
9139 {"volatile_nontermporal_aligned", "Volatile|Nontemporal|Aligned 1"},
9140 };
9141
9142 getHalfColorsFullAlpha(colors);
9143
9144 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameMemoryAccess); ++testNdx)
9145 {
9146 map<string, string> fragments;
9147 map<string, string> memoryAccess;
9148 memoryAccess["access_type"] = tests[testNdx].accessType;
9149
9150 fragments["pre_main"] = constantsAndTypes;
9151 fragments["testfun"] = tcu::StringTemplate(function).specialize(memoryAccess);
9152 createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, memoryAccessTests.get());
9153 }
9154 return memoryAccessTests.release();
9155 }
createOpUndefTests(tcu::TestContext & testCtx)9156 tcu::TestCaseGroup *createOpUndefTests(tcu::TestContext &testCtx)
9157 {
9158 de::MovePtr<tcu::TestCaseGroup> opUndefTests(new tcu::TestCaseGroup(testCtx, "opundef"));
9159 RGBA defaultColors[4];
9160 map<string, string> fragments;
9161 getDefaultColors(defaultColors);
9162
9163 // First, simple cases that don't do anything with the OpUndef result.
9164 struct NameCodePair
9165 {
9166 string name, decl, type;
9167 };
9168 const NameCodePair tests[] = {{"bool", "", "%bool"},
9169 {"vec2uint32", "", "%v2u32"},
9170 {"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown", "%type"},
9171 {"sampler", "%type = OpTypeSampler", "%type"},
9172 {"sampledimage",
9173 "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
9174 "%type = OpTypeSampledImage %img",
9175 "%type"},
9176 {"pointer", "", "%fp_i32"},
9177 {"runtimearray", "%type = OpTypeRuntimeArray %f32", "%type"},
9178 {"array",
9179 "%c_u32_100 = OpConstant %u32 100\n"
9180 "%type = OpTypeArray %i32 %c_u32_100",
9181 "%type"},
9182 {"struct", "%type = OpTypeStruct %f32 %i32 %u32", "%type"}};
9183 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
9184 {
9185 fragments["undef_type"] = tests[testNdx].type;
9186 fragments["testfun"] = StringTemplate("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9187 "%param1 = OpFunctionParameter %v4f32\n"
9188 "%label_testfun = OpLabel\n"
9189 "%undef = OpUndef ${undef_type}\n"
9190 "OpReturnValue %param1\n"
9191 "OpFunctionEnd\n")
9192 .specialize(fragments);
9193 fragments["pre_main"] = tests[testNdx].decl;
9194 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opUndefTests.get());
9195 }
9196 fragments.clear();
9197
9198 fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9199 "%param1 = OpFunctionParameter %v4f32\n"
9200 "%label_testfun = OpLabel\n"
9201 "%undef = OpUndef %f32\n"
9202 "%zero = OpFMul %f32 %undef %c_f32_0\n"
9203 "%is_nan = OpIsNan %bool %zero\n" //OpUndef may result in NaN which may turn %zero into Nan.
9204 "%actually_zero = OpSelect %f32 %is_nan %c_f32_0 %zero\n"
9205 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9206 "%b = OpFAdd %f32 %a %actually_zero\n"
9207 "%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
9208 "OpReturnValue %ret\n"
9209 "OpFunctionEnd\n";
9210
9211 createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
9212
9213 fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9214 "%param1 = OpFunctionParameter %v4f32\n"
9215 "%label_testfun = OpLabel\n"
9216 "%undef = OpUndef %i32\n"
9217 "%zero = OpIMul %i32 %undef %c_i32_0\n"
9218 "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
9219 "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
9220 "OpReturnValue %ret\n"
9221 "OpFunctionEnd\n";
9222
9223 createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
9224
9225 fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9226 "%param1 = OpFunctionParameter %v4f32\n"
9227 "%label_testfun = OpLabel\n"
9228 "%undef = OpUndef %u32\n"
9229 "%zero = OpIMul %u32 %undef %c_i32_0\n"
9230 "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
9231 "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
9232 "OpReturnValue %ret\n"
9233 "OpFunctionEnd\n";
9234
9235 createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
9236
9237 fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9238 "%param1 = OpFunctionParameter %v4f32\n"
9239 "%label_testfun = OpLabel\n"
9240 "%undef = OpUndef %v4f32\n"
9241 "%vzero = OpVectorTimesScalar %v4f32 %undef %c_f32_0\n"
9242 "%zero_0 = OpVectorExtractDynamic %f32 %vzero %c_i32_0\n"
9243 "%zero_1 = OpVectorExtractDynamic %f32 %vzero %c_i32_1\n"
9244 "%zero_2 = OpVectorExtractDynamic %f32 %vzero %c_i32_2\n"
9245 "%zero_3 = OpVectorExtractDynamic %f32 %vzero %c_i32_3\n"
9246 "%is_nan_0 = OpIsNan %bool %zero_0\n"
9247 "%is_nan_1 = OpIsNan %bool %zero_1\n"
9248 "%is_nan_2 = OpIsNan %bool %zero_2\n"
9249 "%is_nan_3 = OpIsNan %bool %zero_3\n"
9250 "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
9251 "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
9252 "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
9253 "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
9254 "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9255 "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
9256 "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
9257 "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
9258 "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
9259 "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
9260 "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
9261 "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
9262 "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
9263 "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
9264 "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
9265 "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
9266 "OpReturnValue %ret\n"
9267 "OpFunctionEnd\n";
9268
9269 createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
9270
9271 fragments["pre_main"] = "%m2x2f32 = OpTypeMatrix %v2f32 2\n";
9272 fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9273 "%param1 = OpFunctionParameter %v4f32\n"
9274 "%label_testfun = OpLabel\n"
9275 "%undef = OpUndef %m2x2f32\n"
9276 "%mzero = OpMatrixTimesScalar %m2x2f32 %undef %c_f32_0\n"
9277 "%zero_0 = OpCompositeExtract %f32 %mzero 0 0\n"
9278 "%zero_1 = OpCompositeExtract %f32 %mzero 0 1\n"
9279 "%zero_2 = OpCompositeExtract %f32 %mzero 1 0\n"
9280 "%zero_3 = OpCompositeExtract %f32 %mzero 1 1\n"
9281 "%is_nan_0 = OpIsNan %bool %zero_0\n"
9282 "%is_nan_1 = OpIsNan %bool %zero_1\n"
9283 "%is_nan_2 = OpIsNan %bool %zero_2\n"
9284 "%is_nan_3 = OpIsNan %bool %zero_3\n"
9285 "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
9286 "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
9287 "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
9288 "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
9289 "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9290 "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
9291 "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
9292 "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
9293 "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
9294 "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
9295 "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
9296 "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
9297 "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
9298 "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
9299 "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
9300 "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
9301 "OpReturnValue %ret\n"
9302 "OpFunctionEnd\n";
9303
9304 createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
9305
9306 return opUndefTests.release();
9307 }
9308
createOpQuantizeSingleOptionTests(tcu::TestCaseGroup * testCtx)9309 void createOpQuantizeSingleOptionTests(tcu::TestCaseGroup *testCtx)
9310 {
9311 const RGBA inputColors[4] = {RGBA(0, 0, 0, 255), RGBA(0, 0, 255, 255), RGBA(0, 255, 0, 255),
9312 RGBA(0, 255, 255, 255)};
9313
9314 const RGBA expectedColors[4] = {RGBA(255, 0, 0, 255), RGBA(255, 0, 0, 255), RGBA(255, 0, 0, 255),
9315 RGBA(255, 0, 0, 255)};
9316
9317 const struct SingleFP16Possibility
9318 {
9319 const char *name;
9320 const char *constant; // Value to assign to %test_constant.
9321 float valueAsFloat;
9322 const char *
9323 condition; // Must assign to %cond an expression that evaluates to true after %c = OpQuantizeToF16(%test_constant + 0).
9324 bool preserveNanInf;
9325 } tests[] = {
9326 {"negative", "-0x1.3p1\n", -constructNormalizedFloat(1, 0x300000),
9327 "%cond = OpFOrdEqual %bool %c %test_constant\n", false}, // -19
9328 {"positive", "0x1.0p7\n", constructNormalizedFloat(7, 0x000000),
9329 "%cond = OpFOrdEqual %bool %c %test_constant\n", false}, // +128
9330 // SPIR-V requires that OpQuantizeToF16 flushes
9331 // any numbers that would end up denormalized in F16 to zero.
9332 {"denorm", "0x0.0006p-126\n", std::ldexp(1.5f, -140), "%cond = OpFOrdEqual %bool %c %c_f32_0\n",
9333 false}, // denorm
9334 {"negative_denorm", "-0x0.0006p-126\n", -std::ldexp(1.5f, -140), "%cond = OpFOrdEqual %bool %c %c_f32_0\n",
9335 false}, // -denorm
9336 {"too_small", "0x1.0p-16\n", std::ldexp(1.0f, -16), "%cond = OpFOrdEqual %bool %c %c_f32_0\n",
9337 false}, // too small positive
9338 {"negative_too_small", "-0x1.0p-32\n", -std::ldexp(1.0f, -32), "%cond = OpFOrdEqual %bool %c %c_f32_0\n",
9339 false}, // too small negative
9340 {"negative_inf", "-0x1.0p128\n", -std::ldexp(1.0f, 128),
9341
9342 "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9343 "%inf = OpIsInf %bool %c\n"
9344 "%cond = OpLogicalAnd %bool %gz %inf\n",
9345 true}, // -inf to -inf
9346 {"inf", "0x1.0p128\n", std::ldexp(1.0f, 128),
9347
9348 "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9349 "%inf = OpIsInf %bool %c\n"
9350 "%cond = OpLogicalAnd %bool %gz %inf\n",
9351 true}, // +inf to +inf
9352 {"round_to_negative_inf", "-0x1.0p32\n", -std::ldexp(1.0f, 32),
9353
9354 "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9355 "%inf = OpIsInf %bool %c\n"
9356 "%cond = OpLogicalAnd %bool %gz %inf\n",
9357 true}, // round to -inf
9358 {"round_to_inf", "0x1.0p16\n", std::ldexp(1.0f, 16),
9359
9360 "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9361 "%inf = OpIsInf %bool %c\n"
9362 "%cond = OpLogicalAnd %bool %gz %inf\n",
9363 true}, // round to +inf
9364 {"nan", "0x1.1p128\n", std::numeric_limits<float>::quiet_NaN(),
9365
9366 // Test for any NaN value, as NaNs are not preserved
9367 "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9368 "%cond = OpIsNan %bool %direct_quant\n",
9369 true}, // nan
9370 {"negative_nan", "-0x1.0001p128\n", std::numeric_limits<float>::quiet_NaN(),
9371
9372 // Test for any NaN value, as NaNs are not preserved
9373 "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9374 "%cond = OpIsNan %bool %direct_quant\n",
9375 true} // -nan
9376 };
9377 const char *constants = "%test_constant = OpConstant %f32 "; // The value will be test.constant.
9378
9379 StringTemplate function("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9380 "%param1 = OpFunctionParameter %v4f32\n"
9381 "%label_testfun = OpLabel\n"
9382 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9383 "%b = OpFAdd %f32 %test_constant %a\n"
9384 "%c = OpQuantizeToF16 %f32 %b\n"
9385 "${condition}\n"
9386 "%v4cond = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9387 "%retval = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9388 " OpReturnValue %retval\n"
9389 "OpFunctionEnd\n");
9390
9391 const char *specDecorations = "OpDecorate %test_constant SpecId 0\n";
9392 const char *specConstants = "%test_constant = OpSpecConstant %f32 0.\n"
9393 "%c = OpSpecConstantOp %f32 QuantizeToF16 %test_constant\n";
9394
9395 StringTemplate specConstantFunction("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9396 "%param1 = OpFunctionParameter %v4f32\n"
9397 "%label_testfun = OpLabel\n"
9398 "${condition}\n"
9399 "%v4cond = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9400 "%retval = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9401 " OpReturnValue %retval\n"
9402 "OpFunctionEnd\n");
9403
9404 for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
9405 {
9406 std::vector<std::string> extensions;
9407 VulkanFeatures features;
9408 map<string, string> codeSpecialization;
9409 map<string, string> fragments;
9410 codeSpecialization["condition"] = tests[idx].condition;
9411 fragments["testfun"] = function.specialize(codeSpecialization);
9412 fragments["pre_main"] = string(constants) + tests[idx].constant + "\n";
9413
9414 if (tests[idx].preserveNanInf)
9415 {
9416 fragments["capability"] = "OpCapability SignedZeroInfNanPreserve\n";
9417 fragments["extension"] = "OpExtension \"SPV_KHR_float_controls\"\n";
9418 extensions.push_back("VK_KHR_shader_float_controls");
9419 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = true;
9420 }
9421
9422 createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, SpecConstants(),
9423 PushConstants(), GraphicsResources(), GraphicsInterfaces(), extensions, features,
9424 testCtx);
9425 }
9426
9427 for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
9428 {
9429 std::vector<std::string> extensions;
9430 VulkanFeatures features;
9431 map<string, string> codeSpecialization;
9432 map<string, string> fragments;
9433 SpecConstants passConstants;
9434
9435 codeSpecialization["condition"] = tests[idx].condition;
9436 fragments["testfun"] = specConstantFunction.specialize(codeSpecialization);
9437 fragments["decoration"] = specDecorations;
9438 fragments["pre_main"] = specConstants;
9439
9440 passConstants.append<float>(tests[idx].valueAsFloat);
9441
9442 if (tests[idx].preserveNanInf)
9443 {
9444 fragments["capability"] = "OpCapability SignedZeroInfNanPreserve\n";
9445 fragments["extension"] = "OpExtension \"SPV_KHR_float_controls\"\n";
9446 extensions.push_back("VK_KHR_shader_float_controls");
9447 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = true;
9448 }
9449
9450 createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments,
9451 passConstants, PushConstants(), GraphicsResources(), GraphicsInterfaces(), extensions,
9452 features, testCtx);
9453 }
9454 }
9455
createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup * testCtx)9456 void createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup *testCtx)
9457 {
9458 RGBA inputColors[4] = {RGBA(0, 0, 0, 255), RGBA(0, 0, 255, 255), RGBA(0, 255, 0, 255), RGBA(0, 255, 255, 255)};
9459
9460 RGBA expectedColors[4] = {RGBA(255, 0, 0, 255), RGBA(255, 0, 0, 255), RGBA(255, 0, 0, 255), RGBA(255, 0, 0, 255)};
9461
9462 struct DualFP16Possibility
9463 {
9464 const char *name;
9465 const char *input;
9466 float inputAsFloat;
9467 const char *possibleOutput1;
9468 const char *possibleOutput2;
9469 } tests[] = {
9470 {"positive_round_up_or_round_down", "0x1.3003p8", constructNormalizedFloat(8, 0x300300), "0x1.304p8",
9471 "0x1.3p8"},
9472 {"negative_round_up_or_round_down", "-0x1.6008p-7", -constructNormalizedFloat(-7, 0x600800), "-0x1.6p-7",
9473 "-0x1.604p-7"},
9474 {"carry_bit", "0x1.01ep2", constructNormalizedFloat(2, 0x01e000), "0x1.01cp2", "0x1.02p2"},
9475 {"carry_to_exponent", "0x1.ffep1", constructNormalizedFloat(1, 0xffe000), "0x1.ffcp1", "0x1.0p2"},
9476 };
9477 StringTemplate constants("%input_const = OpConstant %f32 ${input}\n"
9478 "%possible_solution1 = OpConstant %f32 ${output1}\n"
9479 "%possible_solution2 = OpConstant %f32 ${output2}\n");
9480
9481 StringTemplate specConstants("%input_const = OpSpecConstant %f32 0.\n"
9482 "%possible_solution1 = OpConstant %f32 ${output1}\n"
9483 "%possible_solution2 = OpConstant %f32 ${output2}\n");
9484
9485 const char *specDecorations = "OpDecorate %input_const SpecId 0\n";
9486
9487 const char *function = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9488 "%param1 = OpFunctionParameter %v4f32\n"
9489 "%label_testfun = OpLabel\n"
9490 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9491 // For the purposes of this test we assume that 0.f will always get
9492 // faithfully passed through the pipeline stages.
9493 "%b = OpFAdd %f32 %input_const %a\n"
9494 "%c = OpQuantizeToF16 %f32 %b\n"
9495 "%eq_1 = OpFOrdEqual %bool %c %possible_solution1\n"
9496 "%eq_2 = OpFOrdEqual %bool %c %possible_solution2\n"
9497 "%cond = OpLogicalOr %bool %eq_1 %eq_2\n"
9498 "%v4cond = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9499 "%retval = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1"
9500 " OpReturnValue %retval\n"
9501 "OpFunctionEnd\n";
9502
9503 for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
9504 {
9505 map<string, string> fragments;
9506 map<string, string> constantSpecialization;
9507
9508 constantSpecialization["input"] = tests[idx].input;
9509 constantSpecialization["output1"] = tests[idx].possibleOutput1;
9510 constantSpecialization["output2"] = tests[idx].possibleOutput2;
9511 fragments["testfun"] = function;
9512 fragments["pre_main"] = constants.specialize(constantSpecialization);
9513 createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
9514 }
9515
9516 for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
9517 {
9518 map<string, string> fragments;
9519 map<string, string> constantSpecialization;
9520 SpecConstants passConstants;
9521
9522 constantSpecialization["output1"] = tests[idx].possibleOutput1;
9523 constantSpecialization["output2"] = tests[idx].possibleOutput2;
9524 fragments["testfun"] = function;
9525 fragments["decoration"] = specDecorations;
9526 fragments["pre_main"] = specConstants.specialize(constantSpecialization);
9527
9528 passConstants.append<float>(tests[idx].inputAsFloat);
9529
9530 createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments,
9531 passConstants, testCtx);
9532 }
9533 }
9534
createOpQuantizeTests(tcu::TestContext & testCtx)9535 tcu::TestCaseGroup *createOpQuantizeTests(tcu::TestContext &testCtx)
9536 {
9537 de::MovePtr<tcu::TestCaseGroup> opQuantizeTests(new tcu::TestCaseGroup(testCtx, "opquantize"));
9538 createOpQuantizeSingleOptionTests(opQuantizeTests.get());
9539 createOpQuantizeTwoPossibilityTests(opQuantizeTests.get());
9540 return opQuantizeTests.release();
9541 }
9542
9543 struct ShaderPermutation
9544 {
9545 uint8_t vertexPermutation;
9546 uint8_t geometryPermutation;
9547 uint8_t tesscPermutation;
9548 uint8_t tessePermutation;
9549 uint8_t fragmentPermutation;
9550 };
9551
getShaderPermutation(uint8_t inputValue)9552 ShaderPermutation getShaderPermutation(uint8_t inputValue)
9553 {
9554 ShaderPermutation permutation = {
9555 static_cast<uint8_t>(inputValue & 0x10 ? 1u : 0u), static_cast<uint8_t>(inputValue & 0x08 ? 1u : 0u),
9556 static_cast<uint8_t>(inputValue & 0x04 ? 1u : 0u), static_cast<uint8_t>(inputValue & 0x02 ? 1u : 0u),
9557 static_cast<uint8_t>(inputValue & 0x01 ? 1u : 0u)};
9558 return permutation;
9559 }
9560
createModuleTests(tcu::TestContext & testCtx)9561 tcu::TestCaseGroup *createModuleTests(tcu::TestContext &testCtx)
9562 {
9563 RGBA defaultColors[4];
9564 RGBA invertedColors[4];
9565 de::MovePtr<tcu::TestCaseGroup> moduleTests(new tcu::TestCaseGroup(testCtx, "module"));
9566
9567 getDefaultColors(defaultColors);
9568 getInvertedDefaultColors(invertedColors);
9569
9570 // Combined module tests
9571 {
9572 // Shader stages: vertex and fragment
9573 {
9574 const ShaderElement combinedPipeline[] = {ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9575 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)};
9576
9577 addFunctionCaseWithPrograms<InstanceContext>(
9578 moduleTests.get(), "same_module", createCombinedModule, runAndVerifyDefaultPipeline,
9579 createInstanceContext(combinedPipeline, map<string, string>()));
9580 }
9581
9582 // Shader stages: vertex, geometry and fragment
9583 {
9584 const ShaderElement combinedPipeline[] = {ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9585 ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9586 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)};
9587
9588 addFunctionCaseWithPrograms<InstanceContext>(
9589 moduleTests.get(), "same_module_geom", createCombinedModule, runAndVerifyDefaultPipeline,
9590 createInstanceContext(combinedPipeline, map<string, string>()));
9591 }
9592
9593 // Shader stages: vertex, tessellation control, tessellation evaluation and fragment
9594 {
9595 const ShaderElement combinedPipeline[] = {
9596 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9597 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9598 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9599 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)};
9600
9601 addFunctionCaseWithPrograms<InstanceContext>(
9602 moduleTests.get(), "same_module_tessc_tesse", createCombinedModule, runAndVerifyDefaultPipeline,
9603 createInstanceContext(combinedPipeline, map<string, string>()));
9604 }
9605
9606 // Shader stages: vertex, tessellation control, tessellation evaluation, geometry and fragment
9607 {
9608 const ShaderElement combinedPipeline[] = {
9609 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9610 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9611 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9612 ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9613 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)};
9614
9615 addFunctionCaseWithPrograms<InstanceContext>(
9616 moduleTests.get(), "same_module_tessc_tesse_geom", createCombinedModule, runAndVerifyDefaultPipeline,
9617 createInstanceContext(combinedPipeline, map<string, string>()));
9618 }
9619 }
9620
9621 const char *numbers[] = {"1", "2"};
9622
9623 for (int8_t idx = 0; idx < 32; ++idx)
9624 {
9625 ShaderPermutation permutation = getShaderPermutation(idx);
9626 string name = string("vert") + numbers[permutation.vertexPermutation] + "_geom" +
9627 numbers[permutation.geometryPermutation] + "_tessc" + numbers[permutation.tesscPermutation] +
9628 "_tesse" + numbers[permutation.tessePermutation] + "_frag" +
9629 numbers[permutation.fragmentPermutation];
9630 const ShaderElement pipeline[] = {
9631 ShaderElement("vert", string("vert") + numbers[permutation.vertexPermutation], VK_SHADER_STAGE_VERTEX_BIT),
9632 ShaderElement("geom", string("geom") + numbers[permutation.geometryPermutation],
9633 VK_SHADER_STAGE_GEOMETRY_BIT),
9634 ShaderElement("tessc", string("tessc") + numbers[permutation.tesscPermutation],
9635 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9636 ShaderElement("tesse", string("tesse") + numbers[permutation.tessePermutation],
9637 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9638 ShaderElement("frag", string("frag") + numbers[permutation.fragmentPermutation],
9639 VK_SHADER_STAGE_FRAGMENT_BIT)};
9640
9641 // If there are an even number of swaps, then it should be no-op.
9642 // If there are an odd number, the color should be flipped.
9643 if ((permutation.vertexPermutation + permutation.geometryPermutation + permutation.tesscPermutation +
9644 permutation.tessePermutation + permutation.fragmentPermutation) %
9645 2 ==
9646 0)
9647 {
9648 addFunctionCaseWithPrograms<InstanceContext>(
9649 moduleTests.get(), name, createMultipleEntries, runAndVerifyDefaultPipeline,
9650 createInstanceContext(pipeline, defaultColors, defaultColors, map<string, string>()));
9651 }
9652 else
9653 {
9654 addFunctionCaseWithPrograms<InstanceContext>(
9655 moduleTests.get(), name, createMultipleEntries, runAndVerifyDefaultPipeline,
9656 createInstanceContext(pipeline, defaultColors, invertedColors, map<string, string>()));
9657 }
9658 }
9659 return moduleTests.release();
9660 }
9661
getUnusedVarTestNamePiece(const std::string & prefix,ShaderTask task)9662 std::string getUnusedVarTestNamePiece(const std::string &prefix, ShaderTask task)
9663 {
9664 switch (task)
9665 {
9666 case SHADER_TASK_NONE:
9667 return "";
9668 case SHADER_TASK_NORMAL:
9669 return prefix + "_normal";
9670 case SHADER_TASK_UNUSED_VAR:
9671 return prefix + "_unused_var";
9672 case SHADER_TASK_UNUSED_FUNC:
9673 return prefix + "_unused_func";
9674 default:
9675 DE_ASSERT(false);
9676 }
9677 // unreachable
9678 return "";
9679 }
9680
getShaderTaskIndexName(ShaderTaskIndex index)9681 std::string getShaderTaskIndexName(ShaderTaskIndex index)
9682 {
9683 switch (index)
9684 {
9685 case SHADER_TASK_INDEX_VERTEX:
9686 return "vertex";
9687 case SHADER_TASK_INDEX_GEOMETRY:
9688 return "geom";
9689 case SHADER_TASK_INDEX_TESS_CONTROL:
9690 return "tessc";
9691 case SHADER_TASK_INDEX_TESS_EVAL:
9692 return "tesse";
9693 case SHADER_TASK_INDEX_FRAGMENT:
9694 return "frag";
9695 default:
9696 DE_ASSERT(false);
9697 }
9698 // unreachable
9699 return "";
9700 }
9701
getUnusedVarTestName(const ShaderTaskArray & shaderTasks,const VariableLocation & location)9702 std::string getUnusedVarTestName(const ShaderTaskArray &shaderTasks, const VariableLocation &location)
9703 {
9704 std::string testName = location.toString();
9705
9706 for (size_t i = 0; i < DE_LENGTH_OF_ARRAY(shaderTasks); ++i)
9707 {
9708 if (shaderTasks[i] != SHADER_TASK_NONE)
9709 {
9710 testName += "_" + getUnusedVarTestNamePiece(getShaderTaskIndexName((ShaderTaskIndex)i), shaderTasks[i]);
9711 }
9712 }
9713
9714 return testName;
9715 }
9716
createUnusedVariableTests(tcu::TestContext & testCtx)9717 tcu::TestCaseGroup *createUnusedVariableTests(tcu::TestContext &testCtx)
9718 {
9719 de::MovePtr<tcu::TestCaseGroup> moduleTests(new tcu::TestCaseGroup(testCtx, "unused_variables"));
9720
9721 ShaderTaskArray shaderCombinations[] = {
9722 // Vertex Geometry Tess. Control Tess. Evaluation Fragment
9723 {SHADER_TASK_UNUSED_VAR, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL},
9724 {SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL},
9725 {SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_UNUSED_VAR},
9726 {SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_UNUSED_FUNC},
9727 {SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_VAR, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL},
9728 {SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL},
9729 {SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_UNUSED_VAR, SHADER_TASK_NORMAL, SHADER_TASK_NORMAL},
9730 {SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NORMAL, SHADER_TASK_NORMAL},
9731 {SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_VAR, SHADER_TASK_NORMAL},
9732 {SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NORMAL}};
9733
9734 const VariableLocation testLocations[] = {
9735 // Set Binding
9736 {0, 5},
9737 {5, 5},
9738 };
9739
9740 for (size_t combNdx = 0; combNdx < DE_LENGTH_OF_ARRAY(shaderCombinations); ++combNdx)
9741 {
9742 for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
9743 {
9744 const ShaderTaskArray &shaderTasks = shaderCombinations[combNdx];
9745 const VariableLocation &location = testLocations[locationNdx];
9746 std::string testName = getUnusedVarTestName(shaderTasks, location);
9747
9748 addFunctionCaseWithPrograms<UnusedVariableContext>(moduleTests.get(), testName, createUnusedVariableModules,
9749 runAndVerifyUnusedVariablePipeline,
9750 createUnusedVariableContext(shaderTasks, location));
9751 }
9752 }
9753
9754 return moduleTests.release();
9755 }
9756
createLoopTests(tcu::TestContext & testCtx)9757 tcu::TestCaseGroup *createLoopTests(tcu::TestContext &testCtx)
9758 {
9759 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "loop"));
9760 RGBA defaultColors[4];
9761 getDefaultColors(defaultColors);
9762 map<string, string> fragments;
9763 fragments["pre_main"] = "%c_f32_5 = OpConstant %f32 5.\n";
9764
9765 // A loop with a single block. The Continue Target is the loop block
9766 // itself. In SPIR-V terms, the "loop construct" contains no blocks at all
9767 // -- the "continue construct" forms the entire loop.
9768 fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9769 "%param1 = OpFunctionParameter %v4f32\n"
9770
9771 "%entry = OpLabel\n"
9772 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9773 "OpBranch %loop\n"
9774
9775 ";adds and subtracts 1.0 to %val in alternate iterations\n"
9776 "%loop = OpLabel\n"
9777 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9778 "%delta = OpPhi %f32 %c_f32_1 %entry %minus_delta %loop\n"
9779 "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9780 "%val = OpFAdd %f32 %val1 %delta\n"
9781 "%minus_delta = OpFSub %f32 %c_f32_0 %delta\n"
9782 "%count__ = OpISub %i32 %count %c_i32_1\n"
9783 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9784 "OpLoopMerge %exit %loop None\n"
9785 "OpBranchConditional %again %loop %exit\n"
9786
9787 "%exit = OpLabel\n"
9788 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9789 "OpReturnValue %result\n"
9790
9791 "OpFunctionEnd\n";
9792
9793 createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
9794
9795 // Body comprised of multiple basic blocks.
9796 const StringTemplate multiBlock("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9797 "%param1 = OpFunctionParameter %v4f32\n"
9798
9799 "%entry = OpLabel\n"
9800 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9801 "OpBranch %loop\n"
9802
9803 ";adds and subtracts 1.0 to %val in alternate iterations\n"
9804 "%loop = OpLabel\n"
9805 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %cont\n"
9806 "%delta = OpPhi %f32 %c_f32_1 %entry %delta_next %cont\n"
9807 "%val1 = OpPhi %f32 %val0 %entry %val %cont\n"
9808 // There are several possibilities for the Continue Target below. Each
9809 // will be specialized into a separate test case.
9810 "OpLoopMerge %exit ${continue_target} None\n"
9811 "OpBranch %if\n"
9812
9813 "%if = OpLabel\n"
9814 ";delta_next = (delta > 0) ? -1 : 1;\n"
9815 "%gt0 = OpFOrdGreaterThan %bool %delta %c_f32_0\n"
9816 "OpSelectionMerge %gather DontFlatten\n"
9817 "OpBranchConditional %gt0 %even %odd ;tells us if %count is even or odd\n"
9818
9819 "%odd = OpLabel\n"
9820 "OpBranch %gather\n"
9821
9822 "%even = OpLabel\n"
9823 "OpBranch %gather\n"
9824
9825 "%gather = OpLabel\n"
9826 "%delta_next = OpPhi %f32 %c_f32_n1 %even %c_f32_1 %odd\n"
9827 "%val = OpFAdd %f32 %val1 %delta\n"
9828 "%count__ = OpISub %i32 %count %c_i32_1\n"
9829 "OpBranch %cont\n"
9830
9831 "%cont = OpLabel\n"
9832 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9833 "OpBranchConditional %again %loop %exit\n"
9834
9835 "%exit = OpLabel\n"
9836 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9837 "OpReturnValue %result\n"
9838
9839 "OpFunctionEnd\n");
9840
9841 map<string, string> continue_target;
9842
9843 // The Continue Target is the loop block itself.
9844 continue_target["continue_target"] = "%if";
9845 fragments["testfun"] = multiBlock.specialize(continue_target);
9846 createTestsForAllStages("multi_block_continue_construct", defaultColors, defaultColors, fragments, testGroup.get());
9847
9848 // The Continue Target is at the end of the loop.
9849 continue_target["continue_target"] = "%cont";
9850 fragments["testfun"] = multiBlock.specialize(continue_target);
9851 createTestsForAllStages("multi_block_loop_construct", defaultColors, defaultColors, fragments, testGroup.get());
9852
9853 // A loop with continue statement.
9854 fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9855 "%param1 = OpFunctionParameter %v4f32\n"
9856
9857 "%entry = OpLabel\n"
9858 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9859 "OpBranch %loop\n"
9860
9861 ";adds 4, 3, and 1 to %val0 (skips 2)\n"
9862 "%loop = OpLabel\n"
9863 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9864 "%val1 = OpPhi %f32 %val0 %entry %val %continue\n"
9865 "OpLoopMerge %exit %continue None\n"
9866 "OpBranch %if\n"
9867
9868 "%if = OpLabel\n"
9869 ";skip if %count==2\n"
9870 "%eq2 = OpIEqual %bool %count %c_i32_2\n"
9871 "OpBranchConditional %eq2 %continue %body\n"
9872
9873 "%body = OpLabel\n"
9874 "%fcount = OpConvertSToF %f32 %count\n"
9875 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9876 "OpBranch %continue\n"
9877
9878 "%continue = OpLabel\n"
9879 "%val = OpPhi %f32 %val2 %body %val1 %if\n"
9880 "%count__ = OpISub %i32 %count %c_i32_1\n"
9881 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9882 "OpBranchConditional %again %loop %exit\n"
9883
9884 "%exit = OpLabel\n"
9885 "%same = OpFSub %f32 %val %c_f32_8\n"
9886 "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9887 "OpReturnValue %result\n"
9888 "OpFunctionEnd\n";
9889 createTestsForAllStages("continue", defaultColors, defaultColors, fragments, testGroup.get());
9890
9891 // A loop with break.
9892 fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9893 "%param1 = OpFunctionParameter %v4f32\n"
9894
9895 "%entry = OpLabel\n"
9896 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9897 "%dot = OpDot %f32 %param1 %param1\n"
9898 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9899 "%zero = OpConvertFToU %u32 %div\n"
9900 "%two = OpIAdd %i32 %zero %c_i32_2\n"
9901 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9902 "OpBranch %loop\n"
9903
9904 ";adds 4 and 3 to %val0 (exits early)\n"
9905 "%loop = OpLabel\n"
9906 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9907 "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9908 "OpLoopMerge %exit %continue None\n"
9909 "OpBranch %if\n"
9910
9911 "%if = OpLabel\n"
9912 ";end loop if %count==%two\n"
9913 "%above2 = OpSGreaterThan %bool %count %two\n"
9914 "OpBranchConditional %above2 %body %exit\n"
9915
9916 "%body = OpLabel\n"
9917 "%fcount = OpConvertSToF %f32 %count\n"
9918 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9919 "OpBranch %continue\n"
9920
9921 "%continue = OpLabel\n"
9922 "%count__ = OpISub %i32 %count %c_i32_1\n"
9923 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9924 "OpBranchConditional %again %loop %exit\n"
9925
9926 "%exit = OpLabel\n"
9927 "%val_post = OpPhi %f32 %val2 %continue %val1 %if\n"
9928 "%same = OpFSub %f32 %val_post %c_f32_7\n"
9929 "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9930 "OpReturnValue %result\n"
9931 "OpFunctionEnd\n";
9932 createTestsForAllStages("break", defaultColors, defaultColors, fragments, testGroup.get());
9933
9934 // A loop with return.
9935 fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9936 "%param1 = OpFunctionParameter %v4f32\n"
9937
9938 "%entry = OpLabel\n"
9939 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9940 "%dot = OpDot %f32 %param1 %param1\n"
9941 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9942 "%zero = OpConvertFToU %u32 %div\n"
9943 "%two = OpIAdd %i32 %zero %c_i32_2\n"
9944 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9945 "OpBranch %loop\n"
9946
9947 ";returns early without modifying %param1\n"
9948 "%loop = OpLabel\n"
9949 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9950 "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9951 "OpLoopMerge %exit %continue None\n"
9952 "OpBranch %if\n"
9953
9954 "%if = OpLabel\n"
9955 ";return if %count==%two\n"
9956 "%above2 = OpSGreaterThan %bool %count %two\n"
9957 "OpSelectionMerge %body DontFlatten\n"
9958 "OpBranchConditional %above2 %body %early_exit\n"
9959
9960 "%early_exit = OpLabel\n"
9961 "OpReturnValue %param1\n"
9962
9963 "%body = OpLabel\n"
9964 "%fcount = OpConvertSToF %f32 %count\n"
9965 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9966 "OpBranch %continue\n"
9967
9968 "%continue = OpLabel\n"
9969 "%count__ = OpISub %i32 %count %c_i32_1\n"
9970 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9971 "OpBranchConditional %again %loop %exit\n"
9972
9973 "%exit = OpLabel\n"
9974 ";should never get here, so return an incorrect result\n"
9975 "%result = OpVectorInsertDynamic %v4f32 %param1 %val2 %c_i32_0\n"
9976 "OpReturnValue %result\n"
9977 "OpFunctionEnd\n";
9978 createTestsForAllStages("return", defaultColors, defaultColors, fragments, testGroup.get());
9979
9980 // Continue inside a switch block to break to enclosing loop's merge block.
9981 // Matches roughly the following GLSL code:
9982 // for (; keep_going; keep_going = false)
9983 // {
9984 // switch (int(param1.x))
9985 // {
9986 // case 0: continue;
9987 // case 1: continue;
9988 // default: continue;
9989 // }
9990 // dead code: modify return value to invalid result.
9991 // }
9992 fragments["pre_main"] = "%fp_bool = OpTypePointer Function %bool\n"
9993 "%true = OpConstantTrue %bool\n"
9994 "%false = OpConstantFalse %bool\n";
9995
9996 fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9997 "%param1 = OpFunctionParameter %v4f32\n"
9998
9999 "%entry = OpLabel\n"
10000 "%keep_going = OpVariable %fp_bool Function\n"
10001 "%val_ptr = OpVariable %fp_f32 Function\n"
10002 "%param1_x = OpCompositeExtract %f32 %param1 0\n"
10003 "OpStore %keep_going %true\n"
10004 "OpBranch %forloop_begin\n"
10005
10006 "%forloop_begin = OpLabel\n"
10007 "OpLoopMerge %forloop_merge %forloop_continue None\n"
10008 "OpBranch %forloop\n"
10009
10010 "%forloop = OpLabel\n"
10011 "%for_condition = OpLoad %bool %keep_going\n"
10012 "OpBranchConditional %for_condition %forloop_body %forloop_merge\n"
10013
10014 "%forloop_body = OpLabel\n"
10015 "OpStore %val_ptr %param1_x\n"
10016 "%param1_x_int = OpConvertFToS %i32 %param1_x\n"
10017
10018 "OpSelectionMerge %switch_merge None\n"
10019 "OpSwitch %param1_x_int %default 0 %case_0 1 %case_1\n"
10020 "%case_0 = OpLabel\n"
10021 "OpBranch %forloop_continue\n"
10022 "%case_1 = OpLabel\n"
10023 "OpBranch %forloop_continue\n"
10024 "%default = OpLabel\n"
10025 "OpBranch %forloop_continue\n"
10026 "%switch_merge = OpLabel\n"
10027 ";should never get here, so change the return value to invalid result\n"
10028 "OpStore %val_ptr %c_f32_1\n"
10029 "OpBranch %forloop_continue\n"
10030
10031 "%forloop_continue = OpLabel\n"
10032 "OpStore %keep_going %false\n"
10033 "OpBranch %forloop_begin\n"
10034 "%forloop_merge = OpLabel\n"
10035
10036 "%val = OpLoad %f32 %val_ptr\n"
10037 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
10038 "OpReturnValue %result\n"
10039 "OpFunctionEnd\n";
10040 createTestsForAllStages("switch_continue", defaultColors, defaultColors, fragments, testGroup.get());
10041
10042 return testGroup.release();
10043 }
10044
10045 // A collection of tests putting OpControlBarrier in places GLSL forbids but SPIR-V allows.
createBarrierTests(tcu::TestContext & testCtx)10046 tcu::TestCaseGroup *createBarrierTests(tcu::TestContext &testCtx)
10047 {
10048 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "barrier"));
10049 map<string, string> fragments;
10050
10051 // A barrier inside a function body.
10052 fragments["pre_main"] = "%Workgroup = OpConstant %i32 2\n"
10053 "%Invocation = OpConstant %i32 4\n"
10054 "%MemorySemanticsNone = OpConstant %i32 0\n";
10055 fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10056 "%param1 = OpFunctionParameter %v4f32\n"
10057 "%label_testfun = OpLabel\n"
10058 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10059 "OpReturnValue %param1\n"
10060 "OpFunctionEnd\n";
10061 addTessCtrlTest(testGroup.get(), "in_function", fragments);
10062
10063 // Common setup code for the following tests.
10064 fragments["pre_main"] = "%Workgroup = OpConstant %i32 2\n"
10065 "%Invocation = OpConstant %i32 4\n"
10066 "%MemorySemanticsNone = OpConstant %i32 0\n"
10067 "%c_f32_5 = OpConstant %f32 5.\n";
10068 const string
10069 setupPercentZero = // Begins %test_code function with code that sets %zero to 0u but cannot be optimized away.
10070 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10071 "%param1 = OpFunctionParameter %v4f32\n"
10072 "%entry = OpLabel\n"
10073 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
10074 "%dot = OpDot %f32 %param1 %param1\n"
10075 "%div = OpFDiv %f32 %dot %c_f32_5\n"
10076 "%zero = OpConvertFToU %u32 %div\n";
10077
10078 // Barriers inside OpSwitch branches.
10079 fragments["testfun"] =
10080 setupPercentZero +
10081 "OpSelectionMerge %switch_exit None\n"
10082 "OpSwitch %zero %switch_default 0 %case0 1 %case1 ;should always go to %case0\n"
10083
10084 "%case1 = OpLabel\n"
10085 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
10086 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10087 "%wrong_branch_alert1 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
10088 "OpBranch %switch_exit\n"
10089
10090 "%switch_default = OpLabel\n"
10091 "%wrong_branch_alert2 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
10092 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
10093 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10094 "OpBranch %switch_exit\n"
10095
10096 "%case0 = OpLabel\n"
10097 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10098 "OpBranch %switch_exit\n"
10099
10100 "%switch_exit = OpLabel\n"
10101 "%ret = OpPhi %v4f32 %param1 %case0 %wrong_branch_alert1 %case1 %wrong_branch_alert2 %switch_default\n"
10102 "OpReturnValue %ret\n"
10103 "OpFunctionEnd\n";
10104 addTessCtrlTest(testGroup.get(), "in_switch", fragments);
10105
10106 // Barriers inside if-then-else.
10107 fragments["testfun"] =
10108 setupPercentZero +
10109 "%eq0 = OpIEqual %bool %zero %c_u32_0\n"
10110 "OpSelectionMerge %exit DontFlatten\n"
10111 "OpBranchConditional %eq0 %then %else\n"
10112
10113 "%else = OpLabel\n"
10114 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
10115 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10116 "%wrong_branch_alert = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
10117 "OpBranch %exit\n"
10118
10119 "%then = OpLabel\n"
10120 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10121 "OpBranch %exit\n"
10122 "%exit = OpLabel\n"
10123 "%ret = OpPhi %v4f32 %param1 %then %wrong_branch_alert %else\n"
10124 "OpReturnValue %ret\n"
10125 "OpFunctionEnd\n";
10126 addTessCtrlTest(testGroup.get(), "in_if", fragments);
10127
10128 // A barrier after control-flow reconvergence, tempting the compiler to attempt something like this:
10129 // http://lists.llvm.org/pipermail/llvm-dev/2009-October/026317.html.
10130 fragments["testfun"] = setupPercentZero + "%thread_id = OpLoad %i32 %BP_gl_InvocationID\n"
10131 "%thread0 = OpIEqual %bool %thread_id %c_i32_0\n"
10132 "OpSelectionMerge %exit DontFlatten\n"
10133 "OpBranchConditional %thread0 %then %else\n"
10134
10135 "%else = OpLabel\n"
10136 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
10137 "OpBranch %exit\n"
10138
10139 "%then = OpLabel\n"
10140 "%val1 = OpVectorExtractDynamic %f32 %param1 %zero\n"
10141 "OpBranch %exit\n"
10142
10143 "%exit = OpLabel\n"
10144 "%val = OpPhi %f32 %val0 %else %val1 %then\n"
10145 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10146 "%ret = OpVectorInsertDynamic %v4f32 %param1 %val %zero\n"
10147 "OpReturnValue %ret\n"
10148 "OpFunctionEnd\n";
10149 addTessCtrlTest(testGroup.get(), "after_divergent_if", fragments);
10150
10151 // A barrier inside a loop.
10152 fragments["pre_main"] = "%Workgroup = OpConstant %i32 2\n"
10153 "%Invocation = OpConstant %i32 4\n"
10154 "%MemorySemanticsNone = OpConstant %i32 0\n"
10155 "%c_f32_10 = OpConstant %f32 10.\n";
10156 fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10157 "%param1 = OpFunctionParameter %v4f32\n"
10158 "%entry = OpLabel\n"
10159 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
10160 "OpBranch %loop\n"
10161
10162 ";adds 4, 3, 2, and 1 to %val0\n"
10163 "%loop = OpLabel\n"
10164 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
10165 "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
10166 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
10167 "%fcount = OpConvertSToF %f32 %count\n"
10168 "%val = OpFAdd %f32 %val1 %fcount\n"
10169 "%count__ = OpISub %i32 %count %c_i32_1\n"
10170 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
10171 "OpLoopMerge %exit %loop None\n"
10172 "OpBranchConditional %again %loop %exit\n"
10173
10174 "%exit = OpLabel\n"
10175 "%same = OpFSub %f32 %val %c_f32_10\n"
10176 "%ret = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
10177 "OpReturnValue %ret\n"
10178 "OpFunctionEnd\n";
10179 addTessCtrlTest(testGroup.get(), "in_loop", fragments);
10180
10181 return testGroup.release();
10182 }
10183
10184 // Test for the OpFRem instruction.
createFRemTests(tcu::TestContext & testCtx)10185 tcu::TestCaseGroup *createFRemTests(tcu::TestContext &testCtx)
10186 {
10187 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "frem"));
10188 map<string, string> fragments;
10189 RGBA inputColors[4];
10190 RGBA outputColors[4];
10191
10192 fragments["pre_main"] =
10193 "%c_f32_3 = OpConstant %f32 3.0\n"
10194 "%c_f32_n3 = OpConstant %f32 -3.0\n"
10195 "%c_f32_4 = OpConstant %f32 4.0\n"
10196 "%c_f32_p75 = OpConstant %f32 0.75\n"
10197 "%c_v4f32_p75_p75_p75_p75 = OpConstantComposite %v4f32 %c_f32_p75 %c_f32_p75 %c_f32_p75 %c_f32_p75 \n"
10198 "%c_v4f32_4_4_4_4 = OpConstantComposite %v4f32 %c_f32_4 %c_f32_4 %c_f32_4 %c_f32_4\n"
10199 "%c_v4f32_3_n3_3_n3 = OpConstantComposite %v4f32 %c_f32_3 %c_f32_n3 %c_f32_3 %c_f32_n3\n";
10200
10201 // The test does the following.
10202 // vec4 result = (param1 * 8.0) - 4.0;
10203 // return (frem(result.x,3) + 0.75, frem(result.y, -3) + 0.75, 0, 1)
10204 fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10205 "%param1 = OpFunctionParameter %v4f32\n"
10206 "%label_testfun = OpLabel\n"
10207 "%v_times_8 = OpVectorTimesScalar %v4f32 %param1 %c_f32_8\n"
10208 "%minus_4 = OpFSub %v4f32 %v_times_8 %c_v4f32_4_4_4_4\n"
10209 "%frem = OpFRem %v4f32 %minus_4 %c_v4f32_3_n3_3_n3\n"
10210 "%added = OpFAdd %v4f32 %frem %c_v4f32_p75_p75_p75_p75\n"
10211 "%xyz_1 = OpVectorInsertDynamic %v4f32 %added %c_f32_1 %c_i32_3\n"
10212 "%xy_0_1 = OpVectorInsertDynamic %v4f32 %xyz_1 %c_f32_0 %c_i32_2\n"
10213 "OpReturnValue %xy_0_1\n"
10214 "OpFunctionEnd\n";
10215
10216 inputColors[0] = RGBA(16, 16, 0, 255);
10217 inputColors[1] = RGBA(232, 232, 0, 255);
10218 inputColors[2] = RGBA(232, 16, 0, 255);
10219 inputColors[3] = RGBA(16, 232, 0, 255);
10220
10221 outputColors[0] = RGBA(64, 64, 0, 255);
10222 outputColors[1] = RGBA(255, 255, 0, 255);
10223 outputColors[2] = RGBA(255, 64, 0, 255);
10224 outputColors[3] = RGBA(64, 255, 0, 255);
10225
10226 createTestsForAllStages("frem", inputColors, outputColors, fragments, testGroup.get());
10227 return testGroup.release();
10228 }
10229
10230 // Test for the OpSRem instruction.
createOpSRemGraphicsTests(tcu::TestContext & testCtx,qpTestResult negFailResult)10231 tcu::TestCaseGroup *createOpSRemGraphicsTests(tcu::TestContext &testCtx, qpTestResult negFailResult)
10232 {
10233 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "srem"));
10234 map<string, string> fragments;
10235
10236 fragments["pre_main"] = "%c_f32_255 = OpConstant %f32 255.0\n"
10237 "%c_i32_128 = OpConstant %i32 128\n"
10238 "%c_i32_255 = OpConstant %i32 255\n"
10239 "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
10240 "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10241 "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10242
10243 // The test does the following.
10244 // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10245 // ivec4 result = ivec4(srem(ints.x, ints.y), srem(ints.y, ints.z), srem(ints.z, ints.x), 255);
10246 // return float(result + 128) / 255.0;
10247 fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10248 "%param1 = OpFunctionParameter %v4f32\n"
10249 "%label_testfun = OpLabel\n"
10250 "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10251 "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10252 "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10253 "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10254 "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10255 "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10256 "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10257 "%x_out = OpSRem %i32 %x_in %y_in\n"
10258 "%y_out = OpSRem %i32 %y_in %z_in\n"
10259 "%z_out = OpSRem %i32 %z_in %x_in\n"
10260 "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10261 "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10262 "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10263 "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10264 "OpReturnValue %float_out\n"
10265 "OpFunctionEnd\n";
10266
10267 const struct CaseParams
10268 {
10269 const char *name;
10270 const char *failMessageTemplate; // customized status message
10271 qpTestResult failResult; // override status on failure
10272 int operands[4][3]; // four (x, y, z) vectors of operands
10273 int results[4][3]; // four (x, y, z) vectors of results
10274 } cases[] = {
10275 {
10276 "positive",
10277 "${reason}",
10278 QP_TEST_RESULT_FAIL,
10279 {{5, 12, 17}, {5, 5, 7}, {75, 8, 81}, {25, 60, 100}}, // operands
10280 {{5, 12, 2}, {0, 5, 2}, {3, 8, 6}, {25, 60, 0}}, // results
10281 },
10282 {
10283 "all",
10284 "Inconsistent results, but within specification: ${reason}",
10285 negFailResult, // negative operands, not required by the spec
10286 {{5, 12, -17}, {-5, -5, 7}, {75, 8, -81}, {25, -60, 100}}, // operands
10287 {{5, 12, -2}, {0, -5, 2}, {3, 8, -6}, {25, -60, 0}}, // results
10288 },
10289 };
10290 // If either operand is negative the result is undefined. Some implementations may still return correct values.
10291
10292 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10293 {
10294 const CaseParams ¶ms = cases[caseNdx];
10295 RGBA inputColors[4];
10296 RGBA outputColors[4];
10297
10298 for (int i = 0; i < 4; ++i)
10299 {
10300 inputColors[i] =
10301 RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10302 outputColors[i] =
10303 RGBA(params.results[i][0] + 128, params.results[i][1] + 128, params.results[i][2] + 128, 255);
10304 }
10305
10306 createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult,
10307 params.failMessageTemplate);
10308 }
10309
10310 return testGroup.release();
10311 }
10312
10313 // Test for the OpSMod instruction.
createOpSModGraphicsTests(tcu::TestContext & testCtx,qpTestResult negFailResult)10314 tcu::TestCaseGroup *createOpSModGraphicsTests(tcu::TestContext &testCtx, qpTestResult negFailResult)
10315 {
10316 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "smod"));
10317 map<string, string> fragments;
10318
10319 fragments["pre_main"] = "%c_f32_255 = OpConstant %f32 255.0\n"
10320 "%c_i32_128 = OpConstant %i32 128\n"
10321 "%c_i32_255 = OpConstant %i32 255\n"
10322 "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
10323 "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10324 "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10325
10326 // The test does the following.
10327 // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10328 // ivec4 result = ivec4(smod(ints.x, ints.y), smod(ints.y, ints.z), smod(ints.z, ints.x), 255);
10329 // return float(result + 128) / 255.0;
10330 fragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10331 "%param1 = OpFunctionParameter %v4f32\n"
10332 "%label_testfun = OpLabel\n"
10333 "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10334 "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10335 "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10336 "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10337 "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10338 "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10339 "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10340 "%x_out = OpSMod %i32 %x_in %y_in\n"
10341 "%y_out = OpSMod %i32 %y_in %z_in\n"
10342 "%z_out = OpSMod %i32 %z_in %x_in\n"
10343 "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10344 "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10345 "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10346 "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10347 "OpReturnValue %float_out\n"
10348 "OpFunctionEnd\n";
10349
10350 const struct CaseParams
10351 {
10352 const char *name;
10353 const char *failMessageTemplate; // customized status message
10354 qpTestResult failResult; // override status on failure
10355 int operands[4][3]; // four (x, y, z) vectors of operands
10356 int results[4][3]; // four (x, y, z) vectors of results
10357 } cases[] = {
10358 {
10359 "positive",
10360 "${reason}",
10361 QP_TEST_RESULT_FAIL,
10362 {{5, 12, 17}, {5, 5, 7}, {75, 8, 81}, {25, 60, 100}}, // operands
10363 {{5, 12, 2}, {0, 5, 2}, {3, 8, 6}, {25, 60, 0}}, // results
10364 },
10365 {
10366 "all",
10367 "Inconsistent results, but within specification: ${reason}",
10368 negFailResult, // negative operands, not required by the spec
10369 {{5, 12, -17}, {-5, -5, 7}, {75, 8, -81}, {25, -60, 100}}, // operands
10370 {{5, -5, 3}, {0, 2, -3}, {3, -73, 69}, {-35, 40, 0}}, // results
10371 },
10372 };
10373 // If either operand is negative the result is undefined. Some implementations may still return correct values.
10374
10375 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10376 {
10377 const CaseParams ¶ms = cases[caseNdx];
10378 RGBA inputColors[4];
10379 RGBA outputColors[4];
10380
10381 for (int i = 0; i < 4; ++i)
10382 {
10383 inputColors[i] =
10384 RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10385 outputColors[i] =
10386 RGBA(params.results[i][0] + 128, params.results[i][1] + 128, params.results[i][2] + 128, 255);
10387 }
10388
10389 createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult,
10390 params.failMessageTemplate);
10391 }
10392 return testGroup.release();
10393 }
10394
10395 enum ConversionDataType
10396 {
10397 DATA_TYPE_SIGNED_8,
10398 DATA_TYPE_SIGNED_16,
10399 DATA_TYPE_SIGNED_32,
10400 DATA_TYPE_SIGNED_64,
10401 DATA_TYPE_UNSIGNED_8,
10402 DATA_TYPE_UNSIGNED_16,
10403 DATA_TYPE_UNSIGNED_32,
10404 DATA_TYPE_UNSIGNED_64,
10405 DATA_TYPE_FLOAT_16,
10406 DATA_TYPE_FLOAT_32,
10407 DATA_TYPE_FLOAT_64,
10408 DATA_TYPE_VEC2_SIGNED_16,
10409 DATA_TYPE_VEC2_SIGNED_32
10410 };
10411
getBitWidthStr(ConversionDataType type)10412 const string getBitWidthStr(ConversionDataType type)
10413 {
10414 switch (type)
10415 {
10416 case DATA_TYPE_SIGNED_8:
10417 case DATA_TYPE_UNSIGNED_8:
10418 return "8";
10419
10420 case DATA_TYPE_SIGNED_16:
10421 case DATA_TYPE_UNSIGNED_16:
10422 case DATA_TYPE_FLOAT_16:
10423 return "16";
10424
10425 case DATA_TYPE_SIGNED_32:
10426 case DATA_TYPE_UNSIGNED_32:
10427 case DATA_TYPE_FLOAT_32:
10428 case DATA_TYPE_VEC2_SIGNED_16:
10429 return "32";
10430
10431 case DATA_TYPE_SIGNED_64:
10432 case DATA_TYPE_UNSIGNED_64:
10433 case DATA_TYPE_FLOAT_64:
10434 case DATA_TYPE_VEC2_SIGNED_32:
10435 return "64";
10436
10437 default:
10438 DE_ASSERT(false);
10439 }
10440 return "";
10441 }
10442
getByteWidthStr(ConversionDataType type)10443 const string getByteWidthStr(ConversionDataType type)
10444 {
10445 switch (type)
10446 {
10447 case DATA_TYPE_SIGNED_8:
10448 case DATA_TYPE_UNSIGNED_8:
10449 return "1";
10450
10451 case DATA_TYPE_SIGNED_16:
10452 case DATA_TYPE_UNSIGNED_16:
10453 case DATA_TYPE_FLOAT_16:
10454 return "2";
10455
10456 case DATA_TYPE_SIGNED_32:
10457 case DATA_TYPE_UNSIGNED_32:
10458 case DATA_TYPE_FLOAT_32:
10459 case DATA_TYPE_VEC2_SIGNED_16:
10460 return "4";
10461
10462 case DATA_TYPE_SIGNED_64:
10463 case DATA_TYPE_UNSIGNED_64:
10464 case DATA_TYPE_FLOAT_64:
10465 case DATA_TYPE_VEC2_SIGNED_32:
10466 return "8";
10467
10468 default:
10469 DE_ASSERT(false);
10470 }
10471 return "";
10472 }
10473
isSigned(ConversionDataType type)10474 bool isSigned(ConversionDataType type)
10475 {
10476 switch (type)
10477 {
10478 case DATA_TYPE_SIGNED_8:
10479 case DATA_TYPE_SIGNED_16:
10480 case DATA_TYPE_SIGNED_32:
10481 case DATA_TYPE_SIGNED_64:
10482 case DATA_TYPE_FLOAT_16:
10483 case DATA_TYPE_FLOAT_32:
10484 case DATA_TYPE_FLOAT_64:
10485 case DATA_TYPE_VEC2_SIGNED_16:
10486 case DATA_TYPE_VEC2_SIGNED_32:
10487 return true;
10488
10489 case DATA_TYPE_UNSIGNED_8:
10490 case DATA_TYPE_UNSIGNED_16:
10491 case DATA_TYPE_UNSIGNED_32:
10492 case DATA_TYPE_UNSIGNED_64:
10493 return false;
10494
10495 default:
10496 DE_ASSERT(false);
10497 }
10498 return false;
10499 }
10500
isInt(ConversionDataType type)10501 bool isInt(ConversionDataType type)
10502 {
10503 switch (type)
10504 {
10505 case DATA_TYPE_SIGNED_8:
10506 case DATA_TYPE_SIGNED_16:
10507 case DATA_TYPE_SIGNED_32:
10508 case DATA_TYPE_SIGNED_64:
10509 case DATA_TYPE_UNSIGNED_8:
10510 case DATA_TYPE_UNSIGNED_16:
10511 case DATA_TYPE_UNSIGNED_32:
10512 case DATA_TYPE_UNSIGNED_64:
10513 return true;
10514
10515 case DATA_TYPE_FLOAT_16:
10516 case DATA_TYPE_FLOAT_32:
10517 case DATA_TYPE_FLOAT_64:
10518 case DATA_TYPE_VEC2_SIGNED_16:
10519 case DATA_TYPE_VEC2_SIGNED_32:
10520 return false;
10521
10522 default:
10523 DE_ASSERT(false);
10524 }
10525 return false;
10526 }
10527
isFloat(ConversionDataType type)10528 bool isFloat(ConversionDataType type)
10529 {
10530 switch (type)
10531 {
10532 case DATA_TYPE_SIGNED_8:
10533 case DATA_TYPE_SIGNED_16:
10534 case DATA_TYPE_SIGNED_32:
10535 case DATA_TYPE_SIGNED_64:
10536 case DATA_TYPE_UNSIGNED_8:
10537 case DATA_TYPE_UNSIGNED_16:
10538 case DATA_TYPE_UNSIGNED_32:
10539 case DATA_TYPE_UNSIGNED_64:
10540 case DATA_TYPE_VEC2_SIGNED_16:
10541 case DATA_TYPE_VEC2_SIGNED_32:
10542 return false;
10543
10544 case DATA_TYPE_FLOAT_16:
10545 case DATA_TYPE_FLOAT_32:
10546 case DATA_TYPE_FLOAT_64:
10547 return true;
10548
10549 default:
10550 DE_ASSERT(false);
10551 }
10552 return false;
10553 }
10554
getTypeName(ConversionDataType type)10555 const string getTypeName(ConversionDataType type)
10556 {
10557 string prefix = isSigned(type) ? "" : "u";
10558
10559 if (isInt(type))
10560 return prefix + "int" + getBitWidthStr(type);
10561 else if (isFloat(type))
10562 return prefix + "float" + getBitWidthStr(type);
10563 else if (type == DATA_TYPE_VEC2_SIGNED_16)
10564 return "i16vec2";
10565 else if (type == DATA_TYPE_VEC2_SIGNED_32)
10566 return "i32vec2";
10567 else
10568 DE_ASSERT(false);
10569
10570 return "";
10571 }
10572
getTestName(ConversionDataType from,ConversionDataType to,const char * suffix)10573 const string getTestName(ConversionDataType from, ConversionDataType to, const char *suffix)
10574 {
10575 const string fullSuffix(suffix == DE_NULL ? "" : string("_") + string(suffix));
10576
10577 return getTypeName(from) + "_to_" + getTypeName(to) + fullSuffix;
10578 }
10579
getAsmTypeName(ConversionDataType type,uint32_t elements=1)10580 const string getAsmTypeName(ConversionDataType type, uint32_t elements = 1)
10581 {
10582 string prefix;
10583
10584 if (isInt(type))
10585 prefix = isSigned(type) ? "i" : "u";
10586 else if (isFloat(type))
10587 prefix = "f";
10588 else if (type == DATA_TYPE_VEC2_SIGNED_16)
10589 return "i16vec2";
10590 else if (type == DATA_TYPE_VEC2_SIGNED_32)
10591 return "v2i32";
10592 else
10593 DE_ASSERT(false);
10594 if ((isInt(type) || isFloat(type)) && elements == 2)
10595 {
10596 prefix = "v2" + prefix;
10597 }
10598
10599 return prefix + getBitWidthStr(type);
10600 }
10601
10602 template <typename T>
getSpecializedBuffer(int64_t number,uint32_t elements=1)10603 BufferSp getSpecializedBuffer(int64_t number, uint32_t elements = 1)
10604 {
10605 return BufferSp(new Buffer<T>(vector<T>(elements, (T)number)));
10606 }
10607
getBuffer(ConversionDataType type,int64_t number,uint32_t elements=1)10608 BufferSp getBuffer(ConversionDataType type, int64_t number, uint32_t elements = 1)
10609 {
10610 switch (type)
10611 {
10612 case DATA_TYPE_SIGNED_8:
10613 return getSpecializedBuffer<int8_t>(number, elements);
10614 case DATA_TYPE_SIGNED_16:
10615 return getSpecializedBuffer<int16_t>(number, elements);
10616 case DATA_TYPE_SIGNED_32:
10617 return getSpecializedBuffer<int32_t>(number, elements);
10618 case DATA_TYPE_SIGNED_64:
10619 return getSpecializedBuffer<int64_t>(number, elements);
10620 case DATA_TYPE_UNSIGNED_8:
10621 return getSpecializedBuffer<uint8_t>(number, elements);
10622 case DATA_TYPE_UNSIGNED_16:
10623 return getSpecializedBuffer<uint16_t>(number, elements);
10624 case DATA_TYPE_UNSIGNED_32:
10625 return getSpecializedBuffer<uint32_t>(number, elements);
10626 case DATA_TYPE_UNSIGNED_64:
10627 return getSpecializedBuffer<uint64_t>(number, elements);
10628 case DATA_TYPE_FLOAT_16:
10629 return getSpecializedBuffer<uint16_t>(number, elements);
10630 case DATA_TYPE_FLOAT_32:
10631 return getSpecializedBuffer<uint32_t>(number, elements);
10632 case DATA_TYPE_FLOAT_64:
10633 return getSpecializedBuffer<uint64_t>(number, elements);
10634 case DATA_TYPE_VEC2_SIGNED_16:
10635 return getSpecializedBuffer<uint32_t>(number, elements);
10636 case DATA_TYPE_VEC2_SIGNED_32:
10637 return getSpecializedBuffer<uint64_t>(number, elements);
10638
10639 default:
10640 TCU_THROW(InternalError, "Unimplemented type passed");
10641 }
10642 }
10643
usesInt8(ConversionDataType from,ConversionDataType to)10644 bool usesInt8(ConversionDataType from, ConversionDataType to)
10645 {
10646 return (from == DATA_TYPE_SIGNED_8 || to == DATA_TYPE_SIGNED_8 || from == DATA_TYPE_UNSIGNED_8 ||
10647 to == DATA_TYPE_UNSIGNED_8);
10648 }
10649
usesInt16(ConversionDataType from,ConversionDataType to)10650 bool usesInt16(ConversionDataType from, ConversionDataType to)
10651 {
10652 return (from == DATA_TYPE_SIGNED_16 || to == DATA_TYPE_SIGNED_16 || from == DATA_TYPE_UNSIGNED_16 ||
10653 to == DATA_TYPE_UNSIGNED_16 || from == DATA_TYPE_VEC2_SIGNED_16 || to == DATA_TYPE_VEC2_SIGNED_16);
10654 }
10655
usesInt32(ConversionDataType from,ConversionDataType to)10656 bool usesInt32(ConversionDataType from, ConversionDataType to)
10657 {
10658 return (from == DATA_TYPE_SIGNED_32 || to == DATA_TYPE_SIGNED_32 || from == DATA_TYPE_UNSIGNED_32 ||
10659 to == DATA_TYPE_UNSIGNED_32 || from == DATA_TYPE_VEC2_SIGNED_32 || to == DATA_TYPE_VEC2_SIGNED_32);
10660 }
10661
usesInt64(ConversionDataType from,ConversionDataType to)10662 bool usesInt64(ConversionDataType from, ConversionDataType to)
10663 {
10664 return (from == DATA_TYPE_SIGNED_64 || to == DATA_TYPE_SIGNED_64 || from == DATA_TYPE_UNSIGNED_64 ||
10665 to == DATA_TYPE_UNSIGNED_64);
10666 }
10667
usesFloat16(ConversionDataType from,ConversionDataType to)10668 bool usesFloat16(ConversionDataType from, ConversionDataType to)
10669 {
10670 return (from == DATA_TYPE_FLOAT_16 || to == DATA_TYPE_FLOAT_16);
10671 }
10672
usesFloat32(ConversionDataType from,ConversionDataType to)10673 bool usesFloat32(ConversionDataType from, ConversionDataType to)
10674 {
10675 return (from == DATA_TYPE_FLOAT_32 || to == DATA_TYPE_FLOAT_32);
10676 }
10677
usesFloat64(ConversionDataType from,ConversionDataType to)10678 bool usesFloat64(ConversionDataType from, ConversionDataType to)
10679 {
10680 return (from == DATA_TYPE_FLOAT_64 || to == DATA_TYPE_FLOAT_64);
10681 }
10682
getVulkanFeaturesAndExtensions(ConversionDataType from,ConversionDataType to,bool useStorageExt,VulkanFeatures & vulkanFeatures,vector<string> & extensions)10683 void getVulkanFeaturesAndExtensions(ConversionDataType from, ConversionDataType to, bool useStorageExt,
10684 VulkanFeatures &vulkanFeatures, vector<string> &extensions)
10685 {
10686 if (usesInt16(from, to) && !usesInt32(from, to))
10687 vulkanFeatures.coreFeatures.shaderInt16 = true;
10688
10689 if (usesInt64(from, to))
10690 vulkanFeatures.coreFeatures.shaderInt64 = true;
10691
10692 if (usesFloat64(from, to))
10693 vulkanFeatures.coreFeatures.shaderFloat64 = true;
10694
10695 if ((usesInt16(from, to) || usesFloat16(from, to)) && useStorageExt)
10696 {
10697 extensions.push_back("VK_KHR_16bit_storage");
10698 vulkanFeatures.ext16BitStorage.storageBuffer16BitAccess = true;
10699 }
10700
10701 if (usesFloat16(from, to) || usesInt8(from, to))
10702 {
10703 extensions.push_back("VK_KHR_shader_float16_int8");
10704
10705 if (usesFloat16(from, to))
10706 {
10707 vulkanFeatures.extFloat16Int8.shaderFloat16 = true;
10708 }
10709
10710 if (usesInt8(from, to))
10711 {
10712 vulkanFeatures.extFloat16Int8.shaderInt8 = true;
10713
10714 extensions.push_back("VK_KHR_8bit_storage");
10715 vulkanFeatures.ext8BitStorage.storageBuffer8BitAccess = true;
10716 }
10717 }
10718 }
10719
10720 struct ConvertCase
10721 {
ConvertCasevkt::SpirVAssembly::ConvertCase10722 ConvertCase(const string &instruction, ConversionDataType from, ConversionDataType to, int64_t number,
10723 bool separateOutput = false, int64_t outputNumber = 0, const char *suffix = DE_NULL,
10724 bool useStorageExt = true)
10725 : m_fromType(from)
10726 , m_toType(to)
10727 , m_elements(1)
10728 , m_useStorageExt(useStorageExt)
10729 , m_name(getTestName(from, to, suffix))
10730 {
10731 string caps;
10732 string decl;
10733 string exts;
10734
10735 m_asmTypes["inStorageType"] = getAsmTypeName(from);
10736 m_asmTypes["outStorageType"] = getAsmTypeName(to);
10737 m_asmTypes["inCast"] = "OpCopyObject";
10738 m_asmTypes["outCast"] = "OpCopyObject";
10739 // If the storage extensions are being avoided, tests instead uses
10740 // vectors so that they are easily convertible to 32-bit integers.
10741 // |m_elements| indicates the size of the vector. It modifies how many
10742 // items added to the buffers and converted in the tests.
10743 //
10744 // Currently only supports 1 (default) or 2 elements.
10745 if (!m_useStorageExt)
10746 {
10747 bool in_change = false;
10748 bool out_change = false;
10749 if (usesFloat16(from, from) || usesInt16(from, from))
10750 {
10751 m_asmTypes["inStorageType"] = "u32";
10752 m_asmTypes["inCast"] = "OpBitcast";
10753 m_elements = 2;
10754 in_change = true;
10755 }
10756 if (usesFloat16(to, to) || usesInt16(to, to))
10757 {
10758 m_asmTypes["outStorageType"] = "u32";
10759 m_asmTypes["outCast"] = "OpBitcast";
10760 m_elements = 2;
10761 out_change = true;
10762 }
10763 if (in_change && !out_change)
10764 {
10765 m_asmTypes["outStorageType"] = getAsmTypeName(to, m_elements);
10766 }
10767 if (!in_change && out_change)
10768 {
10769 m_asmTypes["inStorageType"] = getAsmTypeName(from, m_elements);
10770 }
10771 }
10772
10773 // Safety check for implementation.
10774 if (m_elements < 1 || m_elements > 2)
10775 TCU_THROW(InternalError, "Unsupported number of elements");
10776
10777 m_asmTypes["inputType"] = getAsmTypeName(from, m_elements);
10778 m_asmTypes["outputType"] = getAsmTypeName(to, m_elements);
10779
10780 m_inputBuffer = getBuffer(from, number, m_elements);
10781 if (separateOutput)
10782 m_outputBuffer = getBuffer(to, outputNumber, m_elements);
10783 else
10784 m_outputBuffer = getBuffer(to, number, m_elements);
10785
10786 if (usesInt8(from, to))
10787 {
10788 bool requiresInt8Capability = true;
10789 if (instruction == "OpUConvert" || instruction == "OpSConvert")
10790 {
10791 // Conversions between 8 and 32 bit are provided by SPV_KHR_8bit_storage. The rest requires explicit Int8
10792 if (usesInt32(from, to))
10793 requiresInt8Capability = false;
10794 }
10795
10796 caps += "OpCapability StorageBuffer8BitAccess\n";
10797 if (requiresInt8Capability)
10798 caps += "OpCapability Int8\n";
10799
10800 decl += "%i8 = OpTypeInt 8 1\n"
10801 "%u8 = OpTypeInt 8 0\n";
10802
10803 if (m_elements == 2)
10804 {
10805 decl += "%v2i8 = OpTypeVector %i8 2\n"
10806 "%v2u8 = OpTypeVector %u8 2\n";
10807 }
10808 exts += "OpExtension \"SPV_KHR_8bit_storage\"\n";
10809 }
10810
10811 if (usesInt16(from, to))
10812 {
10813 bool requiresInt16Capability = true;
10814
10815 if (instruction == "OpUConvert" || instruction == "OpSConvert" || instruction == "OpFConvert")
10816 {
10817 // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10818 if (usesInt32(from, to) || usesFloat32(from, to))
10819 requiresInt16Capability = false;
10820 }
10821
10822 decl += "%i16 = OpTypeInt 16 1\n"
10823 "%u16 = OpTypeInt 16 0\n";
10824 if (m_elements == 2)
10825 {
10826 decl += "%v2i16 = OpTypeVector %i16 2\n"
10827 "%v2u16 = OpTypeVector %u16 2\n";
10828 }
10829 else
10830 {
10831 decl += "%i16vec2 = OpTypeVector %i16 2\n";
10832 }
10833
10834 // Conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10835 if (requiresInt16Capability || !m_useStorageExt)
10836 caps += "OpCapability Int16\n";
10837 }
10838
10839 if (usesFloat16(from, to))
10840 {
10841 decl += "%f16 = OpTypeFloat 16\n";
10842 if (m_elements == 2)
10843 {
10844 decl += "%v2f16 = OpTypeVector %f16 2\n";
10845 }
10846
10847 // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Float16
10848 if (!usesFloat32(from, to) || !m_useStorageExt)
10849 caps += "OpCapability Float16\n";
10850 }
10851
10852 if ((usesInt16(from, to) || usesFloat16(from, to)) && m_useStorageExt)
10853 {
10854 caps += "OpCapability StorageUniformBufferBlock16\n";
10855 exts += "OpExtension \"SPV_KHR_16bit_storage\"\n";
10856 }
10857
10858 if (usesInt64(from, to))
10859 {
10860 caps += "OpCapability Int64\n";
10861 decl += "%i64 = OpTypeInt 64 1\n"
10862 "%u64 = OpTypeInt 64 0\n";
10863 if (m_elements == 2)
10864 {
10865 decl += "%v2i64 = OpTypeVector %i64 2\n"
10866 "%v2u64 = OpTypeVector %u64 2\n";
10867 }
10868 }
10869
10870 if (usesFloat64(from, to))
10871 {
10872 caps += "OpCapability Float64\n";
10873 decl += "%f64 = OpTypeFloat 64\n";
10874 if (m_elements == 2)
10875 {
10876 decl += "%v2f64 = OpTypeVector %f64 2\n";
10877 }
10878 }
10879
10880 m_asmTypes["datatype_capabilities"] = caps;
10881 m_asmTypes["datatype_additional_decl"] = decl;
10882 m_asmTypes["datatype_extensions"] = exts;
10883 }
10884
10885 ConversionDataType m_fromType;
10886 ConversionDataType m_toType;
10887 uint32_t m_elements;
10888 bool m_useStorageExt;
10889 string m_name;
10890 map<string, string> m_asmTypes;
10891 BufferSp m_inputBuffer;
10892 BufferSp m_outputBuffer;
10893 };
10894
getConvertCaseShaderStr(const string & instruction,const ConvertCase & convertCase,bool addVectors=false)10895 const string getConvertCaseShaderStr(const string &instruction, const ConvertCase &convertCase, bool addVectors = false)
10896 {
10897 map<string, string> params = convertCase.m_asmTypes;
10898
10899 params["instruction"] = instruction;
10900 params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
10901 params["outDecorator"] = getByteWidthStr(convertCase.m_toType);
10902
10903 std::string shader("OpCapability Shader\n"
10904 "${datatype_capabilities}"
10905 "${datatype_extensions:opt}"
10906 "OpMemoryModel Logical GLSL450\n"
10907 "OpEntryPoint GLCompute %main \"main\"\n"
10908 "OpExecutionMode %main LocalSize 1 1 1\n"
10909 "OpSource GLSL 430\n"
10910 "OpName %main \"main\"\n"
10911 // Decorators
10912 "OpDecorate %indata DescriptorSet 0\n"
10913 "OpDecorate %indata Binding 0\n"
10914 "OpDecorate %outdata DescriptorSet 0\n"
10915 "OpDecorate %outdata Binding 1\n"
10916 "OpDecorate %in_buf BufferBlock\n"
10917 "OpDecorate %out_buf BufferBlock\n"
10918 "OpMemberDecorate %in_buf 0 Offset 0\n"
10919 "OpMemberDecorate %out_buf 0 Offset 0\n"
10920 // Base types
10921 "%void = OpTypeVoid\n"
10922 "%voidf = OpTypeFunction %void\n"
10923 "%u32 = OpTypeInt 32 0\n"
10924 "%i32 = OpTypeInt 32 1\n"
10925 "%f32 = OpTypeFloat 32\n"
10926 "%v2i32 = OpTypeVector %i32 2\n"
10927 "${datatype_additional_decl}");
10928 if (addVectors)
10929 {
10930 shader += "%v2u32 = OpTypeVector %u32 2\n"
10931 "%v2f32 = OpTypeVector %f32 2\n";
10932 }
10933 shader += "%uvec3 = OpTypeVector %u32 3\n"
10934 // Derived types
10935 "%in_ptr = OpTypePointer Uniform %${inStorageType}\n"
10936 "%out_ptr = OpTypePointer Uniform %${outStorageType}\n"
10937 "%in_buf = OpTypeStruct %${inStorageType}\n"
10938 "%out_buf = OpTypeStruct %${outStorageType}\n"
10939 "%in_bufptr = OpTypePointer Uniform %in_buf\n"
10940 "%out_bufptr = OpTypePointer Uniform %out_buf\n"
10941 "%indata = OpVariable %in_bufptr Uniform\n"
10942 "%outdata = OpVariable %out_bufptr Uniform\n"
10943 // Constants
10944 "%zero = OpConstant %i32 0\n"
10945 // Main function
10946 "%main = OpFunction %void None %voidf\n"
10947 "%label = OpLabel\n"
10948 "%inloc = OpAccessChain %in_ptr %indata %zero\n"
10949 "%outloc = OpAccessChain %out_ptr %outdata %zero\n"
10950 "%inval = OpLoad %${inStorageType} %inloc\n"
10951 "%in_cast = ${inCast} %${inputType} %inval\n"
10952 "%conv = ${instruction} %${outputType} %in_cast\n"
10953 "%out_cast = ${outCast} %${outStorageType} %conv\n"
10954 " OpStore %outloc %out_cast\n"
10955 " OpReturn\n"
10956 " OpFunctionEnd\n";
10957
10958 return StringTemplate(shader).specialize(params);
10959 }
10960
createConvertCases(vector<ConvertCase> & testCases,const string & instruction)10961 void createConvertCases(vector<ConvertCase> &testCases, const string &instruction)
10962 {
10963 if (instruction == "OpUConvert")
10964 {
10965 // Convert unsigned int to unsigned int
10966 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_UNSIGNED_16, 42));
10967 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_UNSIGNED_32, 73));
10968 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_UNSIGNED_64, 121));
10969
10970 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_UNSIGNED_8, 33));
10971 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_UNSIGNED_32, 60653));
10972 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_UNSIGNED_64, 17991));
10973
10974 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_UNSIGNED_64, 904256275));
10975 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_UNSIGNED_16, 6275));
10976 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_UNSIGNED_8, 17));
10977
10978 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_UNSIGNED_32, 701256243));
10979 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_UNSIGNED_16, 4741));
10980 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_UNSIGNED_8, 65));
10981
10982 // Zero extension for int->uint
10983 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_16, 56));
10984 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_32, -47, true, 209));
10985 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_64, -5, true, 251));
10986 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_32, 14669));
10987 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_64, -3341, true, 62195));
10988 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_64, 973610259));
10989
10990 // Truncate for int->uint
10991 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_8, -25711, true, 145));
10992 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_8, 103));
10993 testCases.push_back(
10994 ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_8, -1067742499291926803ll, true, 237));
10995 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_16, 12382));
10996 testCases.push_back(
10997 ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_32, -972812359, true, 3322154937u));
10998 testCases.push_back(
10999 ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_16, -1067742499291926803ll, true, 61165));
11000 }
11001 else if (instruction == "OpSConvert")
11002 {
11003 // Sign extension int->int
11004 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_SIGNED_16, -30));
11005 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_SIGNED_32, 55));
11006 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_SIGNED_64, -3));
11007 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_SIGNED_32, 14669));
11008 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_SIGNED_64, -3341));
11009 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_SIGNED_64, 973610259));
11010
11011 // Truncate for int->int
11012 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_SIGNED_8, 81));
11013 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_SIGNED_8, -93));
11014 testCases.push_back(
11015 ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_SIGNED_8, 3182748172687672ll, true, 56));
11016 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_SIGNED_16, 12382));
11017 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_SIGNED_32, -972812359));
11018 testCases.push_back(
11019 ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_SIGNED_16, -1067742499291926803ll, true, -4371));
11020
11021 // Sign extension for int->uint
11022 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_16, 56));
11023 testCases.push_back(
11024 ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_32, -47, true, 4294967249u));
11025 testCases.push_back(
11026 ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_64, -5, true, 18446744073709551611ull));
11027 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_32, 14669));
11028 testCases.push_back(
11029 ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_64, -3341, true, 18446744073709548275ull));
11030 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_64, 973610259));
11031
11032 // Truncate for int->uint
11033 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_8, -25711, true, 145));
11034 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_8, 103));
11035 testCases.push_back(
11036 ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_8, -1067742499291926803ll, true, 237));
11037 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_16, 12382));
11038 testCases.push_back(
11039 ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_32, -972812359, true, 3322154937u));
11040 testCases.push_back(
11041 ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_16, -1067742499291926803ll, true, 61165));
11042
11043 // Sign extension for uint->int
11044 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_SIGNED_16, 71));
11045 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_SIGNED_32, 201, true, -55));
11046 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_SIGNED_64, 188, true, -68));
11047 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_SIGNED_32, 14669));
11048 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_SIGNED_64, 62195, true, -3341));
11049 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_SIGNED_64, 973610259));
11050
11051 // Truncate for uint->int
11052 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_SIGNED_8, 67));
11053 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_SIGNED_8, 133, true, -123));
11054 testCases.push_back(
11055 ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_SIGNED_8, 836927654193256494ull, true, 46));
11056 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_SIGNED_16, 12382));
11057 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_SIGNED_32,
11058 18446744072736739257ull, true, -972812359));
11059 testCases.push_back(
11060 ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_SIGNED_16, 17379001574417624813ull, true, -4371));
11061
11062 // Convert i16vec2 to i32vec2 and vice versa
11063 // Unsigned values are used here to represent negative signed values and to allow defined shifting behaviour.
11064 // The actual signed value -32123 is used here as uint16 value 33413 and uint32 value 4294935173
11065 testCases.push_back(ConvertCase(instruction, DATA_TYPE_VEC2_SIGNED_16, DATA_TYPE_VEC2_SIGNED_32,
11066 (33413u << 16) | 27593, true, (4294935173ull << 32) | 27593));
11067 testCases.push_back(ConvertCase(instruction, DATA_TYPE_VEC2_SIGNED_32, DATA_TYPE_VEC2_SIGNED_16,
11068 (4294935173ull << 32) | 27593, true, (33413u << 16) | 27593));
11069 }
11070 else if (instruction == "OpFConvert")
11071 {
11072 // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
11073 testCases.push_back(
11074 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_FLOAT_64, 0x449a4000, true, 0x4093480000000000));
11075 testCases.push_back(
11076 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_FLOAT_32, 0x4093480000000000, true, 0x449a4000));
11077
11078 // Conversion to/from 32-bit floats are supported by both 16-bit
11079 // storage and Float16. The tests are duplicated to exercise both
11080 // cases.
11081 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_FLOAT_16, 0x449a4000, true, 0x64D2));
11082 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_32, 0x64D2, true, 0x449a4000));
11083 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_FLOAT_16, 0x449a4000, true, 0x64D2,
11084 "no_storage", false));
11085 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_32, 0x64D2, true, 0x449a4000,
11086 "no_storage", false));
11087
11088 testCases.push_back(
11089 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_64, 0x64D2, true, 0x4093480000000000));
11090 testCases.push_back(
11091 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_FLOAT_16, 0x4093480000000000, true, 0x64D2));
11092 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_64, 0x64D2, true,
11093 0x4093480000000000, "no_storage", false));
11094 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_FLOAT_16, 0x4093480000000000, true,
11095 0x64D2, "no_storage", false));
11096 }
11097 else if (instruction == "OpConvertFToU")
11098 {
11099 // Normal numbers from uint8 range
11100 testCases.push_back(
11101 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x5020, true, 33, "33", false));
11102 testCases.push_back(
11103 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x503F, true, 33, "33rtz", false));
11104 testCases.push_back(
11105 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x42280000, true, 42, "42"));
11106 testCases.push_back(
11107 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x422BFFFF, true, 42, "42rtz"));
11108 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x4067800000000000ull,
11109 true, 188, "188"));
11110 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x40679FFFFFFFFFFFull,
11111 true, 188, "188rtz"));
11112
11113 // Maximum uint8 value
11114 testCases.push_back(
11115 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x5BF8, true, 255, "max", false));
11116 testCases.push_back(
11117 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x5BFF, true, 255, "maxrtz", false));
11118 testCases.push_back(
11119 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x437F0000, true, 255, "max"));
11120 testCases.push_back(
11121 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x437FFFFF, true, 255, "maxrtz"));
11122 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x406FE00000000000ull,
11123 true, 255, "max"));
11124 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x406FFFFFFFFFFFFFull,
11125 true, 255, "maxrtz"));
11126
11127 // +0
11128 testCases.push_back(
11129 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x0000, true, 0, "p0", false));
11130 testCases.push_back(
11131 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x00000000, true, 0, "p0"));
11132 testCases.push_back(
11133 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x0000000000000000ull, true, 0, "p0"));
11134
11135 // -0
11136 testCases.push_back(
11137 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x8000, true, 0, "m0", false));
11138 testCases.push_back(
11139 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x80000000, true, 0, "m0"));
11140 testCases.push_back(
11141 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x8000000000000000ull, true, 0, "m0"));
11142
11143 // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
11144 testCases.push_back(
11145 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x64D2, true, 1234, "1234", false));
11146 testCases.push_back(
11147 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x64D2, true, 1234, "1234", false));
11148 testCases.push_back(
11149 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x64D2, true, 1234, "1234", false));
11150
11151 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
11152 testCases.push_back(
11153 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x7BFF, true, 65504, "max", false));
11154 testCases.push_back(
11155 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x7BFF, true, 65504, "max", false));
11156 testCases.push_back(
11157 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x7BFF, true, 65504, "max", false));
11158
11159 // Show round to zero behaviour
11160 // Example: see https://float.exposed/0x58ff
11161 testCases.push_back(
11162 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x44FF, true, 4, "p4rtz", false));
11163 testCases.push_back(
11164 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x58FF, true, 159, "p159rtz", false));
11165 testCases.push_back(
11166 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x58FF, true, 159, "p159rtz", false));
11167
11168 // +0
11169 testCases.push_back(
11170 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x0000, true, 0, "p0", false));
11171 testCases.push_back(
11172 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x0000, true, 0, "p0", false));
11173 testCases.push_back(
11174 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x0000, true, 0, "p0", false));
11175
11176 // -0
11177 testCases.push_back(
11178 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x8000, true, 0, "m0", false));
11179 testCases.push_back(
11180 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x8000, true, 0, "m0", false));
11181 testCases.push_back(
11182 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x8000, true, 0, "m0", false));
11183
11184 testCases.push_back(
11185 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_16, 0x449a4000, true, 1234));
11186 testCases.push_back(
11187 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_16, 0x449a5fff, true, 1234, "rtz"));
11188 testCases.push_back(
11189 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_32, 0x449a4000, true, 1234));
11190 testCases.push_back(
11191 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_32, 0x449a5fff, true, 1234, "rtz"));
11192 testCases.push_back(
11193 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_64, 0x449a4000, true, 1234));
11194 testCases.push_back(
11195 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_64, 0x449a5fff, true, 1234, "rtz"));
11196 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_64, 0x51b9ad78, true,
11197 99684909056ll, "large"));
11198 testCases.push_back(
11199 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_16, 0x4093480000000000, true, 1234));
11200 testCases.push_back(
11201 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_16, 0x40934bffffffffff, true, 1234, "rtz"));
11202 testCases.push_back(
11203 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_32, 0x4093480000000000, true, 1234));
11204 testCases.push_back(
11205 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_32, 0x40934bffffffffff, true, 1234, "rtz"));
11206 testCases.push_back(
11207 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_64, 0x4093480000000000, true, 1234));
11208 testCases.push_back(
11209 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_64, 0x40934bffffffffff, true, 1234, "rtz"));
11210 }
11211 else if (instruction == "OpConvertUToF")
11212 {
11213 // Normal numbers from uint8 range
11214 testCases.push_back(
11215 ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_16, 116, true, 0x5740, "116", false));
11216 testCases.push_back(
11217 ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_32, 232, true, 0x43680000, "232"));
11218 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_64, 164, true,
11219 0x4064800000000000ull, "164"));
11220
11221 // Maximum uint8 value
11222 testCases.push_back(
11223 ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_16, 255, true, 0x5BF8, "max", false));
11224 testCases.push_back(
11225 ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_32, 255, true, 0x437F0000, "max"));
11226 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_64, 255, true,
11227 0x406FE00000000000ull, "max"));
11228
11229 // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
11230 testCases.push_back(
11231 ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_16, 1234, true, 0x64D2, "1234", false));
11232 testCases.push_back(
11233 ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_16, 1234, true, 0x64D2, "1234", false));
11234 testCases.push_back(
11235 ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_16, 1234, true, 0x64D2, "1234", false));
11236
11237 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
11238 testCases.push_back(
11239 ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
11240 testCases.push_back(
11241 ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
11242 testCases.push_back(
11243 ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
11244
11245 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 4294967296ll, true,
11246 0x4f800000, "4294967296", false));
11247 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_64, 4294967296ll, true,
11248 0x41f0000000000000, "4294967296", false));
11249
11250 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 0xffffff0000000000,
11251 true, 0x5f7fffff, "max", false));
11252
11253 testCases.push_back(
11254 ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_32, 1234, true, 0x449a4000));
11255 testCases.push_back(
11256 ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_64, 1234, true, 0x4093480000000000));
11257 testCases.push_back(
11258 ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_32, 1234, true, 0x449a4000));
11259 testCases.push_back(
11260 ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_64, 1234, true, 0x4093480000000000));
11261 testCases.push_back(
11262 ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 1234, true, 0x449a4000));
11263 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 99684909056ll, true,
11264 0x51b9ad78, "large"));
11265 testCases.push_back(
11266 ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_64, 1234, true, 0x4093480000000000));
11267 }
11268 else if (instruction == "OpConvertFToS")
11269 {
11270 // Normal numbers from int8 range
11271 testCases.push_back(
11272 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xC980, true, -11, "m11", false));
11273 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xC9e5,
11274 /*-11.7890625*/ true, -11, "m11rtz", false));
11275 testCases.push_back(
11276 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC2140000, true, -37, "m37"));
11277 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC2178000,
11278 /*-37.875*/ true, -37, "m37rtz"));
11279 testCases.push_back(
11280 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC050800000000000ull, true, -66, "m66"));
11281 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC050B80000000000ull,
11282 /*-66.875*/ true, -66, "m66rtz"));
11283
11284 // Minimum int8 value
11285 testCases.push_back(
11286 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xD800, true, -128, "min", false));
11287 testCases.push_back(
11288 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xD807, true, -128, "minrtz", false));
11289 testCases.push_back(
11290 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC3000000, true, -128, "min"));
11291 testCases.push_back(
11292 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC300e003, true, -128, "minrtz"));
11293 testCases.push_back(
11294 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC060000000000000ull, true, -128, "min"));
11295 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC0601E4FE0000001ull,
11296 true, -128, "minrtz"));
11297
11298 // Maximum int8 value
11299 testCases.push_back(
11300 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x57F0, true, 127, "max", false));
11301 testCases.push_back(
11302 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x57FF, true, 127, "maxrtz", false));
11303 testCases.push_back(
11304 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x42FE0000, true, 127, "max"));
11305 testCases.push_back(
11306 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x42FFFFFF, true, 127, "maxrtz"));
11307 testCases.push_back(
11308 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x405FC00000000000ull, true, 127, "max"));
11309 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x405FFFFFFFFFFFFFull,
11310 true, 127, "maxrtz"));
11311
11312 // +0
11313 testCases.push_back(
11314 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x0000, true, 0, "p0", false));
11315 testCases.push_back(
11316 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x00000000, true, 0, "p0"));
11317 testCases.push_back(
11318 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x0000000000000000ull, true, 0, "p0"));
11319
11320 // -0
11321 testCases.push_back(
11322 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x8000, true, 0, "m0", false));
11323 testCases.push_back(
11324 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x80000000, true, 0, "m0"));
11325 testCases.push_back(
11326 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x8000000000000000ull, true, 0, "m0"));
11327
11328 // All hexadecimal values below represent -1234.0 as 32/64-bit IEEE 754 float
11329 testCases.push_back(
11330 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0xE4D2, true, -1234, "m1234", false));
11331 testCases.push_back(
11332 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0xE4D2, true, -1234, "m1234", false));
11333 testCases.push_back(
11334 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0xE4D2, true, -1234, "m1234", false));
11335
11336 // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
11337 // 0xFBFF = 1111 1011 1111 1111 = 1 11110 1111111111 = -65504
11338 testCases.push_back(
11339 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0xF800, true, -32768, "min", false));
11340 testCases.push_back(
11341 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0xFBFF, true, -65504, "min", false));
11342 testCases.push_back(
11343 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0xFBFF, true, -65504, "min", false));
11344
11345 // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
11346 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
11347 testCases.push_back(
11348 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x77FF, true, 32752, "max", false));
11349 testCases.push_back(
11350 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x7BFF, true, 65504, "max", false));
11351 testCases.push_back(
11352 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x7BFF, true, 65504, "max", false));
11353
11354 // Show round to zero behaviour, from negative side.
11355 // Example: see https://float.exposed/0xd8ff
11356 testCases.push_back(
11357 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0xC4FF, true, -4, "m4rtz", false));
11358 testCases.push_back(
11359 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0xD8FF, true, -159, "m159rtz", false));
11360 testCases.push_back(
11361 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0xD8FF, true, -159, "m159rtz", false));
11362
11363 // Show round to zero behaviour, from positive side.
11364 // Example: see https://float.exposed/0x58ff
11365 testCases.push_back(
11366 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x44FF, true, 4, "p4rtz", false));
11367 testCases.push_back(
11368 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x58FF, true, 159, "p159rtz", false));
11369 testCases.push_back(
11370 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x58FF, true, 159, "p159rtz", false));
11371
11372 // +0
11373 testCases.push_back(
11374 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x0000, true, 0, "p0", false));
11375 testCases.push_back(
11376 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x0000, true, 0, "p0", false));
11377 testCases.push_back(
11378 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x0000, true, 0, "p0", false));
11379
11380 // -0
11381 testCases.push_back(
11382 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x8000, true, 0, "m0", false));
11383 testCases.push_back(
11384 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x8000, true, 0, "m0", false));
11385 testCases.push_back(
11386 ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x8000, true, 0, "m0", false));
11387
11388 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc49a4000, true, -1234));
11389 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_32, 0xc49a4000, true, -1234));
11390 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_64, 0xc49a4000, true, -1234));
11391 testCases.push_back(
11392 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc49a5f00, true, -1234, "rtz"));
11393 testCases.push_back(
11394 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_32, 0xc49a5f00, true, -1234, "rtz"));
11395 testCases.push_back(
11396 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_64, 0xc49a5f00, true, -1234, "rtz"));
11397 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_64, 0xd1b9ad78, true,
11398 -99684909056ll, "largepos"));
11399 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_64, 0x51b9ad78, true,
11400 99684909056ll, "largeneg"));
11401 testCases.push_back(
11402 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_16, 0xc093480000000000, true, -1234));
11403 testCases.push_back(
11404 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_32, 0xc093480000000000, true, -1234));
11405 testCases.push_back(
11406 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_64, 0xc093480000000000, true, -1234));
11407 testCases.push_back(
11408 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_16, 0xc0934bff000000ff, true, -1234, "rtz"));
11409 testCases.push_back(
11410 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_32, 0xc0934bff000000ff, true, -1234, "rtz"));
11411 testCases.push_back(
11412 ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_64, 0xc0934bff000000ff, true, -1234, "rtz"));
11413 testCases.push_back(
11414 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0x453b9000, true, 3001, "p3001"));
11415 testCases.push_back(
11416 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0x453b9fff, true, 3001, "p3001rtz"));
11417 testCases.push_back(
11418 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc53b9000, true, -3001, "m3001"));
11419 testCases.push_back(
11420 ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc53b9fff, true, -3001, "m3001rtz"));
11421 }
11422 else if (instruction == "OpConvertSToF")
11423 {
11424 // Normal numbers from int8 range
11425 testCases.push_back(
11426 ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_16, -12, true, 0xCA00, "m21", false));
11427 testCases.push_back(
11428 ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_32, -21, true, 0xC1A80000, "m21"));
11429 testCases.push_back(
11430 ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_64, -99, true, 0xC058C00000000000ull, "m99"));
11431
11432 // Minimum int8 value
11433 testCases.push_back(
11434 ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_16, -128, true, 0xD800, "min", false));
11435 testCases.push_back(
11436 ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_32, -128, true, 0xC3000000, "min"));
11437 testCases.push_back(
11438 ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_64, -128, true, 0xC060000000000000ull, "min"));
11439
11440 // Maximum int8 value
11441 testCases.push_back(
11442 ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_16, 127, true, 0x57F0, "max", false));
11443 testCases.push_back(
11444 ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_32, 127, true, 0x42FE0000, "max"));
11445 testCases.push_back(
11446 ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_64, 127, true, 0x405FC00000000000ull, "max"));
11447
11448 // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
11449 testCases.push_back(
11450 ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_16, -1234, true, 0xE4D2, "m1234", false));
11451 testCases.push_back(
11452 ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, -1234, true, 0xE4D2, "m1234", false));
11453 testCases.push_back(
11454 ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, -1234, true, 0xE4D2, "m1234", false));
11455
11456 // 0x7800 = 0111 1000 0000 0000 = 0 11110 0000000000 = 32768
11457 testCases.push_back(
11458 ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, 32768, true, 0x7800, "p32768", false));
11459 testCases.push_back(
11460 ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, 32768, true, 0x7800, "p32768", false));
11461
11462 // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
11463 testCases.push_back(
11464 ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, -32768, true, 0xF800, "m32768", false));
11465 testCases.push_back(
11466 ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, -32768, true, 0xF800, "m32768", false));
11467
11468 // 0xFBFF = 1111 1000 0000 0000 = 1 11110 1111111111 = -65504
11469 testCases.push_back(
11470 ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_16, -32768, true, 0xF800, "min", false));
11471 testCases.push_back(
11472 ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, -65504, true, 0xFBFF, "min", false));
11473 testCases.push_back(
11474 ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, -65504, true, 0xFBFF, "min", false));
11475
11476 // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
11477 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
11478 testCases.push_back(
11479 ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_16, 32752, true, 0x77FF, "max", false));
11480 testCases.push_back(
11481 ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
11482 testCases.push_back(
11483 ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
11484
11485 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, 4294967296ll, true,
11486 0x4f800000, "p4294967296", false));
11487 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_64, 4294967296ll, true,
11488 0x41f0000000000000, "p4294967296", false));
11489 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -4294967296ll, true,
11490 0xcf800000, "m4294967296", false));
11491 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_64, -4294967296ll, true,
11492 0xc1f0000000000000, "m4294967296", false));
11493
11494 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, 0x7fffff8000000000, true,
11495 0x5effffff, "max", false));
11496 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -0x7fffff8000000000, true,
11497 0xdeffffff, "min", false));
11498
11499 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_32, -1234, true, 0xc49a4000));
11500 testCases.push_back(
11501 ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_64, -1234, true, 0xc093480000000000));
11502 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_32, -1234, true, 0xc49a4000));
11503 testCases.push_back(
11504 ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_64, -1234, true, 0xc093480000000000));
11505 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -1234, true, 0xc49a4000));
11506 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -99684909056ll, true,
11507 0xd1b9ad78, "large"));
11508 testCases.push_back(
11509 ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_64, -1234, true, 0xc093480000000000));
11510 }
11511 else
11512 DE_FATAL("Unknown instruction");
11513 }
11514
getConvertCaseFragments(string instruction,const ConvertCase & convertCase)11515 const map<string, string> getConvertCaseFragments(string instruction, const ConvertCase &convertCase)
11516 {
11517 map<string, string> params = convertCase.m_asmTypes;
11518 map<string, string> fragments;
11519
11520 params["instruction"] = instruction;
11521 params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11522
11523 const StringTemplate decoration(" OpDecorate %SSBOi DescriptorSet 0\n"
11524 " OpDecorate %SSBOo DescriptorSet 0\n"
11525 " OpDecorate %SSBOi Binding 0\n"
11526 " OpDecorate %SSBOo Binding 1\n"
11527 " OpDecorate %s_SSBOi Block\n"
11528 " OpDecorate %s_SSBOo Block\n"
11529 "OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11530 "OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11531
11532 const StringTemplate pre_main("${datatype_additional_decl:opt}"
11533 " %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11534 " %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11535 " %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11536 " %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11537 " %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11538 " %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11539 " %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11540 " %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11541
11542 const StringTemplate testfun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11543 "%param = OpFunctionParameter %v4f32\n"
11544 "%label = OpLabel\n"
11545 "%iLoc = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11546 "%oLoc = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11547 "%valIn = OpLoad %${inStorageType} %iLoc\n"
11548 "%valInCast = ${inCast} %${inputType} %valIn\n"
11549 "%conv = ${instruction} %${outputType} %valInCast\n"
11550 "%valOutCast = ${outCast} %${outStorageType} %conv\n"
11551 " OpStore %oLoc %valOutCast\n"
11552 " OpReturnValue %param\n"
11553 " OpFunctionEnd\n");
11554
11555 params["datatype_extensions"] =
11556 params["datatype_extensions"] + "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11557
11558 fragments["capability"] = params["datatype_capabilities"];
11559 fragments["extension"] = params["datatype_extensions"];
11560 fragments["decoration"] = decoration.specialize(params);
11561 fragments["pre_main"] = pre_main.specialize(params);
11562 fragments["testfun"] = testfun.specialize(params);
11563
11564 return fragments;
11565 }
11566
getConvertCaseFragmentsNoStorage(string instruction,const ConvertCase & convertCase)11567 const map<string, string> getConvertCaseFragmentsNoStorage(string instruction, const ConvertCase &convertCase)
11568 {
11569 map<string, string> params = convertCase.m_asmTypes;
11570 map<string, string> fragments;
11571
11572 params["instruction"] = instruction;
11573 params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11574
11575 const StringTemplate decoration(" OpDecorate %SSBOi DescriptorSet 0\n"
11576 " OpDecorate %SSBOo DescriptorSet 0\n"
11577 " OpDecorate %SSBOi Binding 0\n"
11578 " OpDecorate %SSBOo Binding 1\n"
11579 " OpDecorate %s_SSBOi Block\n"
11580 " OpDecorate %s_SSBOo Block\n"
11581 "OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11582 "OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11583
11584 const StringTemplate pre_main("${datatype_additional_decl:opt}"
11585 " %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11586 " %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11587 " %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11588 " %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11589 " %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11590 " %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11591 " %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11592 " %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11593
11594 const StringTemplate testfun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11595 "%param = OpFunctionParameter %v4f32\n"
11596 "%label = OpLabel\n"
11597 "%iLoc = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11598 "%oLoc = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11599 "%inval = OpLoad %${inStorageType} %iLoc\n"
11600 "%in_cast = ${inCast} %${inputType} %inval\n"
11601 "%conv = ${instruction} %${outputType} %in_cast\n"
11602 "%out_cast = ${outCast} %${outStorageType} %conv\n"
11603 " OpStore %oLoc %out_cast\n"
11604 " OpReturnValue %param\n"
11605 " OpFunctionEnd\n");
11606
11607 params["datatype_extensions"] =
11608 params["datatype_extensions"] + "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11609
11610 fragments["capability"] = params["datatype_capabilities"];
11611 fragments["extension"] = params["datatype_extensions"];
11612 fragments["decoration"] = decoration.specialize(params);
11613 fragments["pre_main"] = pre_main.specialize(params);
11614 fragments["testfun"] = testfun.specialize(params);
11615 return fragments;
11616 }
11617
11618 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in compute shaders
createConvertComputeTests(tcu::TestContext & testCtx,const string & instruction,const string & name)11619 tcu::TestCaseGroup *createConvertComputeTests(tcu::TestContext &testCtx, const string &instruction, const string &name)
11620 {
11621 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, name.c_str()));
11622 vector<ConvertCase> testCases;
11623 createConvertCases(testCases, instruction);
11624
11625 for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11626 {
11627 ComputeShaderSpec spec;
11628 spec.assembly = getConvertCaseShaderStr(instruction, *test, true);
11629 spec.numWorkGroups = IVec3(1, 1, 1);
11630 spec.inputs.push_back(test->m_inputBuffer);
11631 spec.outputs.push_back(test->m_outputBuffer);
11632
11633 getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt,
11634 spec.requestedVulkanFeatures, spec.extensions);
11635
11636 group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), spec));
11637 }
11638 return group.release();
11639 }
11640
11641 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in graphics shaders
createConvertGraphicsTests(tcu::TestContext & testCtx,const string & instruction,const string & name)11642 tcu::TestCaseGroup *createConvertGraphicsTests(tcu::TestContext &testCtx, const string &instruction, const string &name)
11643 {
11644 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, name.c_str()));
11645 vector<ConvertCase> testCases;
11646 createConvertCases(testCases, instruction);
11647
11648 for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11649 {
11650 map<string, string> fragments = (test->m_useStorageExt) ? getConvertCaseFragments(instruction, *test) :
11651 getConvertCaseFragmentsNoStorage(instruction, *test);
11652 VulkanFeatures vulkanFeatures;
11653 GraphicsResources resources;
11654 vector<string> extensions;
11655 SpecConstants noSpecConstants;
11656 PushConstants noPushConstants;
11657 GraphicsInterfaces noInterfaces;
11658 tcu::RGBA defaultColors[4];
11659
11660 getDefaultColors(defaultColors);
11661 resources.inputs.push_back(Resource(test->m_inputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11662 resources.outputs.push_back(Resource(test->m_outputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11663 extensions.push_back("VK_KHR_storage_buffer_storage_class");
11664
11665 getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, vulkanFeatures,
11666 extensions);
11667
11668 vulkanFeatures.coreFeatures.vertexPipelineStoresAndAtomics = true;
11669 vulkanFeatures.coreFeatures.fragmentStoresAndAtomics = true;
11670
11671 createTestsForAllStages(test->m_name, defaultColors, defaultColors, fragments, noSpecConstants, noPushConstants,
11672 resources, noInterfaces, extensions, vulkanFeatures, group.get());
11673 }
11674 return group.release();
11675 }
11676
11677 // Constant-Creation Instructions: OpConstant, OpConstantComposite
createOpConstantFloat16Tests(tcu::TestContext & testCtx)11678 tcu::TestCaseGroup *createOpConstantFloat16Tests(tcu::TestContext &testCtx)
11679 {
11680 de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests(new tcu::TestCaseGroup(testCtx, "opconstant"));
11681 RGBA inputColors[4];
11682 RGBA outputColors[4];
11683 vector<string> extensions;
11684 GraphicsResources resources;
11685 VulkanFeatures features;
11686
11687 const char functionStart[] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11688 "%param1 = OpFunctionParameter %v4f32\n"
11689 "%lbl = OpLabel\n";
11690
11691 const char functionEnd[] = "%transformed_param_32 = OpFConvert %v4f32 %transformed_param\n"
11692 " OpReturnValue %transformed_param_32\n"
11693 " OpFunctionEnd\n";
11694
11695 struct NameConstantsCode
11696 {
11697 string name;
11698 string constants;
11699 string code;
11700 };
11701
11702 #define FLOAT_16_COMMON_TYPES_AND_CONSTS \
11703 "%f16 = OpTypeFloat 16\n" \
11704 "%c_f16_0 = OpConstant %f16 0.0\n" \
11705 "%c_f16_0_5 = OpConstant %f16 0.5\n" \
11706 "%c_f16_1 = OpConstant %f16 1.0\n" \
11707 "%v4f16 = OpTypeVector %f16 4\n" \
11708 "%fp_f16 = OpTypePointer Function %f16\n" \
11709 "%fp_v4f16 = OpTypePointer Function %v4f16\n" \
11710 "%c_v4f16_1_1_1_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n" \
11711 "%a4f16 = OpTypeArray %f16 %c_u32_4\n"
11712
11713 NameConstantsCode tests[] = {
11714 {"vec4",
11715
11716 FLOAT_16_COMMON_TYPES_AND_CONSTS
11717 "%cval = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_0\n",
11718 "%param1_16 = OpFConvert %v4f16 %param1\n"
11719 "%transformed_param = OpFAdd %v4f16 %param1_16 %cval\n"},
11720 {
11721 "struct",
11722
11723 FLOAT_16_COMMON_TYPES_AND_CONSTS
11724 "%stype = OpTypeStruct %v4f16 %f16\n"
11725 "%fp_stype = OpTypePointer Function %stype\n"
11726 "%f16_n_1 = OpConstant %f16 -1.0\n"
11727 "%f16_1_5 = OpConstant %f16 !0x3e00\n" // +1.5
11728 "%cvec = OpConstantComposite %v4f16 %f16_1_5 %f16_1_5 %f16_1_5 %c_f16_1\n"
11729 "%cval = OpConstantComposite %stype %cvec %f16_n_1\n",
11730
11731 "%v = OpVariable %fp_stype Function %cval\n"
11732 "%vec_ptr = OpAccessChain %fp_v4f16 %v %c_u32_0\n"
11733 "%f16_ptr = OpAccessChain %fp_f16 %v %c_u32_1\n"
11734 "%vec_val = OpLoad %v4f16 %vec_ptr\n"
11735 "%f16_val = OpLoad %f16 %f16_ptr\n"
11736 "%tmp1 = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_1 %f16_val\n" // vec4(-1)
11737 "%param1_16 = OpFConvert %v4f16 %param1\n"
11738 "%tmp2 = OpFAdd %v4f16 %tmp1 %param1_16\n" // param1 + vec4(-1)
11739 "%transformed_param = OpFAdd %v4f16 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
11740 },
11741 {// [1|0|0|0.5] [x] = x + 0.5
11742 // [0|1|0|0.5] [y] = y + 0.5
11743 // [0|0|1|0.5] [z] = z + 0.5
11744 // [0|0|0|1 ] [1] = 1
11745 "matrix",
11746
11747 FLOAT_16_COMMON_TYPES_AND_CONSTS
11748 "%mat4x4_f16 = OpTypeMatrix %v4f16 4\n"
11749 "%v4f16_1_0_0_0 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_0 %c_f16_0 %c_f16_0\n"
11750 "%v4f16_0_1_0_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_1 %c_f16_0 %c_f16_0\n"
11751 "%v4f16_0_0_1_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_1 %c_f16_0\n"
11752 "%v4f16_0_5_0_5_0_5_1 = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_1\n"
11753 "%cval = OpConstantComposite %mat4x4_f16 %v4f16_1_0_0_0 %v4f16_0_1_0_0 %v4f16_0_0_1_0 "
11754 "%v4f16_0_5_0_5_0_5_1\n",
11755
11756 "%param1_16 = OpFConvert %v4f16 %param1\n"
11757 "%transformed_param = OpMatrixTimesVector %v4f16 %cval %param1_16\n"},
11758 {"array",
11759
11760 FLOAT_16_COMMON_TYPES_AND_CONSTS
11761 "%c_v4f16_1_1_1_0 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11762 "%fp_a4f16 = OpTypePointer Function %a4f16\n"
11763 "%f16_n_1 = OpConstant %f16 -1.0\n"
11764 "%f16_1_5 = OpConstant %f16 !0x3e00\n" // +1.5
11765 "%carr = OpConstantComposite %a4f16 %c_f16_0 %f16_n_1 %f16_1_5 %c_f16_0\n",
11766
11767 "%v = OpVariable %fp_a4f16 Function %carr\n"
11768 "%f = OpAccessChain %fp_f16 %v %c_u32_0\n"
11769 "%f1 = OpAccessChain %fp_f16 %v %c_u32_1\n"
11770 "%f2 = OpAccessChain %fp_f16 %v %c_u32_2\n"
11771 "%f3 = OpAccessChain %fp_f16 %v %c_u32_3\n"
11772 "%f_val = OpLoad %f16 %f\n"
11773 "%f1_val = OpLoad %f16 %f1\n"
11774 "%f2_val = OpLoad %f16 %f2\n"
11775 "%f3_val = OpLoad %f16 %f3\n"
11776 "%ftot1 = OpFAdd %f16 %f_val %f1_val\n"
11777 "%ftot2 = OpFAdd %f16 %ftot1 %f2_val\n"
11778 "%ftot3 = OpFAdd %f16 %ftot2 %f3_val\n" // 0 - 1 + 1.5 + 0
11779 "%add_vec = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %ftot3\n"
11780 "%param1_16 = OpFConvert %v4f16 %param1\n"
11781 "%transformed_param = OpFAdd %v4f16 %param1_16 %add_vec\n"},
11782 {//
11783 // [
11784 // {
11785 // 0.0,
11786 // [ 1.0, 1.0, 1.0, 1.0]
11787 // },
11788 // {
11789 // 1.0,
11790 // [ 0.0, 0.5, 0.0, 0.0]
11791 // }, // ^^^
11792 // {
11793 // 0.0,
11794 // [ 1.0, 1.0, 1.0, 1.0]
11795 // }
11796 // ]
11797 "array_of_struct_of_array",
11798
11799 FLOAT_16_COMMON_TYPES_AND_CONSTS
11800 "%c_v4f16_1_1_1_0 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11801 "%fp_a4f16 = OpTypePointer Function %a4f16\n"
11802 "%stype = OpTypeStruct %f16 %a4f16\n"
11803 "%a3stype = OpTypeArray %stype %c_u32_3\n"
11804 "%fp_a3stype = OpTypePointer Function %a3stype\n"
11805 "%ca4f16_0 = OpConstantComposite %a4f16 %c_f16_0 %c_f16_0_5 %c_f16_0 %c_f16_0\n"
11806 "%ca4f16_1 = OpConstantComposite %a4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
11807 "%cstype1 = OpConstantComposite %stype %c_f16_0 %ca4f16_1\n"
11808 "%cstype2 = OpConstantComposite %stype %c_f16_1 %ca4f16_0\n"
11809 "%carr = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
11810
11811 "%v = OpVariable %fp_a3stype Function %carr\n"
11812 "%f = OpAccessChain %fp_f16 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
11813 "%f_l = OpLoad %f16 %f\n"
11814 "%add_vec = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %f_l\n"
11815 "%param1_16 = OpFConvert %v4f16 %param1\n"
11816 "%transformed_param = OpFAdd %v4f16 %param1_16 %add_vec\n"}};
11817
11818 getHalfColorsFullAlpha(inputColors);
11819 outputColors[0] = RGBA(255, 255, 255, 255);
11820 outputColors[1] = RGBA(255, 127, 127, 255);
11821 outputColors[2] = RGBA(127, 255, 127, 255);
11822 outputColors[3] = RGBA(127, 127, 255, 255);
11823
11824 extensions.push_back("VK_KHR_shader_float16_int8");
11825 features.extFloat16Int8.shaderFloat16 = true;
11826
11827 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
11828 {
11829 map<string, string> fragments;
11830
11831 fragments["capability"] = "OpCapability Float16\n";
11832 fragments["pre_main"] = tests[testNdx].constants;
11833 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
11834
11835 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, resources, extensions,
11836 opConstantCompositeTests.get(), features);
11837 }
11838 return opConstantCompositeTests.release();
11839 }
11840
11841 template <typename T>
11842 void finalizeTestsCreation(T &specResource, const map<string, string> &fragments, tcu::TestContext &testCtx,
11843 tcu::TestCaseGroup &testGroup, const std::string &testName,
11844 const VulkanFeatures &vulkanFeatures, const vector<string> &extensions,
11845 const IVec3 &numWorkGroups, const bool splitRenderArea = false);
11846
11847 template <>
finalizeTestsCreation(GraphicsResources & specResource,const map<string,string> & fragments,tcu::TestContext &,tcu::TestCaseGroup & testGroup,const std::string & testName,const VulkanFeatures & vulkanFeatures,const vector<string> & extensions,const IVec3 &,const bool splitRenderArea)11848 void finalizeTestsCreation(GraphicsResources &specResource, const map<string, string> &fragments, tcu::TestContext &,
11849 tcu::TestCaseGroup &testGroup, const std::string &testName,
11850 const VulkanFeatures &vulkanFeatures, const vector<string> &extensions, const IVec3 &,
11851 const bool splitRenderArea)
11852 {
11853 RGBA defaultColors[4];
11854 getDefaultColors(defaultColors);
11855
11856 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, specResource, extensions, &testGroup,
11857 vulkanFeatures, QP_TEST_RESULT_FAIL, std::string(), splitRenderArea);
11858 }
11859
11860 template <>
finalizeTestsCreation(ComputeShaderSpec & specResource,const map<string,string> & fragments,tcu::TestContext & testCtx,tcu::TestCaseGroup & testGroup,const std::string & testName,const VulkanFeatures & vulkanFeatures,const vector<string> & extensions,const IVec3 & numWorkGroups,bool)11861 void finalizeTestsCreation(ComputeShaderSpec &specResource, const map<string, string> &fragments,
11862 tcu::TestContext &testCtx, tcu::TestCaseGroup &testGroup, const std::string &testName,
11863 const VulkanFeatures &vulkanFeatures, const vector<string> &extensions,
11864 const IVec3 &numWorkGroups, bool)
11865 {
11866 specResource.numWorkGroups = numWorkGroups;
11867 specResource.requestedVulkanFeatures = vulkanFeatures;
11868 specResource.extensions = extensions;
11869
11870 specResource.assembly = makeComputeShaderAssembly(fragments);
11871
11872 testGroup.addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), specResource));
11873 }
11874
11875 template <class SpecResource>
createFloat16LogicalSet(tcu::TestContext & testCtx,const bool nanSupported)11876 tcu::TestCaseGroup *createFloat16LogicalSet(tcu::TestContext &testCtx, const bool nanSupported)
11877 {
11878 const string nan = nanSupported ? "_nan" : "";
11879 const string groupName = "logical" + nan;
11880 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, groupName.c_str()));
11881
11882 de::Random rnd(deStringHash(testGroup->getName()));
11883 const string spvCapabilities =
11884 string("OpCapability Float16\n") + (nanSupported ? "OpCapability SignedZeroInfNanPreserve\n" : "");
11885 const string spvExtensions = (nanSupported ? "OpExtension \"SPV_KHR_float_controls\"\n" : "");
11886 const string spvExecutionMode = nanSupported ? "OpExecutionMode %BP_main SignedZeroInfNanPreserve 16\n" : "";
11887 const uint32_t numDataPointsScalar = 16;
11888 const uint32_t numDataPointsVector = 14;
11889 const vector<deFloat16> float16DataScalar = getFloat16s(rnd, numDataPointsScalar);
11890 const vector<deFloat16> float16DataVector = getFloat16s(rnd, numDataPointsVector);
11891 const vector<deFloat16> float16Data1 =
11892 squarize(float16DataScalar, 0); // Total Size: square(sizeof(float16DataScalar))
11893 const vector<deFloat16> float16Data2 = squarize(float16DataScalar, 1);
11894 const vector<deFloat16> float16DataVec1 =
11895 squarizeVector(float16DataVector, 0); // Total Size: 2 * (square(square(sizeof(float16DataVector))))
11896 const vector<deFloat16> float16DataVec2 = squarizeVector(float16DataVector, 1);
11897 const vector<deFloat16> float16OutUnused(float16Data1.size(), 0);
11898 const vector<deFloat16> float16OutVecUnused(float16DataVec1.size(), 0);
11899
11900 struct TestOp
11901 {
11902 const char *opCode;
11903 VerifyIOFunc verifyFuncNan;
11904 VerifyIOFunc verifyFuncNonNan;
11905 const uint32_t argCount;
11906 };
11907
11908 const TestOp testOps[] = {
11909 {"OpIsNan", compareFP16Logical<fp16isNan, true, false, true>, compareFP16Logical<fp16isNan, true, false, false>,
11910 1},
11911 {"OpIsInf", compareFP16Logical<fp16isInf, true, false, true>, compareFP16Logical<fp16isInf, true, false, false>,
11912 1},
11913 {"OpFOrdEqual", compareFP16Logical<fp16isEqual, false, true, true>,
11914 compareFP16Logical<fp16isEqual, false, true, false>, 2},
11915 {"OpFUnordEqual", compareFP16Logical<fp16isEqual, false, false, true>,
11916 compareFP16Logical<fp16isEqual, false, false, false>, 2},
11917 {"OpFOrdNotEqual", compareFP16Logical<fp16isUnequal, false, true, true>,
11918 compareFP16Logical<fp16isUnequal, false, true, false>, 2},
11919 {"OpFUnordNotEqual", compareFP16Logical<fp16isUnequal, false, false, true>,
11920 compareFP16Logical<fp16isUnequal, false, false, false>, 2},
11921 {"OpFOrdLessThan", compareFP16Logical<fp16isLess, false, true, true>,
11922 compareFP16Logical<fp16isLess, false, true, false>, 2},
11923 {"OpFUnordLessThan", compareFP16Logical<fp16isLess, false, false, true>,
11924 compareFP16Logical<fp16isLess, false, false, false>, 2},
11925 {"OpFOrdGreaterThan", compareFP16Logical<fp16isGreater, false, true, true>,
11926 compareFP16Logical<fp16isGreater, false, true, false>, 2},
11927 {"OpFUnordGreaterThan", compareFP16Logical<fp16isGreater, false, false, true>,
11928 compareFP16Logical<fp16isGreater, false, false, false>, 2},
11929 {"OpFOrdLessThanEqual", compareFP16Logical<fp16isLessOrEqual, false, true, true>,
11930 compareFP16Logical<fp16isLessOrEqual, false, true, false>, 2},
11931 {"OpFUnordLessThanEqual", compareFP16Logical<fp16isLessOrEqual, false, false, true>,
11932 compareFP16Logical<fp16isLessOrEqual, false, false, false>, 2},
11933 {"OpFOrdGreaterThanEqual", compareFP16Logical<fp16isGreaterOrEqual, false, true, true>,
11934 compareFP16Logical<fp16isGreaterOrEqual, false, true, false>, 2},
11935 {"OpFUnordGreaterThanEqual", compareFP16Logical<fp16isGreaterOrEqual, false, false, true>,
11936 compareFP16Logical<fp16isGreaterOrEqual, false, false, false>, 2},
11937 };
11938
11939 { // scalar cases
11940 const StringTemplate preMain(" %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11941 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11942 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11943 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
11944 " %f16 = OpTypeFloat 16\n"
11945 " %v2f16 = OpTypeVector %f16 2\n"
11946 " %c_f16_0 = OpConstant %f16 0.0\n"
11947 " %c_f16_1 = OpConstant %f16 1.0\n"
11948 " %up_u32 = OpTypePointer Uniform %u32\n"
11949 " %ra_u32 = OpTypeArray %u32 %c_i32_hndp\n"
11950 " %SSBO16 = OpTypeStruct %ra_u32\n"
11951 " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11952 " %f16_i32_fn = OpTypeFunction %f16 %i32\n"
11953 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11954 " %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
11955 " %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
11956 " %ssbo_dst = OpVariable %up_SSBO16 Uniform\n");
11957
11958 const StringTemplate decoration("OpDecorate %ra_u32 ArrayStride 4\n"
11959 "OpMemberDecorate %SSBO16 0 Offset 0\n"
11960 "OpDecorate %SSBO16 BufferBlock\n"
11961 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
11962 "OpDecorate %ssbo_src0 Binding 0\n"
11963 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
11964 "OpDecorate %ssbo_src1 Binding 1\n"
11965 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11966 "OpDecorate %ssbo_dst Binding 2\n");
11967
11968 const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11969 " %param = OpFunctionParameter %v4f32\n"
11970
11971 " %entry = OpLabel\n"
11972 " %i = OpVariable %fp_i32 Function\n"
11973 " OpStore %i %c_i32_0\n"
11974 " OpBranch %loop\n"
11975
11976 " %loop = OpLabel\n"
11977 " %i_cmp = OpLoad %i32 %i\n"
11978 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11979 " OpLoopMerge %merge %next None\n"
11980 " OpBranchConditional %lt %write %merge\n"
11981
11982 " %write = OpLabel\n"
11983 " %ndx = OpLoad %i32 %i\n"
11984
11985 " %val_src0 = OpFunctionCall %f16 %ld_arg_ssbo_src0 %ndx\n"
11986
11987 "${op_arg1_calc}"
11988
11989 " %val_bdst = ${op_code} %bool %val_src0 ${op_arg1}\n"
11990 " %val_dst = OpSelect %f16 %val_bdst %c_f16_1 %c_f16_0\n"
11991 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11992 " OpBranch %next\n"
11993
11994 " %next = OpLabel\n"
11995 " %i_cur = OpLoad %i32 %i\n"
11996 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11997 " OpStore %i %i_new\n"
11998 " OpBranch %loop\n"
11999
12000 " %merge = OpLabel\n"
12001 " OpReturnValue %param\n"
12002
12003 " OpFunctionEnd\n");
12004
12005 const StringTemplate arg1Calc(" %val_src1 = OpFunctionCall %f16 %ld_arg_ssbo_src1 %ndx\n");
12006
12007 for (uint32_t testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
12008 {
12009 const size_t iterations = float16Data1.size();
12010 const TestOp &testOp = testOps[testOpsIdx];
12011 const string testName = de::toLower(string(testOp.opCode)) + "_scalar";
12012 SpecResource specResource;
12013 map<string, string> specs;
12014 VulkanFeatures features;
12015 map<string, string> fragments;
12016 vector<string> extensions;
12017
12018 specs["num_data_points"] = de::toString(iterations);
12019 specs["op_code"] = testOp.opCode;
12020 specs["op_arg1"] = (testOp.argCount == 1) ? "" : "%val_src1";
12021 specs["op_arg1_calc"] = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
12022
12023 fragments["extension"] = spvExtensions;
12024 fragments["capability"] = spvCapabilities;
12025 fragments["execution_mode"] = spvExecutionMode;
12026 fragments["decoration"] = decoration.specialize(specs);
12027 fragments["pre_main"] = preMain.specialize(specs);
12028 fragments["testfun"] = testFun.specialize(specs);
12029 fragments["testfun"] += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src0"}});
12030 if (testOp.argCount > 1)
12031 {
12032 fragments["testfun"] += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src1"}});
12033 }
12034 fragments["testfun"] += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
12035
12036 specResource.inputs.push_back(
12037 Resource(BufferSp(new Float16Buffer(float16Data1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12038 specResource.inputs.push_back(
12039 Resource(BufferSp(new Float16Buffer(float16Data2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12040 specResource.outputs.push_back(
12041 Resource(BufferSp(new Float16Buffer(float16OutUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12042 specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
12043
12044 extensions.push_back("VK_KHR_shader_float16_int8");
12045
12046 if (nanSupported)
12047 {
12048 extensions.push_back("VK_KHR_shader_float_controls");
12049
12050 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = true;
12051 }
12052
12053 features.extFloat16Int8.shaderFloat16 = true;
12054 if (specResource.graphicsFeaturesRequired)
12055 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
12056
12057 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
12058 IVec3(1, 1, 1));
12059 }
12060 }
12061 { // vector cases
12062 const StringTemplate preMain(" %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12063 " %v2bool = OpTypeVector %bool 2\n"
12064 " %f16 = OpTypeFloat 16\n"
12065 " %c_f16_0 = OpConstant %f16 0.0\n"
12066 " %c_f16_1 = OpConstant %f16 1.0\n"
12067 " %v2f16 = OpTypeVector %f16 2\n"
12068 " %c_v2f16_0_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
12069 " %c_v2f16_1_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
12070 " %up_u32 = OpTypePointer Uniform %u32\n"
12071 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12072 " %SSBO16 = OpTypeStruct %ra_u32\n"
12073 " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
12074 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12075 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
12076 " %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
12077 " %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
12078 " %ssbo_dst = OpVariable %up_SSBO16 Uniform\n");
12079
12080 const StringTemplate decoration("OpDecorate %ra_u32 ArrayStride 4\n"
12081 "OpMemberDecorate %SSBO16 0 Offset 0\n"
12082 "OpDecorate %SSBO16 BufferBlock\n"
12083 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
12084 "OpDecorate %ssbo_src0 Binding 0\n"
12085 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
12086 "OpDecorate %ssbo_src1 Binding 1\n"
12087 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12088 "OpDecorate %ssbo_dst Binding 2\n");
12089
12090 const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12091 " %param = OpFunctionParameter %v4f32\n"
12092
12093 " %entry = OpLabel\n"
12094 " %i = OpVariable %fp_i32 Function\n"
12095 " OpStore %i %c_i32_0\n"
12096 " OpBranch %loop\n"
12097
12098 " %loop = OpLabel\n"
12099 " %i_cmp = OpLoad %i32 %i\n"
12100 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12101 " OpLoopMerge %merge %next None\n"
12102 " OpBranchConditional %lt %write %merge\n"
12103
12104 " %write = OpLabel\n"
12105 " %ndx = OpLoad %i32 %i\n"
12106
12107 " %val_src0 = OpFunctionCall %v2f16 %ld_arg_ssbo_src0 %ndx\n"
12108
12109 "${op_arg1_calc}"
12110
12111 " %val_bdst = ${op_code} %v2bool %val_src0 ${op_arg1}\n"
12112 " %val_dst = OpSelect %v2f16 %val_bdst %c_v2f16_1_1 %c_v2f16_0_0\n"
12113 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12114 " OpBranch %next\n"
12115
12116 " %next = OpLabel\n"
12117 " %i_cur = OpLoad %i32 %i\n"
12118 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12119 " OpStore %i %i_new\n"
12120 " OpBranch %loop\n"
12121
12122 " %merge = OpLabel\n"
12123 " OpReturnValue %param\n"
12124
12125 " OpFunctionEnd\n");
12126
12127 const StringTemplate arg1Calc(" %val_src1 = OpFunctionCall %v2f16 %ld_arg_ssbo_src1 %ndx\n");
12128
12129 for (uint32_t testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
12130 {
12131 const uint32_t itemsPerVec = 2;
12132 const size_t iterations = float16DataVec1.size() / itemsPerVec;
12133 const TestOp &testOp = testOps[testOpsIdx];
12134 const string testName = de::toLower(string(testOp.opCode)) + "_vector";
12135 SpecResource specResource;
12136 map<string, string> specs;
12137 vector<string> extensions;
12138 VulkanFeatures features;
12139 map<string, string> fragments;
12140
12141 specs["num_data_points"] = de::toString(iterations);
12142 specs["op_code"] = testOp.opCode;
12143 specs["op_arg1"] = (testOp.argCount == 1) ? "" : "%val_src1";
12144 specs["op_arg1_calc"] = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
12145
12146 fragments["extension"] = spvExtensions;
12147 fragments["capability"] = spvCapabilities;
12148 fragments["execution_mode"] = spvExecutionMode;
12149 fragments["decoration"] = decoration.specialize(specs);
12150 fragments["pre_main"] = preMain.specialize(specs);
12151 fragments["testfun"] = testFun.specialize(specs);
12152 fragments["testfun"] += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src0"}});
12153 if (testOp.argCount > 1)
12154 {
12155 fragments["testfun"] += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src1"}});
12156 }
12157 fragments["testfun"] += StringTemplate(storeV2F16AsUint).specialize({{"var", "ssbo_dst"}});
12158
12159 specResource.inputs.push_back(
12160 Resource(BufferSp(new Float16Buffer(float16DataVec1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12161 specResource.inputs.push_back(
12162 Resource(BufferSp(new Float16Buffer(float16DataVec2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12163 specResource.outputs.push_back(
12164 Resource(BufferSp(new Float16Buffer(float16OutVecUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12165 specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
12166
12167 extensions.push_back("VK_KHR_shader_float16_int8");
12168
12169 if (nanSupported)
12170 {
12171 extensions.push_back("VK_KHR_shader_float_controls");
12172
12173 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = true;
12174 }
12175
12176 features.extFloat16Int8.shaderFloat16 = true;
12177 if (specResource.graphicsFeaturesRequired)
12178 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
12179
12180 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
12181 IVec3(1, 1, 1), true);
12182 }
12183 }
12184
12185 return testGroup.release();
12186 }
12187
compareFP16FunctionSetFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12188 bool compareFP16FunctionSetFunc(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
12189 const std::vector<Resource> &, TestLog &log)
12190 {
12191 if (inputs.size() != 1 || outputAllocs.size() != 1)
12192 return false;
12193
12194 vector<uint8_t> input1Bytes;
12195
12196 inputs[0].getBytes(input1Bytes);
12197
12198 const uint16_t *const input1AsFP16 = (const uint16_t *)&input1Bytes[0];
12199 const uint16_t *const outputAsFP16 = (const uint16_t *)outputAllocs[0]->getHostPtr();
12200 std::string error;
12201
12202 for (size_t idx = 0; idx < input1Bytes.size() / sizeof(uint16_t); ++idx)
12203 {
12204 if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
12205 {
12206 log << TestLog::Message << error << TestLog::EndMessage;
12207
12208 return false;
12209 }
12210 }
12211
12212 return true;
12213 }
12214
12215 template <class SpecResource>
createFloat16FuncSet(tcu::TestContext & testCtx)12216 tcu::TestCaseGroup *createFloat16FuncSet(tcu::TestContext &testCtx)
12217 {
12218 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "function"));
12219
12220 de::Random rnd(deStringHash(testGroup->getName()));
12221 const StringTemplate capabilities("OpCapability Float16\n");
12222 const uint32_t numDataPoints = 256;
12223 const vector<deFloat16> float16InputData = getFloat16s(rnd, numDataPoints);
12224 const vector<deFloat16> float16OutputUnused(float16InputData.size(), 0);
12225 map<string, string> fragments;
12226
12227 struct TestType
12228 {
12229 const uint32_t typeComponents;
12230 const char *typeName;
12231 const char *typeDecls;
12232 const char *typeStorage;
12233 const string loadFunc;
12234 const string storeFunc;
12235 };
12236
12237 const TestType testTypes[] = {
12238 {1, "f16",
12239 " %v2f16 = OpTypeVector %f16 2\n"
12240 "%f16_i32_fn = OpTypeFunction %f16 %i32\n"
12241 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12242 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12243 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12244 "u32_hndp", loadScalarF16FromUint, storeScalarF16AsUint},
12245 {2, "v2f16",
12246 " %v2f16 = OpTypeVector %f16 2\n"
12247 " %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
12248 "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12249 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
12250 "u32_ndp", loadV2F16FromUint, storeV2F16AsUint},
12251 {4, "v4f16",
12252 " %v2f16 = OpTypeVector %f16 2\n"
12253 " %v4f16 = OpTypeVector %f16 4\n"
12254 " %c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
12255 "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12256 "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
12257 "ra_u32_2", loadV4F16FromUints, storeV4F16AsUints},
12258 };
12259
12260 const StringTemplate preMain(" %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12261 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
12262 " %v2bool = OpTypeVector %bool 2\n"
12263 " %f16 = OpTypeFloat 16\n"
12264 " %c_f16_0 = OpConstant %f16 0.0\n"
12265
12266 "${type_decls}"
12267
12268 " %${tt}_fun = OpTypeFunction %${tt} %${tt}\n"
12269 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12270 "%ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
12271 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12272 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12273 " %up_u32 = OpTypePointer Uniform %u32\n"
12274 " %SSBO16 = OpTypeStruct %ra_${ts}\n"
12275 " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
12276 " %ssbo_src = OpVariable %up_SSBO16 Uniform\n"
12277 " %ssbo_dst = OpVariable %up_SSBO16 Uniform\n");
12278
12279 const StringTemplate decoration("OpDecorate %ra_u32_2 ArrayStride 4\n"
12280 "OpDecorate %ra_u32_hndp ArrayStride 4\n"
12281 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
12282 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12283 "OpMemberDecorate %SSBO16 0 Offset 0\n"
12284 "OpDecorate %SSBO16 BufferBlock\n"
12285 "OpDecorate %ssbo_src DescriptorSet 0\n"
12286 "OpDecorate %ssbo_src Binding 0\n"
12287 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12288 "OpDecorate %ssbo_dst Binding 1\n");
12289
12290 const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12291 " %param = OpFunctionParameter %v4f32\n"
12292 " %entry = OpLabel\n"
12293
12294 " %i = OpVariable %fp_i32 Function\n"
12295 " OpStore %i %c_i32_0\n"
12296 " OpBranch %loop\n"
12297
12298 " %loop = OpLabel\n"
12299 " %i_cmp = OpLoad %i32 %i\n"
12300 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12301 " OpLoopMerge %merge %next None\n"
12302 " OpBranchConditional %lt %write %merge\n"
12303
12304 " %write = OpLabel\n"
12305 " %ndx = OpLoad %i32 %i\n"
12306
12307 " %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12308 " %val_dst = OpFunctionCall %${tt} %pass_fun %val_src\n"
12309 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12310 " OpBranch %next\n"
12311
12312 " %next = OpLabel\n"
12313 " %i_cur = OpLoad %i32 %i\n"
12314 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12315 " OpStore %i %i_new\n"
12316 " OpBranch %loop\n"
12317
12318 " %merge = OpLabel\n"
12319 " OpReturnValue %param\n"
12320
12321 " OpFunctionEnd\n"
12322
12323 " %pass_fun = OpFunction %${tt} None %${tt}_fun\n"
12324 " %param0 = OpFunctionParameter %${tt}\n"
12325 " %entry_pf = OpLabel\n"
12326 " %res0 = OpFAdd %${tt} %param0 %c_${tt}_0\n"
12327 " OpReturnValue %res0\n"
12328 " OpFunctionEnd\n");
12329
12330 for (uint32_t testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12331 {
12332 const TestType &testType = testTypes[testTypeIdx];
12333 const string testName = testType.typeName;
12334 const uint32_t itemsPerType = testType.typeComponents;
12335 const size_t iterations = float16InputData.size() / itemsPerType;
12336 const size_t typeStride = itemsPerType * sizeof(deFloat16);
12337 SpecResource specResource;
12338 map<string, string> specs;
12339 VulkanFeatures features;
12340 vector<string> extensions;
12341
12342 specs["num_data_points"] = de::toString(iterations);
12343 specs["tt"] = testType.typeName;
12344 specs["ts"] = testType.typeStorage;
12345 specs["tt_stride"] = de::toString(typeStride);
12346 specs["type_decls"] = testType.typeDecls;
12347
12348 fragments["capability"] = capabilities.specialize(specs);
12349 fragments["decoration"] = decoration.specialize(specs);
12350 fragments["pre_main"] = preMain.specialize(specs);
12351 fragments["testfun"] = testFun.specialize(specs);
12352 fragments["testfun"] += StringTemplate(testType.loadFunc).specialize({{"var", "ssbo_src"}});
12353 fragments["testfun"] += StringTemplate(testType.storeFunc).specialize({{"var", "ssbo_dst"}});
12354
12355 specResource.inputs.push_back(
12356 Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12357 specResource.outputs.push_back(
12358 Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12359 specResource.verifyIO = compareFP16FunctionSetFunc;
12360
12361 extensions.push_back("VK_KHR_shader_float16_int8");
12362
12363 features.extFloat16Int8.shaderFloat16 = true;
12364 if (specResource.graphicsFeaturesRequired)
12365 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
12366
12367 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
12368 IVec3(1, 1, 1));
12369 }
12370
12371 return testGroup.release();
12372 }
12373
compareFP16VectorExtractFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12374 bool compareFP16VectorExtractFunc(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
12375 const std::vector<Resource> &, TestLog &log)
12376 {
12377 if (inputs.size() != 2 || outputAllocs.size() != 1)
12378 return false;
12379
12380 vector<uint8_t> input1Bytes;
12381 vector<uint8_t> input2Bytes;
12382
12383 inputs[0].getBytes(input1Bytes);
12384 inputs[1].getBytes(input2Bytes);
12385
12386 DE_ASSERT(input1Bytes.size() > 0);
12387 DE_ASSERT(input2Bytes.size() > 0);
12388 DE_ASSERT(input2Bytes.size() % sizeof(uint32_t) == 0);
12389
12390 const size_t iterations = input2Bytes.size() / sizeof(uint32_t);
12391 const size_t components = input1Bytes.size() / (sizeof(deFloat16) * iterations);
12392 const deFloat16 *const input1AsFP16 = (const deFloat16 *)&input1Bytes[0];
12393 const uint32_t *const inputIndices = (const uint32_t *)&input2Bytes[0];
12394 const deFloat16 *const outputAsFP16 = (const deFloat16 *)outputAllocs[0]->getHostPtr();
12395 std::string error;
12396
12397 DE_ASSERT(components == 2 || components == 4);
12398 DE_ASSERT(input1Bytes.size() == iterations * components * sizeof(deFloat16));
12399
12400 for (size_t idx = 0; idx < iterations; ++idx)
12401 {
12402 const uint32_t componentNdx = inputIndices[idx];
12403
12404 DE_ASSERT(componentNdx < components);
12405
12406 const deFloat16 expected = input1AsFP16[components * idx + componentNdx];
12407
12408 if (!compare16BitFloat(expected, outputAsFP16[idx], error))
12409 {
12410 log << TestLog::Message << "At " << idx << error << TestLog::EndMessage;
12411
12412 return false;
12413 }
12414 }
12415
12416 return true;
12417 }
12418
12419 template <class SpecResource>
createFloat16VectorExtractSet(tcu::TestContext & testCtx)12420 tcu::TestCaseGroup *createFloat16VectorExtractSet(tcu::TestContext &testCtx)
12421 {
12422 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "opvectorextractdynamic"));
12423
12424 de::Random rnd(deStringHash(testGroup->getName()));
12425 const uint32_t numDataPoints = 256;
12426 const vector<deFloat16> float16InputData = getFloat16s(rnd, numDataPoints);
12427 const vector<deFloat16> float16OutputUnused(float16InputData.size(), 0);
12428
12429 struct TestType
12430 {
12431 const uint32_t typeComponents;
12432 const size_t typeStride;
12433 const char *typeName;
12434 const char *typeDecls;
12435 const char *typeStorage;
12436 const string loadFunction;
12437 const string storeFunction;
12438 };
12439
12440 const TestType testTypes[] = {
12441 {2, 2 * sizeof(deFloat16), "v2f16",
12442 " %v2f16 = OpTypeVector %f16 2\n"
12443 "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12444 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12445 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12446 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12447 "u32", loadV2F16FromUint, storeScalarF16AsUint},
12448 {3, 4 * sizeof(deFloat16), "v3f16",
12449 " %v2f16 = OpTypeVector %f16 2\n"
12450 " %v3f16 = OpTypeVector %f16 3\n"
12451 "%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12452 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12453 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12454 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12455 "ra_u32_2", loadV3F16FromUints, storeScalarF16AsUint},
12456 {4, 4 * sizeof(deFloat16), "v4f16",
12457 " %v2f16 = OpTypeVector %f16 2\n"
12458 " %v4f16 = OpTypeVector %f16 4\n"
12459 "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12460 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12461 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12462 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12463 "ra_u32_2", loadV4F16FromUints, storeScalarF16AsUint},
12464 };
12465
12466 const StringTemplate preMain(" %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12467 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
12468 " %f16 = OpTypeFloat 16\n"
12469
12470 "${type_decl}"
12471
12472 " %up_u32 = OpTypePointer Uniform %u32\n"
12473 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12474 " %SSBO_IDX = OpTypeStruct %ra_u32\n"
12475 "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12476
12477 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12478 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12479 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12480 " %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12481 "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12482
12483 " %ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
12484 " %SSBO_DST = OpTypeStruct %ra_u32_hndp\n"
12485 "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12486
12487 " %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12488 " %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12489 " %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n");
12490
12491 const StringTemplate decoration("OpDecorate %ra_u32_2 ArrayStride 4\n"
12492 "OpDecorate %ra_u32_hndp ArrayStride 4\n"
12493 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12494 "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12495 "OpDecorate %SSBO_SRC BufferBlock\n"
12496 "OpDecorate %ssbo_src DescriptorSet 0\n"
12497 "OpDecorate %ssbo_src Binding 0\n"
12498
12499 "OpDecorate %ra_u32 ArrayStride 4\n"
12500 "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12501 "OpDecorate %SSBO_IDX BufferBlock\n"
12502 "OpDecorate %ssbo_idx DescriptorSet 0\n"
12503 "OpDecorate %ssbo_idx Binding 1\n"
12504
12505 "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12506 "OpDecorate %SSBO_DST BufferBlock\n"
12507 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12508 "OpDecorate %ssbo_dst Binding 2\n");
12509
12510 const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12511 " %param = OpFunctionParameter %v4f32\n"
12512 " %entry = OpLabel\n"
12513
12514 " %i = OpVariable %fp_i32 Function\n"
12515 " OpStore %i %c_i32_0\n"
12516
12517 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12518 " OpSelectionMerge %end_if None\n"
12519 " OpBranchConditional %will_run %run_test %end_if\n"
12520
12521 " %run_test = OpLabel\n"
12522 " OpBranch %loop\n"
12523
12524 " %loop = OpLabel\n"
12525 " %i_cmp = OpLoad %i32 %i\n"
12526 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12527 " OpLoopMerge %merge %next None\n"
12528 " OpBranchConditional %lt %write %merge\n"
12529
12530 " %write = OpLabel\n"
12531 " %ndx = OpLoad %i32 %i\n"
12532
12533 " %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12534
12535 " %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12536 " %val_idx = OpLoad %u32 %src_idx\n"
12537
12538 " %val_dst = OpVectorExtractDynamic %f16 %val_src %val_idx\n"
12539 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12540
12541 " OpBranch %next\n"
12542
12543 " %next = OpLabel\n"
12544 " %i_cur = OpLoad %i32 %i\n"
12545 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12546 " OpStore %i %i_new\n"
12547 " OpBranch %loop\n"
12548
12549 " %merge = OpLabel\n"
12550 " OpBranch %end_if\n"
12551 " %end_if = OpLabel\n"
12552 " OpReturnValue %param\n"
12553
12554 " OpFunctionEnd\n");
12555
12556 for (uint32_t testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12557 {
12558 const TestType &testType = testTypes[testTypeIdx];
12559 const string testName = testType.typeName;
12560 const size_t itemsPerType = testType.typeStride / sizeof(deFloat16);
12561 const size_t iterations = float16InputData.size() / itemsPerType;
12562 SpecResource specResource;
12563 map<string, string> specs;
12564 VulkanFeatures features;
12565 vector<uint32_t> inputDataNdx;
12566 map<string, string> fragments;
12567 vector<string> extensions;
12568
12569 for (uint32_t ndx = 0; ndx < iterations; ++ndx)
12570 inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12571
12572 specs["num_data_points"] = de::toString(iterations);
12573 specs["tt"] = testType.typeName;
12574 specs["ts"] = testType.typeStorage;
12575 specs["tt_stride"] = de::toString(testType.typeStride);
12576 specs["type_decl"] = testType.typeDecls;
12577
12578 fragments["capability"] = "OpCapability Float16\n";
12579 fragments["decoration"] = decoration.specialize(specs);
12580 fragments["pre_main"] = preMain.specialize(specs);
12581 fragments["testfun"] = testFun.specialize(specs);
12582 fragments["testfun"] += StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12583 fragments["testfun"] += StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12584
12585 specResource.inputs.push_back(
12586 Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12587 specResource.inputs.push_back(
12588 Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12589 specResource.outputs.push_back(
12590 Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12591 specResource.verifyIO = compareFP16VectorExtractFunc;
12592
12593 extensions.push_back("VK_KHR_shader_float16_int8");
12594
12595 features.extFloat16Int8.shaderFloat16 = true;
12596 if (specResource.graphicsFeaturesRequired)
12597 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
12598
12599 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
12600 IVec3(1, 1, 1));
12601 }
12602
12603 return testGroup.release();
12604 }
12605
12606 template <uint32_t COMPONENTS_COUNT, uint32_t REPLACEMENT>
compareFP16VectorInsertFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12607 bool compareFP16VectorInsertFunc(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
12608 const std::vector<Resource> &, TestLog &log)
12609 {
12610 if (inputs.size() != 2 || outputAllocs.size() != 1)
12611 return false;
12612
12613 vector<uint8_t> input1Bytes;
12614 vector<uint8_t> input2Bytes;
12615
12616 inputs[0].getBytes(input1Bytes);
12617 inputs[1].getBytes(input2Bytes);
12618
12619 DE_ASSERT(input1Bytes.size() > 0);
12620 DE_ASSERT(input2Bytes.size() > 0);
12621 DE_ASSERT(input2Bytes.size() % sizeof(uint32_t) == 0);
12622
12623 const size_t iterations = input2Bytes.size() / sizeof(uint32_t);
12624 const size_t componentsStride = input1Bytes.size() / (sizeof(deFloat16) * iterations);
12625 const deFloat16 *const input1AsFP16 = (const deFloat16 *)&input1Bytes[0];
12626 const uint32_t *const inputIndices = (const uint32_t *)&input2Bytes[0];
12627 const deFloat16 *const outputAsFP16 = (const deFloat16 *)outputAllocs[0]->getHostPtr();
12628 const deFloat16 magic = tcu::Float16(float(REPLACEMENT)).bits();
12629 std::string error;
12630
12631 DE_ASSERT(componentsStride == 2 || componentsStride == 4);
12632 DE_ASSERT(input1Bytes.size() == iterations * componentsStride * sizeof(deFloat16));
12633
12634 for (size_t idx = 0; idx < iterations; ++idx)
12635 {
12636 const deFloat16 *inputVec = &input1AsFP16[componentsStride * idx];
12637 const deFloat16 *outputVec = &outputAsFP16[componentsStride * idx];
12638 const uint32_t replacedCompNdx = inputIndices[idx];
12639
12640 DE_ASSERT(replacedCompNdx < COMPONENTS_COUNT);
12641
12642 for (size_t compNdx = 0; compNdx < COMPONENTS_COUNT; ++compNdx)
12643 {
12644 const deFloat16 expected = (compNdx == replacedCompNdx) ? magic : inputVec[compNdx];
12645
12646 if (!compare16BitFloat(expected, outputVec[compNdx], error))
12647 {
12648 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12649
12650 return false;
12651 }
12652 }
12653 }
12654
12655 return true;
12656 }
12657
12658 template <class SpecResource>
createFloat16VectorInsertSet(tcu::TestContext & testCtx)12659 tcu::TestCaseGroup *createFloat16VectorInsertSet(tcu::TestContext &testCtx)
12660 {
12661 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "opvectorinsertdynamic"));
12662
12663 de::Random rnd(deStringHash(testGroup->getName()));
12664 const uint32_t replacement = 42;
12665 const uint32_t numDataPoints = 256;
12666 const vector<deFloat16> float16InputData = getFloat16s(rnd, numDataPoints);
12667 const vector<deFloat16> float16OutputUnused(float16InputData.size(), 0);
12668
12669 struct TestType
12670 {
12671 const uint32_t typeComponents;
12672 const size_t typeStride;
12673 const char *typeName;
12674 const char *typeDecls;
12675 VerifyIOFunc verifyIOFunc;
12676 const char *typeStorage;
12677 const string loadFunction;
12678 const string storeFunction;
12679 };
12680
12681 const TestType testTypes[] = {
12682 {2, 2 * sizeof(deFloat16), "v2f16",
12683 " %v2f16 = OpTypeVector %f16 2\n"
12684 "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12685 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
12686 compareFP16VectorInsertFunc<2, replacement>, "u32", loadV2F16FromUint, storeV2F16AsUint},
12687 {3, 4 * sizeof(deFloat16), "v3f16",
12688 " %v2f16 = OpTypeVector %f16 2\n"
12689 " %v3f16 = OpTypeVector %f16 3\n"
12690 "%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12691 "%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n",
12692 compareFP16VectorInsertFunc<3, replacement>, "ra_u32_2", loadV3F16FromUints, storeV3F16AsUints},
12693 {4, 4 * sizeof(deFloat16), "v4f16",
12694 " %v2f16 = OpTypeVector %f16 2\n"
12695 " %v4f16 = OpTypeVector %f16 4\n"
12696 "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12697 "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
12698 compareFP16VectorInsertFunc<4, replacement>, "ra_u32_2", loadV4F16FromUints, storeV4F16AsUints},
12699 };
12700
12701 const StringTemplate preMain(" %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12702 " %f16 = OpTypeFloat 16\n"
12703 " %c_f16_ins = OpConstant %f16 ${replacement}\n"
12704
12705 "${type_decl}"
12706
12707 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12708 " %up_u32 = OpTypePointer Uniform %u32\n"
12709 " %SSBO_IDX = OpTypeStruct %ra_u32\n"
12710 "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12711
12712 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12713 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12714 " %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12715 "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12716
12717 " %SSBO_DST = OpTypeStruct %ra_${ts}\n"
12718 "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12719
12720 " %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12721 " %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12722 " %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n");
12723
12724 const StringTemplate decoration("OpDecorate %ra_u32_2 ArrayStride 4\n"
12725 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12726 "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12727 "OpDecorate %SSBO_SRC BufferBlock\n"
12728 "OpDecorate %ssbo_src DescriptorSet 0\n"
12729 "OpDecorate %ssbo_src Binding 0\n"
12730
12731 "OpDecorate %ra_u32 ArrayStride 4\n"
12732 "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12733 "OpDecorate %SSBO_IDX BufferBlock\n"
12734 "OpDecorate %ssbo_idx DescriptorSet 0\n"
12735 "OpDecorate %ssbo_idx Binding 1\n"
12736
12737 "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12738 "OpDecorate %SSBO_DST BufferBlock\n"
12739 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12740 "OpDecorate %ssbo_dst Binding 2\n");
12741
12742 const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12743 " %param = OpFunctionParameter %v4f32\n"
12744 " %entry = OpLabel\n"
12745
12746 " %i = OpVariable %fp_i32 Function\n"
12747 " OpStore %i %c_i32_0\n"
12748
12749 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12750 " OpSelectionMerge %end_if None\n"
12751 " OpBranchConditional %will_run %run_test %end_if\n"
12752
12753 " %run_test = OpLabel\n"
12754 " OpBranch %loop\n"
12755
12756 " %loop = OpLabel\n"
12757 " %i_cmp = OpLoad %i32 %i\n"
12758 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12759 " OpLoopMerge %merge %next None\n"
12760 " OpBranchConditional %lt %write %merge\n"
12761
12762 " %write = OpLabel\n"
12763 " %ndx = OpLoad %i32 %i\n"
12764
12765 " %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12766
12767 " %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12768 " %val_idx = OpLoad %u32 %src_idx\n"
12769
12770 " %val_dst = OpVectorInsertDynamic %${tt} %val_src %c_f16_ins %val_idx\n"
12771 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12772
12773 " OpBranch %next\n"
12774
12775 " %next = OpLabel\n"
12776 " %i_cur = OpLoad %i32 %i\n"
12777 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12778 " OpStore %i %i_new\n"
12779 " OpBranch %loop\n"
12780
12781 " %merge = OpLabel\n"
12782 " OpBranch %end_if\n"
12783 " %end_if = OpLabel\n"
12784 " OpReturnValue %param\n"
12785
12786 " OpFunctionEnd\n");
12787
12788 for (uint32_t testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12789 {
12790 const TestType &testType = testTypes[testTypeIdx];
12791 const string testName = testType.typeName;
12792 const size_t itemsPerType = testType.typeStride / sizeof(deFloat16);
12793 const size_t iterations = float16InputData.size() / itemsPerType;
12794 SpecResource specResource;
12795 map<string, string> specs;
12796 VulkanFeatures features;
12797 vector<uint32_t> inputDataNdx;
12798 map<string, string> fragments;
12799 vector<string> extensions;
12800
12801 for (uint32_t ndx = 0; ndx < iterations; ++ndx)
12802 inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12803
12804 specs["num_data_points"] = de::toString(iterations);
12805 specs["tt"] = testType.typeName;
12806 specs["ts"] = testType.typeStorage;
12807 specs["tt_stride"] = de::toString(testType.typeStride);
12808 specs["type_decl"] = testType.typeDecls;
12809 specs["replacement"] = de::toString(replacement);
12810
12811 fragments["capability"] = "OpCapability Float16\n";
12812 fragments["decoration"] = decoration.specialize(specs);
12813 fragments["pre_main"] = preMain.specialize(specs);
12814 fragments["testfun"] = testFun.specialize(specs);
12815 fragments["testfun"] += StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12816 fragments["testfun"] += StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12817
12818 specResource.inputs.push_back(
12819 Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12820 specResource.inputs.push_back(
12821 Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12822 specResource.outputs.push_back(
12823 Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12824 specResource.verifyIO = testType.verifyIOFunc;
12825
12826 extensions.push_back("VK_KHR_shader_float16_int8");
12827
12828 features.extFloat16Int8.shaderFloat16 = true;
12829 if (specResource.graphicsFeaturesRequired)
12830 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
12831
12832 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
12833 IVec3(1, 1, 1));
12834 }
12835
12836 return testGroup.release();
12837 }
12838
getShuffledComponent(const size_t iteration,const size_t componentNdx,const deFloat16 * input1Vec,const deFloat16 * input2Vec,size_t vec1Len,size_t vec2Len,bool & validate)12839 inline deFloat16 getShuffledComponent(const size_t iteration, const size_t componentNdx, const deFloat16 *input1Vec,
12840 const deFloat16 *input2Vec, size_t vec1Len, size_t vec2Len, bool &validate)
12841 {
12842 const size_t compNdxCount = (vec1Len + vec2Len + 1);
12843 const size_t compNdxLimited = iteration % (compNdxCount * compNdxCount);
12844 size_t comp;
12845
12846 switch (componentNdx)
12847 {
12848 case 0:
12849 comp = compNdxLimited / compNdxCount;
12850 break;
12851 case 1:
12852 comp = compNdxLimited % compNdxCount;
12853 break;
12854 case 2:
12855 comp = 0;
12856 break;
12857 case 3:
12858 comp = 1;
12859 break;
12860 default:
12861 TCU_THROW(InternalError, "Impossible");
12862 }
12863
12864 if (comp >= vec1Len + vec2Len)
12865 {
12866 validate = false;
12867 return 0;
12868 }
12869 else
12870 {
12871 validate = true;
12872 return (comp < vec1Len) ? input1Vec[comp] : input2Vec[comp - vec1Len];
12873 }
12874 }
12875
12876 template <uint32_t DST_COMPONENTS_COUNT, uint32_t SRC0_COMPONENTS_COUNT, uint32_t SRC1_COMPONENTS_COUNT>
compareFP16VectorShuffleFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12877 bool compareFP16VectorShuffleFunc(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
12878 const std::vector<Resource> &, TestLog &log)
12879 {
12880 DE_STATIC_ASSERT(DST_COMPONENTS_COUNT == 2 || DST_COMPONENTS_COUNT == 3 || DST_COMPONENTS_COUNT == 4);
12881 DE_STATIC_ASSERT(SRC0_COMPONENTS_COUNT == 2 || SRC0_COMPONENTS_COUNT == 3 || SRC0_COMPONENTS_COUNT == 4);
12882 DE_STATIC_ASSERT(SRC1_COMPONENTS_COUNT == 2 || SRC1_COMPONENTS_COUNT == 3 || SRC1_COMPONENTS_COUNT == 4);
12883
12884 if (inputs.size() != 2 || outputAllocs.size() != 1)
12885 return false;
12886
12887 vector<uint8_t> input1Bytes;
12888 vector<uint8_t> input2Bytes;
12889
12890 inputs[0].getBytes(input1Bytes);
12891 inputs[1].getBytes(input2Bytes);
12892
12893 DE_ASSERT(input1Bytes.size() > 0);
12894 DE_ASSERT(input2Bytes.size() > 0);
12895 DE_ASSERT(input2Bytes.size() % sizeof(deFloat16) == 0);
12896
12897 const size_t componentsStrideDst = (DST_COMPONENTS_COUNT == 3) ? 4 : DST_COMPONENTS_COUNT;
12898 const size_t componentsStrideSrc0 = (SRC0_COMPONENTS_COUNT == 3) ? 4 : SRC0_COMPONENTS_COUNT;
12899 const size_t componentsStrideSrc1 = (SRC1_COMPONENTS_COUNT == 3) ? 4 : SRC1_COMPONENTS_COUNT;
12900 const size_t iterations = input1Bytes.size() / (componentsStrideSrc0 * sizeof(deFloat16));
12901 const deFloat16 *const input1AsFP16 = (const deFloat16 *)&input1Bytes[0];
12902 const deFloat16 *const input2AsFP16 = (const deFloat16 *)&input2Bytes[0];
12903 const deFloat16 *const outputAsFP16 = (const deFloat16 *)outputAllocs[0]->getHostPtr();
12904 std::string error;
12905
12906 DE_ASSERT(input1Bytes.size() == iterations * componentsStrideSrc0 * sizeof(deFloat16));
12907 DE_ASSERT(input2Bytes.size() == iterations * componentsStrideSrc1 * sizeof(deFloat16));
12908
12909 for (size_t idx = 0; idx < iterations; ++idx)
12910 {
12911 const deFloat16 *input1Vec = &input1AsFP16[componentsStrideSrc0 * idx];
12912 const deFloat16 *input2Vec = &input2AsFP16[componentsStrideSrc1 * idx];
12913 const deFloat16 *outputVec = &outputAsFP16[componentsStrideDst * idx];
12914
12915 for (size_t compNdx = 0; compNdx < DST_COMPONENTS_COUNT; ++compNdx)
12916 {
12917 bool validate = true;
12918 deFloat16 expected = getShuffledComponent(idx, compNdx, input1Vec, input2Vec, SRC0_COMPONENTS_COUNT,
12919 SRC1_COMPONENTS_COUNT, validate);
12920
12921 if (validate && !compare16BitFloat(expected, outputVec[compNdx], error))
12922 {
12923 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12924
12925 return false;
12926 }
12927 }
12928 }
12929
12930 return true;
12931 }
12932
getFloat16VectorShuffleVerifyIOFunc(uint32_t dstComponentsCount,uint32_t src0ComponentsCount,uint32_t src1ComponentsCount)12933 VerifyIOFunc getFloat16VectorShuffleVerifyIOFunc(uint32_t dstComponentsCount, uint32_t src0ComponentsCount,
12934 uint32_t src1ComponentsCount)
12935 {
12936 DE_ASSERT(dstComponentsCount <= 4);
12937 DE_ASSERT(src0ComponentsCount <= 4);
12938 DE_ASSERT(src1ComponentsCount <= 4);
12939 uint32_t funcCode = 100 * dstComponentsCount + 10 * src0ComponentsCount + src1ComponentsCount;
12940
12941 switch (funcCode)
12942 {
12943 case 222:
12944 return compareFP16VectorShuffleFunc<2, 2, 2>;
12945 case 223:
12946 return compareFP16VectorShuffleFunc<2, 2, 3>;
12947 case 224:
12948 return compareFP16VectorShuffleFunc<2, 2, 4>;
12949 case 232:
12950 return compareFP16VectorShuffleFunc<2, 3, 2>;
12951 case 233:
12952 return compareFP16VectorShuffleFunc<2, 3, 3>;
12953 case 234:
12954 return compareFP16VectorShuffleFunc<2, 3, 4>;
12955 case 242:
12956 return compareFP16VectorShuffleFunc<2, 4, 2>;
12957 case 243:
12958 return compareFP16VectorShuffleFunc<2, 4, 3>;
12959 case 244:
12960 return compareFP16VectorShuffleFunc<2, 4, 4>;
12961 case 322:
12962 return compareFP16VectorShuffleFunc<3, 2, 2>;
12963 case 323:
12964 return compareFP16VectorShuffleFunc<3, 2, 3>;
12965 case 324:
12966 return compareFP16VectorShuffleFunc<3, 2, 4>;
12967 case 332:
12968 return compareFP16VectorShuffleFunc<3, 3, 2>;
12969 case 333:
12970 return compareFP16VectorShuffleFunc<3, 3, 3>;
12971 case 334:
12972 return compareFP16VectorShuffleFunc<3, 3, 4>;
12973 case 342:
12974 return compareFP16VectorShuffleFunc<3, 4, 2>;
12975 case 343:
12976 return compareFP16VectorShuffleFunc<3, 4, 3>;
12977 case 344:
12978 return compareFP16VectorShuffleFunc<3, 4, 4>;
12979 case 422:
12980 return compareFP16VectorShuffleFunc<4, 2, 2>;
12981 case 423:
12982 return compareFP16VectorShuffleFunc<4, 2, 3>;
12983 case 424:
12984 return compareFP16VectorShuffleFunc<4, 2, 4>;
12985 case 432:
12986 return compareFP16VectorShuffleFunc<4, 3, 2>;
12987 case 433:
12988 return compareFP16VectorShuffleFunc<4, 3, 3>;
12989 case 434:
12990 return compareFP16VectorShuffleFunc<4, 3, 4>;
12991 case 442:
12992 return compareFP16VectorShuffleFunc<4, 4, 2>;
12993 case 443:
12994 return compareFP16VectorShuffleFunc<4, 4, 3>;
12995 case 444:
12996 return compareFP16VectorShuffleFunc<4, 4, 4>;
12997 default:
12998 TCU_THROW(InternalError, "Invalid number of components specified.");
12999 }
13000 }
13001
13002 template <class SpecResource>
createFloat16VectorShuffleSet(tcu::TestContext & testCtx)13003 tcu::TestCaseGroup *createFloat16VectorShuffleSet(tcu::TestContext &testCtx)
13004 {
13005 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "opvectorshuffle"));
13006 const int testSpecificSeed = deStringHash(testGroup->getName());
13007 const int seed = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
13008 de::Random rnd(seed);
13009 const uint32_t numDataPoints = 128;
13010 map<string, string> fragments;
13011
13012 struct TestType
13013 {
13014 const uint32_t typeComponents;
13015 const char *typeName;
13016 const string loadFunction;
13017 const string storeFunction;
13018 };
13019
13020 const TestType testTypes[] = {
13021 {2, "v2f16", loadV2F16FromUint, storeV2F16AsUint},
13022 {3, "v3f16", loadV3F16FromUints, storeV3F16AsUints},
13023 {4, "v4f16", loadV4F16FromUints, storeV4F16AsUints},
13024 };
13025
13026 const StringTemplate preMain(" %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
13027 " %c_i32_cc = OpConstant %i32 ${case_count}\n"
13028 " %f16 = OpTypeFloat 16\n"
13029 " %v2f16 = OpTypeVector %f16 2\n"
13030 " %v3f16 = OpTypeVector %f16 3\n"
13031 " %v4f16 = OpTypeVector %f16 4\n"
13032
13033 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
13034 " %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
13035 " %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
13036 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
13037 "%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
13038 "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
13039
13040 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
13041 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
13042 " %ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
13043 " %up_u32 = OpTypePointer Uniform %u32\n"
13044 " %SSBO_v2f16 = OpTypeStruct %ra_u32_ndp\n"
13045 " %SSBO_v3f16 = OpTypeStruct %ra_ra_u32_2\n"
13046 " %SSBO_v4f16 = OpTypeStruct %ra_ra_u32_2\n"
13047
13048 "%up_SSBO_v2f16 = OpTypePointer Uniform %SSBO_v2f16\n"
13049 "%up_SSBO_v3f16 = OpTypePointer Uniform %SSBO_v3f16\n"
13050 "%up_SSBO_v4f16 = OpTypePointer Uniform %SSBO_v4f16\n"
13051
13052 " %fun_t = OpTypeFunction %${tt_dst} %${tt_src0} %${tt_src1} %i32\n"
13053
13054 " %ssbo_src0 = OpVariable %up_SSBO_${tt_src0} Uniform\n"
13055 " %ssbo_src1 = OpVariable %up_SSBO_${tt_src1} Uniform\n"
13056 " %ssbo_dst = OpVariable %up_SSBO_${tt_dst} Uniform\n");
13057
13058 const StringTemplate decoration("OpDecorate %ra_u32_2 ArrayStride 4\n"
13059 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
13060 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
13061
13062 "OpMemberDecorate %SSBO_v2f16 0 Offset 0\n"
13063 "OpDecorate %SSBO_v2f16 BufferBlock\n"
13064
13065 "OpMemberDecorate %SSBO_v3f16 0 Offset 0\n"
13066 "OpDecorate %SSBO_v3f16 BufferBlock\n"
13067
13068 "OpMemberDecorate %SSBO_v4f16 0 Offset 0\n"
13069 "OpDecorate %SSBO_v4f16 BufferBlock\n"
13070
13071 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
13072 "OpDecorate %ssbo_src0 Binding 0\n"
13073 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
13074 "OpDecorate %ssbo_src1 Binding 1\n"
13075 "OpDecorate %ssbo_dst DescriptorSet 0\n"
13076 "OpDecorate %ssbo_dst Binding 2\n");
13077
13078 const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
13079 " %param = OpFunctionParameter %v4f32\n"
13080 " %entry = OpLabel\n"
13081
13082 " %i = OpVariable %fp_i32 Function\n"
13083 " OpStore %i %c_i32_0\n"
13084
13085 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
13086 " OpSelectionMerge %end_if None\n"
13087 " OpBranchConditional %will_run %run_test %end_if\n"
13088
13089 " %run_test = OpLabel\n"
13090 " OpBranch %loop\n"
13091
13092 " %loop = OpLabel\n"
13093 " %i_cmp = OpLoad %i32 %i\n"
13094 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
13095 " OpLoopMerge %merge %next None\n"
13096 " OpBranchConditional %lt %write %merge\n"
13097
13098 " %write = OpLabel\n"
13099 " %ndx = OpLoad %i32 %i\n"
13100 " %val_src0 = OpFunctionCall %${tt_src0} %ld_arg_ssbo_src0 %ndx\n"
13101 " %val_src1 = OpFunctionCall %${tt_src1} %ld_arg_ssbo_src1 %ndx\n"
13102 " %val_dst = OpFunctionCall %${tt_dst} %sw_fun %val_src0 %val_src1 %ndx\n"
13103 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
13104 " OpBranch %next\n"
13105
13106 " %next = OpLabel\n"
13107 " %i_cur = OpLoad %i32 %i\n"
13108 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13109 " OpStore %i %i_new\n"
13110 " OpBranch %loop\n"
13111
13112 " %merge = OpLabel\n"
13113 " OpBranch %end_if\n"
13114 " %end_if = OpLabel\n"
13115 " OpReturnValue %param\n"
13116 " OpFunctionEnd\n"
13117 "\n"
13118
13119 " %sw_fun = OpFunction %${tt_dst} None %fun_t\n"
13120 "%sw_param0 = OpFunctionParameter %${tt_src0}\n"
13121 "%sw_param1 = OpFunctionParameter %${tt_src1}\n"
13122 "%sw_paramn = OpFunctionParameter %i32\n"
13123 " %sw_entry = OpLabel\n"
13124 " %modulo = OpSMod %i32 %sw_paramn %c_i32_cc\n"
13125 " OpSelectionMerge %switch_e None\n"
13126 " OpSwitch %modulo %default ${case_list}\n"
13127 "${case_bodies}"
13128 "%default = OpLabel\n"
13129 " OpUnreachable\n" // Unreachable default case for switch statement
13130 "%switch_e = OpLabel\n"
13131 " OpUnreachable\n" // Unreachable merge block for switch statement
13132 " OpFunctionEnd\n");
13133
13134 const StringTemplate testCaseBody(
13135 "%case_${case_ndx} = OpLabel\n"
13136 "%val_dst_${case_ndx} = OpVectorShuffle %${tt_dst} %sw_param0 %sw_param1 ${shuffle}\n"
13137 " OpReturnValue %val_dst_${case_ndx}\n");
13138
13139 for (uint32_t dstTypeIdx = 0; dstTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++dstTypeIdx)
13140 {
13141 const TestType &dstType = testTypes[dstTypeIdx];
13142
13143 for (uint32_t comp0Idx = 0; comp0Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp0Idx)
13144 {
13145 const TestType &src0Type = testTypes[comp0Idx];
13146
13147 for (uint32_t comp1Idx = 0; comp1Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp1Idx)
13148 {
13149 const TestType &src1Type = testTypes[comp1Idx];
13150 const uint32_t input0Stride = (src0Type.typeComponents == 3) ? 4 : src0Type.typeComponents;
13151 const uint32_t input1Stride = (src1Type.typeComponents == 3) ? 4 : src1Type.typeComponents;
13152 const uint32_t outputStride = (dstType.typeComponents == 3) ? 4 : dstType.typeComponents;
13153 const vector<deFloat16> float16Input0Data = getFloat16s(rnd, input0Stride * numDataPoints);
13154 const vector<deFloat16> float16Input1Data = getFloat16s(rnd, input1Stride * numDataPoints);
13155 const vector<deFloat16> float16OutputUnused(outputStride * numDataPoints, 0);
13156 const string testName = de::toString(dstType.typeComponents) + de::toString(src0Type.typeComponents) +
13157 de::toString(src1Type.typeComponents);
13158 uint32_t caseCount = 0;
13159 SpecResource specResource;
13160 map<string, string> specs;
13161 vector<string> extensions;
13162 VulkanFeatures features;
13163 string caseBodies;
13164 string caseList;
13165
13166 // Generate case
13167 {
13168 vector<string> componentList;
13169
13170 // Generate component possible indices for OpVectorShuffle for components 0 and 1 in output vector
13171 {
13172 uint32_t caseNo = 0;
13173
13174 for (uint32_t comp0IdxLocal = 0; comp0IdxLocal < src0Type.typeComponents; ++comp0IdxLocal)
13175 componentList.push_back(de::toString(caseNo++));
13176 for (uint32_t comp1IdxLocal = 0; comp1IdxLocal < src1Type.typeComponents; ++comp1IdxLocal)
13177 componentList.push_back(de::toString(caseNo++));
13178 componentList.push_back("0xFFFFFFFF");
13179 }
13180
13181 for (uint32_t comp0IdxLocal = 0; comp0IdxLocal < componentList.size(); ++comp0IdxLocal)
13182 {
13183 for (uint32_t comp1IdxLocal = 0; comp1IdxLocal < componentList.size(); ++comp1IdxLocal)
13184 {
13185 map<string, string> specCase;
13186 string shuffle = componentList[comp0IdxLocal] + " " + componentList[comp1IdxLocal];
13187
13188 for (uint32_t compIdx = 2; compIdx < dstType.typeComponents; ++compIdx)
13189 shuffle += " " + de::toString(compIdx - 2);
13190
13191 specCase["case_ndx"] = de::toString(caseCount);
13192 specCase["shuffle"] = shuffle;
13193 specCase["tt_dst"] = dstType.typeName;
13194
13195 caseBodies += testCaseBody.specialize(specCase);
13196 caseList += de::toString(caseCount) + " %case_" + de::toString(caseCount) + " ";
13197
13198 caseCount++;
13199 }
13200 }
13201 }
13202
13203 specs["num_data_points"] = de::toString(numDataPoints);
13204 specs["tt_dst"] = dstType.typeName;
13205 specs["tt_src0"] = src0Type.typeName;
13206 specs["tt_src1"] = src1Type.typeName;
13207 specs["case_bodies"] = caseBodies;
13208 specs["case_list"] = caseList;
13209 specs["case_count"] = de::toString(caseCount);
13210
13211 fragments["capability"] = "OpCapability Float16\n";
13212 fragments["decoration"] = decoration.specialize(specs);
13213 fragments["pre_main"] = preMain.specialize(specs);
13214 fragments["testfun"] = testFun.specialize(specs);
13215 fragments["testfun"] += StringTemplate(src0Type.loadFunction).specialize({{"var", "ssbo_src0"}});
13216 fragments["testfun"] += StringTemplate(src1Type.loadFunction).specialize({{"var", "ssbo_src1"}});
13217 fragments["testfun"] += StringTemplate(dstType.storeFunction).specialize({{"var", "ssbo_dst"}});
13218
13219 specResource.inputs.push_back(
13220 Resource(BufferSp(new Float16Buffer(float16Input0Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13221 specResource.inputs.push_back(
13222 Resource(BufferSp(new Float16Buffer(float16Input1Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13223 specResource.outputs.push_back(
13224 Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13225 specResource.verifyIO = getFloat16VectorShuffleVerifyIOFunc(
13226 dstType.typeComponents, src0Type.typeComponents, src1Type.typeComponents);
13227
13228 extensions.push_back("VK_KHR_shader_float16_int8");
13229
13230 features.extFloat16Int8.shaderFloat16 = true;
13231 if (specResource.graphicsFeaturesRequired)
13232 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
13233
13234 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features,
13235 extensions, IVec3(1, 1, 1));
13236 }
13237 }
13238 }
13239
13240 return testGroup.release();
13241 }
13242
compareFP16CompositeFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)13243 bool compareFP16CompositeFunc(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
13244 const std::vector<Resource> &, TestLog &log)
13245 {
13246 if (inputs.size() != 1 || outputAllocs.size() != 1)
13247 return false;
13248
13249 vector<uint8_t> input1Bytes;
13250
13251 inputs[0].getBytes(input1Bytes);
13252
13253 DE_ASSERT(input1Bytes.size() > 0);
13254 DE_ASSERT(input1Bytes.size() % sizeof(deFloat16) == 0);
13255
13256 const size_t iterations = input1Bytes.size() / sizeof(deFloat16);
13257 const deFloat16 *const input1AsFP16 = (const deFloat16 *)&input1Bytes[0];
13258 const deFloat16 *const outputAsFP16 = (const deFloat16 *)outputAllocs[0]->getHostPtr();
13259 const deFloat16 exceptionValue = tcu::Float16(-1.0).bits();
13260 std::string error;
13261
13262 for (size_t idx = 0; idx < iterations; ++idx)
13263 {
13264 if (input1AsFP16[idx] == exceptionValue)
13265 continue;
13266
13267 if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
13268 {
13269 log << TestLog::Message << "At " << idx << ":" << error << TestLog::EndMessage;
13270
13271 return false;
13272 }
13273 }
13274
13275 return true;
13276 }
13277
13278 template <class SpecResource>
createFloat16CompositeConstructSet(tcu::TestContext & testCtx)13279 tcu::TestCaseGroup *createFloat16CompositeConstructSet(tcu::TestContext &testCtx)
13280 {
13281 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "opcompositeconstruct"));
13282 const uint32_t numElements = 8;
13283 const string testName = "struct";
13284 const uint32_t structItemsCount = 88;
13285 const uint32_t exceptionIndices[] = {1, 7, 15, 17, 25, 33, 51, 55, 59, 63, 67, 71, 84, 85, 86, 87};
13286 const deFloat16 exceptionValue = tcu::Float16(-1.0).bits();
13287 const uint32_t fieldModifier = 2;
13288 const uint32_t fieldModifiedMulIndex = 60;
13289 const uint32_t fieldModifiedAddIndex = 66;
13290
13291 const StringTemplate preMain(" %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
13292 " %f16 = OpTypeFloat 16\n"
13293 " %v2f16 = OpTypeVector %f16 2\n"
13294 " %v3f16 = OpTypeVector %f16 3\n"
13295 " %v4f16 = OpTypeVector %f16 4\n"
13296 " %c_f16_mod = OpConstant %f16 ${field_modifier}\n"
13297
13298 "${consts}"
13299
13300 " %c_f16_n1 = OpConstant %f16 -1.0\n"
13301 " %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_n1 %c_f16_n1\n"
13302 " %c_u32_5 = OpConstant %u32 5\n"
13303 " %c_u32_6 = OpConstant %u32 6\n"
13304 " %c_u32_7 = OpConstant %u32 7\n"
13305 " %c_u32_8 = OpConstant %u32 8\n"
13306 " %c_u32_9 = OpConstant %u32 9\n"
13307 " %c_u32_10 = OpConstant %u32 10\n"
13308 " %c_u32_11 = OpConstant %u32 11\n"
13309 " %c_u32_12 = OpConstant %u32 12\n"
13310 " %c_u32_13 = OpConstant %u32 13\n"
13311 " %c_u32_14 = OpConstant %u32 14\n"
13312 " %c_u32_15 = OpConstant %u32 15\n"
13313 " %c_u32_16 = OpConstant %u32 16\n"
13314 " %c_u32_17 = OpConstant %u32 17\n"
13315 " %c_u32_18 = OpConstant %u32 18\n"
13316 " %c_u32_19 = OpConstant %u32 19\n"
13317 " %c_u32_20 = OpConstant %u32 20\n"
13318 " %c_u32_21 = OpConstant %u32 21\n"
13319 " %c_u32_22 = OpConstant %u32 22\n"
13320 " %c_u32_23 = OpConstant %u32 23\n"
13321 " %c_u32_24 = OpConstant %u32 24\n"
13322 " %c_u32_25 = OpConstant %u32 25\n"
13323 " %c_u32_26 = OpConstant %u32 26\n"
13324 " %c_u32_27 = OpConstant %u32 27\n"
13325 " %c_u32_28 = OpConstant %u32 28\n"
13326 " %c_u32_29 = OpConstant %u32 29\n"
13327 " %c_u32_30 = OpConstant %u32 30\n"
13328 " %c_u32_31 = OpConstant %u32 31\n"
13329 " %c_u32_33 = OpConstant %u32 33\n"
13330 " %c_u32_34 = OpConstant %u32 34\n"
13331 " %c_u32_35 = OpConstant %u32 35\n"
13332 " %c_u32_36 = OpConstant %u32 36\n"
13333 " %c_u32_37 = OpConstant %u32 37\n"
13334 " %c_u32_38 = OpConstant %u32 38\n"
13335 " %c_u32_39 = OpConstant %u32 39\n"
13336 " %c_u32_40 = OpConstant %u32 40\n"
13337 " %c_u32_41 = OpConstant %u32 41\n"
13338 " %c_u32_44 = OpConstant %u32 44\n"
13339
13340 " %f16arr3 = OpTypeArray %f16 %c_u32_3\n"
13341 " %v2f16arr3 = OpTypeArray %v2f16 %c_u32_3\n"
13342 " %v2f16arr5 = OpTypeArray %v2f16 %c_u32_5\n"
13343 " %v3f16arr5 = OpTypeArray %v3f16 %c_u32_5\n"
13344 " %v4f16arr3 = OpTypeArray %v4f16 %c_u32_3\n"
13345 " %struct16 = OpTypeStruct %f16 %v2f16arr3\n"
13346 " %struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
13347 " %st_test = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr3 "
13348 "%v2f16arr5 %f16 %v3f16arr5 %v4f16arr3\n"
13349
13350 " %up_u32 = OpTypePointer Uniform %u32\n"
13351 " %ra_u32_44 = OpTypeArray %u32 %c_u32_44\n"
13352 " %ra_ra_u32 = OpTypeArray %ra_u32_44 %c_i32_ndp\n"
13353 " %SSBO_st = OpTypeStruct %ra_ra_u32\n"
13354 " %up_SSBO_st = OpTypePointer Uniform %SSBO_st\n"
13355
13356 " %ssbo_dst = OpVariable %up_SSBO_st Uniform\n");
13357
13358 const StringTemplate decoration("OpDecorate %SSBO_st BufferBlock\n"
13359 "OpDecorate %ra_u32_44 ArrayStride 4\n"
13360 "OpDecorate %ra_ra_u32 ArrayStride ${struct_item_size}\n"
13361 "OpDecorate %ssbo_dst DescriptorSet 0\n"
13362 "OpDecorate %ssbo_dst Binding 1\n"
13363
13364 "OpMemberDecorate %SSBO_st 0 Offset 0\n"
13365
13366 "OpDecorate %v2f16arr3 ArrayStride 4\n"
13367 "OpMemberDecorate %struct16 0 Offset 0\n"
13368 "OpMemberDecorate %struct16 1 Offset 4\n"
13369 "OpDecorate %struct16arr3 ArrayStride 16\n"
13370 "OpDecorate %f16arr3 ArrayStride 2\n"
13371 "OpDecorate %v2f16arr5 ArrayStride 4\n"
13372 "OpDecorate %v3f16arr5 ArrayStride 8\n"
13373 "OpDecorate %v4f16arr3 ArrayStride 8\n"
13374
13375 "OpMemberDecorate %st_test 0 Offset 0\n"
13376 "OpMemberDecorate %st_test 1 Offset 4\n"
13377 "OpMemberDecorate %st_test 2 Offset 8\n"
13378 "OpMemberDecorate %st_test 3 Offset 16\n"
13379 "OpMemberDecorate %st_test 4 Offset 24\n"
13380 "OpMemberDecorate %st_test 5 Offset 32\n"
13381 "OpMemberDecorate %st_test 6 Offset 80\n"
13382 "OpMemberDecorate %st_test 7 Offset 100\n"
13383 "OpMemberDecorate %st_test 8 Offset 104\n"
13384 "OpMemberDecorate %st_test 9 Offset 144\n");
13385
13386 const StringTemplate testFun(
13387 " %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
13388 " %param = OpFunctionParameter %v4f32\n"
13389 " %entry = OpLabel\n"
13390
13391 " %i = OpVariable %fp_i32 Function\n"
13392 " OpStore %i %c_i32_0\n"
13393
13394 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
13395 " OpSelectionMerge %end_if None\n"
13396 " OpBranchConditional %will_run %run_test %end_if\n"
13397
13398 " %run_test = OpLabel\n"
13399 " OpBranch %loop\n"
13400
13401 " %loop = OpLabel\n"
13402 " %i_cmp = OpLoad %i32 %i\n"
13403 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
13404 " OpLoopMerge %merge %next None\n"
13405 " OpBranchConditional %lt %write %merge\n"
13406
13407 " %write = OpLabel\n"
13408 " %ndx = OpLoad %i32 %i\n"
13409
13410 " %fld1 = OpCompositeConstruct %v2f16 %c_f16_2 %c_f16_3\n"
13411 " %fld2 = OpCompositeConstruct %v3f16 %c_f16_4 %c_f16_5 %c_f16_6\n"
13412 " %fld3 = OpCompositeConstruct %v4f16 %c_f16_8 %c_f16_9 %c_f16_10 %c_f16_11\n"
13413
13414 " %fld4 = OpCompositeConstruct %f16arr3 %c_f16_12 %c_f16_13 %c_f16_14\n"
13415
13416 "%fld5_0_1_0 = OpCompositeConstruct %v2f16 %c_f16_18 %c_f16_19\n"
13417 "%fld5_0_1_1 = OpCompositeConstruct %v2f16 %c_f16_20 %c_f16_21\n"
13418 "%fld5_0_1_2 = OpCompositeConstruct %v2f16 %c_f16_22 %c_f16_23\n"
13419 " %fld5_0_1 = OpCompositeConstruct %v2f16arr3 %fld5_0_1_0 %fld5_0_1_1 %fld5_0_1_2\n"
13420 " %fld5_0 = OpCompositeConstruct %struct16 %c_f16_16 %fld5_0_1\n"
13421
13422 "%fld5_1_1_0 = OpCompositeConstruct %v2f16 %c_f16_26 %c_f16_27\n"
13423 "%fld5_1_1_1 = OpCompositeConstruct %v2f16 %c_f16_28 %c_f16_29\n"
13424 "%fld5_1_1_2 = OpCompositeConstruct %v2f16 %c_f16_30 %c_f16_31\n"
13425 " %fld5_1_1 = OpCompositeConstruct %v2f16arr3 %fld5_1_1_0 %fld5_1_1_1 %fld5_1_1_2\n"
13426 " %fld5_1 = OpCompositeConstruct %struct16 %c_f16_24 %fld5_1_1\n"
13427
13428 "%fld5_2_1_0 = OpCompositeConstruct %v2f16 %c_f16_34 %c_f16_35\n"
13429 "%fld5_2_1_1 = OpCompositeConstruct %v2f16 %c_f16_36 %c_f16_37\n"
13430 "%fld5_2_1_2 = OpCompositeConstruct %v2f16 %c_f16_38 %c_f16_39\n"
13431 " %fld5_2_1 = OpCompositeConstruct %v2f16arr3 %fld5_2_1_0 %fld5_2_1_1 %fld5_2_1_2\n"
13432 " %fld5_2 = OpCompositeConstruct %struct16 %c_f16_32 %fld5_2_1\n"
13433
13434 " %fld5 = OpCompositeConstruct %struct16arr3 %fld5_0 %fld5_1 %fld5_2\n"
13435
13436 " %fld6_0 = OpCompositeConstruct %v2f16 %c_f16_40 %c_f16_41\n"
13437 " %fld6_1 = OpCompositeConstruct %v2f16 %c_f16_42 %c_f16_43\n"
13438 " %fld6_2 = OpCompositeConstruct %v2f16 %c_f16_44 %c_f16_45\n"
13439 " %fld6_3 = OpCompositeConstruct %v2f16 %c_f16_46 %c_f16_47\n"
13440 " %fld6_4 = OpCompositeConstruct %v2f16 %c_f16_48 %c_f16_49\n"
13441 " %fld6 = OpCompositeConstruct %v2f16arr5 %fld6_0 %fld6_1 %fld6_2 %fld6_3 %fld6_4\n"
13442
13443 " %fndx = OpConvertSToF %f16 %ndx\n"
13444 " %fld8_2a0 = OpFMul %f16 %fndx %c_f16_mod\n"
13445 " %fld8_3b1 = OpFAdd %f16 %fndx %c_f16_mod\n"
13446
13447 " %fld8_2a = OpCompositeConstruct %v2f16 %fld8_2a0 %c_f16_61\n"
13448 " %fld8_3b = OpCompositeConstruct %v2f16 %c_f16_65 %fld8_3b1\n"
13449 " %fld8_0 = OpCompositeConstruct %v3f16 %c_f16_52 %c_f16_53 %c_f16_54\n"
13450 " %fld8_1 = OpCompositeConstruct %v3f16 %c_f16_56 %c_f16_57 %c_f16_58\n"
13451 " %fld8_2 = OpCompositeConstruct %v3f16 %fld8_2a %c_f16_62\n"
13452 " %fld8_3 = OpCompositeConstruct %v3f16 %c_f16_64 %fld8_3b\n"
13453 " %fld8_4 = OpCompositeConstruct %v3f16 %c_f16_68 %c_f16_69 %c_f16_70\n"
13454 " %fld8 = OpCompositeConstruct %v3f16arr5 %fld8_0 %fld8_1 %fld8_2 %fld8_3 %fld8_4\n"
13455
13456 " %fld9_0 = OpCompositeConstruct %v4f16 %c_f16_72 %c_f16_73 %c_f16_74 %c_f16_75\n"
13457 " %fld9_1 = OpCompositeConstruct %v4f16 %c_f16_76 %c_f16_77 %c_f16_78 %c_f16_79\n"
13458 " %fld9_2 = OpCompositeConstruct %v4f16 %c_f16_80 %c_f16_81 %c_f16_82 %c_f16_83\n"
13459 " %fld9 = OpCompositeConstruct %v4f16arr3 %fld9_0 %fld9_1 %fld9_2\n"
13460
13461 " %st_val = OpCompositeConstruct %st_test %c_f16_0 %fld1 %fld2 %fld3 %fld4 %fld5 %fld6 %c_f16_50 %fld8 "
13462 "%fld9\n"
13463
13464 // Storage section: all elements that are not directly accessed should
13465 // have the value of -1.0. This means for f16 and v3f16 stores the v2f16
13466 // is constructed with one element from a constant -1.0.
13467 // half offset 0
13468 " %ex_0 = OpCompositeExtract %f16 %st_val 0\n"
13469 " %vec_0 = OpCompositeConstruct %v2f16 %ex_0 %c_f16_n1\n"
13470 " %bc_0 = OpBitcast %u32 %vec_0\n"
13471 " %gep_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_0\n"
13472 " OpStore %gep_0 %bc_0\n"
13473
13474 // <2 x half> offset 4
13475 " %ex_1 = OpCompositeExtract %v2f16 %st_val 1\n"
13476 " %bc_1 = OpBitcast %u32 %ex_1\n"
13477 " %gep_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_1\n"
13478 " OpStore %gep_1 %bc_1\n"
13479
13480 // <3 x half> offset 8
13481 " %ex_2 = OpCompositeExtract %v3f16 %st_val 2\n"
13482 " %ex_2_0 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 0 1\n"
13483 " %ex_2_1 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 2 3\n"
13484 " %bc_2_0 = OpBitcast %u32 %ex_2_0\n"
13485 " %bc_2_1 = OpBitcast %u32 %ex_2_1\n"
13486 " %gep_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_2\n"
13487 " %gep_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_3\n"
13488 " OpStore %gep_2_0 %bc_2_0\n"
13489 " OpStore %gep_2_1 %bc_2_1\n"
13490
13491 // <4 x half> offset 16
13492 " %ex_3 = OpCompositeExtract %v4f16 %st_val 3\n"
13493 " %ex_3_0 = OpVectorShuffle %v2f16 %ex_3 %ex_3 0 1\n"
13494 " %ex_3_1 = OpVectorShuffle %v2f16 %ex_3 %ex_3 2 3\n"
13495 " %bc_3_0 = OpBitcast %u32 %ex_3_0\n"
13496 " %bc_3_1 = OpBitcast %u32 %ex_3_1\n"
13497 " %gep_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_4\n"
13498 " %gep_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_5\n"
13499 " OpStore %gep_3_0 %bc_3_0\n"
13500 " OpStore %gep_3_1 %bc_3_1\n"
13501
13502 // [3 x half] offset 24
13503 " %ex_4_0 = OpCompositeExtract %f16 %st_val 4 0\n"
13504 " %ex_4_1 = OpCompositeExtract %f16 %st_val 4 1\n"
13505 " %ex_4_2 = OpCompositeExtract %f16 %st_val 4 2\n"
13506 " %vec_4_0 = OpCompositeConstruct %v2f16 %ex_4_0 %ex_4_1\n"
13507 " %vec_4_1 = OpCompositeConstruct %v2f16 %ex_4_2 %c_f16_n1\n"
13508 " %bc_4_0 = OpBitcast %u32 %vec_4_0\n"
13509 " %bc_4_1 = OpBitcast %u32 %vec_4_1\n"
13510 " %gep_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_6\n"
13511 " %gep_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_7\n"
13512 " OpStore %gep_4_0 %bc_4_0\n"
13513 " OpStore %gep_4_1 %bc_4_1\n"
13514
13515 // [3 x {half, [3 x <2 x half>]}] offset 32
13516 " %ex_5_0 = OpCompositeExtract %struct16 %st_val 5 0\n"
13517 " %ex_5_1 = OpCompositeExtract %struct16 %st_val 5 1\n"
13518 " %ex_5_2 = OpCompositeExtract %struct16 %st_val 5 2\n"
13519 " %ex_5_0_0 = OpCompositeExtract %f16 %ex_5_0 0\n"
13520 " %ex_5_1_0 = OpCompositeExtract %f16 %ex_5_1 0\n"
13521 " %ex_5_2_0 = OpCompositeExtract %f16 %ex_5_2 0\n"
13522 "%ex_5_0_1_0 = OpCompositeExtract %v2f16 %ex_5_0 1 0\n"
13523 "%ex_5_0_1_1 = OpCompositeExtract %v2f16 %ex_5_0 1 1\n"
13524 "%ex_5_0_1_2 = OpCompositeExtract %v2f16 %ex_5_0 1 2\n"
13525 "%ex_5_1_1_0 = OpCompositeExtract %v2f16 %ex_5_1 1 0\n"
13526 "%ex_5_1_1_1 = OpCompositeExtract %v2f16 %ex_5_1 1 1\n"
13527 "%ex_5_1_1_2 = OpCompositeExtract %v2f16 %ex_5_1 1 2\n"
13528 "%ex_5_2_1_0 = OpCompositeExtract %v2f16 %ex_5_2 1 0\n"
13529 "%ex_5_2_1_1 = OpCompositeExtract %v2f16 %ex_5_2 1 1\n"
13530 "%ex_5_2_1_2 = OpCompositeExtract %v2f16 %ex_5_2 1 2\n"
13531 " %vec_5_0_0 = OpCompositeConstruct %v2f16 %ex_5_0_0 %c_f16_n1\n"
13532 " %vec_5_1_0 = OpCompositeConstruct %v2f16 %ex_5_1_0 %c_f16_n1\n"
13533 " %vec_5_2_0 = OpCompositeConstruct %v2f16 %ex_5_2_0 %c_f16_n1\n"
13534 " %bc_5_0_0 = OpBitcast %u32 %vec_5_0_0\n"
13535 " %bc_5_1_0 = OpBitcast %u32 %vec_5_1_0\n"
13536 " %bc_5_2_0 = OpBitcast %u32 %vec_5_2_0\n"
13537 "%bc_5_0_1_0 = OpBitcast %u32 %ex_5_0_1_0\n"
13538 "%bc_5_0_1_1 = OpBitcast %u32 %ex_5_0_1_1\n"
13539 "%bc_5_0_1_2 = OpBitcast %u32 %ex_5_0_1_2\n"
13540 "%bc_5_1_1_0 = OpBitcast %u32 %ex_5_1_1_0\n"
13541 "%bc_5_1_1_1 = OpBitcast %u32 %ex_5_1_1_1\n"
13542 "%bc_5_1_1_2 = OpBitcast %u32 %ex_5_1_1_2\n"
13543 "%bc_5_2_1_0 = OpBitcast %u32 %ex_5_2_1_0\n"
13544 "%bc_5_2_1_1 = OpBitcast %u32 %ex_5_2_1_1\n"
13545 "%bc_5_2_1_2 = OpBitcast %u32 %ex_5_2_1_2\n"
13546 " %gep_5_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_8\n"
13547 "%gep_5_0_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_9\n"
13548 "%gep_5_0_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_10\n"
13549 "%gep_5_0_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_11\n"
13550 " %gep_5_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_12\n"
13551 "%gep_5_1_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_13\n"
13552 "%gep_5_1_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_14\n"
13553 "%gep_5_1_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_15\n"
13554 " %gep_5_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_16\n"
13555 "%gep_5_2_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_17\n"
13556 "%gep_5_2_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_18\n"
13557 "%gep_5_2_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_19\n"
13558 " OpStore %gep_5_0_0 %bc_5_0_0\n"
13559 " OpStore %gep_5_0_1_0 %bc_5_0_1_0\n"
13560 " OpStore %gep_5_0_1_1 %bc_5_0_1_1\n"
13561 " OpStore %gep_5_0_1_2 %bc_5_0_1_2\n"
13562 " OpStore %gep_5_1_0 %bc_5_1_0\n"
13563 " OpStore %gep_5_1_1_0 %bc_5_1_1_0\n"
13564 " OpStore %gep_5_1_1_1 %bc_5_1_1_1\n"
13565 " OpStore %gep_5_1_1_2 %bc_5_1_1_2\n"
13566 " OpStore %gep_5_2_0 %bc_5_2_0\n"
13567 " OpStore %gep_5_2_1_0 %bc_5_2_1_0\n"
13568 " OpStore %gep_5_2_1_1 %bc_5_2_1_1\n"
13569 " OpStore %gep_5_2_1_2 %bc_5_2_1_2\n"
13570
13571 // [5 x <2 x half>] offset 80
13572 " %ex_6_0 = OpCompositeExtract %v2f16 %st_val 6 0\n"
13573 " %ex_6_1 = OpCompositeExtract %v2f16 %st_val 6 1\n"
13574 " %ex_6_2 = OpCompositeExtract %v2f16 %st_val 6 2\n"
13575 " %ex_6_3 = OpCompositeExtract %v2f16 %st_val 6 3\n"
13576 " %ex_6_4 = OpCompositeExtract %v2f16 %st_val 6 4\n"
13577 " %bc_6_0 = OpBitcast %u32 %ex_6_0\n"
13578 " %bc_6_1 = OpBitcast %u32 %ex_6_1\n"
13579 " %bc_6_2 = OpBitcast %u32 %ex_6_2\n"
13580 " %bc_6_3 = OpBitcast %u32 %ex_6_3\n"
13581 " %bc_6_4 = OpBitcast %u32 %ex_6_4\n"
13582 " %gep_6_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_20\n"
13583 " %gep_6_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_21\n"
13584 " %gep_6_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_22\n"
13585 " %gep_6_3 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_23\n"
13586 " %gep_6_4 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_24\n"
13587 " OpStore %gep_6_0 %bc_6_0\n"
13588 " OpStore %gep_6_1 %bc_6_1\n"
13589 " OpStore %gep_6_2 %bc_6_2\n"
13590 " OpStore %gep_6_3 %bc_6_3\n"
13591 " OpStore %gep_6_4 %bc_6_4\n"
13592
13593 // half offset 100
13594 " %ex_7 = OpCompositeExtract %f16 %st_val 7\n"
13595 " %vec_7 = OpCompositeConstruct %v2f16 %ex_7 %c_f16_n1\n"
13596 " %bc_7 = OpBitcast %u32 %vec_7\n"
13597 " %gep_7 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_25\n"
13598 " OpStore %gep_7 %bc_7\n"
13599
13600 // [5 x <3 x half>] offset 104
13601 " %ex_8_0 = OpCompositeExtract %v3f16 %st_val 8 0\n"
13602 " %ex_8_1 = OpCompositeExtract %v3f16 %st_val 8 1\n"
13603 " %ex_8_2 = OpCompositeExtract %v3f16 %st_val 8 2\n"
13604 " %ex_8_3 = OpCompositeExtract %v3f16 %st_val 8 3\n"
13605 " %ex_8_4 = OpCompositeExtract %v3f16 %st_val 8 4\n"
13606 " %vec_8_0_0 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 0 1\n"
13607 " %vec_8_0_1 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 2 3\n"
13608 " %vec_8_1_0 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 0 1\n"
13609 " %vec_8_1_1 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 2 3\n"
13610 " %vec_8_2_0 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 0 1\n"
13611 " %vec_8_2_1 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 2 3\n"
13612 " %vec_8_3_0 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 0 1\n"
13613 " %vec_8_3_1 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 2 3\n"
13614 " %vec_8_4_0 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 0 1\n"
13615 " %vec_8_4_1 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 2 3\n"
13616 " %bc_8_0_0 = OpBitcast %u32 %vec_8_0_0\n"
13617 " %bc_8_0_1 = OpBitcast %u32 %vec_8_0_1\n"
13618 " %bc_8_1_0 = OpBitcast %u32 %vec_8_1_0\n"
13619 " %bc_8_1_1 = OpBitcast %u32 %vec_8_1_1\n"
13620 " %bc_8_2_0 = OpBitcast %u32 %vec_8_2_0\n"
13621 " %bc_8_2_1 = OpBitcast %u32 %vec_8_2_1\n"
13622 " %bc_8_3_0 = OpBitcast %u32 %vec_8_3_0\n"
13623 " %bc_8_3_1 = OpBitcast %u32 %vec_8_3_1\n"
13624 " %bc_8_4_0 = OpBitcast %u32 %vec_8_4_0\n"
13625 " %bc_8_4_1 = OpBitcast %u32 %vec_8_4_1\n"
13626 " %gep_8_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_26\n"
13627 " %gep_8_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_27\n"
13628 " %gep_8_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_28\n"
13629 " %gep_8_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_29\n"
13630 " %gep_8_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_30\n"
13631 " %gep_8_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_31\n"
13632 " %gep_8_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_32\n"
13633 " %gep_8_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_33\n"
13634 " %gep_8_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_34\n"
13635 " %gep_8_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_35\n"
13636 " OpStore %gep_8_0_0 %bc_8_0_0\n"
13637 " OpStore %gep_8_0_1 %bc_8_0_1\n"
13638 " OpStore %gep_8_1_0 %bc_8_1_0\n"
13639 " OpStore %gep_8_1_1 %bc_8_1_1\n"
13640 " OpStore %gep_8_2_0 %bc_8_2_0\n"
13641 " OpStore %gep_8_2_1 %bc_8_2_1\n"
13642 " OpStore %gep_8_3_0 %bc_8_3_0\n"
13643 " OpStore %gep_8_3_1 %bc_8_3_1\n"
13644 " OpStore %gep_8_4_0 %bc_8_4_0\n"
13645 " OpStore %gep_8_4_1 %bc_8_4_1\n"
13646
13647 // [3 x <4 x half>] offset 144
13648 " %ex_9_0 = OpCompositeExtract %v4f16 %st_val 9 0\n"
13649 " %ex_9_1 = OpCompositeExtract %v4f16 %st_val 9 1\n"
13650 " %ex_9_2 = OpCompositeExtract %v4f16 %st_val 9 2\n"
13651 " %vec_9_0_0 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 0 1\n"
13652 " %vec_9_0_1 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 2 3\n"
13653 " %vec_9_1_0 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 0 1\n"
13654 " %vec_9_1_1 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 2 3\n"
13655 " %vec_9_2_0 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 0 1\n"
13656 " %vec_9_2_1 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 2 3\n"
13657 " %bc_9_0_0 = OpBitcast %u32 %vec_9_0_0\n"
13658 " %bc_9_0_1 = OpBitcast %u32 %vec_9_0_1\n"
13659 " %bc_9_1_0 = OpBitcast %u32 %vec_9_1_0\n"
13660 " %bc_9_1_1 = OpBitcast %u32 %vec_9_1_1\n"
13661 " %bc_9_2_0 = OpBitcast %u32 %vec_9_2_0\n"
13662 " %bc_9_2_1 = OpBitcast %u32 %vec_9_2_1\n"
13663 " %gep_9_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_36\n"
13664 " %gep_9_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_37\n"
13665 " %gep_9_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_38\n"
13666 " %gep_9_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_39\n"
13667 " %gep_9_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_40\n"
13668 " %gep_9_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_41\n"
13669 " OpStore %gep_9_0_0 %bc_9_0_0\n"
13670 " OpStore %gep_9_0_1 %bc_9_0_1\n"
13671 " OpStore %gep_9_1_0 %bc_9_1_0\n"
13672 " OpStore %gep_9_1_1 %bc_9_1_1\n"
13673 " OpStore %gep_9_2_0 %bc_9_2_0\n"
13674 " OpStore %gep_9_2_1 %bc_9_2_1\n"
13675
13676 " OpBranch %next\n"
13677
13678 " %next = OpLabel\n"
13679 " %i_cur = OpLoad %i32 %i\n"
13680 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13681 " OpStore %i %i_new\n"
13682 " OpBranch %loop\n"
13683
13684 " %merge = OpLabel\n"
13685 " OpBranch %end_if\n"
13686 " %end_if = OpLabel\n"
13687 " OpReturnValue %param\n"
13688 " OpFunctionEnd\n");
13689
13690 {
13691 SpecResource specResource;
13692 map<string, string> specs;
13693 VulkanFeatures features;
13694 map<string, string> fragments;
13695 vector<string> extensions;
13696 vector<deFloat16> expectedOutput;
13697 string consts;
13698
13699 for (uint32_t elementNdx = 0; elementNdx < numElements; ++elementNdx)
13700 {
13701 vector<deFloat16> expectedIterationOutput;
13702
13703 for (uint32_t structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
13704 expectedIterationOutput.push_back(tcu::Float16(float(structItemNdx)).bits());
13705
13706 for (uint32_t structItemNdx = 0; structItemNdx < DE_LENGTH_OF_ARRAY(exceptionIndices); ++structItemNdx)
13707 expectedIterationOutput[exceptionIndices[structItemNdx]] = exceptionValue;
13708
13709 expectedIterationOutput[fieldModifiedMulIndex] = tcu::Float16(float(elementNdx * fieldModifier)).bits();
13710 expectedIterationOutput[fieldModifiedAddIndex] = tcu::Float16(float(elementNdx + fieldModifier)).bits();
13711
13712 expectedOutput.insert(expectedOutput.end(), expectedIterationOutput.begin(), expectedIterationOutput.end());
13713 }
13714
13715 for (uint32_t i = 0; i < structItemsCount; ++i)
13716 consts += " %c_f16_" + de::toString(i) + " = OpConstant %f16 " + de::toString(i) + "\n";
13717
13718 specs["num_elements"] = de::toString(numElements);
13719 specs["struct_item_size"] = de::toString(structItemsCount * sizeof(deFloat16));
13720 specs["field_modifier"] = de::toString(fieldModifier);
13721 specs["consts"] = consts;
13722
13723 fragments["capability"] = "OpCapability Float16\n";
13724 fragments["decoration"] = decoration.specialize(specs);
13725 fragments["pre_main"] = preMain.specialize(specs);
13726 fragments["testfun"] = testFun.specialize(specs);
13727
13728 specResource.inputs.push_back(
13729 Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13730 specResource.outputs.push_back(
13731 Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13732 specResource.verifyIO = compareFP16CompositeFunc;
13733
13734 extensions.push_back("VK_KHR_shader_float16_int8");
13735
13736 features.extFloat16Int8.shaderFloat16 = true;
13737 if (specResource.graphicsFeaturesRequired)
13738 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
13739
13740 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
13741 IVec3(1, 1, 1));
13742 }
13743
13744 return testGroup.release();
13745 }
13746
13747 template <class SpecResource>
createFloat16CompositeInsertExtractSet(tcu::TestContext & testCtx,const char * op)13748 tcu::TestCaseGroup *createFloat16CompositeInsertExtractSet(tcu::TestContext &testCtx, const char *op)
13749 {
13750 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, de::toLower(op).c_str()));
13751 const deFloat16 exceptionValue = tcu::Float16(-1.0).bits();
13752 const string opName(op);
13753 const uint32_t opIndex = (opName == "OpCompositeInsert") ? 0 :
13754 (opName == "OpCompositeExtract") ? 1 :
13755 std::numeric_limits<uint32_t>::max();
13756
13757 const StringTemplate preMain(" %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
13758 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
13759 " %c_i32_size = OpConstant %i32 ${struct_u32s}\n"
13760 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
13761 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
13762 " %f16 = OpTypeFloat 16\n"
13763 " %v2f16 = OpTypeVector %f16 2\n"
13764 " %v3f16 = OpTypeVector %f16 3\n"
13765 " %v4f16 = OpTypeVector %f16 4\n"
13766 " %c_f16_na = OpConstant %f16 -1.0\n"
13767 " %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_na %c_f16_na\n"
13768 " %c_u32_5 = OpConstant %u32 5\n"
13769 " %c_i32_5 = OpConstant %i32 5\n"
13770 " %c_i32_6 = OpConstant %i32 6\n"
13771 " %c_i32_7 = OpConstant %i32 7\n"
13772 " %c_i32_8 = OpConstant %i32 8\n"
13773 " %c_i32_9 = OpConstant %i32 9\n"
13774 " %c_i32_10 = OpConstant %i32 10\n"
13775 " %c_i32_11 = OpConstant %i32 11\n"
13776
13777 "%f16arr3 = OpTypeArray %f16 %c_u32_3\n"
13778 "%v2f16arr3 = OpTypeArray %v2f16 %c_u32_3\n"
13779 "%v2f16arr5 = OpTypeArray %v2f16 %c_u32_5\n"
13780 "%v3f16arr5 = OpTypeArray %v3f16 %c_u32_5\n"
13781 "%v4f16arr3 = OpTypeArray %v4f16 %c_u32_3\n"
13782 "%struct16 = OpTypeStruct %f16 %v2f16arr3\n"
13783 "%struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
13784 "%st_test = OpTypeStruct %${field_type}\n"
13785
13786 " %ra_f16 = OpTypeArray %u32 %c_i32_hndp\n"
13787 " %ra_st = OpTypeArray %u32 %c_i32_size\n"
13788 " %up_u32 = OpTypePointer Uniform %u32\n"
13789 " %st_test_i32_fn = OpTypeFunction %st_test %i32\n"
13790 "%void_st_test_i32_fn = OpTypeFunction %void %st_test %i32\n"
13791 " %f16_i32_fn = OpTypeFunction %f16 %i32\n"
13792 " %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
13793 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
13794 " %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
13795
13796 "${op_premain_decls}"
13797
13798 " %up_SSBO_src = OpTypePointer Uniform %SSBO_src\n"
13799 " %up_SSBO_dst = OpTypePointer Uniform %SSBO_dst\n"
13800
13801 " %ssbo_src = OpVariable %up_SSBO_src Uniform\n"
13802 " %ssbo_dst = OpVariable %up_SSBO_dst Uniform\n");
13803
13804 const StringTemplate decoration("OpDecorate %SSBO_src BufferBlock\n"
13805 "OpDecorate %SSBO_dst BufferBlock\n"
13806 "OpDecorate %ra_f16 ArrayStride 4\n"
13807 "OpDecorate %ra_st ArrayStride 4\n"
13808 "OpDecorate %ssbo_src DescriptorSet 0\n"
13809 "OpDecorate %ssbo_src Binding 0\n"
13810 "OpDecorate %ssbo_dst DescriptorSet 0\n"
13811 "OpDecorate %ssbo_dst Binding 1\n"
13812
13813 "OpMemberDecorate %SSBO_src 0 Offset 0\n"
13814 "OpMemberDecorate %SSBO_dst 0 Offset 0\n"
13815
13816 "OpDecorate %v2f16arr3 ArrayStride 4\n"
13817 "OpMemberDecorate %struct16 0 Offset 0\n"
13818 "OpMemberDecorate %struct16 1 Offset 4\n"
13819 "OpDecorate %struct16arr3 ArrayStride 16\n"
13820 "OpDecorate %f16arr3 ArrayStride 2\n"
13821 "OpDecorate %v2f16arr5 ArrayStride 4\n"
13822 "OpDecorate %v3f16arr5 ArrayStride 8\n"
13823 "OpDecorate %v4f16arr3 ArrayStride 8\n"
13824
13825 "OpMemberDecorate %st_test 0 Offset 0\n");
13826
13827 const StringTemplate testFun(" %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
13828 " %param = OpFunctionParameter %v4f32\n"
13829 " %entry = OpLabel\n"
13830
13831 " %i = OpVariable %fp_i32 Function\n"
13832 " OpStore %i %c_i32_0\n"
13833
13834 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
13835 " OpSelectionMerge %end_if None\n"
13836 " OpBranchConditional %will_run %run_test %end_if\n"
13837
13838 " %run_test = OpLabel\n"
13839 " OpBranch %loop\n"
13840
13841 " %loop = OpLabel\n"
13842 " %i_cmp = OpLoad %i32 %i\n"
13843 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
13844 " OpLoopMerge %merge %next None\n"
13845 " OpBranchConditional %lt %write %merge\n"
13846
13847 " %write = OpLabel\n"
13848 " %ndx = OpLoad %i32 %i\n"
13849
13850 "${op_sw_fun_call}"
13851
13852 " %dst_st = OpFunctionCall %void %${st_call} %val_dst %${st_ndx}\n"
13853 " OpBranch %next\n"
13854
13855 " %next = OpLabel\n"
13856 " %i_cur = OpLoad %i32 %i\n"
13857 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13858 " OpStore %i %i_new\n"
13859 " OpBranch %loop\n"
13860
13861 " %merge = OpLabel\n"
13862 " OpBranch %end_if\n"
13863 " %end_if = OpLabel\n"
13864 " OpReturnValue %param\n"
13865 " OpFunctionEnd\n"
13866
13867 "${op_sw_fun_header}"
13868 " %sw_param = OpFunctionParameter %st_test\n"
13869 "%sw_paramn = OpFunctionParameter %i32\n"
13870 " %sw_entry = OpLabel\n"
13871 " OpSelectionMerge %switch_e None\n"
13872 " OpSwitch %sw_paramn %default ${case_list}\n"
13873
13874 "${case_bodies}"
13875
13876 "%default = OpLabel\n"
13877 " OpReturnValue ${op_case_default_value}\n"
13878 "%switch_e = OpLabel\n"
13879 " OpUnreachable\n" // Unreachable merge block for switch statement
13880 " OpFunctionEnd\n");
13881
13882 const StringTemplate testCaseBody("%case_${case_ndx} = OpLabel\n"
13883 "%val_ret_${case_ndx} = ${op_name} ${op_args_part} ${access_path}\n"
13884 " OpReturnValue %val_ret_${case_ndx}\n");
13885
13886 const string loadF16(" %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13887 " %ld_${var}_param = OpFunctionParameter %i32\n"
13888 " %ld_${var}_entry = OpLabel\n"
13889 " %ld_${var}_call = OpFunctionCall %f16 %ld_arg_${var} %ld_${var}_param\n"
13890 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13891 " OpReturnValue %ld_${var}_st_test\n"
13892 " OpFunctionEnd\n" +
13893 loadScalarF16FromUint);
13894
13895 const string loadV2F16(" %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13896 " %ld_${var}_param = OpFunctionParameter %i32\n"
13897 " %ld_${var}_entry = OpLabel\n"
13898 " %ld_${var}_call = OpFunctionCall %v2f16 %ld_arg_${var} %ld_${var}_param\n"
13899 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13900 " OpReturnValue %ld_${var}_st_test\n"
13901 " OpFunctionEnd\n" +
13902 loadV2F16FromUint);
13903
13904 const string loadV3F16(" %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13905 " %ld_${var}_param = OpFunctionParameter %i32\n"
13906 " %ld_${var}_entry = OpLabel\n"
13907 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13908 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13909 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13910 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13911 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13912 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13913 " %ld_${var}_vec = OpVectorShuffle %v3f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2\n"
13914 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13915 " OpReturnValue %ld_${var}_st_test\n"
13916 " OpFunctionEnd\n");
13917
13918 const string loadV4F16(" %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13919 " %ld_${var}_param = OpFunctionParameter %i32\n"
13920 " %ld_${var}_entry = OpLabel\n"
13921 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13922 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13923 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13924 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13925 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13926 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13927 " %ld_${var}_vec = OpVectorShuffle %v4f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2 3\n"
13928 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13929 " OpReturnValue %ld_${var}_st_test\n"
13930 " OpFunctionEnd\n");
13931
13932 const string loadF16Arr3(
13933 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13934 " %ld_${var}_param = OpFunctionParameter %i32\n"
13935 " %ld_${var}_entry = OpLabel\n"
13936 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_0\n"
13937 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_1\n"
13938 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13939 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13940 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13941 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13942 " %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0 0\n"
13943 " %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_0 1\n"
13944 " %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_1 0\n"
13945 " %ld_${var}_cons = OpCompositeConstruct %f16arr3 %ld_${var}_ex_0 %ld_${var}_ex_1 %ld_${var}_ex_2\n"
13946 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13947 " OpReturnValue %ld_${var}_st_test\n"
13948 " OpFunctionEnd\n");
13949
13950 const string loadV2F16Arr5(" %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13951 " %ld_${var}_param = OpFunctionParameter %i32\n"
13952 " %ld_${var}_label = OpLabel\n"
13953 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13954 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13955 " %ld_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13956 " %ld_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13957 " %ld_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13958 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13959 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13960 " %ld_${var}_ld_2 = OpLoad %u32 %ld_${var}_gep_2\n"
13961 " %ld_${var}_ld_3 = OpLoad %u32 %ld_${var}_gep_3\n"
13962 " %ld_${var}_ld_4 = OpLoad %u32 %ld_${var}_gep_4\n"
13963 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13964 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13965 " %ld_${var}_bc_2 = OpBitcast %v2f16 %ld_${var}_ld_2\n"
13966 " %ld_${var}_bc_3 = OpBitcast %v2f16 %ld_${var}_ld_3\n"
13967 " %ld_${var}_bc_4 = OpBitcast %v2f16 %ld_${var}_ld_4\n"
13968 " %ld_${var}_cons = OpCompositeConstruct %v2f16arr5 %ld_${var}_bc_0 %ld_${var}_bc_1 "
13969 "%ld_${var}_bc_2 %ld_${var}_bc_3 %ld_${var}_bc_4\n"
13970 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13971 " OpReturnValue %ld_${var}_st_test\n"
13972 " OpFunctionEnd\n");
13973
13974 const string loadV3F16Arr5(" %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13975 " %ld_${var}_param = OpFunctionParameter %i32\n"
13976 " %ld_${var}_entry = OpLabel\n"
13977 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13978 "%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13979 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13980 "%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13981 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13982 "%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13983 "%ld_${var}_gep_3_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13984 "%ld_${var}_gep_3_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13985 "%ld_${var}_gep_4_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13986 "%ld_${var}_gep_4_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13987 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13988 " %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13989 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13990 " %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13991 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13992 " %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13993 " %ld_${var}_ld_3_0 = OpLoad %u32 %ld_${var}_gep_3_0\n"
13994 " %ld_${var}_ld_3_1 = OpLoad %u32 %ld_${var}_gep_3_1\n"
13995 " %ld_${var}_ld_4_0 = OpLoad %u32 %ld_${var}_gep_4_0\n"
13996 " %ld_${var}_ld_4_1 = OpLoad %u32 %ld_${var}_gep_4_1\n"
13997 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13998 " %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13999 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
14000 " %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
14001 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
14002 " %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
14003 " %ld_${var}_bc_3_0 = OpBitcast %v2f16 %ld_${var}_ld_3_0\n"
14004 " %ld_${var}_bc_3_1 = OpBitcast %v2f16 %ld_${var}_ld_3_1\n"
14005 " %ld_${var}_bc_4_0 = OpBitcast %v2f16 %ld_${var}_ld_4_0\n"
14006 " %ld_${var}_bc_4_1 = OpBitcast %v2f16 %ld_${var}_ld_4_1\n"
14007 " %ld_${var}_vec_0 = OpVectorShuffle %v3f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2\n"
14008 " %ld_${var}_vec_1 = OpVectorShuffle %v3f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2\n"
14009 " %ld_${var}_vec_2 = OpVectorShuffle %v3f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2\n"
14010 " %ld_${var}_vec_3 = OpVectorShuffle %v3f16 %ld_${var}_bc_3_0 %ld_${var}_bc_3_1 0 1 2\n"
14011 " %ld_${var}_vec_4 = OpVectorShuffle %v3f16 %ld_${var}_bc_4_0 %ld_${var}_bc_4_1 0 1 2\n"
14012 " %ld_${var}_cons = OpCompositeConstruct %v3f16arr5 %ld_${var}_vec_0 %ld_${var}_vec_1 "
14013 "%ld_${var}_vec_2 %ld_${var}_vec_3 %ld_${var}_vec_4\n"
14014 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
14015 " OpReturnValue %ld_${var}_st_test\n"
14016 " OpFunctionEnd\n");
14017
14018 const string loadV4F16Arr3(
14019 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
14020 " %ld_${var}_param = OpFunctionParameter %i32\n"
14021 " %ld_${var}_entry = OpLabel\n"
14022 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14023 "%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14024 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
14025 "%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
14026 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
14027 "%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
14028 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
14029 " %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
14030 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
14031 " %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
14032 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
14033 " %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
14034 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
14035 " %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
14036 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
14037 " %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
14038 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
14039 " %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
14040 " %ld_${var}_vec_0 = OpVectorShuffle %v4f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2 3\n"
14041 " %ld_${var}_vec_1 = OpVectorShuffle %v4f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2 3\n"
14042 " %ld_${var}_vec_2 = OpVectorShuffle %v4f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2 3\n"
14043 " %ld_${var}_cons = OpCompositeConstruct %v4f16arr3 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2\n"
14044 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
14045 " OpReturnValue %ld_${var}_st_test\n"
14046 " OpFunctionEnd\n");
14047
14048 const string loadStruct16Arr3(
14049 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
14050 " %ld_${var}_param = OpFunctionParameter %i32\n"
14051 " %ld_${var}_entry = OpLabel\n"
14052 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14053 "%ld_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14054 "%ld_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
14055 "%ld_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
14056 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
14057 "%ld_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
14058 "%ld_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
14059 "%ld_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
14060 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
14061 "%ld_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
14062 "%ld_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
14063 "%ld_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
14064 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
14065 " %ld_${var}_ld_0_1_0 = OpLoad %u32 %ld_${var}_gep_0_1_0\n"
14066 " %ld_${var}_ld_0_1_1 = OpLoad %u32 %ld_${var}_gep_0_1_1\n"
14067 " %ld_${var}_ld_0_1_2 = OpLoad %u32 %ld_${var}_gep_0_1_2\n"
14068 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
14069 " %ld_${var}_ld_1_1_0 = OpLoad %u32 %ld_${var}_gep_1_1_0\n"
14070 " %ld_${var}_ld_1_1_1 = OpLoad %u32 %ld_${var}_gep_1_1_1\n"
14071 " %ld_${var}_ld_1_1_2 = OpLoad %u32 %ld_${var}_gep_1_1_2\n"
14072 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
14073 " %ld_${var}_ld_2_1_0 = OpLoad %u32 %ld_${var}_gep_2_1_0\n"
14074 " %ld_${var}_ld_2_1_1 = OpLoad %u32 %ld_${var}_gep_2_1_1\n"
14075 " %ld_${var}_ld_2_1_2 = OpLoad %u32 %ld_${var}_gep_2_1_2\n"
14076 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
14077 " %ld_${var}_bc_0_1_0 = OpBitcast %v2f16 %ld_${var}_ld_0_1_0\n"
14078 " %ld_${var}_bc_0_1_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1_1\n"
14079 " %ld_${var}_bc_0_1_2 = OpBitcast %v2f16 %ld_${var}_ld_0_1_2\n"
14080 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
14081 " %ld_${var}_bc_1_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_1_0\n"
14082 " %ld_${var}_bc_1_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1_1\n"
14083 " %ld_${var}_bc_1_1_2 = OpBitcast %v2f16 %ld_${var}_ld_1_1_2\n"
14084 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
14085 " %ld_${var}_bc_2_1_0 = OpBitcast %v2f16 %ld_${var}_ld_2_1_0\n"
14086 " %ld_${var}_bc_2_1_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1_1\n"
14087 " %ld_${var}_bc_2_1_2 = OpBitcast %v2f16 %ld_${var}_ld_2_1_2\n"
14088 " %ld_${var}_arr_0 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_0_1_0 %ld_${var}_bc_0_1_1 "
14089 "%ld_${var}_bc_0_1_2\n"
14090 " %ld_${var}_arr_1 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_1_1_0 %ld_${var}_bc_1_1_1 "
14091 "%ld_${var}_bc_1_1_2\n"
14092 " %ld_${var}_arr_2 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_2_1_0 %ld_${var}_bc_2_1_1 "
14093 "%ld_${var}_bc_2_1_2\n"
14094 " %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0_0 0\n"
14095 " %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_1_0 0\n"
14096 " %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_2_0 0\n"
14097 " %ld_${var}_st_0 = OpCompositeConstruct %struct16 %ld_${var}_ex_0 %ld_${var}_arr_0\n"
14098 " %ld_${var}_st_1 = OpCompositeConstruct %struct16 %ld_${var}_ex_1 %ld_${var}_arr_1\n"
14099 " %ld_${var}_st_2 = OpCompositeConstruct %struct16 %ld_${var}_ex_2 %ld_${var}_arr_2\n"
14100 " %ld_${var}_cons = OpCompositeConstruct %struct16arr3 %ld_${var}_st_0 %ld_${var}_st_1 %ld_${var}_st_2\n"
14101 " %ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
14102 " OpReturnValue %ld_${var}_st_test\n"
14103 " OpFunctionEnd\n");
14104
14105 const string storeF16(" %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14106 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
14107 "%st_${var}_param2 = OpFunctionParameter %i32\n"
14108 " %st_${var}_entry = OpLabel\n"
14109 " %st_${var}_ex = OpCompositeExtract %f16 %st_${var}_param1 0\n"
14110 " %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
14111 " OpReturn\n"
14112 " OpFunctionEnd\n" +
14113 storeScalarF16AsUint);
14114
14115 const string storeV2F16(" %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14116 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
14117 "%st_${var}_param2 = OpFunctionParameter %i32\n"
14118 " %st_${var}_entry = OpLabel\n"
14119 " %st_${var}_ex = OpCompositeExtract %v2f16 %st_${var}_param1 0\n"
14120 " %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
14121 " OpReturn\n"
14122 " OpFunctionEnd\n" +
14123 storeV2F16AsUint);
14124
14125 const string storeV3F16(" %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14126 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
14127 "%st_${var}_param2 = OpFunctionParameter %i32\n"
14128 " %st_${var}_entry = OpLabel\n"
14129 " %st_${var}_ex = OpCompositeExtract %v3f16 %st_${var}_param1 0\n"
14130 " %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
14131 " %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
14132 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
14133 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
14134 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14135 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14136 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
14137 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
14138 " OpReturn\n"
14139 " OpFunctionEnd\n");
14140
14141 const string storeV4F16(" %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14142 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
14143 "%st_${var}_param2 = OpFunctionParameter %i32\n"
14144 " %st_${var}_entry = OpLabel\n"
14145 " %st_${var}_ex = OpCompositeExtract %v4f16 %st_${var}_param1 0\n"
14146 " %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
14147 " %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
14148 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
14149 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
14150 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14151 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14152 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
14153 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
14154 " OpReturn\n"
14155 " OpFunctionEnd\n");
14156
14157 const string storeF16Arr3(" %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14158 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
14159 "%st_${var}_param2 = OpFunctionParameter %i32\n"
14160 " %st_${var}_entry = OpLabel\n"
14161 " %st_${var}_ex_0 = OpCompositeExtract %f16 %st_${var}_param1 0 0\n"
14162 " %st_${var}_ex_1 = OpCompositeExtract %f16 %st_${var}_param1 0 1\n"
14163 " %st_${var}_ex_2 = OpCompositeExtract %f16 %st_${var}_param1 0 2\n"
14164 " %st_${var}_vec_0 = OpCompositeConstruct %v2f16 %st_${var}_ex_0 %st_${var}_ex_1\n"
14165 " %st_${var}_vec_1 = OpCompositeConstruct %v2f16 %st_${var}_ex_2 %c_f16_na\n"
14166 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
14167 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
14168 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14169 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14170 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
14171 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
14172 " OpReturn\n"
14173 " OpFunctionEnd\n");
14174
14175 const string storeV2F16Arr5(" %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14176 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
14177 "%st_${var}_param2 = OpFunctionParameter %i32\n"
14178 " %st_${var}_entry = OpLabel\n"
14179 " %st_${var}_ex_0 = OpCompositeExtract %v2f16 %st_${var}_param1 0 0\n"
14180 " %st_${var}_ex_1 = OpCompositeExtract %v2f16 %st_${var}_param1 0 1\n"
14181 " %st_${var}_ex_2 = OpCompositeExtract %v2f16 %st_${var}_param1 0 2\n"
14182 " %st_${var}_ex_3 = OpCompositeExtract %v2f16 %st_${var}_param1 0 3\n"
14183 " %st_${var}_ex_4 = OpCompositeExtract %v2f16 %st_${var}_param1 0 4\n"
14184 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_ex_0\n"
14185 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_ex_1\n"
14186 " %st_${var}_bc_2 = OpBitcast %u32 %st_${var}_ex_2\n"
14187 " %st_${var}_bc_3 = OpBitcast %u32 %st_${var}_ex_3\n"
14188 " %st_${var}_bc_4 = OpBitcast %u32 %st_${var}_ex_4\n"
14189 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14190 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14191 " %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
14192 " %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
14193 " %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
14194 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
14195 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
14196 " OpStore %st_${var}_gep_2 %st_${var}_bc_2\n"
14197 " OpStore %st_${var}_gep_3 %st_${var}_bc_3\n"
14198 " OpStore %st_${var}_gep_4 %st_${var}_bc_4\n"
14199 " OpReturn\n"
14200 " OpFunctionEnd\n");
14201
14202 const string storeV3F16Arr5(" %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14203 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
14204 "%st_${var}_param2 = OpFunctionParameter %i32\n"
14205 " %st_${var}_entry = OpLabel\n"
14206 " %st_${var}_ex_0 = OpCompositeExtract %v3f16 %st_${var}_param1 0 0\n"
14207 " %st_${var}_ex_1 = OpCompositeExtract %v3f16 %st_${var}_param1 0 1\n"
14208 " %st_${var}_ex_2 = OpCompositeExtract %v3f16 %st_${var}_param1 0 2\n"
14209 " %st_${var}_ex_3 = OpCompositeExtract %v3f16 %st_${var}_param1 0 3\n"
14210 " %st_${var}_ex_4 = OpCompositeExtract %v3f16 %st_${var}_param1 0 4\n"
14211 "%st_${var}_v2_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 0 1\n"
14212 "%st_${var}_v2_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 2 3\n"
14213 "%st_${var}_v2_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 0 1\n"
14214 "%st_${var}_v2_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 2 3\n"
14215 "%st_${var}_v2_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 0 1\n"
14216 "%st_${var}_v2_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 2 3\n"
14217 "%st_${var}_v2_3_0 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 0 1\n"
14218 "%st_${var}_v2_3_1 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 2 3\n"
14219 "%st_${var}_v2_4_0 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 0 1\n"
14220 "%st_${var}_v2_4_1 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 2 3\n"
14221 "%st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
14222 "%st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
14223 "%st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
14224 "%st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
14225 "%st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
14226 "%st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
14227 "%st_${var}_bc_3_0 = OpBitcast %u32 %st_${var}_v2_3_0\n"
14228 "%st_${var}_bc_3_1 = OpBitcast %u32 %st_${var}_v2_3_1\n"
14229 "%st_${var}_bc_4_0 = OpBitcast %u32 %st_${var}_v2_4_0\n"
14230 "%st_${var}_bc_4_1 = OpBitcast %u32 %st_${var}_v2_4_1\n"
14231 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14232 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14233 " %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
14234 " %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
14235 " %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
14236 " %st_${var}_gep_5 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
14237 " %st_${var}_gep_6 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
14238 " %st_${var}_gep_7 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
14239 " %st_${var}_gep_8 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
14240 " %st_${var}_gep_9 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
14241 " OpStore %st_${var}_gep_0 %st_${var}_bc_0_0\n"
14242 " OpStore %st_${var}_gep_1 %st_${var}_bc_0_1\n"
14243 " OpStore %st_${var}_gep_2 %st_${var}_bc_1_0\n"
14244 " OpStore %st_${var}_gep_3 %st_${var}_bc_1_1\n"
14245 " OpStore %st_${var}_gep_4 %st_${var}_bc_2_0\n"
14246 " OpStore %st_${var}_gep_5 %st_${var}_bc_2_1\n"
14247 " OpStore %st_${var}_gep_6 %st_${var}_bc_3_0\n"
14248 " OpStore %st_${var}_gep_7 %st_${var}_bc_3_1\n"
14249 " OpStore %st_${var}_gep_8 %st_${var}_bc_4_0\n"
14250 " OpStore %st_${var}_gep_9 %st_${var}_bc_4_1\n"
14251 " OpReturn\n"
14252 " OpFunctionEnd\n");
14253
14254 const string storeV4F16Arr3(" %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14255 " %st_${var}_param1 = OpFunctionParameter %st_test\n"
14256 " %st_${var}_param2 = OpFunctionParameter %i32\n"
14257 " %st_${var}_entry = OpLabel\n"
14258 " %st_${var}_ex_0 = OpCompositeExtract %v4f16 %st_${var}_param1 0 0\n"
14259 " %st_${var}_ex_1 = OpCompositeExtract %v4f16 %st_${var}_param1 0 1\n"
14260 " %st_${var}_ex_2 = OpCompositeExtract %v4f16 %st_${var}_param1 0 2\n"
14261 "%st_${var}_vec_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 0 1\n"
14262 "%st_${var}_vec_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 2 3\n"
14263 "%st_${var}_vec_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 0 1\n"
14264 "%st_${var}_vec_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 2 3\n"
14265 "%st_${var}_vec_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 0 1\n"
14266 "%st_${var}_vec_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 2 3\n"
14267 " %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_vec_0_0\n"
14268 " %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_vec_0_1\n"
14269 " %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_vec_1_0\n"
14270 " %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_vec_1_1\n"
14271 " %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_vec_2_0\n"
14272 " %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_vec_2_1\n"
14273 "%st_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14274 "%st_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14275 "%st_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
14276 "%st_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
14277 "%st_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
14278 "%st_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
14279 " OpStore %st_${var}_gep_0_0 %st_${var}_bc_0_0\n"
14280 " OpStore %st_${var}_gep_0_1 %st_${var}_bc_0_1\n"
14281 " OpStore %st_${var}_gep_1_0 %st_${var}_bc_1_0\n"
14282 " OpStore %st_${var}_gep_1_1 %st_${var}_bc_1_1\n"
14283 " OpStore %st_${var}_gep_2_0 %st_${var}_bc_2_0\n"
14284 " OpStore %st_${var}_gep_2_1 %st_${var}_bc_2_1\n"
14285 " OpReturn\n"
14286 " OpFunctionEnd\n");
14287
14288 const string storeStruct16Arr3(" %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14289 " %st_${var}_param1 = OpFunctionParameter %st_test\n"
14290 " %st_${var}_param2 = OpFunctionParameter %i32\n"
14291 " %st_${var}_entry = OpLabel\n"
14292 " %st_${var}_st_0 = OpCompositeExtract %struct16 %st_${var}_param1 0 0\n"
14293 " %st_${var}_st_1 = OpCompositeExtract %struct16 %st_${var}_param1 0 1\n"
14294 " %st_${var}_st_2 = OpCompositeExtract %struct16 %st_${var}_param1 0 2\n"
14295 " %st_${var}_el_0 = OpCompositeExtract %f16 %st_${var}_st_0 0\n"
14296 " %st_${var}_v2_0_0 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 0\n"
14297 " %st_${var}_v2_0_1 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 1\n"
14298 " %st_${var}_v2_0_2 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 2\n"
14299 " %st_${var}_el_1 = OpCompositeExtract %f16 %st_${var}_st_1 0\n"
14300 " %st_${var}_v2_1_0 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 0\n"
14301 " %st_${var}_v2_1_1 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 1\n"
14302 " %st_${var}_v2_1_2 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 2\n"
14303 " %st_${var}_el_2 = OpCompositeExtract %f16 %st_${var}_st_2 0\n"
14304 " %st_${var}_v2_2_0 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 0\n"
14305 " %st_${var}_v2_2_1 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 1\n"
14306 " %st_${var}_v2_2_2 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 2\n"
14307 " %st_${var}_v2_0 = OpCompositeConstruct %v2f16 %st_${var}_el_0 %c_f16_na\n"
14308 " %st_${var}_v2_1 = OpCompositeConstruct %v2f16 %st_${var}_el_1 %c_f16_na\n"
14309 " %st_${var}_v2_2 = OpCompositeConstruct %v2f16 %st_${var}_el_2 %c_f16_na\n"
14310 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_v2_0\n"
14311 " %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
14312 " %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
14313 " %st_${var}_bc_0_2 = OpBitcast %u32 %st_${var}_v2_0_2\n"
14314 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_v2_1\n"
14315 " %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
14316 " %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
14317 " %st_${var}_bc_1_2 = OpBitcast %u32 %st_${var}_v2_1_2\n"
14318 " %st_${var}_bc_2 = OpBitcast %u32 %st_${var}_v2_2\n"
14319 " %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
14320 " %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
14321 " %st_${var}_bc_2_2 = OpBitcast %u32 %st_${var}_v2_2_2\n"
14322 "%st_${var}_gep_0_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14323 "%st_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14324 "%st_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
14325 "%st_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
14326 "%st_${var}_gep_1_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
14327 "%st_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
14328 "%st_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
14329 "%st_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
14330 "%st_${var}_gep_2_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
14331 "%st_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
14332 "%st_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
14333 "%st_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
14334 " OpStore %st_${var}_gep_0_0_0 %st_${var}_bc_0\n"
14335 " OpStore %st_${var}_gep_0_1_0 %st_${var}_bc_0_0\n"
14336 " OpStore %st_${var}_gep_0_1_1 %st_${var}_bc_0_1\n"
14337 " OpStore %st_${var}_gep_0_1_2 %st_${var}_bc_0_2\n"
14338 " OpStore %st_${var}_gep_1_0_0 %st_${var}_bc_1\n"
14339 " OpStore %st_${var}_gep_1_1_0 %st_${var}_bc_1_0\n"
14340 " OpStore %st_${var}_gep_1_1_1 %st_${var}_bc_1_1\n"
14341 " OpStore %st_${var}_gep_1_1_2 %st_${var}_bc_1_2\n"
14342 " OpStore %st_${var}_gep_2_0_0 %st_${var}_bc_2\n"
14343 " OpStore %st_${var}_gep_2_1_0 %st_${var}_bc_2_0\n"
14344 " OpStore %st_${var}_gep_2_1_1 %st_${var}_bc_2_1\n"
14345 " OpStore %st_${var}_gep_2_1_2 %st_${var}_bc_2_2\n"
14346 " OpReturn\n"
14347 " OpFunctionEnd\n");
14348
14349 struct OpParts
14350 {
14351 const char *premainDecls;
14352 const char *swFunCall;
14353 const char *swFunHeader;
14354 const char *caseDefaultValue;
14355 const char *argsPartial;
14356 };
14357
14358 OpParts opPartsArray[] = {
14359 // OpCompositeInsert
14360 {
14361 " %fun_t = OpTypeFunction %st_test %f16 %st_test %i32\n"
14362 " %SSBO_src = OpTypeStruct %ra_f16\n"
14363 " %SSBO_dst = OpTypeStruct %ra_st\n",
14364
14365 " %val_new = OpFunctionCall %f16 %ld_arg_ssbo_src %ndx\n"
14366 " %val_old = OpFunctionCall %st_test %ld_ssbo_dst %c_i32_0\n"
14367 " %val_dst = OpFunctionCall %st_test %sw_fun %val_new %val_old %ndx\n",
14368
14369 " %sw_fun = OpFunction %st_test None %fun_t\n"
14370 "%sw_paramv = OpFunctionParameter %f16\n",
14371
14372 "%sw_param",
14373
14374 "%st_test %sw_paramv %sw_param",
14375 },
14376 // OpCompositeExtract
14377 {
14378 " %fun_t = OpTypeFunction %f16 %st_test %i32\n"
14379 " %SSBO_src = OpTypeStruct %ra_st\n"
14380 " %SSBO_dst = OpTypeStruct %ra_f16\n",
14381
14382 " %val_src = OpFunctionCall %st_test %ld_ssbo_src %c_i32_0\n"
14383 " %val_dst = OpFunctionCall %f16 %sw_fun %val_src %ndx\n",
14384
14385 " %sw_fun = OpFunction %f16 None %fun_t\n",
14386
14387 "%c_f16_na",
14388
14389 "%f16 %sw_param",
14390 },
14391 };
14392
14393 DE_ASSERT(opIndex < DE_LENGTH_OF_ARRAY(opPartsArray));
14394
14395 const char *accessPathF16[] = {
14396 "0", // %f16
14397 DE_NULL,
14398 };
14399 const char *accessPathV2F16[] = {
14400 "0 0", // %v2f16
14401 "0 1",
14402 };
14403 const char *accessPathV3F16[] = {
14404 "0 0", // %v3f16
14405 "0 1",
14406 "0 2",
14407 DE_NULL,
14408 };
14409 const char *accessPathV4F16[] = {
14410 "0 0", // %v4f16"
14411 "0 1",
14412 "0 2",
14413 "0 3",
14414 };
14415 const char *accessPathF16Arr3[] = {
14416 "0 0", // %f16arr3
14417 "0 1",
14418 "0 2",
14419 DE_NULL,
14420 };
14421 const char *accessPathStruct16Arr3[] = {
14422 "0 0 0", // %struct16arr3
14423 DE_NULL, "0 0 1 0 0", "0 0 1 0 1", "0 0 1 1 0", "0 0 1 1 1", "0 0 1 2 0", "0 0 1 2 1", "0 1 0",
14424 DE_NULL, "0 1 1 0 0", "0 1 1 0 1", "0 1 1 1 0", "0 1 1 1 1", "0 1 1 2 0", "0 1 1 2 1", "0 2 0",
14425 DE_NULL, "0 2 1 0 0", "0 2 1 0 1", "0 2 1 1 0", "0 2 1 1 1", "0 2 1 2 0", "0 2 1 2 1",
14426 };
14427 const char *accessPathV2F16Arr5[] = {
14428 "0 0 0", // %v2f16arr5
14429 "0 0 1", "0 1 0", "0 1 1", "0 2 0", "0 2 1", "0 3 0", "0 3 1", "0 4 0", "0 4 1",
14430 };
14431 const char *accessPathV3F16Arr5[] = {
14432 "0 0 0", // %v3f16arr5
14433 "0 0 1", "0 0 2", DE_NULL, "0 1 0", "0 1 1", "0 1 2", DE_NULL, "0 2 0", "0 2 1", "0 2 2",
14434 DE_NULL, "0 3 0", "0 3 1", "0 3 2", DE_NULL, "0 4 0", "0 4 1", "0 4 2", DE_NULL,
14435 };
14436 const char *accessPathV4F16Arr3[] = {
14437 "0 0 0", // %v4f16arr3
14438 "0 0 1", "0 0 2", "0 0 3", "0 1 0", "0 1 1", "0 1 2", "0 1 3", "0 2 0",
14439 "0 2 1", "0 2 2", "0 2 3", DE_NULL, DE_NULL, DE_NULL, DE_NULL,
14440 };
14441
14442 struct TypeTestParameters
14443 {
14444 const char *name;
14445 size_t accessPathLength;
14446 const char **accessPath;
14447 const string loadFunction;
14448 const string storeFunction;
14449 };
14450
14451 const TypeTestParameters typeTestParameters[] = {
14452 {"f16", DE_LENGTH_OF_ARRAY(accessPathF16), accessPathF16, loadF16, storeF16},
14453 {"v2f16", DE_LENGTH_OF_ARRAY(accessPathV2F16), accessPathV2F16, loadV2F16, storeV2F16},
14454 {"v3f16", DE_LENGTH_OF_ARRAY(accessPathV3F16), accessPathV3F16, loadV3F16, storeV3F16},
14455 {"v4f16", DE_LENGTH_OF_ARRAY(accessPathV4F16), accessPathV4F16, loadV4F16, storeV4F16},
14456 {"f16arr3", DE_LENGTH_OF_ARRAY(accessPathF16Arr3), accessPathF16Arr3, loadF16Arr3, storeF16Arr3},
14457 {"v2f16arr5", DE_LENGTH_OF_ARRAY(accessPathV2F16Arr5), accessPathV2F16Arr5, loadV2F16Arr5, storeV2F16Arr5},
14458 {"v3f16arr5", DE_LENGTH_OF_ARRAY(accessPathV3F16Arr5), accessPathV3F16Arr5, loadV3F16Arr5, storeV3F16Arr5},
14459 {"v4f16arr3", DE_LENGTH_OF_ARRAY(accessPathV4F16Arr3), accessPathV4F16Arr3, loadV4F16Arr3, storeV4F16Arr3},
14460 {"struct16arr3", DE_LENGTH_OF_ARRAY(accessPathStruct16Arr3), accessPathStruct16Arr3, loadStruct16Arr3,
14461 storeStruct16Arr3},
14462 };
14463
14464 for (size_t typeTestNdx = 0; typeTestNdx < DE_LENGTH_OF_ARRAY(typeTestParameters); ++typeTestNdx)
14465 {
14466 const OpParts opParts = opPartsArray[opIndex];
14467 const string testName = typeTestParameters[typeTestNdx].name;
14468 const size_t structItemsCount = typeTestParameters[typeTestNdx].accessPathLength;
14469 const char **accessPath = typeTestParameters[typeTestNdx].accessPath;
14470 SpecResource specResource;
14471 map<string, string> specs;
14472 VulkanFeatures features;
14473 map<string, string> fragments;
14474 vector<string> extensions;
14475 vector<deFloat16> inputFP16;
14476 vector<deFloat16> unusedFP16Output;
14477
14478 // Generate values for input
14479 inputFP16.reserve(structItemsCount);
14480 for (uint32_t structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
14481 inputFP16.push_back((accessPath[structItemNdx] == DE_NULL) ? exceptionValue :
14482 tcu::Float16(float(structItemNdx)).bits());
14483
14484 unusedFP16Output.resize(structItemsCount);
14485
14486 // Generate cases for OpSwitch
14487 {
14488 string caseBodies;
14489 string caseList;
14490
14491 for (uint32_t caseNdx = 0; caseNdx < structItemsCount; ++caseNdx)
14492 if (accessPath[caseNdx] != DE_NULL)
14493 {
14494 map<string, string> specCase;
14495
14496 specCase["case_ndx"] = de::toString(caseNdx);
14497 specCase["access_path"] = accessPath[caseNdx];
14498 specCase["op_args_part"] = opParts.argsPartial;
14499 specCase["op_name"] = opName;
14500
14501 caseBodies += testCaseBody.specialize(specCase);
14502 caseList += de::toString(caseNdx) + " %case_" + de::toString(caseNdx) + " ";
14503 }
14504
14505 specs["case_bodies"] = caseBodies;
14506 specs["case_list"] = caseList;
14507 }
14508
14509 specs["num_elements"] = de::toString(structItemsCount);
14510 specs["field_type"] = typeTestParameters[typeTestNdx].name;
14511 specs["struct_item_size"] = de::toString(structItemsCount * sizeof(deFloat16));
14512 specs["struct_u32s"] = de::toString(structItemsCount / 2);
14513 specs["op_premain_decls"] = opParts.premainDecls;
14514 specs["op_sw_fun_call"] = opParts.swFunCall;
14515 specs["op_sw_fun_header"] = opParts.swFunHeader;
14516 specs["op_case_default_value"] = opParts.caseDefaultValue;
14517 if (opIndex == 0)
14518 {
14519 specs["st_call"] = "st_ssbo_dst";
14520 specs["st_ndx"] = "c_i32_0";
14521 }
14522 else
14523 {
14524 specs["st_call"] = "st_fn_ssbo_dst";
14525 specs["st_ndx"] = "ndx";
14526 }
14527
14528 fragments["capability"] = "OpCapability Float16\n";
14529 fragments["decoration"] = decoration.specialize(specs);
14530 fragments["pre_main"] = preMain.specialize(specs);
14531 fragments["testfun"] = testFun.specialize(specs);
14532 if (opIndex == 0)
14533 {
14534 fragments["testfun"] += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src"}});
14535 fragments["testfun"] +=
14536 StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_dst"}});
14537 fragments["testfun"] +=
14538 StringTemplate(typeTestParameters[typeTestNdx].storeFunction).specialize({{"var", "ssbo_dst"}});
14539 }
14540 else
14541 {
14542 fragments["testfun"] +=
14543 StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_src"}});
14544 fragments["testfun"] += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
14545 }
14546
14547 specResource.inputs.push_back(
14548 Resource(BufferSp(new Float16Buffer(inputFP16)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14549 specResource.outputs.push_back(
14550 Resource(BufferSp(new Float16Buffer(unusedFP16Output)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14551 specResource.verifyIO = compareFP16CompositeFunc;
14552
14553 extensions.push_back("VK_KHR_shader_float16_int8");
14554
14555 features.extFloat16Int8.shaderFloat16 = true;
14556 if (specResource.graphicsFeaturesRequired)
14557 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
14558
14559 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
14560 IVec3(1, 1, 1));
14561 }
14562
14563 return testGroup.release();
14564 }
14565
14566 struct fp16PerComponent
14567 {
fp16PerComponentvkt::SpirVAssembly::fp16PerComponent14568 fp16PerComponent() : flavor(0), floatFormat16(-14, 15, 10, true), outCompCount(0), argCompCount(3, 0)
14569 {
14570 }
14571
~fp16PerComponentvkt::SpirVAssembly::fp16PerComponent14572 virtual ~fp16PerComponent()
14573 {
14574 }
14575
callOncePerComponentvkt::SpirVAssembly::fp16PerComponent14576 bool callOncePerComponent()
14577 {
14578 return true;
14579 }
getComponentValidityvkt::SpirVAssembly::fp16PerComponent14580 uint32_t getComponentValidity()
14581 {
14582 return static_cast<uint32_t>(-1);
14583 }
14584
getULPsvkt::SpirVAssembly::fp16PerComponent14585 virtual double getULPs(vector<const deFloat16 *> &)
14586 {
14587 return 1.0;
14588 }
getMinvkt::SpirVAssembly::fp16PerComponent14589 virtual double getMin(double value, double ulps)
14590 {
14591 return value - floatFormat16.ulp(deAbs(value), ulps);
14592 }
getMaxvkt::SpirVAssembly::fp16PerComponent14593 virtual double getMax(double value, double ulps)
14594 {
14595 return value + floatFormat16.ulp(deAbs(value), ulps);
14596 }
14597
getFlavorCountvkt::SpirVAssembly::fp16PerComponent14598 virtual size_t getFlavorCount()
14599 {
14600 return flavorNames.empty() ? 1 : flavorNames.size();
14601 }
setFlavorvkt::SpirVAssembly::fp16PerComponent14602 virtual void setFlavor(size_t flavorNo)
14603 {
14604 DE_ASSERT(flavorNo < getFlavorCount());
14605 flavor = flavorNo;
14606 }
getFlavorvkt::SpirVAssembly::fp16PerComponent14607 virtual size_t getFlavor()
14608 {
14609 return flavor;
14610 }
getCurrentFlavorNamevkt::SpirVAssembly::fp16PerComponent14611 virtual string getCurrentFlavorName()
14612 {
14613 return flavorNames.empty() ? string("") : flavorNames[getFlavor()];
14614 }
14615
setOutCompCountvkt::SpirVAssembly::fp16PerComponent14616 virtual void setOutCompCount(size_t compCount)
14617 {
14618 outCompCount = compCount;
14619 }
getOutCompCountvkt::SpirVAssembly::fp16PerComponent14620 virtual size_t getOutCompCount()
14621 {
14622 return outCompCount;
14623 }
14624
setArgCompCountvkt::SpirVAssembly::fp16PerComponent14625 virtual void setArgCompCount(size_t argNo, size_t compCount)
14626 {
14627 argCompCount[argNo] = compCount;
14628 }
getArgCompCountvkt::SpirVAssembly::fp16PerComponent14629 virtual size_t getArgCompCount(size_t argNo)
14630 {
14631 return argCompCount[argNo];
14632 }
14633
14634 protected:
14635 size_t flavor;
14636 tcu::FloatFormat floatFormat16;
14637 size_t outCompCount;
14638 vector<size_t> argCompCount;
14639 vector<string> flavorNames;
14640 };
14641
14642 struct fp16OpFNegate : public fp16PerComponent
14643 {
14644 template <class fp16type>
calcvkt::SpirVAssembly::fp16OpFNegate14645 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14646 {
14647 const fp16type x(*in[0]);
14648 const double d(x.asDouble());
14649 const double result(0.0 - d);
14650
14651 out[0] = fp16type(result).bits();
14652 min[0] = getMin(result, getULPs(in));
14653 max[0] = getMax(result, getULPs(in));
14654
14655 return true;
14656 }
14657 };
14658
14659 struct fp16Round : public fp16PerComponent
14660 {
fp16Roundvkt::SpirVAssembly::fp16Round14661 fp16Round() : fp16PerComponent()
14662 {
14663 flavorNames.push_back("Floor(x+0.5)");
14664 flavorNames.push_back("Floor(x-0.5)");
14665 flavorNames.push_back("RoundEven");
14666 }
14667
14668 template <class fp16type>
calcvkt::SpirVAssembly::fp16Round14669 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14670 {
14671 const fp16type x(*in[0]);
14672 const double d(x.asDouble());
14673 double result(0.0);
14674
14675 switch (flavor)
14676 {
14677 case 0:
14678 result = deRound(d);
14679 break;
14680 case 1:
14681 result = deFloor(d - 0.5);
14682 break;
14683 case 2:
14684 result = deRoundEven(d);
14685 break;
14686 default:
14687 TCU_THROW(InternalError, "Invalid flavor specified");
14688 }
14689
14690 out[0] = fp16type(result).bits();
14691 min[0] = getMin(result, getULPs(in));
14692 max[0] = getMax(result, getULPs(in));
14693
14694 return true;
14695 }
14696 };
14697
14698 struct fp16RoundEven : public fp16PerComponent
14699 {
14700 template <class fp16type>
calcvkt::SpirVAssembly::fp16RoundEven14701 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14702 {
14703 const fp16type x(*in[0]);
14704 const double d(x.asDouble());
14705 const double result(deRoundEven(d));
14706
14707 out[0] = fp16type(result).bits();
14708 min[0] = getMin(result, getULPs(in));
14709 max[0] = getMax(result, getULPs(in));
14710
14711 return true;
14712 }
14713 };
14714
14715 struct fp16Trunc : public fp16PerComponent
14716 {
14717 template <class fp16type>
calcvkt::SpirVAssembly::fp16Trunc14718 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14719 {
14720 const fp16type x(*in[0]);
14721 const double d(x.asDouble());
14722 const double result(deTrunc(d));
14723
14724 out[0] = fp16type(result).bits();
14725 min[0] = getMin(result, getULPs(in));
14726 max[0] = getMax(result, getULPs(in));
14727
14728 return true;
14729 }
14730 };
14731
14732 struct fp16FAbs : public fp16PerComponent
14733 {
14734 template <class fp16type>
calcvkt::SpirVAssembly::fp16FAbs14735 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14736 {
14737 const fp16type x(*in[0]);
14738 const double d(x.asDouble());
14739 const double result(deAbs(d));
14740
14741 out[0] = fp16type(result).bits();
14742 min[0] = getMin(result, getULPs(in));
14743 max[0] = getMax(result, getULPs(in));
14744
14745 return true;
14746 }
14747 };
14748
14749 struct fp16FSign : public fp16PerComponent
14750 {
14751 template <class fp16type>
calcvkt::SpirVAssembly::fp16FSign14752 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14753 {
14754 const fp16type x(*in[0]);
14755 const double d(x.asDouble());
14756 const double result(deSign(d));
14757
14758 if (x.isNaN())
14759 return false;
14760
14761 out[0] = fp16type(result).bits();
14762 min[0] = getMin(result, getULPs(in));
14763 max[0] = getMax(result, getULPs(in));
14764
14765 return true;
14766 }
14767 };
14768
14769 struct fp16Floor : public fp16PerComponent
14770 {
14771 template <class fp16type>
calcvkt::SpirVAssembly::fp16Floor14772 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14773 {
14774 const fp16type x(*in[0]);
14775 const double d(x.asDouble());
14776 const double result(deFloor(d));
14777
14778 out[0] = fp16type(result).bits();
14779 min[0] = getMin(result, getULPs(in));
14780 max[0] = getMax(result, getULPs(in));
14781
14782 return true;
14783 }
14784 };
14785
14786 struct fp16Ceil : public fp16PerComponent
14787 {
14788 template <class fp16type>
calcvkt::SpirVAssembly::fp16Ceil14789 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14790 {
14791 const fp16type x(*in[0]);
14792 const double d(x.asDouble());
14793 const double result(deCeil(d));
14794
14795 out[0] = fp16type(result).bits();
14796 min[0] = getMin(result, getULPs(in));
14797 max[0] = getMax(result, getULPs(in));
14798
14799 return true;
14800 }
14801 };
14802
14803 struct fp16Fract : public fp16PerComponent
14804 {
14805 template <class fp16type>
calcvkt::SpirVAssembly::fp16Fract14806 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14807 {
14808 const fp16type x(*in[0]);
14809 const double d(x.asDouble());
14810 const double result(deFrac(d));
14811
14812 out[0] = fp16type(result).bits();
14813 min[0] = getMin(result, getULPs(in));
14814 max[0] = getMax(result, getULPs(in));
14815
14816 return true;
14817 }
14818 };
14819
14820 struct fp16Radians : public fp16PerComponent
14821 {
getULPsvkt::SpirVAssembly::fp16Radians14822 virtual double getULPs(vector<const deFloat16 *> &in)
14823 {
14824 DE_UNREF(in);
14825
14826 return 2.5;
14827 }
14828
14829 template <class fp16type>
calcvkt::SpirVAssembly::fp16Radians14830 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14831 {
14832 const fp16type x(*in[0]);
14833 const float d(x.asFloat());
14834 const float result(deFloatRadians(d));
14835
14836 out[0] = fp16type(result).bits();
14837 min[0] = getMin(result, getULPs(in));
14838 max[0] = getMax(result, getULPs(in));
14839
14840 return true;
14841 }
14842 };
14843
14844 struct fp16Degrees : public fp16PerComponent
14845 {
getULPsvkt::SpirVAssembly::fp16Degrees14846 virtual double getULPs(vector<const deFloat16 *> &in)
14847 {
14848 DE_UNREF(in);
14849
14850 return 2.5;
14851 }
14852
14853 template <class fp16type>
calcvkt::SpirVAssembly::fp16Degrees14854 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14855 {
14856 const fp16type x(*in[0]);
14857 const float d(x.asFloat());
14858 const float result(deFloatDegrees(d));
14859
14860 out[0] = fp16type(result).bits();
14861 min[0] = getMin(result, getULPs(in));
14862 max[0] = getMax(result, getULPs(in));
14863
14864 return true;
14865 }
14866 };
14867
14868 struct fp16Sin : public fp16PerComponent
14869 {
14870 template <class fp16type>
calcvkt::SpirVAssembly::fp16Sin14871 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14872 {
14873 const fp16type x(*in[0]);
14874 const double d(x.asDouble());
14875 const double result(deSin(d));
14876 const double unspecUlp(16.0);
14877 const double err(de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) :
14878 floatFormat16.ulp(deAbs(result), unspecUlp));
14879
14880 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14881 return false;
14882
14883 out[0] = fp16type(result).bits();
14884 min[0] = result - err;
14885 max[0] = result + err;
14886
14887 return true;
14888 }
14889 };
14890
14891 struct fp16Cos : public fp16PerComponent
14892 {
14893 template <class fp16type>
calcvkt::SpirVAssembly::fp16Cos14894 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14895 {
14896 const fp16type x(*in[0]);
14897 const double d(x.asDouble());
14898 const double result(deCos(d));
14899 const double unspecUlp(16.0);
14900 const double err(de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) :
14901 floatFormat16.ulp(deAbs(result), unspecUlp));
14902
14903 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14904 return false;
14905
14906 out[0] = fp16type(result).bits();
14907 min[0] = result - err;
14908 max[0] = result + err;
14909
14910 return true;
14911 }
14912 };
14913
14914 struct fp16Tan : public fp16PerComponent
14915 {
14916 template <class fp16type>
calcvkt::SpirVAssembly::fp16Tan14917 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14918 {
14919 const fp16type x(*in[0]);
14920 const double d(x.asDouble());
14921 const double result(deTan(d));
14922
14923 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14924 return false;
14925
14926 out[0] = fp16type(result).bits();
14927 {
14928 const double err = deLdExp(1.0, -7);
14929 const double s1 = deSin(d) + err;
14930 const double s2 = deSin(d) - err;
14931 const double c1 = deCos(d) + err;
14932 const double c2 = deCos(d) - err;
14933 const double edgeVals[] = {s1 / c1, s1 / c2, s2 / c1, s2 / c2};
14934 double edgeLeft = out[0];
14935 double edgeRight = out[0];
14936
14937 if (deSign(c1 * c2) < 0.0)
14938 {
14939 edgeLeft = -std::numeric_limits<double>::infinity();
14940 edgeRight = +std::numeric_limits<double>::infinity();
14941 }
14942 else
14943 {
14944 edgeLeft = *std::min_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14945 edgeRight = *std::max_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14946 }
14947
14948 min[0] = edgeLeft;
14949 max[0] = edgeRight;
14950 }
14951
14952 return true;
14953 }
14954 };
14955
14956 struct fp16Asin : public fp16PerComponent
14957 {
14958 template <class fp16type>
calcvkt::SpirVAssembly::fp16Asin14959 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14960 {
14961 const fp16type x(*in[0]);
14962 const double d(x.asDouble());
14963 const double result(deAsin(d));
14964 const double error(deAtan2(d, sqrt(1.0 - d * d)));
14965
14966 if (!x.isNaN() && deAbs(d) > 1.0)
14967 return false;
14968
14969 out[0] = fp16type(result).bits();
14970 min[0] =
14971 result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14972 max[0] =
14973 result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14974
14975 return true;
14976 }
14977 };
14978
14979 struct fp16Acos : public fp16PerComponent
14980 {
14981 template <class fp16type>
calcvkt::SpirVAssembly::fp16Acos14982 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
14983 {
14984 const fp16type x(*in[0]);
14985 const double d(x.asDouble());
14986 const double result(deAcos(d));
14987 const double error(deAtan2(sqrt(1.0 - d * d), d));
14988
14989 if (!x.isNaN() && deAbs(d) > 1.0)
14990 return false;
14991
14992 out[0] = fp16type(result).bits();
14993 min[0] =
14994 result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14995 max[0] =
14996 result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14997
14998 return true;
14999 }
15000 };
15001
15002 struct fp16Atan : public fp16PerComponent
15003 {
getULPsvkt::SpirVAssembly::fp16Atan15004 virtual double getULPs(vector<const deFloat16 *> &in)
15005 {
15006 DE_UNREF(in);
15007
15008 return 2 * 5.0; // This is not a precision test. Value is not from spec
15009 }
15010
15011 template <class fp16type>
calcvkt::SpirVAssembly::fp16Atan15012 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15013 {
15014 const fp16type x(*in[0]);
15015 const double d(x.asDouble());
15016 const double result(deAtanOver(d));
15017
15018 out[0] = fp16type(result).bits();
15019 min[0] = getMin(result, getULPs(in));
15020 max[0] = getMax(result, getULPs(in));
15021
15022 return true;
15023 }
15024 };
15025
15026 struct fp16Sinh : public fp16PerComponent
15027 {
fp16Sinhvkt::SpirVAssembly::fp16Sinh15028 fp16Sinh() : fp16PerComponent()
15029 {
15030 flavorNames.push_back("Double");
15031 flavorNames.push_back("ExpFP16");
15032 }
15033
15034 template <class fp16type>
calcvkt::SpirVAssembly::fp16Sinh15035 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15036 {
15037 const fp16type x(*in[0]);
15038 const double d(x.asDouble());
15039 const double ulps(64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
15040 double result(0.0);
15041 double error(0.0);
15042
15043 if (getFlavor() == 0)
15044 {
15045 result = deSinh(d);
15046 error = floatFormat16.ulp(deAbs(result), ulps);
15047 }
15048 else if (getFlavor() == 1)
15049 {
15050 const fp16type epx(deExp(d));
15051 const fp16type enx(deExp(-d));
15052 const fp16type esx(epx.asDouble() - enx.asDouble());
15053 const fp16type sx2(esx.asDouble() / 2.0);
15054
15055 result = sx2.asDouble();
15056 error = deAbs(floatFormat16.ulp(epx.asDouble(), ulps)) + deAbs(floatFormat16.ulp(enx.asDouble(), ulps));
15057 }
15058 else
15059 {
15060 TCU_THROW(InternalError, "Unknown flavor");
15061 }
15062
15063 out[0] = fp16type(result).bits();
15064 min[0] = result - error;
15065 max[0] = result + error;
15066
15067 return true;
15068 }
15069 };
15070
15071 struct fp16Cosh : public fp16PerComponent
15072 {
fp16Coshvkt::SpirVAssembly::fp16Cosh15073 fp16Cosh() : fp16PerComponent()
15074 {
15075 flavorNames.push_back("Double");
15076 flavorNames.push_back("ExpFP16");
15077 }
15078
15079 template <class fp16type>
calcvkt::SpirVAssembly::fp16Cosh15080 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15081 {
15082 const fp16type x(*in[0]);
15083 const double d(x.asDouble());
15084 const double ulps(64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
15085 double result(0.0);
15086
15087 if (getFlavor() == 0)
15088 {
15089 result = deCosh(d);
15090 }
15091 else if (getFlavor() == 1)
15092 {
15093 const fp16type epx(deExp(d));
15094 const fp16type enx(deExp(-d));
15095 const fp16type esx(epx.asDouble() + enx.asDouble());
15096 const fp16type sx2(esx.asDouble() / 2.0);
15097
15098 result = sx2.asDouble();
15099 }
15100 else
15101 {
15102 TCU_THROW(InternalError, "Unknown flavor");
15103 }
15104
15105 out[0] = fp16type(result).bits();
15106 min[0] = result - floatFormat16.ulp(deAbs(result), ulps);
15107 max[0] = result + floatFormat16.ulp(deAbs(result), ulps);
15108
15109 return true;
15110 }
15111 };
15112
15113 struct fp16Tanh : public fp16PerComponent
15114 {
fp16Tanhvkt::SpirVAssembly::fp16Tanh15115 fp16Tanh() : fp16PerComponent()
15116 {
15117 flavorNames.push_back("Tanh");
15118 flavorNames.push_back("SinhCosh");
15119 flavorNames.push_back("SinhCoshFP16");
15120 flavorNames.push_back("PolyFP16");
15121 }
15122
getULPsvkt::SpirVAssembly::fp16Tanh15123 virtual double getULPs(vector<const deFloat16 *> &in)
15124 {
15125 const tcu::Float16 x(*in[0]);
15126 const double d(x.asDouble());
15127
15128 return 2 * (1.0 + 2 * deAbs(d)); // This is not a precision test. Value is not from spec
15129 }
15130
15131 template <class fp16type>
calcPolyvkt::SpirVAssembly::fp16Tanh15132 inline double calcPoly(const fp16type &espx, const fp16type &esnx, const fp16type &ecpx, const fp16type &ecnx)
15133 {
15134 const fp16type esx(espx.asDouble() - esnx.asDouble());
15135 const fp16type sx2(esx.asDouble() / 2.0);
15136 const fp16type ecx(ecpx.asDouble() + ecnx.asDouble());
15137 const fp16type cx2(ecx.asDouble() / 2.0);
15138 const fp16type tg(sx2.asDouble() / cx2.asDouble());
15139 const double rez(tg.asDouble());
15140
15141 return rez;
15142 }
15143
15144 template <class fp16type>
calcvkt::SpirVAssembly::fp16Tanh15145 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15146 {
15147 const fp16type x(*in[0]);
15148 const double d(x.asDouble());
15149 double result(0.0);
15150
15151 if (getFlavor() == 0)
15152 {
15153 result = deTanh(d);
15154 min[0] = getMin(result, getULPs(in));
15155 max[0] = getMax(result, getULPs(in));
15156 }
15157 else if (getFlavor() == 1)
15158 {
15159 result = deSinh(d) / deCosh(d);
15160 min[0] = getMin(result, getULPs(in));
15161 max[0] = getMax(result, getULPs(in));
15162 }
15163 else if (getFlavor() == 2)
15164 {
15165 const fp16type s(deSinh(d));
15166 const fp16type c(deCosh(d));
15167
15168 result = s.asDouble() / c.asDouble();
15169 min[0] = getMin(result, getULPs(in));
15170 max[0] = getMax(result, getULPs(in));
15171 }
15172 else if (getFlavor() == 3)
15173 {
15174 const double ulps(getULPs(in));
15175 const double epxm(deExp(d));
15176 const double enxm(deExp(-d));
15177 const double epxmerr = floatFormat16.ulp(epxm, ulps);
15178 const double enxmerr = floatFormat16.ulp(enxm, ulps);
15179 const fp16type epx[] = {fp16type(epxm - epxmerr), fp16type(epxm + epxmerr)};
15180 const fp16type enx[] = {fp16type(enxm - enxmerr), fp16type(enxm + enxmerr)};
15181 const fp16type epxm16(epxm);
15182 const fp16type enxm16(enxm);
15183 vector<double> tgs;
15184
15185 for (size_t spNdx = 0; spNdx < DE_LENGTH_OF_ARRAY(epx); ++spNdx)
15186 for (size_t snNdx = 0; snNdx < DE_LENGTH_OF_ARRAY(enx); ++snNdx)
15187 for (size_t cpNdx = 0; cpNdx < DE_LENGTH_OF_ARRAY(epx); ++cpNdx)
15188 for (size_t cnNdx = 0; cnNdx < DE_LENGTH_OF_ARRAY(enx); ++cnNdx)
15189 {
15190 const double tgh = calcPoly(epx[spNdx], enx[snNdx], epx[cpNdx], enx[cnNdx]);
15191
15192 tgs.push_back(tgh);
15193 }
15194
15195 result = calcPoly(epxm16, enxm16, epxm16, enxm16);
15196 min[0] = *std::min_element(tgs.begin(), tgs.end());
15197 max[0] = *std::max_element(tgs.begin(), tgs.end());
15198 }
15199 else
15200 {
15201 TCU_THROW(InternalError, "Unknown flavor");
15202 }
15203
15204 out[0] = fp16type(result).bits();
15205
15206 return true;
15207 }
15208 };
15209
15210 struct fp16Asinh : public fp16PerComponent
15211 {
fp16Asinhvkt::SpirVAssembly::fp16Asinh15212 fp16Asinh() : fp16PerComponent()
15213 {
15214 flavorNames.push_back("Double");
15215 flavorNames.push_back("PolyFP16Wiki");
15216 flavorNames.push_back("PolyFP16Abs");
15217 }
15218
getULPsvkt::SpirVAssembly::fp16Asinh15219 virtual double getULPs(vector<const deFloat16 *> &in)
15220 {
15221 DE_UNREF(in);
15222
15223 return 256.0; // This is not a precision test. Value is not from spec
15224 }
15225
15226 template <class fp16type>
calcvkt::SpirVAssembly::fp16Asinh15227 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15228 {
15229 const fp16type x(*in[0]);
15230 const double d(x.asDouble());
15231 double result(0.0);
15232
15233 if (getFlavor() == 0)
15234 {
15235 result = deAsinh(d);
15236 }
15237 else if (getFlavor() == 1)
15238 {
15239 const fp16type x2(d * d);
15240 const fp16type x2p1(x2.asDouble() + 1.0);
15241 const fp16type sq(deSqrt(x2p1.asDouble()));
15242 const fp16type sxsq(d + sq.asDouble());
15243 const fp16type lsxsq(deLog(sxsq.asDouble()));
15244
15245 if (lsxsq.isInf())
15246 return false;
15247
15248 result = lsxsq.asDouble();
15249 }
15250 else if (getFlavor() == 2)
15251 {
15252 const fp16type x2(d * d);
15253 const fp16type x2p1(x2.asDouble() + 1.0);
15254 const fp16type sq(deSqrt(x2p1.asDouble()));
15255 const fp16type sxsq(deAbs(d) + sq.asDouble());
15256 const fp16type lsxsq(deLog(sxsq.asDouble()));
15257
15258 result = deSign(d) * lsxsq.asDouble();
15259 }
15260 else
15261 {
15262 TCU_THROW(InternalError, "Unknown flavor");
15263 }
15264
15265 out[0] = fp16type(result).bits();
15266 min[0] = getMin(result, getULPs(in));
15267 max[0] = getMax(result, getULPs(in));
15268
15269 return true;
15270 }
15271 };
15272
15273 struct fp16Acosh : public fp16PerComponent
15274 {
fp16Acoshvkt::SpirVAssembly::fp16Acosh15275 fp16Acosh() : fp16PerComponent()
15276 {
15277 flavorNames.push_back("Double");
15278 flavorNames.push_back("PolyFP16");
15279 }
15280
getULPsvkt::SpirVAssembly::fp16Acosh15281 virtual double getULPs(vector<const deFloat16 *> &in)
15282 {
15283 DE_UNREF(in);
15284
15285 return 16.0; // This is not a precision test. Value is not from spec
15286 }
15287
15288 template <class fp16type>
calcvkt::SpirVAssembly::fp16Acosh15289 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15290 {
15291 const fp16type x(*in[0]);
15292 const double d(x.asDouble());
15293 double result(0.0);
15294
15295 if (!x.isNaN() && d < 1.0)
15296 return false;
15297
15298 if (getFlavor() == 0)
15299 {
15300 result = deAcosh(d);
15301 }
15302 else if (getFlavor() == 1)
15303 {
15304 const fp16type x2(d * d);
15305 const fp16type x2m1(x2.asDouble() - 1.0);
15306 const fp16type sq(deSqrt(x2m1.asDouble()));
15307 const fp16type sxsq(d + sq.asDouble());
15308 const fp16type lsxsq(deLog(sxsq.asDouble()));
15309
15310 result = lsxsq.asDouble();
15311 }
15312 else
15313 {
15314 TCU_THROW(InternalError, "Unknown flavor");
15315 }
15316
15317 out[0] = fp16type(result).bits();
15318 min[0] = getMin(result, getULPs(in));
15319 max[0] = getMax(result, getULPs(in));
15320
15321 return true;
15322 }
15323 };
15324
15325 struct fp16Atanh : public fp16PerComponent
15326 {
fp16Atanhvkt::SpirVAssembly::fp16Atanh15327 fp16Atanh() : fp16PerComponent()
15328 {
15329 flavorNames.push_back("Double");
15330 flavorNames.push_back("PolyFP16");
15331 }
15332
15333 template <class fp16type>
calcvkt::SpirVAssembly::fp16Atanh15334 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15335 {
15336 const fp16type x(*in[0]);
15337 const double d(x.asDouble());
15338 double result(0.0);
15339
15340 if (deAbs(d) >= 1.0)
15341 return false;
15342
15343 if (getFlavor() == 0)
15344 {
15345 const double ulps(16.0); // This is not a precision test. Value is not from spec
15346
15347 result = deAtanh(d);
15348 min[0] = getMin(result, ulps);
15349 max[0] = getMax(result, ulps);
15350 }
15351 else if (getFlavor() == 1)
15352 {
15353 const fp16type x1a(1.0 + d);
15354 const fp16type x1b(1.0 - d);
15355 const fp16type x1d(x1a.asDouble() / x1b.asDouble());
15356 const fp16type lx1d(deLog(x1d.asDouble()));
15357 const fp16type lx1d2(0.5 * lx1d.asDouble());
15358 const double error(2 * (de::inRange(deAbs(x1d.asDouble()), 0.5, 2.0) ?
15359 deLdExp(2.0, -7) :
15360 floatFormat16.ulp(deAbs(x1d.asDouble()), 3.0)));
15361
15362 result = lx1d2.asDouble();
15363 min[0] = result - error;
15364 max[0] = result + error;
15365 }
15366 else
15367 {
15368 TCU_THROW(InternalError, "Unknown flavor");
15369 }
15370
15371 out[0] = fp16type(result).bits();
15372
15373 return true;
15374 }
15375 };
15376
15377 struct fp16Exp : public fp16PerComponent
15378 {
15379 template <class fp16type>
calcvkt::SpirVAssembly::fp16Exp15380 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15381 {
15382 const fp16type x(*in[0]);
15383 const double d(x.asDouble());
15384 const double ulps(10.0 * (1.0 + 2.0 * deAbs(d)));
15385 const double result(deExp(d));
15386
15387 out[0] = fp16type(result).bits();
15388 min[0] = getMin(result, ulps);
15389 max[0] = getMax(result, ulps);
15390
15391 return true;
15392 }
15393 };
15394
15395 struct fp16Log : public fp16PerComponent
15396 {
15397 template <class fp16type>
calcvkt::SpirVAssembly::fp16Log15398 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15399 {
15400 const fp16type x(*in[0]);
15401 const double d(x.asDouble());
15402 const double result(deLog(d));
15403 const double error(de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15404
15405 if (d <= 0.0)
15406 return false;
15407
15408 out[0] = fp16type(result).bits();
15409 min[0] = result - error;
15410 max[0] = result + error;
15411
15412 return true;
15413 }
15414 };
15415
15416 struct fp16Exp2 : public fp16PerComponent
15417 {
15418 template <class fp16type>
calcvkt::SpirVAssembly::fp16Exp215419 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15420 {
15421 const fp16type x(*in[0]);
15422 const double d(x.asDouble());
15423 const double result(deExp2(d));
15424 const double ulps(1.0 + 2.0 * deAbs(fp16type(in[0][0]).asDouble()));
15425
15426 out[0] = fp16type(result).bits();
15427 min[0] = getMin(result, ulps);
15428 max[0] = getMax(result, ulps);
15429
15430 return true;
15431 }
15432 };
15433
15434 struct fp16Log2 : public fp16PerComponent
15435 {
15436 template <class fp16type>
calcvkt::SpirVAssembly::fp16Log215437 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15438 {
15439 const fp16type x(*in[0]);
15440 const double d(x.asDouble());
15441 const double result(deLog2(d));
15442 const double error(de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15443
15444 if (d <= 0.0)
15445 return false;
15446
15447 out[0] = fp16type(result).bits();
15448 min[0] = result - error;
15449 max[0] = result + error;
15450
15451 return true;
15452 }
15453 };
15454
15455 struct fp16Sqrt : public fp16PerComponent
15456 {
getULPsvkt::SpirVAssembly::fp16Sqrt15457 virtual double getULPs(vector<const deFloat16 *> &in)
15458 {
15459 DE_UNREF(in);
15460
15461 return 6.0;
15462 }
15463
15464 template <class fp16type>
calcvkt::SpirVAssembly::fp16Sqrt15465 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15466 {
15467 const fp16type x(*in[0]);
15468 const double d(x.asDouble());
15469 const double result(deSqrt(d));
15470
15471 if (!x.isNaN() && d < 0.0)
15472 return false;
15473
15474 out[0] = fp16type(result).bits();
15475 min[0] = getMin(result, getULPs(in));
15476 max[0] = getMax(result, getULPs(in));
15477
15478 return true;
15479 }
15480 };
15481
15482 struct fp16InverseSqrt : public fp16PerComponent
15483 {
getULPsvkt::SpirVAssembly::fp16InverseSqrt15484 virtual double getULPs(vector<const deFloat16 *> &in)
15485 {
15486 DE_UNREF(in);
15487
15488 return 2.0;
15489 }
15490
15491 template <class fp16type>
calcvkt::SpirVAssembly::fp16InverseSqrt15492 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15493 {
15494 const fp16type x(*in[0]);
15495 const double d(x.asDouble());
15496 const double result(1.0 / deSqrt(d));
15497
15498 if (!x.isNaN() && d <= 0.0)
15499 return false;
15500
15501 out[0] = fp16type(result).bits();
15502 min[0] = getMin(result, getULPs(in));
15503 max[0] = getMax(result, getULPs(in));
15504
15505 return true;
15506 }
15507 };
15508
15509 struct fp16ModfFrac : public fp16PerComponent
15510 {
15511 template <class fp16type>
calcvkt::SpirVAssembly::fp16ModfFrac15512 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15513 {
15514 const fp16type x(*in[0]);
15515 const double d(x.asDouble());
15516 double i(0.0);
15517 const double result(deModf(d, &i));
15518
15519 if (x.isInf() || x.isNaN())
15520 return false;
15521
15522 out[0] = fp16type(result).bits();
15523 min[0] = getMin(result, getULPs(in));
15524 max[0] = getMax(result, getULPs(in));
15525
15526 return true;
15527 }
15528 };
15529
15530 struct fp16ModfInt : public fp16PerComponent
15531 {
15532 template <class fp16type>
calcvkt::SpirVAssembly::fp16ModfInt15533 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15534 {
15535 const fp16type x(*in[0]);
15536 const double d(x.asDouble());
15537 double i(0.0);
15538 const double unused(deModf(d, &i));
15539 const double result(i);
15540
15541 DE_UNREF(unused);
15542
15543 if (x.isInf() || x.isNaN())
15544 return false;
15545
15546 out[0] = fp16type(result).bits();
15547 min[0] = getMin(result, getULPs(in));
15548 max[0] = getMax(result, getULPs(in));
15549
15550 return true;
15551 }
15552 };
15553
15554 struct fp16FrexpS : public fp16PerComponent
15555 {
15556 template <class fp16type>
calcvkt::SpirVAssembly::fp16FrexpS15557 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15558 {
15559 const fp16type x(*in[0]);
15560 const double d(x.asDouble());
15561 int e(0);
15562 const double result(deFrExp(d, &e));
15563
15564 if (x.isNaN() || x.isInf())
15565 return false;
15566
15567 out[0] = fp16type(result).bits();
15568 min[0] = getMin(result, getULPs(in));
15569 max[0] = getMax(result, getULPs(in));
15570
15571 return true;
15572 }
15573 };
15574
15575 struct fp16FrexpE : public fp16PerComponent
15576 {
15577 template <class fp16type>
calcvkt::SpirVAssembly::fp16FrexpE15578 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15579 {
15580 const fp16type x(*in[0]);
15581 const double d(x.asDouble());
15582 int e(0);
15583 const double unused(deFrExp(d, &e));
15584 const double result(static_cast<double>(e));
15585
15586 DE_UNREF(unused);
15587
15588 if (x.isNaN() || x.isInf())
15589 return false;
15590
15591 out[0] = fp16type(result).bits();
15592 min[0] = getMin(result, getULPs(in));
15593 max[0] = getMax(result, getULPs(in));
15594
15595 return true;
15596 }
15597 };
15598
15599 struct fp16OpFAdd : public fp16PerComponent
15600 {
15601 template <class fp16type>
calcvkt::SpirVAssembly::fp16OpFAdd15602 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15603 {
15604 const fp16type x(*in[0]);
15605 const fp16type y(*in[1]);
15606 const double xd(x.asDouble());
15607 const double yd(y.asDouble());
15608 const double result(xd + yd);
15609
15610 out[0] = fp16type(result).bits();
15611 min[0] = getMin(result, getULPs(in));
15612 max[0] = getMax(result, getULPs(in));
15613
15614 return true;
15615 }
15616 };
15617
15618 struct fp16OpFSub : public fp16PerComponent
15619 {
15620 template <class fp16type>
calcvkt::SpirVAssembly::fp16OpFSub15621 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15622 {
15623 const fp16type x(*in[0]);
15624 const fp16type y(*in[1]);
15625 const double xd(x.asDouble());
15626 const double yd(y.asDouble());
15627 const double result(xd - yd);
15628
15629 out[0] = fp16type(result).bits();
15630 min[0] = getMin(result, getULPs(in));
15631 max[0] = getMax(result, getULPs(in));
15632
15633 return true;
15634 }
15635 };
15636
15637 struct fp16OpFMul : public fp16PerComponent
15638 {
15639 template <class fp16type>
calcvkt::SpirVAssembly::fp16OpFMul15640 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15641 {
15642 const fp16type x(*in[0]);
15643 const fp16type y(*in[1]);
15644 const double xd(x.asDouble());
15645 const double yd(y.asDouble());
15646 const double result(xd * yd);
15647
15648 out[0] = fp16type(result).bits();
15649 min[0] = getMin(result, getULPs(in));
15650 max[0] = getMax(result, getULPs(in));
15651
15652 return true;
15653 }
15654 };
15655
15656 struct fp16OpFDiv : public fp16PerComponent
15657 {
fp16OpFDivvkt::SpirVAssembly::fp16OpFDiv15658 fp16OpFDiv() : fp16PerComponent()
15659 {
15660 flavorNames.push_back("DirectDiv");
15661 flavorNames.push_back("InverseDiv");
15662 }
15663
15664 template <class fp16type>
calcvkt::SpirVAssembly::fp16OpFDiv15665 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15666 {
15667 const fp16type x(*in[0]);
15668 const fp16type y(*in[1]);
15669 const double xd(x.asDouble());
15670 const double yd(y.asDouble());
15671 const double unspecUlp(16.0);
15672 const double ulpCnt(de::inRange(deAbs(yd), deLdExp(1, -14), deLdExp(1, 14)) ? 2.5 : unspecUlp);
15673 double result(0.0);
15674
15675 if (y.isZero())
15676 return false;
15677
15678 if (getFlavor() == 0)
15679 {
15680 result = (xd / yd);
15681 }
15682 else if (getFlavor() == 1)
15683 {
15684 const double invyd(1.0 / yd);
15685 const fp16type invy(invyd);
15686
15687 result = (xd * invy.asDouble());
15688 }
15689 else
15690 {
15691 TCU_THROW(InternalError, "Unknown flavor");
15692 }
15693
15694 out[0] = fp16type(result).bits();
15695 min[0] = getMin(result, ulpCnt);
15696 max[0] = getMax(result, ulpCnt);
15697
15698 return true;
15699 }
15700 };
15701
15702 struct fp16Atan2 : public fp16PerComponent
15703 {
fp16Atan2vkt::SpirVAssembly::fp16Atan215704 fp16Atan2() : fp16PerComponent()
15705 {
15706 flavorNames.push_back("DoubleCalc");
15707 flavorNames.push_back("DoubleCalc_PI");
15708 }
15709
getULPsvkt::SpirVAssembly::fp16Atan215710 virtual double getULPs(vector<const deFloat16 *> &in)
15711 {
15712 DE_UNREF(in);
15713
15714 return 2 * 5.0; // This is not a precision test. Value is not from spec
15715 }
15716
15717 template <class fp16type>
calcvkt::SpirVAssembly::fp16Atan215718 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15719 {
15720 const fp16type x(*in[0]);
15721 const fp16type y(*in[1]);
15722 const double xd(x.asDouble());
15723 const double yd(y.asDouble());
15724 double result(0.0);
15725
15726 if ((x.isZero() && y.isZero()) || (x.isInf() && y.isInf()))
15727 return false;
15728
15729 if (getFlavor() == 0)
15730 {
15731 result = deAtan2(xd, yd);
15732 }
15733 else if (getFlavor() == 1)
15734 {
15735 const double ulps(2.0 * 5.0); // This is not a precision test. Value is not from spec
15736 const double eps(floatFormat16.ulp(DE_PI_DOUBLE, ulps));
15737
15738 result = deAtan2(xd, yd);
15739
15740 if (de::inRange(deAbs(result), DE_PI_DOUBLE - eps, DE_PI_DOUBLE + eps))
15741 result = -result;
15742 }
15743 else
15744 {
15745 TCU_THROW(InternalError, "Unknown flavor");
15746 }
15747
15748 out[0] = fp16type(result).bits();
15749 min[0] = getMin(result, getULPs(in));
15750 max[0] = getMax(result, getULPs(in));
15751
15752 return true;
15753 }
15754 };
15755
15756 struct fp16Pow : public fp16PerComponent
15757 {
fp16Powvkt::SpirVAssembly::fp16Pow15758 fp16Pow() : fp16PerComponent()
15759 {
15760 flavorNames.push_back("Pow");
15761 flavorNames.push_back("PowLog2");
15762 flavorNames.push_back("PowLog2FP16");
15763 }
15764
15765 template <class fp16type>
calcvkt::SpirVAssembly::fp16Pow15766 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15767 {
15768 const fp16type x(*in[0]);
15769 const fp16type y(*in[1]);
15770 const double xd(x.asDouble());
15771 const double yd(y.asDouble());
15772 const double logxeps(de::inRange(deAbs(xd), 0.5, 2.0) ? deLdExp(1.0, -7) : floatFormat16.ulp(deLog2(xd), 3.0));
15773 const double ulps1(1.0 + 4.0 * deAbs(yd * (deLog2(xd) - logxeps)));
15774 const double ulps2(1.0 + 4.0 * deAbs(yd * (deLog2(xd) + logxeps)));
15775 const double ulps(deMax(deAbs(ulps1), deAbs(ulps2)));
15776 double result(0.0);
15777
15778 if (xd < 0.0)
15779 return false;
15780
15781 if (x.isZero() && yd <= 0.0)
15782 return false;
15783
15784 if (getFlavor() == 0)
15785 {
15786 result = dePow(xd, yd);
15787 }
15788 else if (getFlavor() == 1)
15789 {
15790 const double l2d(deLog2(xd));
15791 const double e2d(deExp2(yd * l2d));
15792
15793 result = e2d;
15794 }
15795 else if (getFlavor() == 2)
15796 {
15797 const double l2d(deLog2(xd));
15798 const fp16type l2(l2d);
15799 const double e2d(deExp2(yd * l2.asDouble()));
15800 const fp16type e2(e2d);
15801
15802 result = e2.asDouble();
15803 }
15804 else
15805 {
15806 TCU_THROW(InternalError, "Unknown flavor");
15807 }
15808
15809 out[0] = fp16type(result).bits();
15810 min[0] = getMin(result, ulps);
15811 max[0] = getMax(result, ulps);
15812
15813 return true;
15814 }
15815 };
15816
15817 struct fp16FMin : public fp16PerComponent
15818 {
15819 template <class fp16type>
calcvkt::SpirVAssembly::fp16FMin15820 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15821 {
15822 const fp16type x(*in[0]);
15823 const fp16type y(*in[1]);
15824 const double xd(x.asDouble());
15825 const double yd(y.asDouble());
15826 const double result(deMin(xd, yd));
15827
15828 if (x.isNaN() || y.isNaN())
15829 return false;
15830
15831 out[0] = fp16type(result).bits();
15832 min[0] = getMin(result, getULPs(in));
15833 max[0] = getMax(result, getULPs(in));
15834
15835 return true;
15836 }
15837 };
15838
15839 struct fp16FMax : public fp16PerComponent
15840 {
15841 template <class fp16type>
calcvkt::SpirVAssembly::fp16FMax15842 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15843 {
15844 const fp16type x(*in[0]);
15845 const fp16type y(*in[1]);
15846 const double xd(x.asDouble());
15847 const double yd(y.asDouble());
15848 const double result(deMax(xd, yd));
15849
15850 if (x.isNaN() || y.isNaN())
15851 return false;
15852
15853 out[0] = fp16type(result).bits();
15854 min[0] = getMin(result, getULPs(in));
15855 max[0] = getMax(result, getULPs(in));
15856
15857 return true;
15858 }
15859 };
15860
15861 struct fp16Step : public fp16PerComponent
15862 {
15863 template <class fp16type>
calcvkt::SpirVAssembly::fp16Step15864 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15865 {
15866 const fp16type edge(*in[0]);
15867 const fp16type x(*in[1]);
15868 const double edged(edge.asDouble());
15869 const double xd(x.asDouble());
15870 const double result(deStep(edged, xd));
15871
15872 out[0] = fp16type(result).bits();
15873 min[0] = getMin(result, getULPs(in));
15874 max[0] = getMax(result, getULPs(in));
15875
15876 return true;
15877 }
15878 };
15879
15880 struct fp16Ldexp : public fp16PerComponent
15881 {
15882 template <class fp16type>
calcvkt::SpirVAssembly::fp16Ldexp15883 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15884 {
15885 const fp16type x(*in[0]);
15886 const fp16type y(*in[1]);
15887 const double xd(x.asDouble());
15888 const int yd(static_cast<int>(deTrunc(y.asDouble())));
15889 const double result(deLdExp(xd, yd));
15890
15891 if (y.isNaN() || y.isInf() || y.isDenorm() || yd < -14 || yd > 15)
15892 return false;
15893
15894 // Spec: "If this product is too large to be represented in the floating-point type, the result is undefined."
15895 if (fp16type(result).isInf())
15896 return false;
15897
15898 out[0] = fp16type(result).bits();
15899 min[0] = getMin(result, getULPs(in));
15900 max[0] = getMax(result, getULPs(in));
15901
15902 return true;
15903 }
15904 };
15905
15906 struct fp16FClamp : public fp16PerComponent
15907 {
15908 template <class fp16type>
calcvkt::SpirVAssembly::fp16FClamp15909 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15910 {
15911 const fp16type x(*in[0]);
15912 const fp16type minVal(*in[1]);
15913 const fp16type maxVal(*in[2]);
15914 const double xd(x.asDouble());
15915 const double minVald(minVal.asDouble());
15916 const double maxVald(maxVal.asDouble());
15917 const double result(deClamp(xd, minVald, maxVald));
15918
15919 if (minVal.isNaN() || maxVal.isNaN() || minVald > maxVald)
15920 return false;
15921
15922 out[0] = fp16type(result).bits();
15923 min[0] = getMin(result, getULPs(in));
15924 max[0] = getMax(result, getULPs(in));
15925
15926 return true;
15927 }
15928 };
15929
15930 struct fp16FMix : public fp16PerComponent
15931 {
fp16FMixvkt::SpirVAssembly::fp16FMix15932 fp16FMix() : fp16PerComponent()
15933 {
15934 flavorNames.push_back("DoubleCalc");
15935 flavorNames.push_back("EmulatingFP16");
15936 flavorNames.push_back("EmulatingFP16YminusX");
15937 }
15938
15939 template <class fp16type>
calcvkt::SpirVAssembly::fp16FMix15940 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
15941 {
15942 const fp16type x(*in[0]);
15943 const fp16type y(*in[1]);
15944 const fp16type a(*in[2]);
15945 const double ulps(8.0); // This is not a precision test. Value is not from spec
15946 double result(0.0);
15947
15948 if (getFlavor() == 0)
15949 {
15950 const double xd(x.asDouble());
15951 const double yd(y.asDouble());
15952 const double ad(a.asDouble());
15953 const double xeps(floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15954 const double yeps(floatFormat16.ulp(deAbs(yd * ad), ulps));
15955 const double eps(xeps + yeps);
15956
15957 result = deMix(xd, yd, ad);
15958 min[0] = result - eps;
15959 max[0] = result + eps;
15960 }
15961 else if (getFlavor() == 1)
15962 {
15963 const double xd(x.asDouble());
15964 const double yd(y.asDouble());
15965 const double ad(a.asDouble());
15966 const fp16type am(1.0 - ad);
15967 const double amd(am.asDouble());
15968 const fp16type xam(xd * amd);
15969 const double xamd(xam.asDouble());
15970 const fp16type ya(yd * ad);
15971 const double yad(ya.asDouble());
15972 const double xeps(floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15973 const double yeps(floatFormat16.ulp(deAbs(yd * ad), ulps));
15974 const double eps(xeps + yeps);
15975
15976 result = xamd + yad;
15977 min[0] = result - eps;
15978 max[0] = result + eps;
15979 }
15980 else if (getFlavor() == 2)
15981 {
15982 const double xd(x.asDouble());
15983 const double yd(y.asDouble());
15984 const double ad(a.asDouble());
15985 const fp16type ymx(yd - xd);
15986 const double ymxd(ymx.asDouble());
15987 const fp16type ymxa(ymxd * ad);
15988 const double ymxad(ymxa.asDouble());
15989 const double xeps(floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15990 const double yeps(floatFormat16.ulp(deAbs(yd * ad), ulps));
15991 const double eps(xeps + yeps);
15992
15993 result = xd + ymxad;
15994 min[0] = result - eps;
15995 max[0] = result + eps;
15996 }
15997 else
15998 {
15999 TCU_THROW(InternalError, "Unknown flavor");
16000 }
16001
16002 out[0] = fp16type(result).bits();
16003
16004 return true;
16005 }
16006 };
16007
16008 struct fp16SmoothStep : public fp16PerComponent
16009 {
fp16SmoothStepvkt::SpirVAssembly::fp16SmoothStep16010 fp16SmoothStep() : fp16PerComponent()
16011 {
16012 flavorNames.push_back("FloatCalc");
16013 flavorNames.push_back("EmulatingFP16");
16014 flavorNames.push_back("EmulatingFP16WClamp");
16015 }
16016
getULPsvkt::SpirVAssembly::fp16SmoothStep16017 virtual double getULPs(vector<const deFloat16 *> &in)
16018 {
16019 DE_UNREF(in);
16020
16021 return 4.0; // This is not a precision test. Value is not from spec
16022 }
16023
16024 template <class fp16type>
calcvkt::SpirVAssembly::fp16SmoothStep16025 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16026 {
16027 const fp16type edge0(*in[0]);
16028 const fp16type edge1(*in[1]);
16029 const fp16type x(*in[2]);
16030 double result(0.0);
16031
16032 if (edge0.isNaN() || edge1.isNaN() || x.isNaN() || edge0.asDouble() >= edge1.asDouble())
16033 return false;
16034
16035 if (edge0.isInf() || edge1.isInf() || x.isInf())
16036 return false;
16037
16038 if (getFlavor() == 0)
16039 {
16040 const float edge0d(edge0.asFloat());
16041 const float edge1d(edge1.asFloat());
16042 const float xd(x.asFloat());
16043 const float sstep(deFloatSmoothStep(edge0d, edge1d, xd));
16044
16045 result = sstep;
16046 }
16047 else if (getFlavor() == 1)
16048 {
16049 const double edge0d(edge0.asDouble());
16050 const double edge1d(edge1.asDouble());
16051 const double xd(x.asDouble());
16052
16053 if (xd <= edge0d)
16054 result = 0.0;
16055 else if (xd >= edge1d)
16056 result = 1.0;
16057 else
16058 {
16059 const fp16type a(xd - edge0d);
16060 const fp16type b(edge1d - edge0d);
16061 const fp16type t(a.asDouble() / b.asDouble());
16062 const fp16type t2(2.0 * t.asDouble());
16063 const fp16type t3(3.0 - t2.asDouble());
16064 const fp16type t4(t.asDouble() * t3.asDouble());
16065 const fp16type t5(t.asDouble() * t4.asDouble());
16066
16067 result = t5.asDouble();
16068 }
16069 }
16070 else if (getFlavor() == 2)
16071 {
16072 const double edge0d(edge0.asDouble());
16073 const double edge1d(edge1.asDouble());
16074 const double xd(x.asDouble());
16075 const fp16type a(xd - edge0d);
16076 const fp16type b(edge1d - edge0d);
16077 const fp16type bi(1.0 / b.asDouble());
16078 const fp16type t0(a.asDouble() * bi.asDouble());
16079 const double tc(deClamp(t0.asDouble(), 0.0, 1.0));
16080 const fp16type t(tc);
16081 const fp16type t2(2.0 * t.asDouble());
16082 const fp16type t3(3.0 - t2.asDouble());
16083 const fp16type t4(t.asDouble() * t3.asDouble());
16084 const fp16type t5(t.asDouble() * t4.asDouble());
16085
16086 result = t5.asDouble();
16087 }
16088 else
16089 {
16090 TCU_THROW(InternalError, "Unknown flavor");
16091 }
16092
16093 out[0] = fp16type(result).bits();
16094 min[0] = getMin(result, getULPs(in));
16095 max[0] = getMax(result, getULPs(in));
16096
16097 return true;
16098 }
16099 };
16100
16101 struct fp16Fma : public fp16PerComponent
16102 {
fp16Fmavkt::SpirVAssembly::fp16Fma16103 fp16Fma()
16104 {
16105 flavorNames.push_back("DoubleCalc");
16106 flavorNames.push_back("EmulatingFP16");
16107 }
16108
getULPsvkt::SpirVAssembly::fp16Fma16109 virtual double getULPs(vector<const deFloat16 *> &in)
16110 {
16111 DE_UNREF(in);
16112
16113 return 16.0;
16114 }
16115
16116 template <class fp16type>
calcvkt::SpirVAssembly::fp16Fma16117 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16118 {
16119 DE_ASSERT(in.size() == 3);
16120 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16121 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16122 DE_ASSERT(getArgCompCount(2) == getOutCompCount());
16123 DE_ASSERT(getOutCompCount() > 0);
16124
16125 const fp16type a(*in[0]);
16126 const fp16type b(*in[1]);
16127 const fp16type c(*in[2]);
16128 double result(0.0);
16129
16130 if (getFlavor() == 0)
16131 {
16132 const double ad(a.asDouble());
16133 const double bd(b.asDouble());
16134 const double cd(c.asDouble());
16135
16136 result = deMadd(ad, bd, cd);
16137 }
16138 else if (getFlavor() == 1)
16139 {
16140 const double ad(a.asDouble());
16141 const double bd(b.asDouble());
16142 const double cd(c.asDouble());
16143 const fp16type ab(ad * bd);
16144 const fp16type r(ab.asDouble() + cd);
16145
16146 result = r.asDouble();
16147 }
16148 else
16149 {
16150 TCU_THROW(InternalError, "Unknown flavor");
16151 }
16152
16153 out[0] = fp16type(result).bits();
16154 min[0] = getMin(result, getULPs(in));
16155 max[0] = getMax(result, getULPs(in));
16156
16157 return true;
16158 }
16159 };
16160
16161 struct fp16AllComponents : public fp16PerComponent
16162 {
callOncePerComponentvkt::SpirVAssembly::fp16AllComponents16163 bool callOncePerComponent()
16164 {
16165 return false;
16166 }
16167 };
16168
16169 struct fp16Length : public fp16AllComponents
16170 {
fp16Lengthvkt::SpirVAssembly::fp16Length16171 fp16Length() : fp16AllComponents()
16172 {
16173 flavorNames.push_back("EmulatingFP16");
16174 flavorNames.push_back("DoubleCalc");
16175 }
16176
getULPsvkt::SpirVAssembly::fp16Length16177 virtual double getULPs(vector<const deFloat16 *> &in)
16178 {
16179 DE_UNREF(in);
16180
16181 return 4.0;
16182 }
16183
16184 template <class fp16type>
calcvkt::SpirVAssembly::fp16Length16185 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16186 {
16187 DE_ASSERT(getOutCompCount() == 1);
16188 DE_ASSERT(in.size() == 1);
16189
16190 double result(0.0);
16191
16192 if (getFlavor() == 0)
16193 {
16194 fp16type r(0.0);
16195
16196 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16197 {
16198 const fp16type x(in[0][componentNdx]);
16199 const fp16type q(x.asDouble() * x.asDouble());
16200
16201 r = fp16type(r.asDouble() + q.asDouble());
16202 }
16203
16204 result = deSqrt(r.asDouble());
16205
16206 out[0] = fp16type(result).bits();
16207 }
16208 else if (getFlavor() == 1)
16209 {
16210 double r(0.0);
16211
16212 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16213 {
16214 const fp16type x(in[0][componentNdx]);
16215 const double q(x.asDouble() * x.asDouble());
16216
16217 r += q;
16218 }
16219
16220 result = deSqrt(r);
16221
16222 out[0] = fp16type(result).bits();
16223 }
16224 else
16225 {
16226 TCU_THROW(InternalError, "Unknown flavor");
16227 }
16228
16229 min[0] = getMin(result, getULPs(in));
16230 max[0] = getMax(result, getULPs(in));
16231
16232 return true;
16233 }
16234 };
16235
16236 struct fp16Distance : public fp16AllComponents
16237 {
fp16Distancevkt::SpirVAssembly::fp16Distance16238 fp16Distance() : fp16AllComponents()
16239 {
16240 flavorNames.push_back("EmulatingFP16");
16241 flavorNames.push_back("DoubleCalc");
16242 }
16243
getULPsvkt::SpirVAssembly::fp16Distance16244 virtual double getULPs(vector<const deFloat16 *> &in)
16245 {
16246 DE_UNREF(in);
16247
16248 return 4.0;
16249 }
16250
16251 template <class fp16type>
calcvkt::SpirVAssembly::fp16Distance16252 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16253 {
16254 DE_ASSERT(getOutCompCount() == 1);
16255 DE_ASSERT(in.size() == 2);
16256 DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
16257
16258 double result(0.0);
16259
16260 if (getFlavor() == 0)
16261 {
16262 fp16type r(0.0);
16263
16264 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16265 {
16266 const fp16type x(in[0][componentNdx]);
16267 const fp16type y(in[1][componentNdx]);
16268 const fp16type d(x.asDouble() - y.asDouble());
16269 const fp16type q(d.asDouble() * d.asDouble());
16270
16271 r = fp16type(r.asDouble() + q.asDouble());
16272 }
16273
16274 result = deSqrt(r.asDouble());
16275 }
16276 else if (getFlavor() == 1)
16277 {
16278 double r(0.0);
16279
16280 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16281 {
16282 const fp16type x(in[0][componentNdx]);
16283 const fp16type y(in[1][componentNdx]);
16284 const double d(x.asDouble() - y.asDouble());
16285 const double q(d * d);
16286
16287 r += q;
16288 }
16289
16290 result = deSqrt(r);
16291 }
16292 else
16293 {
16294 TCU_THROW(InternalError, "Unknown flavor");
16295 }
16296
16297 out[0] = fp16type(result).bits();
16298 min[0] = getMin(result, getULPs(in));
16299 max[0] = getMax(result, getULPs(in));
16300
16301 return true;
16302 }
16303 };
16304
16305 struct fp16Cross : public fp16AllComponents
16306 {
fp16Crossvkt::SpirVAssembly::fp16Cross16307 fp16Cross() : fp16AllComponents()
16308 {
16309 flavorNames.push_back("EmulatingFP16");
16310 flavorNames.push_back("DoubleCalc");
16311 }
16312
getULPsvkt::SpirVAssembly::fp16Cross16313 virtual double getULPs(vector<const deFloat16 *> &in)
16314 {
16315 DE_UNREF(in);
16316
16317 return 4.0;
16318 }
16319
16320 template <class fp16type>
calcvkt::SpirVAssembly::fp16Cross16321 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16322 {
16323 DE_ASSERT(getOutCompCount() == 3);
16324 DE_ASSERT(in.size() == 2);
16325 DE_ASSERT(getArgCompCount(0) == 3);
16326 DE_ASSERT(getArgCompCount(1) == 3);
16327
16328 if (getFlavor() == 0)
16329 {
16330 const fp16type x0(in[0][0]);
16331 const fp16type x1(in[0][1]);
16332 const fp16type x2(in[0][2]);
16333 const fp16type y0(in[1][0]);
16334 const fp16type y1(in[1][1]);
16335 const fp16type y2(in[1][2]);
16336 const fp16type x1y2(x1.asDouble() * y2.asDouble());
16337 const fp16type y1x2(y1.asDouble() * x2.asDouble());
16338 const fp16type x2y0(x2.asDouble() * y0.asDouble());
16339 const fp16type y2x0(y2.asDouble() * x0.asDouble());
16340 const fp16type x0y1(x0.asDouble() * y1.asDouble());
16341 const fp16type y0x1(y0.asDouble() * x1.asDouble());
16342
16343 out[0] = fp16type(x1y2.asDouble() - y1x2.asDouble()).bits();
16344 out[1] = fp16type(x2y0.asDouble() - y2x0.asDouble()).bits();
16345 out[2] = fp16type(x0y1.asDouble() - y0x1.asDouble()).bits();
16346 }
16347 else if (getFlavor() == 1)
16348 {
16349 const fp16type x0(in[0][0]);
16350 const fp16type x1(in[0][1]);
16351 const fp16type x2(in[0][2]);
16352 const fp16type y0(in[1][0]);
16353 const fp16type y1(in[1][1]);
16354 const fp16type y2(in[1][2]);
16355 const double x1y2(x1.asDouble() * y2.asDouble());
16356 const double y1x2(y1.asDouble() * x2.asDouble());
16357 const double x2y0(x2.asDouble() * y0.asDouble());
16358 const double y2x0(y2.asDouble() * x0.asDouble());
16359 const double x0y1(x0.asDouble() * y1.asDouble());
16360 const double y0x1(y0.asDouble() * x1.asDouble());
16361
16362 out[0] = fp16type(x1y2 - y1x2).bits();
16363 out[1] = fp16type(x2y0 - y2x0).bits();
16364 out[2] = fp16type(x0y1 - y0x1).bits();
16365 }
16366 else
16367 {
16368 TCU_THROW(InternalError, "Unknown flavor");
16369 }
16370
16371 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16372 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16373 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16374 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16375
16376 return true;
16377 }
16378 };
16379
16380 struct fp16Normalize : public fp16AllComponents
16381 {
fp16Normalizevkt::SpirVAssembly::fp16Normalize16382 fp16Normalize() : fp16AllComponents()
16383 {
16384 flavorNames.push_back("EmulatingFP16");
16385 flavorNames.push_back("DoubleCalc");
16386
16387 permutationsFlavorStart = 0;
16388 permutationsFlavorEnd = flavorNames.size();
16389
16390 // flavorNames will be extended later
16391 }
16392
setArgCompCountvkt::SpirVAssembly::fp16Normalize16393 virtual void setArgCompCount(size_t argNo, size_t compCount)
16394 {
16395 DE_ASSERT(argCompCount[argNo] == 0); // Once only
16396
16397 if (argNo == 0 && argCompCount[argNo] == 0)
16398 {
16399 const size_t maxPermutationsCount = 24u; // Equal to 4!
16400 std::vector<int> indices;
16401
16402 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16403 indices.push_back(static_cast<int>(componentNdx));
16404
16405 m_permutations.reserve(maxPermutationsCount);
16406
16407 permutationsFlavorStart = flavorNames.size();
16408
16409 do
16410 {
16411 tcu::UVec4 permutation;
16412 std::string name = "Permutted_";
16413
16414 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16415 {
16416 permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16417 name += de::toString(indices[componentNdx]);
16418 }
16419
16420 m_permutations.push_back(permutation);
16421 flavorNames.push_back(name);
16422
16423 } while (std::next_permutation(indices.begin(), indices.end()));
16424
16425 permutationsFlavorEnd = flavorNames.size();
16426 }
16427
16428 fp16AllComponents::setArgCompCount(argNo, compCount);
16429 }
getULPsvkt::SpirVAssembly::fp16Normalize16430 virtual double getULPs(vector<const deFloat16 *> &in)
16431 {
16432 DE_UNREF(in);
16433
16434 return 8.0;
16435 }
16436
16437 template <class fp16type>
calcvkt::SpirVAssembly::fp16Normalize16438 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16439 {
16440 DE_ASSERT(in.size() == 1);
16441 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16442
16443 if (getFlavor() == 0)
16444 {
16445 fp16type r(0.0);
16446
16447 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16448 {
16449 const fp16type x(in[0][componentNdx]);
16450 const fp16type q(x.asDouble() * x.asDouble());
16451
16452 r = fp16type(r.asDouble() + q.asDouble());
16453 }
16454
16455 r = fp16type(deSqrt(r.asDouble()));
16456
16457 if (r.isZero())
16458 return false;
16459
16460 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16461 {
16462 const fp16type x(in[0][componentNdx]);
16463
16464 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16465 }
16466 }
16467 else if (getFlavor() == 1)
16468 {
16469 double r(0.0);
16470
16471 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16472 {
16473 const fp16type x(in[0][componentNdx]);
16474 const double q(x.asDouble() * x.asDouble());
16475
16476 r += q;
16477 }
16478
16479 r = deSqrt(r);
16480
16481 if (r == 0)
16482 return false;
16483
16484 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16485 {
16486 const fp16type x(in[0][componentNdx]);
16487
16488 out[componentNdx] = fp16type(x.asDouble() / r).bits();
16489 }
16490 }
16491 else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
16492 {
16493 const int compCount(static_cast<int>(getArgCompCount(0)));
16494 const size_t permutationNdx(getFlavor() - permutationsFlavorStart);
16495 const tcu::UVec4 &permutation(m_permutations[permutationNdx]);
16496 fp16type r(0.0);
16497
16498 for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16499 {
16500 const size_t componentNdx(permutation[permComponentNdx]);
16501 const fp16type x(in[0][componentNdx]);
16502 const fp16type q(x.asDouble() * x.asDouble());
16503
16504 r = fp16type(r.asDouble() + q.asDouble());
16505 }
16506
16507 r = fp16type(deSqrt(r.asDouble()));
16508
16509 if (r.isZero())
16510 return false;
16511
16512 for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16513 {
16514 const size_t componentNdx(permutation[permComponentNdx]);
16515 const fp16type x(in[0][componentNdx]);
16516
16517 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16518 }
16519 }
16520 else
16521 {
16522 TCU_THROW(InternalError, "Unknown flavor");
16523 }
16524
16525 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16526 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16527 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16528 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16529
16530 return true;
16531 }
16532
16533 private:
16534 std::vector<tcu::UVec4> m_permutations;
16535 size_t permutationsFlavorStart;
16536 size_t permutationsFlavorEnd;
16537 };
16538
16539 struct fp16FaceForward : public fp16AllComponents
16540 {
getULPsvkt::SpirVAssembly::fp16FaceForward16541 virtual double getULPs(vector<const deFloat16 *> &in)
16542 {
16543 DE_UNREF(in);
16544
16545 return 4.0;
16546 }
16547
16548 template <class fp16type>
calcvkt::SpirVAssembly::fp16FaceForward16549 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16550 {
16551 DE_ASSERT(in.size() == 3);
16552 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16553 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16554 DE_ASSERT(getArgCompCount(2) == getOutCompCount());
16555
16556 fp16type dp(0.0);
16557
16558 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16559 {
16560 const fp16type x(in[1][componentNdx]);
16561 const fp16type y(in[2][componentNdx]);
16562 const double xd(x.asDouble());
16563 const double yd(y.asDouble());
16564 const fp16type q(xd * yd);
16565
16566 dp = fp16type(dp.asDouble() + q.asDouble());
16567 }
16568
16569 if (dp.isNaN() || dp.isZero())
16570 return false;
16571
16572 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16573 {
16574 const fp16type n(in[0][componentNdx]);
16575
16576 out[componentNdx] = (dp.signBit() == 1) ? n.bits() : fp16type(-n.asDouble()).bits();
16577 }
16578
16579 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16580 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16581 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16582 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16583
16584 return true;
16585 }
16586 };
16587
16588 struct fp16Reflect : public fp16AllComponents
16589 {
fp16Reflectvkt::SpirVAssembly::fp16Reflect16590 fp16Reflect() : fp16AllComponents()
16591 {
16592 flavorNames.push_back("EmulatingFP16");
16593 flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16594 flavorNames.push_back("FloatCalc");
16595 flavorNames.push_back("FloatCalc+KeepZeroSign");
16596 flavorNames.push_back("EmulatingFP16+2Nfirst");
16597 flavorNames.push_back("EmulatingFP16+2Ifirst");
16598 }
16599
getULPsvkt::SpirVAssembly::fp16Reflect16600 virtual double getULPs(vector<const deFloat16 *> &in)
16601 {
16602 DE_UNREF(in);
16603
16604 return 256.0; // This is not a precision test. Value is not from spec
16605 }
16606
16607 template <class fp16type>
calcvkt::SpirVAssembly::fp16Reflect16608 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16609 {
16610 DE_ASSERT(in.size() == 2);
16611 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16612 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16613
16614 if (getFlavor() < 4)
16615 {
16616 const bool keepZeroSign((flavor & 1) != 0 ? true : false);
16617 const bool floatCalc((flavor & 2) != 0 ? true : false);
16618
16619 if (floatCalc)
16620 {
16621 float dp(0.0f);
16622
16623 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16624 {
16625 const fp16type i(in[0][componentNdx]);
16626 const fp16type n(in[1][componentNdx]);
16627 const float id(i.asFloat());
16628 const float nd(n.asFloat());
16629 const float qd(id * nd);
16630
16631 if (keepZeroSign)
16632 dp = (componentNdx == 0) ? qd : dp + qd;
16633 else
16634 dp = dp + qd;
16635 }
16636
16637 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16638 {
16639 const fp16type i(in[0][componentNdx]);
16640 const fp16type n(in[1][componentNdx]);
16641 const float dpnd(dp * n.asFloat());
16642 const float dpn2d(2.0f * dpnd);
16643 const float idpn2d(i.asFloat() - dpn2d);
16644 const fp16type result(idpn2d);
16645
16646 out[componentNdx] = result.bits();
16647 }
16648 }
16649 else
16650 {
16651 fp16type dp(0.0);
16652
16653 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16654 {
16655 const fp16type i(in[0][componentNdx]);
16656 const fp16type n(in[1][componentNdx]);
16657 const double id(i.asDouble());
16658 const double nd(n.asDouble());
16659 const fp16type q(id * nd);
16660
16661 if (keepZeroSign)
16662 dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16663 else
16664 dp = fp16type(dp.asDouble() + q.asDouble());
16665 }
16666
16667 if (dp.isNaN())
16668 return false;
16669
16670 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16671 {
16672 const fp16type i(in[0][componentNdx]);
16673 const fp16type n(in[1][componentNdx]);
16674 const fp16type dpn(dp.asDouble() * n.asDouble());
16675 const fp16type dpn2(2 * dpn.asDouble());
16676 const fp16type idpn2(i.asDouble() - dpn2.asDouble());
16677
16678 out[componentNdx] = idpn2.bits();
16679 }
16680 }
16681 }
16682 else if (getFlavor() == 4)
16683 {
16684 fp16type dp(0.0);
16685
16686 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16687 {
16688 const fp16type i(in[0][componentNdx]);
16689 const fp16type n(in[1][componentNdx]);
16690 const double id(i.asDouble());
16691 const double nd(n.asDouble());
16692 const fp16type q(id * nd);
16693
16694 dp = fp16type(dp.asDouble() + q.asDouble());
16695 }
16696
16697 if (dp.isNaN())
16698 return false;
16699
16700 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16701 {
16702 const fp16type i(in[0][componentNdx]);
16703 const fp16type n(in[1][componentNdx]);
16704 const fp16type n2(2 * n.asDouble());
16705 const fp16type dpn2(dp.asDouble() * n2.asDouble());
16706 const fp16type idpn2(i.asDouble() - dpn2.asDouble());
16707
16708 out[componentNdx] = idpn2.bits();
16709 }
16710 }
16711 else if (getFlavor() == 5)
16712 {
16713 fp16type dp2(0.0);
16714
16715 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16716 {
16717 const fp16type i(in[0][componentNdx]);
16718 const fp16type n(in[1][componentNdx]);
16719 const fp16type i2(2.0 * i.asDouble());
16720 const double i2d(i2.asDouble());
16721 const double nd(n.asDouble());
16722 const fp16type q(i2d * nd);
16723
16724 dp2 = fp16type(dp2.asDouble() + q.asDouble());
16725 }
16726
16727 if (dp2.isNaN())
16728 return false;
16729
16730 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16731 {
16732 const fp16type i(in[0][componentNdx]);
16733 const fp16type n(in[1][componentNdx]);
16734 const fp16type dpn2(dp2.asDouble() * n.asDouble());
16735 const fp16type idpn2(i.asDouble() - dpn2.asDouble());
16736
16737 out[componentNdx] = idpn2.bits();
16738 }
16739 }
16740 else
16741 {
16742 TCU_THROW(InternalError, "Unknown flavor");
16743 }
16744
16745 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16746 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16747 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16748 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16749
16750 return true;
16751 }
16752 };
16753
16754 struct fp16Refract : public fp16AllComponents
16755 {
fp16Refractvkt::SpirVAssembly::fp16Refract16756 fp16Refract() : fp16AllComponents()
16757 {
16758 flavorNames.push_back("EmulatingFP16");
16759 flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16760 flavorNames.push_back("FloatCalc");
16761 flavorNames.push_back("FloatCalc+KeepZeroSign");
16762 }
16763
getULPsvkt::SpirVAssembly::fp16Refract16764 virtual double getULPs(vector<const deFloat16 *> &in)
16765 {
16766 DE_UNREF(in);
16767
16768 return 8192.0; // This is not a precision test. Value is not from spec
16769 }
16770
16771 template <class fp16type>
calcvkt::SpirVAssembly::fp16Refract16772 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16773 {
16774 DE_ASSERT(in.size() == 3);
16775 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16776 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16777 DE_ASSERT(getArgCompCount(2) == 1);
16778
16779 const bool keepZeroSign((flavor & 1) != 0 ? true : false);
16780 const bool doubleCalc((flavor & 2) != 0 ? true : false);
16781 const fp16type eta(*in[2]);
16782
16783 if (doubleCalc)
16784 {
16785 double dp(0.0);
16786
16787 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16788 {
16789 const fp16type i(in[0][componentNdx]);
16790 const fp16type n(in[1][componentNdx]);
16791 const double id(i.asDouble());
16792 const double nd(n.asDouble());
16793 const double qd(id * nd);
16794
16795 if (keepZeroSign)
16796 dp = (componentNdx == 0) ? qd : dp + qd;
16797 else
16798 dp = dp + qd;
16799 }
16800
16801 const double eta2(eta.asDouble() * eta.asDouble());
16802 const double dp2(dp * dp);
16803 const double dp1(1.0 - dp2);
16804 const double dpe(eta2 * dp1);
16805 const double k(1.0 - dpe);
16806
16807 if (k < 0.0)
16808 {
16809 const fp16type zero(0.0);
16810
16811 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16812 out[componentNdx] = zero.bits();
16813 }
16814 else
16815 {
16816 const double sk(deSqrt(k));
16817
16818 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16819 {
16820 const fp16type i(in[0][componentNdx]);
16821 const fp16type n(in[1][componentNdx]);
16822 const double etai(i.asDouble() * eta.asDouble());
16823 const double etadp(eta.asDouble() * dp);
16824 const double etadpk(etadp + sk);
16825 const double etadpkn(etadpk * n.asDouble());
16826 const double full(etai - etadpkn);
16827 const fp16type result(full);
16828
16829 if (result.isInf())
16830 return false;
16831
16832 out[componentNdx] = result.bits();
16833 }
16834 }
16835 }
16836 else
16837 {
16838 fp16type dp(0.0);
16839
16840 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16841 {
16842 const fp16type i(in[0][componentNdx]);
16843 const fp16type n(in[1][componentNdx]);
16844 const double id(i.asDouble());
16845 const double nd(n.asDouble());
16846 const fp16type q(id * nd);
16847
16848 if (keepZeroSign)
16849 dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16850 else
16851 dp = fp16type(dp.asDouble() + q.asDouble());
16852 }
16853
16854 if (dp.isNaN())
16855 return false;
16856
16857 const fp16type eta2(eta.asDouble() * eta.asDouble());
16858 const fp16type dp2(dp.asDouble() * dp.asDouble());
16859 const fp16type dp1(1.0 - dp2.asDouble());
16860 const fp16type dpe(eta2.asDouble() * dp1.asDouble());
16861 const fp16type k(1.0 - dpe.asDouble());
16862
16863 if (k.asDouble() < 0.0)
16864 {
16865 const fp16type zero(0.0);
16866
16867 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16868 out[componentNdx] = zero.bits();
16869 }
16870 else
16871 {
16872 const fp16type sk(deSqrt(k.asDouble()));
16873
16874 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16875 {
16876 const fp16type i(in[0][componentNdx]);
16877 const fp16type n(in[1][componentNdx]);
16878 const fp16type etai(i.asDouble() * eta.asDouble());
16879 const fp16type etadp(eta.asDouble() * dp.asDouble());
16880 const fp16type etadpk(etadp.asDouble() + sk.asDouble());
16881 const fp16type etadpkn(etadpk.asDouble() * n.asDouble());
16882 const fp16type full(etai.asDouble() - etadpkn.asDouble());
16883
16884 if (full.isNaN() || full.isInf())
16885 return false;
16886
16887 out[componentNdx] = full.bits();
16888 }
16889 }
16890 }
16891
16892 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16893 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16894 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16895 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16896
16897 return true;
16898 }
16899 };
16900
16901 struct fp16Dot : public fp16AllComponents
16902 {
fp16Dotvkt::SpirVAssembly::fp16Dot16903 fp16Dot() : fp16AllComponents()
16904 {
16905 flavorNames.push_back("EmulatingFP16");
16906 flavorNames.push_back("FloatCalc");
16907 flavorNames.push_back("DoubleCalc");
16908
16909 permutationsFlavorStart = 0;
16910 permutationsFlavorEnd = flavorNames.size();
16911
16912 // flavorNames will be extended later
16913 }
16914
setArgCompCountvkt::SpirVAssembly::fp16Dot16915 virtual void setArgCompCount(size_t argNo, size_t compCount)
16916 {
16917 DE_ASSERT(argCompCount[argNo] == 0); // Once only
16918
16919 if (argNo == 0 && argCompCount[argNo] == 0)
16920 {
16921 const size_t maxPermutationsCount = 24u; // Equal to 4!
16922 std::vector<int> indices;
16923
16924 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16925 indices.push_back(static_cast<int>(componentNdx));
16926
16927 m_permutations.reserve(maxPermutationsCount);
16928
16929 permutationsFlavorStart = flavorNames.size();
16930
16931 do
16932 {
16933 tcu::UVec4 permutation;
16934 std::string name = "Permutted_";
16935
16936 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16937 {
16938 permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16939 name += de::toString(indices[componentNdx]);
16940 }
16941
16942 m_permutations.push_back(permutation);
16943 flavorNames.push_back(name);
16944
16945 } while (std::next_permutation(indices.begin(), indices.end()));
16946
16947 permutationsFlavorEnd = flavorNames.size();
16948 }
16949
16950 fp16AllComponents::setArgCompCount(argNo, compCount);
16951 }
16952
getULPsvkt::SpirVAssembly::fp16Dot16953 virtual double getULPs(vector<const deFloat16 *> &in)
16954 {
16955 DE_UNREF(in);
16956
16957 return 16.0; // This is not a precision test. Value is not from spec
16958 }
16959
16960 template <class fp16type>
calcvkt::SpirVAssembly::fp16Dot16961 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
16962 {
16963 DE_ASSERT(in.size() == 2);
16964 DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
16965 DE_ASSERT(getOutCompCount() == 1);
16966
16967 double result(0.0);
16968 double eps(0.0);
16969
16970 if (getFlavor() == 0)
16971 {
16972 fp16type dp(0.0);
16973
16974 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16975 {
16976 const fp16type x(in[0][componentNdx]);
16977 const fp16type y(in[1][componentNdx]);
16978 const fp16type q(x.asDouble() * y.asDouble());
16979
16980 dp = fp16type(dp.asDouble() + q.asDouble());
16981 eps += floatFormat16.ulp(q.asDouble(), 2.0);
16982 }
16983
16984 result = dp.asDouble();
16985 }
16986 else if (getFlavor() == 1)
16987 {
16988 float dp(0.0);
16989
16990 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16991 {
16992 const fp16type x(in[0][componentNdx]);
16993 const fp16type y(in[1][componentNdx]);
16994 const float q(x.asFloat() * y.asFloat());
16995
16996 dp += q;
16997 eps += floatFormat16.ulp(static_cast<double>(q), 2.0);
16998 }
16999
17000 result = dp;
17001 }
17002 else if (getFlavor() == 2)
17003 {
17004 double dp(0.0);
17005
17006 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
17007 {
17008 const fp16type x(in[0][componentNdx]);
17009 const fp16type y(in[1][componentNdx]);
17010 const double q(x.asDouble() * y.asDouble());
17011
17012 dp += q;
17013 eps += floatFormat16.ulp(q, 2.0);
17014 }
17015
17016 result = dp;
17017 }
17018 else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
17019 {
17020 const int compCount(static_cast<int>(getArgCompCount(1)));
17021 const size_t permutationNdx(getFlavor() - permutationsFlavorStart);
17022 const tcu::UVec4 &permutation(m_permutations[permutationNdx]);
17023 fp16type dp(0.0);
17024
17025 for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
17026 {
17027 const size_t componentNdx(permutation[permComponentNdx]);
17028 const fp16type x(in[0][componentNdx]);
17029 const fp16type y(in[1][componentNdx]);
17030 const fp16type q(x.asDouble() * y.asDouble());
17031
17032 dp = fp16type(dp.asDouble() + q.asDouble());
17033 eps += floatFormat16.ulp(q.asDouble(), 2.0);
17034 }
17035
17036 result = dp.asDouble();
17037 }
17038 else
17039 {
17040 TCU_THROW(InternalError, "Unknown flavor");
17041 }
17042
17043 out[0] = fp16type(result).bits();
17044 min[0] = result - eps;
17045 max[0] = result + eps;
17046
17047 return true;
17048 }
17049
17050 private:
17051 std::vector<tcu::UVec4> m_permutations;
17052 size_t permutationsFlavorStart;
17053 size_t permutationsFlavorEnd;
17054 };
17055
17056 struct fp16VectorTimesScalar : public fp16AllComponents
17057 {
getULPsvkt::SpirVAssembly::fp16VectorTimesScalar17058 virtual double getULPs(vector<const deFloat16 *> &in)
17059 {
17060 DE_UNREF(in);
17061
17062 return 2.0;
17063 }
17064
17065 template <class fp16type>
calcvkt::SpirVAssembly::fp16VectorTimesScalar17066 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17067 {
17068 DE_ASSERT(in.size() == 2);
17069 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
17070 DE_ASSERT(getArgCompCount(1) == 1);
17071
17072 fp16type s(*in[1]);
17073
17074 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
17075 {
17076 const fp16type x(in[0][componentNdx]);
17077 const double result(s.asDouble() * x.asDouble());
17078 const fp16type m(result);
17079
17080 out[componentNdx] = m.bits();
17081 min[componentNdx] = getMin(result, getULPs(in));
17082 max[componentNdx] = getMax(result, getULPs(in));
17083 }
17084
17085 return true;
17086 }
17087 };
17088
17089 struct fp16MatrixBase : public fp16AllComponents
17090 {
getComponentValidityvkt::SpirVAssembly::fp16MatrixBase17091 uint32_t getComponentValidity()
17092 {
17093 return static_cast<uint32_t>(-1);
17094 }
17095
getNdxvkt::SpirVAssembly::fp16MatrixBase17096 inline size_t getNdx(const size_t rowCount, const size_t col, const size_t row)
17097 {
17098 const size_t minComponentCount = 0;
17099 const size_t maxComponentCount = 3;
17100 const size_t alignedRowsCount = (rowCount == 3) ? 4 : rowCount;
17101
17102 DE_ASSERT(de::inRange(rowCount, minComponentCount + 1, maxComponentCount + 1));
17103 DE_ASSERT(de::inRange(col, minComponentCount, maxComponentCount));
17104 DE_ASSERT(de::inBounds(row, minComponentCount, rowCount));
17105 DE_UNREF(minComponentCount);
17106 DE_UNREF(maxComponentCount);
17107
17108 return col * alignedRowsCount + row;
17109 }
17110
getComponentMatrixValidityMaskvkt::SpirVAssembly::fp16MatrixBase17111 uint32_t getComponentMatrixValidityMask(size_t cols, size_t rows)
17112 {
17113 uint32_t result = 0u;
17114
17115 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17116 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17117 {
17118 const size_t bitNdx = getNdx(rows, colNdx, rowNdx);
17119
17120 DE_ASSERT(bitNdx < sizeof(result) * 8);
17121
17122 result |= (1 << bitNdx);
17123 }
17124
17125 return result;
17126 }
17127 };
17128
17129 template <size_t cols, size_t rows>
17130 struct fp16Transpose : public fp16MatrixBase
17131 {
getULPsvkt::SpirVAssembly::fp16Transpose17132 virtual double getULPs(vector<const deFloat16 *> &in)
17133 {
17134 DE_UNREF(in);
17135
17136 return 1.0;
17137 }
17138
getComponentValidityvkt::SpirVAssembly::fp16Transpose17139 uint32_t getComponentValidity()
17140 {
17141 return getComponentMatrixValidityMask(rows, cols);
17142 }
17143
17144 template <class fp16type>
calcvkt::SpirVAssembly::fp16Transpose17145 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17146 {
17147 DE_ASSERT(in.size() == 1);
17148
17149 const size_t alignedCols = (cols == 3) ? 4 : cols;
17150 const size_t alignedRows = (rows == 3) ? 4 : rows;
17151 vector<deFloat16> output(alignedCols * alignedRows, 0);
17152
17153 DE_ASSERT(output.size() == alignedCols * alignedRows);
17154
17155 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17156 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17157 output[rowNdx * alignedCols + colNdx] = in[0][colNdx * alignedRows + rowNdx];
17158
17159 deMemcpy(out, &output[0], sizeof(deFloat16) * output.size());
17160 deMemcpy(min, &output[0], sizeof(deFloat16) * output.size());
17161 deMemcpy(max, &output[0], sizeof(deFloat16) * output.size());
17162
17163 return true;
17164 }
17165 };
17166
17167 template <size_t cols, size_t rows>
17168 struct fp16MatrixTimesScalar : public fp16MatrixBase
17169 {
getULPsvkt::SpirVAssembly::fp16MatrixTimesScalar17170 virtual double getULPs(vector<const deFloat16 *> &in)
17171 {
17172 DE_UNREF(in);
17173
17174 return 4.0;
17175 }
17176
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesScalar17177 uint32_t getComponentValidity()
17178 {
17179 return getComponentMatrixValidityMask(cols, rows);
17180 }
17181
17182 template <class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesScalar17183 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17184 {
17185 DE_ASSERT(in.size() == 2);
17186 DE_ASSERT(getArgCompCount(1) == 1);
17187
17188 const fp16type y(in[1][0]);
17189 const float scalar(y.asFloat());
17190 const size_t alignedCols = (cols == 3) ? 4 : cols;
17191 const size_t alignedRows = (rows == 3) ? 4 : rows;
17192
17193 DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
17194 DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
17195 DE_UNREF(alignedCols);
17196
17197 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17198 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17199 {
17200 const size_t ndx(colNdx * alignedRows + rowNdx);
17201 const fp16type x(in[0][ndx]);
17202 const double result(scalar * x.asFloat());
17203
17204 out[ndx] = fp16type(result).bits();
17205 min[ndx] = getMin(result, getULPs(in));
17206 max[ndx] = getMax(result, getULPs(in));
17207 }
17208
17209 return true;
17210 }
17211 };
17212
17213 template <size_t cols, size_t rows>
17214 struct fp16VectorTimesMatrix : public fp16MatrixBase
17215 {
fp16VectorTimesMatrixvkt::SpirVAssembly::fp16VectorTimesMatrix17216 fp16VectorTimesMatrix() : fp16MatrixBase()
17217 {
17218 flavorNames.push_back("EmulatingFP16");
17219 flavorNames.push_back("FloatCalc");
17220 }
17221
getULPsvkt::SpirVAssembly::fp16VectorTimesMatrix17222 virtual double getULPs(vector<const deFloat16 *> &in)
17223 {
17224 DE_UNREF(in);
17225
17226 return (8.0 * cols);
17227 }
17228
getComponentValidityvkt::SpirVAssembly::fp16VectorTimesMatrix17229 uint32_t getComponentValidity()
17230 {
17231 return getComponentMatrixValidityMask(cols, 1);
17232 }
17233
17234 template <class fp16type>
calcvkt::SpirVAssembly::fp16VectorTimesMatrix17235 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17236 {
17237 DE_ASSERT(in.size() == 2);
17238
17239 const size_t alignedCols = (cols == 3) ? 4 : cols;
17240 const size_t alignedRows = (rows == 3) ? 4 : rows;
17241
17242 DE_ASSERT(getOutCompCount() == cols);
17243 DE_ASSERT(getArgCompCount(0) == rows);
17244 DE_ASSERT(getArgCompCount(1) == alignedCols * alignedRows);
17245 DE_UNREF(alignedCols);
17246
17247 if (getFlavor() == 0)
17248 {
17249 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17250 {
17251 fp16type s(fp16type::zero(1));
17252
17253 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17254 {
17255 const fp16type v(in[0][rowNdx]);
17256 const float vf(v.asFloat());
17257 const size_t ndx(colNdx * alignedRows + rowNdx);
17258 const fp16type x(in[1][ndx]);
17259 const float xf(x.asFloat());
17260 const fp16type m(vf * xf);
17261
17262 s = fp16type(s.asFloat() + m.asFloat());
17263 }
17264
17265 out[colNdx] = s.bits();
17266 min[colNdx] = getMin(s.asDouble(), getULPs(in));
17267 max[colNdx] = getMax(s.asDouble(), getULPs(in));
17268 }
17269 }
17270 else if (getFlavor() == 1)
17271 {
17272 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17273 {
17274 float s(0.0f);
17275
17276 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17277 {
17278 const fp16type v(in[0][rowNdx]);
17279 const float vf(v.asFloat());
17280 const size_t ndx(colNdx * alignedRows + rowNdx);
17281 const fp16type x(in[1][ndx]);
17282 const float xf(x.asFloat());
17283 const float m(vf * xf);
17284
17285 s += m;
17286 }
17287
17288 out[colNdx] = fp16type(s).bits();
17289 min[colNdx] = getMin(static_cast<double>(s), getULPs(in));
17290 max[colNdx] = getMax(static_cast<double>(s), getULPs(in));
17291 }
17292 }
17293 else
17294 {
17295 TCU_THROW(InternalError, "Unknown flavor");
17296 }
17297
17298 return true;
17299 }
17300 };
17301
17302 template <size_t cols, size_t rows>
17303 struct fp16MatrixTimesVector : public fp16MatrixBase
17304 {
fp16MatrixTimesVectorvkt::SpirVAssembly::fp16MatrixTimesVector17305 fp16MatrixTimesVector() : fp16MatrixBase()
17306 {
17307 flavorNames.push_back("EmulatingFP16");
17308 flavorNames.push_back("FloatCalc");
17309 }
17310
getULPsvkt::SpirVAssembly::fp16MatrixTimesVector17311 virtual double getULPs(vector<const deFloat16 *> &in)
17312 {
17313 DE_UNREF(in);
17314
17315 return (8.0 * rows);
17316 }
17317
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesVector17318 uint32_t getComponentValidity()
17319 {
17320 return getComponentMatrixValidityMask(rows, 1);
17321 }
17322
17323 template <class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesVector17324 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17325 {
17326 DE_ASSERT(in.size() == 2);
17327
17328 const size_t alignedCols = (cols == 3) ? 4 : cols;
17329 const size_t alignedRows = (rows == 3) ? 4 : rows;
17330
17331 DE_ASSERT(getOutCompCount() == rows);
17332 DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
17333 DE_ASSERT(getArgCompCount(1) == cols);
17334 DE_UNREF(alignedCols);
17335
17336 if (getFlavor() == 0)
17337 {
17338 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17339 {
17340 fp16type s(fp16type::zero(1));
17341
17342 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17343 {
17344 const size_t ndx(colNdx * alignedRows + rowNdx);
17345 const fp16type x(in[0][ndx]);
17346 const float xf(x.asFloat());
17347 const fp16type v(in[1][colNdx]);
17348 const float vf(v.asFloat());
17349 const fp16type m(vf * xf);
17350
17351 s = fp16type(s.asFloat() + m.asFloat());
17352 }
17353
17354 out[rowNdx] = s.bits();
17355 min[rowNdx] = getMin(s.asDouble(), getULPs(in));
17356 max[rowNdx] = getMax(s.asDouble(), getULPs(in));
17357 }
17358 }
17359 else if (getFlavor() == 1)
17360 {
17361 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17362 {
17363 float s(0.0f);
17364
17365 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17366 {
17367 const size_t ndx(colNdx * alignedRows + rowNdx);
17368 const fp16type x(in[0][ndx]);
17369 const float xf(x.asFloat());
17370 const fp16type v(in[1][colNdx]);
17371 const float vf(v.asFloat());
17372 const float m(vf * xf);
17373
17374 s += m;
17375 }
17376
17377 out[rowNdx] = fp16type(s).bits();
17378 min[rowNdx] = getMin(static_cast<double>(s), getULPs(in));
17379 max[rowNdx] = getMax(static_cast<double>(s), getULPs(in));
17380 }
17381 }
17382 else
17383 {
17384 TCU_THROW(InternalError, "Unknown flavor");
17385 }
17386
17387 return true;
17388 }
17389 };
17390
17391 template <size_t colsL, size_t rowsL, size_t colsR, size_t rowsR>
17392 struct fp16MatrixTimesMatrix : public fp16MatrixBase
17393 {
fp16MatrixTimesMatrixvkt::SpirVAssembly::fp16MatrixTimesMatrix17394 fp16MatrixTimesMatrix() : fp16MatrixBase()
17395 {
17396 flavorNames.push_back("EmulatingFP16");
17397 flavorNames.push_back("FloatCalc");
17398 }
17399
getULPsvkt::SpirVAssembly::fp16MatrixTimesMatrix17400 virtual double getULPs(vector<const deFloat16 *> &in)
17401 {
17402 DE_UNREF(in);
17403
17404 return 32.0;
17405 }
17406
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesMatrix17407 uint32_t getComponentValidity()
17408 {
17409 return getComponentMatrixValidityMask(colsR, rowsL);
17410 }
17411
17412 template <class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesMatrix17413 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17414 {
17415 DE_STATIC_ASSERT(colsL == rowsR);
17416
17417 DE_ASSERT(in.size() == 2);
17418
17419 const size_t alignedColsL = (colsL == 3) ? 4 : colsL;
17420 const size_t alignedRowsL = (rowsL == 3) ? 4 : rowsL;
17421 const size_t alignedColsR = (colsR == 3) ? 4 : colsR;
17422 const size_t alignedRowsR = (rowsR == 3) ? 4 : rowsR;
17423
17424 DE_ASSERT(getOutCompCount() == alignedColsR * alignedRowsL);
17425 DE_ASSERT(getArgCompCount(0) == alignedColsL * alignedRowsL);
17426 DE_ASSERT(getArgCompCount(1) == alignedColsR * alignedRowsR);
17427 DE_UNREF(alignedColsL);
17428 DE_UNREF(alignedColsR);
17429
17430 if (getFlavor() == 0)
17431 {
17432 for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17433 {
17434 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17435 {
17436 const size_t ndx(colNdx * alignedRowsL + rowNdx);
17437 fp16type s(fp16type::zero(1));
17438
17439 for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17440 {
17441 const size_t ndxl(commonNdx * alignedRowsL + rowNdx);
17442 const fp16type l(in[0][ndxl]);
17443 const float lf(l.asFloat());
17444 const size_t ndxr(colNdx * alignedRowsR + commonNdx);
17445 const fp16type r(in[1][ndxr]);
17446 const float rf(r.asFloat());
17447 const fp16type m(lf * rf);
17448
17449 s = fp16type(s.asFloat() + m.asFloat());
17450 }
17451
17452 out[ndx] = s.bits();
17453 min[ndx] = getMin(s.asDouble(), getULPs(in));
17454 max[ndx] = getMax(s.asDouble(), getULPs(in));
17455 }
17456 }
17457 }
17458 else if (getFlavor() == 1)
17459 {
17460 for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17461 {
17462 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17463 {
17464 const size_t ndx(colNdx * alignedRowsL + rowNdx);
17465 float s(0.0f);
17466
17467 for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17468 {
17469 const size_t ndxl(commonNdx * alignedRowsL + rowNdx);
17470 const fp16type l(in[0][ndxl]);
17471 const float lf(l.asFloat());
17472 const size_t ndxr(colNdx * alignedRowsR + commonNdx);
17473 const fp16type r(in[1][ndxr]);
17474 const float rf(r.asFloat());
17475 const float m(lf * rf);
17476
17477 s += m;
17478 }
17479
17480 out[ndx] = fp16type(s).bits();
17481 min[ndx] = getMin(static_cast<double>(s), getULPs(in));
17482 max[ndx] = getMax(static_cast<double>(s), getULPs(in));
17483 }
17484 }
17485 }
17486 else
17487 {
17488 TCU_THROW(InternalError, "Unknown flavor");
17489 }
17490
17491 return true;
17492 }
17493 };
17494
17495 template <size_t cols, size_t rows>
17496 struct fp16OuterProduct : public fp16MatrixBase
17497 {
getULPsvkt::SpirVAssembly::fp16OuterProduct17498 virtual double getULPs(vector<const deFloat16 *> &in)
17499 {
17500 DE_UNREF(in);
17501
17502 return 2.0;
17503 }
17504
getComponentValidityvkt::SpirVAssembly::fp16OuterProduct17505 uint32_t getComponentValidity()
17506 {
17507 return getComponentMatrixValidityMask(cols, rows);
17508 }
17509
17510 template <class fp16type>
calcvkt::SpirVAssembly::fp16OuterProduct17511 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17512 {
17513 DE_ASSERT(in.size() == 2);
17514
17515 const size_t alignedCols = (cols == 3) ? 4 : cols;
17516 const size_t alignedRows = (rows == 3) ? 4 : rows;
17517
17518 DE_ASSERT(getArgCompCount(0) == rows);
17519 DE_ASSERT(getArgCompCount(1) == cols);
17520 DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
17521 DE_UNREF(alignedCols);
17522
17523 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17524 {
17525 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17526 {
17527 const size_t ndx(colNdx * alignedRows + rowNdx);
17528 const fp16type x(in[0][rowNdx]);
17529 const float xf(x.asFloat());
17530 const fp16type y(in[1][colNdx]);
17531 const float yf(y.asFloat());
17532 const fp16type m(xf * yf);
17533
17534 out[ndx] = m.bits();
17535 min[ndx] = getMin(m.asDouble(), getULPs(in));
17536 max[ndx] = getMax(m.asDouble(), getULPs(in));
17537 }
17538 }
17539
17540 return true;
17541 }
17542 };
17543
17544 template <size_t size>
17545 struct fp16Determinant;
17546
17547 template <>
17548 struct fp16Determinant<2> : public fp16MatrixBase
17549 {
getULPsvkt::SpirVAssembly::fp16Determinant17550 virtual double getULPs(vector<const deFloat16 *> &in)
17551 {
17552 DE_UNREF(in);
17553
17554 return 128.0; // This is not a precision test. Value is not from spec
17555 }
17556
getComponentValidityvkt::SpirVAssembly::fp16Determinant17557 uint32_t getComponentValidity()
17558 {
17559 return 1;
17560 }
17561
17562 template <class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17563 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17564 {
17565 const size_t cols = 2;
17566 const size_t rows = 2;
17567 const size_t alignedCols = (cols == 3) ? 4 : cols;
17568 const size_t alignedRows = (rows == 3) ? 4 : rows;
17569
17570 DE_ASSERT(in.size() == 1);
17571 DE_ASSERT(getOutCompCount() == 1);
17572 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17573 DE_UNREF(alignedCols);
17574 DE_UNREF(alignedRows);
17575
17576 // [ a b ]
17577 // [ c d ]
17578 const float a(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17579 const float b(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17580 const float c(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17581 const float d(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17582 const float ad(a * d);
17583 const fp16type adf16(ad);
17584 const float bc(b * c);
17585 const fp16type bcf16(bc);
17586 const float r(adf16.asFloat() - bcf16.asFloat());
17587 const fp16type rf16(r);
17588
17589 out[0] = rf16.bits();
17590 min[0] = getMin(r, getULPs(in));
17591 max[0] = getMax(r, getULPs(in));
17592
17593 return true;
17594 }
17595 };
17596
17597 template <>
17598 struct fp16Determinant<3> : public fp16MatrixBase
17599 {
getULPsvkt::SpirVAssembly::fp16Determinant17600 virtual double getULPs(vector<const deFloat16 *> &in)
17601 {
17602 DE_UNREF(in);
17603
17604 return 128.0; // This is not a precision test. Value is not from spec
17605 }
17606
getComponentValidityvkt::SpirVAssembly::fp16Determinant17607 uint32_t getComponentValidity()
17608 {
17609 return 1;
17610 }
17611
17612 template <class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17613 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17614 {
17615 const size_t cols = 3;
17616 const size_t rows = 3;
17617 const size_t alignedCols = (cols == 3) ? 4 : cols;
17618 const size_t alignedRows = (rows == 3) ? 4 : rows;
17619
17620 DE_ASSERT(in.size() == 1);
17621 DE_ASSERT(getOutCompCount() == 1);
17622 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17623 DE_UNREF(alignedCols);
17624 DE_UNREF(alignedRows);
17625
17626 // [ a b c ]
17627 // [ d e f ]
17628 // [ g h i ]
17629 const float a(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17630 const float b(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17631 const float c(fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17632 const float d(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17633 const float e(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17634 const float f(fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17635 const float g(fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17636 const float h(fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17637 const float i(fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17638 const fp16type aei(a * e * i);
17639 const fp16type bfg(b * f * g);
17640 const fp16type cdh(c * d * h);
17641 const fp16type ceg(c * e * g);
17642 const fp16type bdi(b * d * i);
17643 const fp16type afh(a * f * h);
17644 const float r(aei.asFloat() + bfg.asFloat() + cdh.asFloat() - ceg.asFloat() - bdi.asFloat() - afh.asFloat());
17645 const fp16type rf16(r);
17646
17647 out[0] = rf16.bits();
17648 min[0] = getMin(r, getULPs(in));
17649 max[0] = getMax(r, getULPs(in));
17650
17651 return true;
17652 }
17653 };
17654
17655 template <>
17656 struct fp16Determinant<4> : public fp16MatrixBase
17657 {
getULPsvkt::SpirVAssembly::fp16Determinant17658 virtual double getULPs(vector<const deFloat16 *> &in)
17659 {
17660 DE_UNREF(in);
17661
17662 return 128.0; // This is not a precision test. Value is not from spec
17663 }
17664
getComponentValidityvkt::SpirVAssembly::fp16Determinant17665 uint32_t getComponentValidity()
17666 {
17667 return 1;
17668 }
17669
17670 template <class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17671 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17672 {
17673 const size_t rows = 4;
17674 const size_t cols = 4;
17675 const size_t alignedCols = (cols == 3) ? 4 : cols;
17676 const size_t alignedRows = (rows == 3) ? 4 : rows;
17677
17678 DE_ASSERT(in.size() == 1);
17679 DE_ASSERT(getOutCompCount() == 1);
17680 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17681 DE_UNREF(alignedCols);
17682 DE_UNREF(alignedRows);
17683
17684 // [ a b c d ]
17685 // [ e f g h ]
17686 // [ i j k l ]
17687 // [ m n o p ]
17688 const float a(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17689 const float b(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17690 const float c(fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17691 const float d(fp16type(in[0][getNdx(rows, 3, 0)]).asFloat());
17692 const float e(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17693 const float f(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17694 const float g(fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17695 const float h(fp16type(in[0][getNdx(rows, 3, 1)]).asFloat());
17696 const float i(fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17697 const float j(fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17698 const float k(fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17699 const float l(fp16type(in[0][getNdx(rows, 3, 2)]).asFloat());
17700 const float m(fp16type(in[0][getNdx(rows, 0, 3)]).asFloat());
17701 const float n(fp16type(in[0][getNdx(rows, 1, 3)]).asFloat());
17702 const float o(fp16type(in[0][getNdx(rows, 2, 3)]).asFloat());
17703 const float p(fp16type(in[0][getNdx(rows, 3, 3)]).asFloat());
17704
17705 // [ f g h ]
17706 // [ j k l ]
17707 // [ n o p ]
17708 const fp16type fkp(f * k * p);
17709 const fp16type gln(g * l * n);
17710 const fp16type hjo(h * j * o);
17711 const fp16type hkn(h * k * n);
17712 const fp16type gjp(g * j * p);
17713 const fp16type flo(f * l * o);
17714 const fp16type detA(
17715 a * (fkp.asFloat() + gln.asFloat() + hjo.asFloat() - hkn.asFloat() - gjp.asFloat() - flo.asFloat()));
17716
17717 // [ e g h ]
17718 // [ i k l ]
17719 // [ m o p ]
17720 const fp16type ekp(e * k * p);
17721 const fp16type glm(g * l * m);
17722 const fp16type hio(h * i * o);
17723 const fp16type hkm(h * k * m);
17724 const fp16type gip(g * i * p);
17725 const fp16type elo(e * l * o);
17726 const fp16type detB(
17727 b * (ekp.asFloat() + glm.asFloat() + hio.asFloat() - hkm.asFloat() - gip.asFloat() - elo.asFloat()));
17728
17729 // [ e f h ]
17730 // [ i j l ]
17731 // [ m n p ]
17732 const fp16type ejp(e * j * p);
17733 const fp16type flm(f * l * m);
17734 const fp16type hin(h * i * n);
17735 const fp16type hjm(h * j * m);
17736 const fp16type fip(f * i * p);
17737 const fp16type eln(e * l * n);
17738 const fp16type detC(
17739 c * (ejp.asFloat() + flm.asFloat() + hin.asFloat() - hjm.asFloat() - fip.asFloat() - eln.asFloat()));
17740
17741 // [ e f g ]
17742 // [ i j k ]
17743 // [ m n o ]
17744 const fp16type ejo(e * j * o);
17745 const fp16type fkm(f * k * m);
17746 const fp16type gin(g * i * n);
17747 const fp16type gjm(g * j * m);
17748 const fp16type fio(f * i * o);
17749 const fp16type ekn(e * k * n);
17750 const fp16type detD(
17751 d * (ejo.asFloat() + fkm.asFloat() + gin.asFloat() - gjm.asFloat() - fio.asFloat() - ekn.asFloat()));
17752
17753 const float r(detA.asFloat() - detB.asFloat() + detC.asFloat() - detD.asFloat());
17754 const fp16type rf16(r);
17755
17756 out[0] = rf16.bits();
17757 min[0] = getMin(r, getULPs(in));
17758 max[0] = getMax(r, getULPs(in));
17759
17760 return true;
17761 }
17762 };
17763
17764 template <size_t size>
17765 struct fp16Inverse;
17766
17767 template <>
17768 struct fp16Inverse<2> : public fp16MatrixBase
17769 {
getULPsvkt::SpirVAssembly::fp16Inverse17770 virtual double getULPs(vector<const deFloat16 *> &in)
17771 {
17772 DE_UNREF(in);
17773
17774 return 128.0; // This is not a precision test. Value is not from spec
17775 }
17776
getComponentValidityvkt::SpirVAssembly::fp16Inverse17777 uint32_t getComponentValidity()
17778 {
17779 return getComponentMatrixValidityMask(2, 2);
17780 }
17781
17782 template <class fp16type>
calcvkt::SpirVAssembly::fp16Inverse17783 bool calc(vector<const deFloat16 *> &in, deFloat16 *out, double *min, double *max)
17784 {
17785 const size_t cols = 2;
17786 const size_t rows = 2;
17787 const size_t alignedCols = (cols == 3) ? 4 : cols;
17788 const size_t alignedRows = (rows == 3) ? 4 : rows;
17789
17790 DE_ASSERT(in.size() == 1);
17791 DE_ASSERT(getOutCompCount() == alignedRows * alignedCols);
17792 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17793 DE_UNREF(alignedCols);
17794
17795 // [ a b ]
17796 // [ c d ]
17797 const float a(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17798 const float b(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17799 const float c(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17800 const float d(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17801 const float ad(a * d);
17802 const fp16type adf16(ad);
17803 const float bc(b * c);
17804 const fp16type bcf16(bc);
17805 const float det(adf16.asFloat() - bcf16.asFloat());
17806 const fp16type det16(det);
17807
17808 out[0] = fp16type(d / det16.asFloat()).bits();
17809 out[1] = fp16type(-c / det16.asFloat()).bits();
17810 out[2] = fp16type(-b / det16.asFloat()).bits();
17811 out[3] = fp16type(a / det16.asFloat()).bits();
17812
17813 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17814 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17815 {
17816 const size_t ndx(colNdx * alignedRows + rowNdx);
17817 const fp16type s(out[ndx]);
17818
17819 min[ndx] = getMin(s.asDouble(), getULPs(in));
17820 max[ndx] = getMax(s.asDouble(), getULPs(in));
17821 }
17822
17823 return true;
17824 }
17825 };
17826
fp16ToString(deFloat16 val)17827 inline std::string fp16ToString(deFloat16 val)
17828 {
17829 return tcu::toHex<4>(val).toString() + " (" + de::floatToString(tcu::Float16(val).asFloat(), 10) + ")";
17830 }
17831
17832 template <size_t RES_COMPONENTS, size_t ARG0_COMPONENTS, size_t ARG1_COMPONENTS, size_t ARG2_COMPONENTS,
17833 class TestedArithmeticFunction>
compareFP16ArithmeticFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)17834 bool compareFP16ArithmeticFunc(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
17835 const std::vector<Resource> &expectedOutputs, TestLog &log)
17836 {
17837 if (inputs.size() < 1 || inputs.size() > 3 || outputAllocs.size() != 1 || expectedOutputs.size() != 1)
17838 return false;
17839
17840 const size_t resultStep = (RES_COMPONENTS == 3) ? 4 : RES_COMPONENTS;
17841 const size_t iterationsCount = expectedOutputs[0].getByteSize() / (sizeof(deFloat16) * resultStep);
17842 const size_t inputsSteps[3] = {
17843 (ARG0_COMPONENTS == 3) ? 4 : ARG0_COMPONENTS,
17844 (ARG1_COMPONENTS == 3) ? 4 : ARG1_COMPONENTS,
17845 (ARG2_COMPONENTS == 3) ? 4 : ARG2_COMPONENTS,
17846 };
17847
17848 DE_ASSERT(expectedOutputs[0].getByteSize() > 0);
17849 DE_ASSERT(expectedOutputs[0].getByteSize() == sizeof(deFloat16) * iterationsCount * resultStep);
17850
17851 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17852 {
17853 DE_ASSERT(inputs[inputNdx].getByteSize() > 0);
17854 DE_ASSERT(inputs[inputNdx].getByteSize() == sizeof(deFloat16) * iterationsCount * inputsSteps[inputNdx]);
17855 }
17856
17857 const deFloat16 *const outputAsFP16 = (const deFloat16 *)outputAllocs[0]->getHostPtr();
17858 TestedArithmeticFunction func;
17859
17860 func.setOutCompCount(RES_COMPONENTS);
17861 func.setArgCompCount(0, ARG0_COMPONENTS);
17862 func.setArgCompCount(1, ARG1_COMPONENTS);
17863 func.setArgCompCount(2, ARG2_COMPONENTS);
17864
17865 const bool callOncePerComponent = func.callOncePerComponent();
17866 const uint32_t componentValidityMask = func.getComponentValidity();
17867 const size_t denormModesCount = 2;
17868 const char *denormModes[denormModesCount] = {"keep denormal numbers", "flush to zero"};
17869 const size_t successfulRunsPerComponent = denormModesCount * func.getFlavorCount();
17870 bool success = true;
17871 size_t validatedCount = 0;
17872
17873 vector<uint8_t> inputBytes[3];
17874
17875 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17876 inputs[inputNdx].getBytes(inputBytes[inputNdx]);
17877
17878 const deFloat16 *const inputsAsFP16[3] = {
17879 inputs.size() >= 1 ? (const deFloat16 *)&inputBytes[0][0] : DE_NULL,
17880 inputs.size() >= 2 ? (const deFloat16 *)&inputBytes[1][0] : DE_NULL,
17881 inputs.size() >= 3 ? (const deFloat16 *)&inputBytes[2][0] : DE_NULL,
17882 };
17883
17884 for (size_t idx = 0; idx < iterationsCount; ++idx)
17885 {
17886 std::vector<size_t> successfulRuns(RES_COMPONENTS, successfulRunsPerComponent);
17887 std::vector<std::string> errors(RES_COMPONENTS);
17888 bool iterationValidated(true);
17889
17890 for (size_t denormNdx = 0; denormNdx < 2; ++denormNdx)
17891 {
17892 for (size_t flavorNdx = 0; flavorNdx < func.getFlavorCount(); ++flavorNdx)
17893 {
17894 func.setFlavor(flavorNdx);
17895
17896 const deFloat16 *iterationOutputFP16 = &outputAsFP16[idx * resultStep];
17897 vector<deFloat16> iterationCalculatedFP16(resultStep, 0);
17898 vector<double> iterationEdgeMin(resultStep, 0.0);
17899 vector<double> iterationEdgeMax(resultStep, 0.0);
17900 vector<const deFloat16 *> arguments;
17901
17902 for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
17903 {
17904 std::string error;
17905 bool reportError = false;
17906
17907 if (callOncePerComponent || componentNdx == 0)
17908 {
17909 bool funcCallResult;
17910
17911 arguments.clear();
17912
17913 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17914 arguments.push_back(&inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + componentNdx]);
17915
17916 if (denormNdx == 0)
17917 funcCallResult = func.template calc<tcu::Float16>(
17918 arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx],
17919 &iterationEdgeMax[componentNdx]);
17920 else
17921 funcCallResult = func.template calc<tcu::Float16Denormless>(
17922 arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx],
17923 &iterationEdgeMax[componentNdx]);
17924
17925 if (!funcCallResult)
17926 {
17927 iterationValidated = false;
17928
17929 if (callOncePerComponent)
17930 continue;
17931 else
17932 break;
17933 }
17934 }
17935
17936 if ((componentValidityMask != 0) && (componentValidityMask & (1 << componentNdx)) == 0)
17937 continue;
17938
17939 reportError = !compare16BitFloat(iterationCalculatedFP16[componentNdx],
17940 iterationOutputFP16[componentNdx], error);
17941
17942 if (reportError)
17943 {
17944 tcu::Float16 expected(iterationCalculatedFP16[componentNdx]);
17945 tcu::Float16 outputted(iterationOutputFP16[componentNdx]);
17946 tcu::Float64 edgeMin(iterationEdgeMin[componentNdx]);
17947 tcu::Float64 edgeMax(iterationEdgeMax[componentNdx]);
17948
17949 if (reportError && expected.isNaN())
17950 reportError = false;
17951
17952 if (reportError && !expected.isNaN() && !outputted.isNaN())
17953 {
17954 if (reportError && !expected.isInf() && !outputted.isInf())
17955 {
17956 // Ignore rounding
17957 if (expected.bits() == outputted.bits() + 1 || expected.bits() + 1 == outputted.bits())
17958 reportError = false;
17959 }
17960
17961 if (reportError && expected.isInf())
17962 {
17963 // RTZ rounding mode returns +/-65504 instead of Inf on overflow
17964 if (expected.sign() == 1 && outputted.bits() == 0x7bff &&
17965 edgeMin.asDouble() <= std::numeric_limits<double>::max())
17966 reportError = false;
17967 else if (expected.sign() == -1 && outputted.bits() == 0xfbff &&
17968 edgeMax.asDouble() >= -std::numeric_limits<double>::max())
17969 reportError = false;
17970 }
17971
17972 if (reportError)
17973 {
17974 const double outputtedDouble = outputted.asDouble();
17975
17976 DE_ASSERT(edgeMin.isNaN() || edgeMax.isNaN() ||
17977 (edgeMin.asDouble() <= edgeMax.asDouble()));
17978
17979 if (de::inRange(outputtedDouble, edgeMin.asDouble(), edgeMax.asDouble()))
17980 reportError = false;
17981 }
17982 }
17983
17984 if (reportError)
17985 {
17986 const size_t inputsComps[3] = {
17987 ARG0_COMPONENTS,
17988 ARG1_COMPONENTS,
17989 ARG2_COMPONENTS,
17990 };
17991 string inputsValues("Inputs:");
17992 string flavorName(func.getFlavorCount() == 1 ?
17993 "" :
17994 string(" flavor ") + de::toString(flavorNdx) + " (" +
17995 func.getCurrentFlavorName() + ")");
17996 std::stringstream errStream;
17997
17998 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17999 {
18000 const size_t inputCompsCount = inputsComps[inputNdx];
18001
18002 inputsValues += " [" + de::toString(inputNdx) + "]=(";
18003
18004 for (size_t compNdx = 0; compNdx < inputCompsCount; ++compNdx)
18005 {
18006 const deFloat16 inputComponentValue =
18007 inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + compNdx];
18008
18009 inputsValues += fp16ToString(inputComponentValue) +
18010 ((compNdx + 1 == inputCompsCount) ? ")" : ", ");
18011 }
18012 }
18013
18014 errStream << "At"
18015 << " iteration " << de::toString(idx) << " component "
18016 << de::toString(componentNdx) << " denormMode " << de::toString(denormNdx) << " ("
18017 << denormModes[denormNdx] << ")"
18018 << " " << flavorName << " " << inputsValues
18019 << " outputted:" + fp16ToString(iterationOutputFP16[componentNdx])
18020 << " expected:" + fp16ToString(iterationCalculatedFP16[componentNdx])
18021 << " or in range: [" << iterationEdgeMin[componentNdx] << ", "
18022 << iterationEdgeMax[componentNdx] << "]."
18023 << " " << error << "." << std::endl;
18024
18025 errors[componentNdx] += errStream.str();
18026
18027 successfulRuns[componentNdx]--;
18028 }
18029 }
18030 }
18031 }
18032 }
18033
18034 for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
18035 {
18036 // Check if any component has total failure
18037 if (successfulRuns[componentNdx] == 0)
18038 {
18039 // Test failed in all denorm modes and all flavors for certain component: dump errors
18040 log << TestLog::Message << errors[componentNdx] << TestLog::EndMessage;
18041
18042 success = false;
18043 }
18044 }
18045
18046 if (iterationValidated)
18047 validatedCount++;
18048 }
18049
18050 if (validatedCount < 16)
18051 TCU_THROW(InternalError, "Too few samples have been validated.");
18052
18053 return success;
18054 }
18055
18056 // IEEE-754 floating point numbers:
18057 // +--------+------+----------+-------------+
18058 // | binary | sign | exponent | significand |
18059 // +--------+------+----------+-------------+
18060 // | 16-bit | 1 | 5 | 10 |
18061 // +--------+------+----------+-------------+
18062 // | 32-bit | 1 | 8 | 23 |
18063 // +--------+------+----------+-------------+
18064 //
18065 // 16-bit floats:
18066 //
18067 // 0 000 00 00 0000 0001 (0x0001: 2e-24: minimum positive denormalized)
18068 // 0 000 00 11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
18069 // 0 000 01 00 0000 0000 (0x0400: 2e-14: minimum positive normalized)
18070 // 0 111 10 11 1111 1111 (0x7bff: 65504: maximum positive normalized)
18071 //
18072 // 0 000 00 00 0000 0000 (0x0000: +0)
18073 // 0 111 11 00 0000 0000 (0x7c00: +Inf)
18074 // 0 000 00 11 1111 0000 (0x03f0: +Denorm)
18075 // 0 000 01 00 0000 0001 (0x0401: +Norm)
18076 // 0 111 11 00 0000 1111 (0x7c0f: +SNaN)
18077 // 0 111 11 11 1111 0000 (0x7ff0: +QNaN)
18078 // Generate and return 16-bit floats and their corresponding 32-bit values.
18079 //
18080 // The first 14 number pairs are manually picked, while the rest are randomly generated.
18081 // Expected count to be at least 14 (numPicks).
getFloat16a(de::Random & rnd,uint32_t count)18082 vector<deFloat16> getFloat16a(de::Random &rnd, uint32_t count)
18083 {
18084 vector<deFloat16> float16;
18085
18086 float16.reserve(count);
18087
18088 // Zero
18089 float16.push_back(uint16_t(0x0000));
18090 float16.push_back(uint16_t(0x8000));
18091 // Infinity
18092 float16.push_back(uint16_t(0x7c00));
18093 float16.push_back(uint16_t(0xfc00));
18094 // Normalized
18095 float16.push_back(uint16_t(0x0401));
18096 float16.push_back(uint16_t(0x8401));
18097 // Some normal number
18098 float16.push_back(uint16_t(0x14cb));
18099 float16.push_back(uint16_t(0x94cb));
18100 // Min/max positive normal
18101 float16.push_back(uint16_t(0x0400));
18102 float16.push_back(uint16_t(0x7bff));
18103 // Min/max negative normal
18104 float16.push_back(uint16_t(0x8400));
18105 float16.push_back(uint16_t(0xfbff));
18106 // PI
18107 float16.push_back(uint16_t(0x4248)); // 3.140625
18108 float16.push_back(uint16_t(0xb248)); // -3.140625
18109 // PI/2
18110 float16.push_back(uint16_t(0x3e48)); // 1.5703125
18111 float16.push_back(uint16_t(0xbe48)); // -1.5703125
18112 float16.push_back(uint16_t(0x3c00)); // 1.0
18113 float16.push_back(uint16_t(0x3800)); // 0.5
18114 // Some useful constants
18115 float16.push_back(tcu::Float16(-2.5f).bits());
18116 float16.push_back(tcu::Float16(-1.0f).bits());
18117 float16.push_back(tcu::Float16(0.4f).bits());
18118 float16.push_back(tcu::Float16(2.5f).bits());
18119
18120 const uint32_t numPicks = static_cast<uint32_t>(float16.size());
18121
18122 DE_ASSERT(count >= numPicks);
18123 count -= numPicks;
18124
18125 for (uint32_t numIdx = 0; numIdx < count; ++numIdx)
18126 {
18127 int sign = (rnd.getUint16() % 2 == 0) ? +1 : -1;
18128 int exponent = (rnd.getUint16() % 29) - 14 + 1;
18129 uint16_t mantissa = static_cast<uint16_t>(2 * (rnd.getUint16() % 512));
18130
18131 // Exclude power of -14 to avoid denorms
18132 DE_ASSERT(de::inRange(exponent, -13, 15));
18133
18134 float16.push_back(tcu::Float16::constructBits(sign, exponent, mantissa).bits());
18135 }
18136
18137 return float16;
18138 }
18139
getInputData1(uint32_t seed,size_t count,size_t argNo)18140 static inline vector<deFloat16> getInputData1(uint32_t seed, size_t count, size_t argNo)
18141 {
18142 DE_UNREF(argNo);
18143
18144 de::Random rnd(seed);
18145
18146 return getFloat16a(rnd, static_cast<uint32_t>(count));
18147 }
18148
getInputData2(uint32_t seed,size_t count,size_t argNo)18149 static inline vector<deFloat16> getInputData2(uint32_t seed, size_t count, size_t argNo)
18150 {
18151 de::Random rnd(seed);
18152 size_t newCount = static_cast<size_t>(deSqrt(double(count)));
18153
18154 DE_ASSERT(newCount * newCount == count);
18155
18156 vector<deFloat16> float16 = getFloat16a(rnd, static_cast<uint32_t>(newCount));
18157
18158 return squarize(float16, static_cast<uint32_t>(argNo));
18159 }
18160
getInputData3(uint32_t seed,size_t count,size_t argNo)18161 static inline vector<deFloat16> getInputData3(uint32_t seed, size_t count, size_t argNo)
18162 {
18163 if (argNo == 0 || argNo == 1)
18164 return getInputData2(seed, count, argNo);
18165 else
18166 return getInputData1(seed << argNo, count, argNo);
18167 }
18168
getInputData(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18169 vector<deFloat16> getInputData(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18170 size_t argNo)
18171 {
18172 DE_UNREF(stride);
18173
18174 vector<deFloat16> result;
18175
18176 switch (argCount)
18177 {
18178 case 1:
18179 result = getInputData1(seed, count, argNo);
18180 break;
18181 case 2:
18182 result = getInputData2(seed, count, argNo);
18183 break;
18184 case 3:
18185 result = getInputData3(seed, count, argNo);
18186 break;
18187 default:
18188 TCU_THROW(InternalError, "Invalid argument count specified");
18189 }
18190
18191 if (compCount == 3)
18192 {
18193 const size_t newCount = (3 * count) / 4;
18194 vector<deFloat16> newResult;
18195
18196 newResult.reserve(result.size());
18197
18198 for (size_t ndx = 0; ndx < newCount; ++ndx)
18199 {
18200 newResult.push_back(result[ndx]);
18201
18202 if (ndx % 3 == 2)
18203 newResult.push_back(0);
18204 }
18205
18206 result = newResult;
18207 }
18208
18209 DE_ASSERT(result.size() == count);
18210
18211 return result;
18212 }
18213
18214 // Generator for functions requiring data in range [1, inf]
getInputDataAC(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18215 vector<deFloat16> getInputDataAC(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18216 size_t argNo)
18217 {
18218 vector<deFloat16> result;
18219
18220 result = getInputData(seed, count, compCount, stride, argCount, argNo);
18221
18222 // Filter out values below 1.0 from upper half of numbers
18223 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
18224 {
18225 const float f = tcu::Float16(result[idx]).asFloat();
18226
18227 if (f < 1.0f)
18228 result[idx] = tcu::Float16(1.0f - f).bits();
18229 }
18230
18231 return result;
18232 }
18233
18234 // Generator for functions requiring data in range [-1, 1]
getInputDataA(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18235 vector<deFloat16> getInputDataA(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18236 size_t argNo)
18237 {
18238 vector<deFloat16> result;
18239
18240 result = getInputData(seed, count, compCount, stride, argCount, argNo);
18241
18242 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
18243 {
18244 const float f = tcu::Float16(result[idx]).asFloat();
18245
18246 if (!de::inRange(f, -1.0f, 1.0f))
18247 result[idx] = tcu::Float16(deFloatFrac(f)).bits();
18248 }
18249
18250 return result;
18251 }
18252
18253 // Generator for functions requiring data in range [-pi, pi]
getInputDataPI(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18254 vector<deFloat16> getInputDataPI(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18255 size_t argNo)
18256 {
18257 vector<deFloat16> result;
18258
18259 result = getInputData(seed, count, compCount, stride, argCount, argNo);
18260
18261 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
18262 {
18263 const float f = tcu::Float16(result[idx]).asFloat();
18264
18265 if (!de::inRange(f, -DE_PI, DE_PI))
18266 result[idx] = tcu::Float16(fmodf(f, DE_PI)).bits();
18267 }
18268
18269 return result;
18270 }
18271
18272 // Generator for functions requiring data in range [0, inf]
getInputDataP(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18273 vector<deFloat16> getInputDataP(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18274 size_t argNo)
18275 {
18276 vector<deFloat16> result;
18277
18278 result = getInputData(seed, count, compCount, stride, argCount, argNo);
18279
18280 if (argNo == 0)
18281 {
18282 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
18283 result[idx] &= static_cast<deFloat16>(~0x8000);
18284 }
18285
18286 return result;
18287 }
18288
getInputDataV(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18289 vector<deFloat16> getInputDataV(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18290 size_t argNo)
18291 {
18292 DE_UNREF(stride);
18293 DE_UNREF(argCount);
18294
18295 vector<deFloat16> result;
18296
18297 if (argNo == 0)
18298 result = getInputData2(seed, count, argNo);
18299 else
18300 {
18301 const size_t alignedCount = (compCount == 3) ? 4 : compCount;
18302 const size_t newCountX = static_cast<size_t>(deSqrt(double(count * alignedCount)));
18303 const size_t newCountY = count / newCountX;
18304 de::Random rnd(seed);
18305 vector<deFloat16> float16 = getFloat16a(rnd, static_cast<uint32_t>(newCountX));
18306
18307 DE_ASSERT(newCountX * newCountX == alignedCount * count);
18308
18309 for (size_t numIdx = 0; numIdx < newCountX; ++numIdx)
18310 {
18311 const vector<deFloat16> tmp(newCountY, float16[numIdx]);
18312
18313 result.insert(result.end(), tmp.begin(), tmp.end());
18314 }
18315 }
18316
18317 DE_ASSERT(result.size() == count);
18318
18319 return result;
18320 }
18321
getInputDataM(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18322 vector<deFloat16> getInputDataM(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18323 size_t argNo)
18324 {
18325 DE_UNREF(compCount);
18326 DE_UNREF(stride);
18327 DE_UNREF(argCount);
18328
18329 de::Random rnd(seed << argNo);
18330 vector<deFloat16> result;
18331
18332 result = getFloat16a(rnd, static_cast<uint32_t>(count));
18333
18334 DE_ASSERT(result.size() == count);
18335
18336 return result;
18337 }
18338
getInputDataD(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18339 vector<deFloat16> getInputDataD(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18340 size_t argNo)
18341 {
18342 DE_UNREF(compCount);
18343 DE_UNREF(argCount);
18344
18345 de::Random rnd(seed << argNo);
18346 vector<deFloat16> result;
18347
18348 for (uint32_t numIdx = 0; numIdx < count; ++numIdx)
18349 {
18350 int num = (rnd.getUint16() % 16) - 8;
18351
18352 result.push_back(tcu::Float16(float(num)).bits());
18353 }
18354
18355 result[0 * stride] = uint16_t(0x7c00); // +Inf
18356 result[1 * stride] = uint16_t(0xfc00); // -Inf
18357
18358 DE_ASSERT(result.size() == count);
18359
18360 return result;
18361 }
18362
18363 // Generator for smoothstep function
getInputDataSS(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18364 vector<deFloat16> getInputDataSS(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18365 size_t argNo)
18366 {
18367 vector<deFloat16> result;
18368
18369 result = getInputDataD(seed, count, compCount, stride, argCount, argNo);
18370
18371 if (argNo == 0)
18372 {
18373 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
18374 {
18375 const float f = tcu::Float16(result[idx]).asFloat();
18376
18377 if (f > 4.0f)
18378 result[idx] = tcu::Float16(-f).bits();
18379 }
18380 }
18381
18382 if (argNo == 1)
18383 {
18384 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
18385 {
18386 const float f = tcu::Float16(result[idx]).asFloat();
18387
18388 if (f < 4.0f)
18389 result[idx] = tcu::Float16(-f).bits();
18390 }
18391 }
18392
18393 return result;
18394 }
18395
18396 // Generates normalized vectors for arguments 0 and 1
getInputDataN(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18397 vector<deFloat16> getInputDataN(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18398 size_t argNo)
18399 {
18400 DE_UNREF(compCount);
18401 DE_UNREF(argCount);
18402
18403 de::Random rnd(seed << argNo);
18404 vector<deFloat16> result;
18405
18406 if (argNo == 0 || argNo == 1)
18407 {
18408 // The input parameters for the incident vector I and the surface normal N must already be normalized
18409 for (size_t numIdx = 0; numIdx < count; numIdx += stride)
18410 {
18411 vector<float> unnormolized;
18412 float sum = 0;
18413
18414 for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18415 unnormolized.push_back(float((rnd.getUint16() % 16) - 8));
18416
18417 for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18418 sum += unnormolized[compIdx] * unnormolized[compIdx];
18419
18420 sum = deFloatSqrt(sum);
18421 if (sum == 0.0f)
18422 unnormolized[0] = sum = 1.0f;
18423
18424 for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18425 result.push_back(tcu::Float16(unnormolized[compIdx] / sum).bits());
18426
18427 for (size_t compIdx = compCount; compIdx < stride; ++compIdx)
18428 result.push_back(0);
18429 }
18430 }
18431 else
18432 {
18433 // Input parameter eta
18434 for (uint32_t numIdx = 0; numIdx < count; ++numIdx)
18435 {
18436 int num = (rnd.getUint16() % 16) - 8;
18437
18438 result.push_back(tcu::Float16(float(num)).bits());
18439 }
18440 }
18441
18442 DE_ASSERT(result.size() == count);
18443
18444 return result;
18445 }
18446
18447 // Data generator for complex matrix functions like determinant and inverse
getInputDataC(uint32_t seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18448 vector<deFloat16> getInputDataC(uint32_t seed, size_t count, size_t compCount, size_t stride, size_t argCount,
18449 size_t argNo)
18450 {
18451 DE_UNREF(compCount);
18452 DE_UNREF(stride);
18453 DE_UNREF(argCount);
18454
18455 de::Random rnd(seed << argNo);
18456 vector<deFloat16> result;
18457
18458 for (uint32_t numIdx = 0; numIdx < count; ++numIdx)
18459 {
18460 int num = (rnd.getUint16() % 16) - 8;
18461
18462 result.push_back(tcu::Float16(float(num)).bits());
18463 }
18464
18465 DE_ASSERT(result.size() == count);
18466
18467 return result;
18468 }
18469
18470 struct Math16TestType
18471 {
18472 const char *typePrefix;
18473 const size_t typeComponents;
18474 const size_t typeArrayStride;
18475 const size_t typeStructStride;
18476 const char *storage_type;
18477 };
18478
18479 enum Math16DataTypes
18480 {
18481 NONE = 0,
18482 SCALAR = 1,
18483 VEC2 = 2,
18484 VEC3 = 3,
18485 VEC4 = 4,
18486 MAT2X2,
18487 MAT2X3,
18488 MAT2X4,
18489 MAT3X2,
18490 MAT3X3,
18491 MAT3X4,
18492 MAT4X2,
18493 MAT4X3,
18494 MAT4X4,
18495 MATH16_TYPE_LAST
18496 };
18497
18498 struct Math16ArgFragments
18499 {
18500 const char *bodies;
18501 const char *variables;
18502 const char *decorations;
18503 const char *funcVariables;
18504 };
18505
18506 typedef vector<deFloat16> Math16GetInputData(uint32_t seed, size_t count, size_t compCount, size_t stride,
18507 size_t argCount, size_t argNo);
18508
18509 struct Math16TestFunc
18510 {
18511 const char *funcName;
18512 const char *funcSuffix;
18513 size_t funcArgsCount;
18514 size_t typeResult;
18515 size_t typeArg0;
18516 size_t typeArg1;
18517 size_t typeArg2;
18518 Math16GetInputData *getInputDataFunc;
18519 VerifyIOFunc verifyFunc;
18520 };
18521
18522 template <class SpecResource>
createFloat16ArithmeticFuncTest(tcu::TestContext & testCtx,tcu::TestCaseGroup & testGroup,const size_t testTypeIdx,const Math16TestFunc & testFunc)18523 void createFloat16ArithmeticFuncTest(tcu::TestContext &testCtx, tcu::TestCaseGroup &testGroup, const size_t testTypeIdx,
18524 const Math16TestFunc &testFunc)
18525 {
18526 const int testSpecificSeed = deStringHash(testGroup.getName());
18527 const int seed = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
18528 const size_t numDataPointsByAxis = 32;
18529 const size_t numDataPoints = numDataPointsByAxis * numDataPointsByAxis;
18530 const char *componentType = "f16";
18531 const Math16TestType testTypes[MATH16_TYPE_LAST] = {
18532 {"", 0, 0, 0, ""},
18533 {"", 1, 1 * sizeof(deFloat16), 2 * sizeof(deFloat16), "u32_half_ndp"},
18534 {"v2", 2, 2 * sizeof(deFloat16), 2 * sizeof(deFloat16), "u32_ndp"},
18535 {"v3", 3, 4 * sizeof(deFloat16), 4 * sizeof(deFloat16), "u32_ndp_2"},
18536 {"v4", 4, 4 * sizeof(deFloat16), 4 * sizeof(deFloat16), "u32_ndp_2"},
18537 {"m2x2", 0, 4 * sizeof(deFloat16), 4 * sizeof(deFloat16), "u32_ndp_2"},
18538 {"m2x3", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_4"},
18539 {"m2x4", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_4"},
18540 {"m3x2", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_3"},
18541 {"m3x3", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6"},
18542 {"m3x4", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6"},
18543 {"m4x2", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_4"},
18544 {"m4x3", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8"},
18545 {"m4x4", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8"},
18546 };
18547
18548 DE_ASSERT(testTypeIdx == testTypes[testTypeIdx].typeComponents);
18549
18550 const StringTemplate preMain(" %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
18551
18552 " %f16 = OpTypeFloat 16\n"
18553 " %v2f16 = OpTypeVector %f16 2\n"
18554 " %v3f16 = OpTypeVector %f16 3\n"
18555 " %v4f16 = OpTypeVector %f16 4\n"
18556 " %m2x2f16 = OpTypeMatrix %v2f16 2\n"
18557 " %m2x3f16 = OpTypeMatrix %v3f16 2\n"
18558 " %m2x4f16 = OpTypeMatrix %v4f16 2\n"
18559 " %m3x2f16 = OpTypeMatrix %v2f16 3\n"
18560 " %m3x3f16 = OpTypeMatrix %v3f16 3\n"
18561 " %m3x4f16 = OpTypeMatrix %v4f16 3\n"
18562 " %m4x2f16 = OpTypeMatrix %v2f16 4\n"
18563 " %m4x3f16 = OpTypeMatrix %v3f16 4\n"
18564 " %m4x4f16 = OpTypeMatrix %v4f16 4\n"
18565
18566 " %fp_v2i32 = OpTypePointer Function %v2i32\n"
18567 " %fp_v3i32 = OpTypePointer Function %v3i32\n"
18568 " %fp_v4i32 = OpTypePointer Function %v4i32\n"
18569
18570 " %c_u32_ndp = OpConstant %u32 ${num_data_points}\n"
18571 " %c_u32_half_ndp = OpSpecConstantOp %u32 UDiv %c_i32_ndp %c_u32_2\n"
18572 " %c_u32_5 = OpConstant %u32 5\n"
18573 " %c_u32_6 = OpConstant %u32 6\n"
18574 " %c_u32_7 = OpConstant %u32 7\n"
18575 " %c_u32_8 = OpConstant %u32 8\n"
18576 " %c_f16_0 = OpConstant %f16 0\n"
18577 " %c_f16_1 = OpConstant %f16 1\n"
18578 " %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
18579 " %up_u32 = OpTypePointer Uniform %u32\n"
18580 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
18581 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
18582
18583 " %ra_u32_half_ndp = OpTypeArray %u32 %c_u32_half_ndp\n"
18584 " %SSBO_u32_half_ndp = OpTypeStruct %ra_u32_half_ndp\n"
18585 "%up_SSBO_u32_half_ndp = OpTypePointer Uniform %SSBO_u32_half_ndp\n"
18586 " %ra_u32_ndp = OpTypeArray %u32 %c_u32_ndp\n"
18587 " %SSBO_u32_ndp = OpTypeStruct %ra_u32_ndp\n"
18588 " %up_SSBO_u32_ndp = OpTypePointer Uniform %SSBO_u32_ndp\n"
18589 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
18590 " %up_ra_u32_2 = OpTypePointer Uniform %ra_u32_2\n"
18591 " %ra_ra_u32_ndp = OpTypeArray %ra_u32_2 %c_u32_ndp\n"
18592 " %SSBO_u32_ndp_2 = OpTypeStruct %ra_ra_u32_ndp\n"
18593 " %up_SSBO_u32_ndp_2 = OpTypePointer Uniform %SSBO_u32_ndp_2\n"
18594 " %ra_u32_4 = OpTypeArray %u32 %c_u32_4\n"
18595 " %up_ra_u32_4 = OpTypePointer Uniform %ra_u32_4\n"
18596 " %ra_ra_u32_4 = OpTypeArray %ra_u32_4 %c_u32_ndp\n"
18597 " %SSBO_u32_ndp_4 = OpTypeStruct %ra_ra_u32_4\n"
18598 " %up_SSBO_u32_ndp_4 = OpTypePointer Uniform %SSBO_u32_ndp_4\n"
18599 " %ra_u32_3 = OpTypeArray %u32 %c_u32_3\n"
18600 " %up_ra_u32_3 = OpTypePointer Uniform %ra_u32_3\n"
18601 " %ra_ra_u32_3 = OpTypeArray %ra_u32_3 %c_u32_ndp\n"
18602 " %SSBO_u32_ndp_3 = OpTypeStruct %ra_ra_u32_3\n"
18603 " %up_SSBO_u32_ndp_3 = OpTypePointer Uniform %SSBO_u32_ndp_3\n"
18604 " %ra_u32_6 = OpTypeArray %u32 %c_u32_6\n"
18605 " %up_ra_u32_6 = OpTypePointer Uniform %ra_u32_6\n"
18606 " %ra_ra_u32_6 = OpTypeArray %ra_u32_6 %c_u32_ndp\n"
18607 " %SSBO_u32_ndp_6 = OpTypeStruct %ra_ra_u32_6\n"
18608 " %up_SSBO_u32_ndp_6 = OpTypePointer Uniform %SSBO_u32_ndp_6\n"
18609 " %ra_u32_8 = OpTypeArray %u32 %c_u32_8\n"
18610 " %up_ra_u32_8 = OpTypePointer Uniform %ra_u32_8\n"
18611 " %ra_ra_u32_8 = OpTypeArray %ra_u32_8 %c_u32_ndp\n"
18612 " %SSBO_u32_ndp_8 = OpTypeStruct %ra_ra_u32_8\n"
18613 " %up_SSBO_u32_ndp_8 = OpTypePointer Uniform %SSBO_u32_ndp_8\n"
18614
18615 " %f16_i32_fn = OpTypeFunction %f16 %i32\n"
18616 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
18617 " %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
18618 " %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
18619 " %m2x2f16_i32_fn = OpTypeFunction %m2x2f16 %i32\n"
18620 " %m2x3f16_i32_fn = OpTypeFunction %m2x3f16 %i32\n"
18621 " %m2x4f16_i32_fn = OpTypeFunction %m2x4f16 %i32\n"
18622 " %m3x2f16_i32_fn = OpTypeFunction %m3x2f16 %i32\n"
18623 " %m3x3f16_i32_fn = OpTypeFunction %m3x3f16 %i32\n"
18624 " %m3x4f16_i32_fn = OpTypeFunction %m3x4f16 %i32\n"
18625 " %m4x2f16_i32_fn = OpTypeFunction %m4x2f16 %i32\n"
18626 " %m4x3f16_i32_fn = OpTypeFunction %m4x3f16 %i32\n"
18627 " %m4x4f16_i32_fn = OpTypeFunction %m4x4f16 %i32\n"
18628 " %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
18629 " %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
18630 " %void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
18631 " %void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
18632 "%void_m2x2f16_i32_fn = OpTypeFunction %void %m2x2f16 %i32\n"
18633 "%void_m2x3f16_i32_fn = OpTypeFunction %void %m2x3f16 %i32\n"
18634 "%void_m2x4f16_i32_fn = OpTypeFunction %void %m2x4f16 %i32\n"
18635 "%void_m3x2f16_i32_fn = OpTypeFunction %void %m3x2f16 %i32\n"
18636 "%void_m3x3f16_i32_fn = OpTypeFunction %void %m3x3f16 %i32\n"
18637 "%void_m3x4f16_i32_fn = OpTypeFunction %void %m3x4f16 %i32\n"
18638 "%void_m4x2f16_i32_fn = OpTypeFunction %void %m4x2f16 %i32\n"
18639 "%void_m4x3f16_i32_fn = OpTypeFunction %void %m4x3f16 %i32\n"
18640 "%void_m4x4f16_i32_fn = OpTypeFunction %void %m4x4f16 %i32\n"
18641 "${arg_vars}");
18642
18643 const StringTemplate decoration("OpDecorate %ra_u32_half_ndp ArrayStride 4\n"
18644 "OpMemberDecorate %SSBO_u32_half_ndp 0 Offset 0\n"
18645 "OpDecorate %SSBO_u32_half_ndp BufferBlock\n"
18646
18647 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
18648 "OpMemberDecorate %SSBO_u32_ndp 0 Offset 0\n"
18649 "OpDecorate %SSBO_u32_ndp BufferBlock\n"
18650
18651 "OpDecorate %ra_u32_2 ArrayStride 4\n"
18652 "OpDecorate %ra_ra_u32_ndp ArrayStride 8\n"
18653 "OpMemberDecorate %SSBO_u32_ndp_2 0 Offset 0\n"
18654 "OpDecorate %SSBO_u32_ndp_2 BufferBlock\n"
18655
18656 "OpDecorate %ra_u32_4 ArrayStride 4\n"
18657 "OpDecorate %ra_ra_u32_4 ArrayStride 16\n"
18658 "OpMemberDecorate %SSBO_u32_ndp_4 0 Offset 0\n"
18659 "OpDecorate %SSBO_u32_ndp_4 BufferBlock\n"
18660
18661 "OpDecorate %ra_u32_3 ArrayStride 4\n"
18662 "OpDecorate %ra_ra_u32_3 ArrayStride 16\n"
18663 "OpMemberDecorate %SSBO_u32_ndp_3 0 Offset 0\n"
18664 "OpDecorate %SSBO_u32_ndp_3 BufferBlock\n"
18665
18666 "OpDecorate %ra_u32_6 ArrayStride 4\n"
18667 "OpDecorate %ra_ra_u32_6 ArrayStride 32\n"
18668 "OpMemberDecorate %SSBO_u32_ndp_6 0 Offset 0\n"
18669 "OpDecorate %SSBO_u32_ndp_6 BufferBlock\n"
18670
18671 "OpDecorate %ra_u32_8 ArrayStride 4\n"
18672 "OpDecorate %ra_ra_u32_8 ArrayStride 32\n"
18673 "OpMemberDecorate %SSBO_u32_ndp_8 0 Offset 0\n"
18674 "OpDecorate %SSBO_u32_ndp_8 BufferBlock\n"
18675
18676 "${arg_decorations}");
18677
18678 const StringTemplate testFun("%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
18679 " %param = OpFunctionParameter %v4f32\n"
18680 " %entry = OpLabel\n"
18681
18682 " %i = OpVariable %fp_i32 Function\n"
18683 "${arg_infunc_vars}"
18684 " OpStore %i %c_i32_0\n"
18685 " OpBranch %loop\n"
18686
18687 " %loop = OpLabel\n"
18688 " %i_cmp = OpLoad %i32 %i\n"
18689 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
18690 " OpLoopMerge %merge %next None\n"
18691 " OpBranchConditional %lt %write %merge\n"
18692
18693 " %write = OpLabel\n"
18694 " %ndx = OpLoad %i32 %i\n"
18695
18696 "${arg_func_call}"
18697
18698 " OpBranch %next\n"
18699
18700 " %next = OpLabel\n"
18701 " %i_cur = OpLoad %i32 %i\n"
18702 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
18703 " OpStore %i %i_new\n"
18704 " OpBranch %loop\n"
18705
18706 " %merge = OpLabel\n"
18707 " OpReturnValue %param\n"
18708 " OpFunctionEnd\n");
18709
18710 const Math16ArgFragments argFragment1 = {
18711 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18712 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0\n"
18713 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18714 "",
18715 "",
18716 "",
18717 };
18718
18719 const Math16ArgFragments argFragment2 = {
18720 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18721 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18722 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1\n"
18723 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18724 "",
18725 "",
18726 "",
18727 };
18728
18729 const Math16ArgFragments argFragment3 = {
18730 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18731 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18732 " %val_src2 = OpFunctionCall %${t2} %ld_arg_ssbo_src2 %ndx\n"
18733 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1 %val_src2\n"
18734 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18735 "",
18736 "",
18737 "",
18738 };
18739
18740 const Math16ArgFragments argFragmentLdExp = {
18741 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18742 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18743 "%val_src1i = OpConvertFToS %${dr}i32 %val_src1\n"
18744 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1i\n"
18745 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18746
18747 "",
18748
18749 "",
18750
18751 "",
18752 };
18753
18754 const Math16ArgFragments argFragmentModfFrac = {
18755 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18756 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18757 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18758
18759 " %fp_tmp = OpTypePointer Function %${tr}\n",
18760
18761 "",
18762
18763 " %tmp = OpVariable %fp_tmp Function\n",
18764 };
18765
18766 const Math16ArgFragments argFragmentModfInt = {
18767 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18768 "%val_unused = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18769 " %tmp0 = OpAccessChain %fp_tmp %tmp\n"
18770 " %val_dst = OpLoad %${tr} %tmp0\n"
18771 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18772
18773 " %fp_tmp = OpTypePointer Function %${tr}\n",
18774
18775 "",
18776
18777 " %tmp = OpVariable %fp_tmp Function\n",
18778 };
18779
18780 const Math16ArgFragments argFragmentModfStruct = {
18781 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18782 " %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18783 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18784 " OpStore %tmp_ptr_s %val_tmp\n"
18785 "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_${struct_member}\n"
18786 " %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18787 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18788
18789 " %fp_${tr} = OpTypePointer Function %${tr}\n"
18790 " %st_tmp = OpTypeStruct %${tr} %${tr}\n"
18791 " %fp_tmp = OpTypePointer Function %st_tmp\n"
18792 " %c_frac = OpConstant %i32 0\n"
18793 " %c_int = OpConstant %i32 1\n",
18794
18795 "OpMemberDecorate %st_tmp 0 Offset 0\n"
18796 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18797
18798 " %tmp = OpVariable %fp_tmp Function\n",
18799 };
18800
18801 const Math16ArgFragments argFragmentFrexpStructS = {
18802 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18803 " %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18804 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18805 " OpStore %tmp_ptr_s %val_tmp\n"
18806 "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_i32_0\n"
18807 " %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18808 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18809
18810 " %fp_${tr} = OpTypePointer Function %${tr}\n"
18811 " %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18812 " %fp_tmp = OpTypePointer Function %st_tmp\n",
18813
18814 "OpMemberDecorate %st_tmp 0 Offset 0\n"
18815 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18816
18817 " %tmp = OpVariable %fp_tmp Function\n",
18818 };
18819
18820 const Math16ArgFragments argFragmentFrexpStructE = {
18821 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18822 " %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18823 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18824 " OpStore %tmp_ptr_s %val_tmp\n"
18825 "%tmp_ptr_l = OpAccessChain %fp_${dr}i32 %tmp %c_i32_1\n"
18826 "%val_dst_i = OpLoad %${dr}i32 %tmp_ptr_l\n"
18827 " %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
18828 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18829
18830 " %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18831 " %fp_tmp = OpTypePointer Function %st_tmp\n",
18832
18833 "OpMemberDecorate %st_tmp 0 Offset 0\n"
18834 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18835
18836 " %tmp = OpVariable %fp_tmp Function\n",
18837 };
18838
18839 const Math16ArgFragments argFragmentFrexpS = {
18840 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18841 " %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
18842 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18843 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18844
18845 "",
18846
18847 "",
18848
18849 " %tmp = OpVariable %fp_${dr}i32 Function\n",
18850 };
18851
18852 const Math16ArgFragments argFragmentFrexpE = {
18853 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18854 " %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
18855 "%val_unused = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18856 "%val_dst_i = OpLoad %${dr}i32 %out_exp\n"
18857 " %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
18858 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18859
18860 "",
18861
18862 "",
18863
18864 " %tmp = OpVariable %fp_${dr}i32 Function\n",
18865 };
18866
18867 string load_funcs[MATH16_TYPE_LAST];
18868 load_funcs[SCALAR] = loadScalarF16FromUint;
18869 load_funcs[VEC2] = loadV2F16FromUint;
18870 load_funcs[VEC3] = loadV3F16FromUints;
18871 load_funcs[VEC4] = loadV4F16FromUints;
18872 load_funcs[MAT2X2] = loadM2x2F16FromUints;
18873 load_funcs[MAT2X3] = loadM2x3F16FromUints;
18874 load_funcs[MAT2X4] = loadM2x4F16FromUints;
18875 load_funcs[MAT3X2] = loadM3x2F16FromUints;
18876 load_funcs[MAT3X3] = loadM3x3F16FromUints;
18877 load_funcs[MAT3X4] = loadM3x4F16FromUints;
18878 load_funcs[MAT4X2] = loadM4x2F16FromUints;
18879 load_funcs[MAT4X3] = loadM4x3F16FromUints;
18880 load_funcs[MAT4X4] = loadM4x4F16FromUints;
18881
18882 string store_funcs[MATH16_TYPE_LAST];
18883 store_funcs[SCALAR] = storeScalarF16AsUint;
18884 store_funcs[VEC2] = storeV2F16AsUint;
18885 store_funcs[VEC3] = storeV3F16AsUints;
18886 store_funcs[VEC4] = storeV4F16AsUints;
18887 store_funcs[MAT2X2] = storeM2x2F16AsUints;
18888 store_funcs[MAT2X3] = storeM2x3F16AsUints;
18889 store_funcs[MAT2X4] = storeM2x4F16AsUints;
18890 store_funcs[MAT3X2] = storeM3x2F16AsUints;
18891 store_funcs[MAT3X3] = storeM3x3F16AsUints;
18892 store_funcs[MAT3X4] = storeM3x4F16AsUints;
18893 store_funcs[MAT4X2] = storeM4x2F16AsUints;
18894 store_funcs[MAT4X3] = storeM4x3F16AsUints;
18895 store_funcs[MAT4X4] = storeM4x4F16AsUints;
18896
18897 const Math16TestType &testType = testTypes[testTypeIdx];
18898 const string funcNameString = string(testFunc.funcName) + string(testFunc.funcSuffix);
18899 const string testName = de::toLower(funcNameString);
18900 const Math16ArgFragments *argFragments = DE_NULL;
18901 const size_t typeStructStride = testType.typeStructStride;
18902 const bool extInst = !(testFunc.funcName[0] == 'O' && testFunc.funcName[1] == 'p');
18903 const size_t numFloatsPerArg0Type = testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16);
18904 const size_t iterations = numDataPoints / numFloatsPerArg0Type;
18905 const size_t numFloatsPerResultType = testTypes[testFunc.typeResult].typeArrayStride / sizeof(deFloat16);
18906 const vector<deFloat16> float16UnusedOutput(iterations * numFloatsPerResultType, 0);
18907 VulkanFeatures features;
18908 SpecResource specResource;
18909 map<string, string> specs;
18910 map<string, string> fragments;
18911 vector<string> extensions;
18912 string funcCall;
18913 string funcVariables;
18914 string variables;
18915 string declarations;
18916 string decorations;
18917 string functions;
18918
18919 switch (testFunc.funcArgsCount)
18920 {
18921 case 1:
18922 {
18923 argFragments = &argFragment1;
18924
18925 if (funcNameString == "ModfFrac")
18926 argFragments = &argFragmentModfFrac;
18927 if (funcNameString == "ModfInt")
18928 argFragments = &argFragmentModfInt;
18929 if (funcNameString == "ModfStructFrac")
18930 argFragments = &argFragmentModfStruct;
18931 if (funcNameString == "ModfStructInt")
18932 argFragments = &argFragmentModfStruct;
18933 if (funcNameString == "FrexpS")
18934 argFragments = &argFragmentFrexpS;
18935 if (funcNameString == "FrexpE")
18936 argFragments = &argFragmentFrexpE;
18937 if (funcNameString == "FrexpStructS")
18938 argFragments = &argFragmentFrexpStructS;
18939 if (funcNameString == "FrexpStructE")
18940 argFragments = &argFragmentFrexpStructE;
18941
18942 break;
18943 }
18944 case 2:
18945 {
18946 argFragments = &argFragment2;
18947
18948 if (funcNameString == "Ldexp")
18949 argFragments = &argFragmentLdExp;
18950
18951 break;
18952 }
18953 case 3:
18954 {
18955 argFragments = &argFragment3;
18956
18957 break;
18958 }
18959 default:
18960 {
18961 TCU_THROW(InternalError, "Invalid number of arguments");
18962 }
18963 }
18964
18965 functions = StringTemplate(store_funcs[testFunc.typeResult]).specialize({{"var", "ssbo_dst"}});
18966 if (testFunc.funcArgsCount == 1)
18967 {
18968 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18969 variables += " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18970 " %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18971
18972 decorations += "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18973 "OpDecorate %ssbo_src0 Binding 0\n"
18974 "OpDecorate %ssbo_dst DescriptorSet 0\n"
18975 "OpDecorate %ssbo_dst Binding 1\n";
18976 }
18977 else if (testFunc.funcArgsCount == 2)
18978 {
18979 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18980 functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18981 variables += " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18982 " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18983 " %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18984
18985 decorations += "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18986 "OpDecorate %ssbo_src0 Binding 0\n"
18987 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
18988 "OpDecorate %ssbo_src1 Binding 1\n"
18989 "OpDecorate %ssbo_dst DescriptorSet 0\n"
18990 "OpDecorate %ssbo_dst Binding 2\n";
18991 }
18992 else if (testFunc.funcArgsCount == 3)
18993 {
18994 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18995 functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18996 functions += StringTemplate(load_funcs[testFunc.typeArg2]).specialize({{"var", "ssbo_src2"}});
18997 variables += " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18998 " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18999 " %ssbo_src2 = OpVariable %up_SSBO_${store_t2} Uniform\n"
19000 " %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
19001
19002 decorations += "OpDecorate %ssbo_src0 DescriptorSet 0\n"
19003 "OpDecorate %ssbo_src0 Binding 0\n"
19004 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
19005 "OpDecorate %ssbo_src1 Binding 1\n"
19006 "OpDecorate %ssbo_src2 DescriptorSet 0\n"
19007 "OpDecorate %ssbo_src2 Binding 2\n"
19008 "OpDecorate %ssbo_dst DescriptorSet 0\n"
19009 "OpDecorate %ssbo_dst Binding 3\n";
19010 }
19011 else
19012 {
19013 TCU_THROW(InternalError, "Invalid number of function arguments");
19014 }
19015
19016 variables += argFragments->variables;
19017 decorations += argFragments->decorations;
19018
19019 specs["dr"] = testTypes[testFunc.typeResult].typePrefix;
19020 specs["d0"] = testTypes[testFunc.typeArg0].typePrefix;
19021 specs["d1"] = testTypes[testFunc.typeArg1].typePrefix;
19022 specs["d2"] = testTypes[testFunc.typeArg2].typePrefix;
19023 specs["tr"] = string(testTypes[testFunc.typeResult].typePrefix) + componentType;
19024 specs["t0"] = string(testTypes[testFunc.typeArg0].typePrefix) + componentType;
19025 specs["t1"] = string(testTypes[testFunc.typeArg1].typePrefix) + componentType;
19026 specs["t2"] = string(testTypes[testFunc.typeArg2].typePrefix) + componentType;
19027 specs["store_tr"] = string(testTypes[testFunc.typeResult].storage_type);
19028 specs["store_t0"] = string(testTypes[testFunc.typeArg0].storage_type);
19029 specs["store_t1"] = string(testTypes[testFunc.typeArg1].storage_type);
19030 specs["store_t2"] = string(testTypes[testFunc.typeArg2].storage_type);
19031 specs["struct_stride"] = de::toString(typeStructStride);
19032 specs["op"] = extInst ? "OpExtInst" : testFunc.funcName;
19033 specs["ext_inst"] = extInst ? string("%ext_import ") + testFunc.funcName : "";
19034 specs["struct_member"] = de::toLower(testFunc.funcSuffix);
19035
19036 variables = StringTemplate(variables).specialize(specs);
19037 decorations = StringTemplate(decorations).specialize(specs);
19038 funcVariables = StringTemplate(argFragments->funcVariables).specialize(specs);
19039 funcCall = StringTemplate(argFragments->bodies).specialize(specs);
19040
19041 specs["num_data_points"] = de::toString(iterations);
19042 specs["arg_vars"] = variables;
19043 specs["arg_decorations"] = decorations;
19044 specs["arg_infunc_vars"] = funcVariables;
19045 specs["arg_func_call"] = funcCall;
19046
19047 fragments["extension"] = "%ext_import = OpExtInstImport \"GLSL.std.450\"";
19048 fragments["capability"] = "OpCapability Matrix\nOpCapability Float16\n";
19049 fragments["decoration"] = decoration.specialize(specs);
19050 fragments["pre_main"] = preMain.specialize(specs) + functions;
19051 fragments["testfun"] = testFun.specialize(specs);
19052
19053 for (size_t inputArgNdx = 0; inputArgNdx < testFunc.funcArgsCount; ++inputArgNdx)
19054 {
19055 const size_t numFloatsPerItem =
19056 (inputArgNdx == 0) ? testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16) :
19057 (inputArgNdx == 1) ? testTypes[testFunc.typeArg1].typeArrayStride / sizeof(deFloat16) :
19058 (inputArgNdx == 2) ? testTypes[testFunc.typeArg2].typeArrayStride / sizeof(deFloat16) :
19059 -1;
19060 const vector<deFloat16> inputData = testFunc.getInputDataFunc(
19061 seed, numFloatsPerItem * iterations, testTypeIdx, numFloatsPerItem, testFunc.funcArgsCount, inputArgNdx);
19062
19063 specResource.inputs.push_back(
19064 Resource(BufferSp(new Float16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
19065 }
19066
19067 specResource.outputs.push_back(
19068 Resource(BufferSp(new Float16Buffer(float16UnusedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
19069 specResource.verifyIO = testFunc.verifyFunc;
19070
19071 extensions.push_back("VK_KHR_shader_float16_int8");
19072
19073 features.extFloat16Int8.shaderFloat16 = true;
19074 if (specResource.graphicsFeaturesRequired)
19075 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
19076
19077 finalizeTestsCreation(specResource, fragments, testCtx, testGroup, testName, features, extensions, IVec3(1, 1, 1));
19078 }
19079
19080 template <size_t C, class SpecResource>
createFloat16ArithmeticSet(tcu::TestContext & testCtx)19081 tcu::TestCaseGroup *createFloat16ArithmeticSet(tcu::TestContext &testCtx)
19082 {
19083 DE_STATIC_ASSERT(C >= 1 && C <= 4);
19084
19085 const std::string testGroupName(string("arithmetic_") + de::toString(C));
19086 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, testGroupName.c_str()));
19087 const Math16TestFunc testFuncs[] = {
19088 {"OpFNegate", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16OpFNegate>},
19089 {"Round", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Round>},
19090 {"RoundEven", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16RoundEven>},
19091 {"Trunc", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Trunc>},
19092 {"FAbs", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16FAbs>},
19093 {"FSign", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16FSign>},
19094 {"Floor", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Floor>},
19095 {"Ceil", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Ceil>},
19096 {"Fract", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Fract>},
19097 {"Radians", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Radians>},
19098 {"Degrees", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Degrees>},
19099 {"Sin", "", 1, C, C, 0, 0, &getInputDataPI, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Sin>},
19100 {"Cos", "", 1, C, C, 0, 0, &getInputDataPI, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Cos>},
19101 {"Tan", "", 1, C, C, 0, 0, &getInputDataPI, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Tan>},
19102 {"Asin", "", 1, C, C, 0, 0, &getInputDataA, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Asin>},
19103 {"Acos", "", 1, C, C, 0, 0, &getInputDataA, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Acos>},
19104 {"Atan", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Atan>},
19105 {"Sinh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Sinh>},
19106 {"Cosh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Cosh>},
19107 {"Tanh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Tanh>},
19108 {"Asinh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Asinh>},
19109 {"Acosh", "", 1, C, C, 0, 0, &getInputDataAC, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Acosh>},
19110 {"Atanh", "", 1, C, C, 0, 0, &getInputDataA, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Atanh>},
19111 {"Exp", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Exp>},
19112 {"Log", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Log>},
19113 {"Exp2", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Exp2>},
19114 {"Log2", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Log2>},
19115 {"Sqrt", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Sqrt>},
19116 {"InverseSqrt", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc<C, C, 0, 0, fp16InverseSqrt>},
19117 {"Modf", "Frac", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16ModfFrac>},
19118 {"Modf", "Int", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16ModfInt>},
19119 {"ModfStruct", "Frac", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16ModfFrac>},
19120 {"ModfStruct", "Int", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16ModfInt>},
19121 {"Frexp", "S", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16FrexpS>},
19122 {"Frexp", "E", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16FrexpE>},
19123 {"FrexpStruct", "S", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16FrexpS>},
19124 {"FrexpStruct", "E", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16FrexpE>},
19125 {"OpFAdd", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16OpFAdd>},
19126 {"OpFSub", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16OpFSub>},
19127 {"OpFMul", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16OpFMul>},
19128 {"OpFDiv", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16OpFDiv>},
19129 {"Atan2", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16Atan2>},
19130 {"Pow", "", 2, C, C, C, 0, &getInputDataP, compareFP16ArithmeticFunc<C, C, C, 0, fp16Pow>},
19131 {"FMin", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16FMin>},
19132 {"FMax", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16FMax>},
19133 {"Step", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16Step>},
19134 {"Ldexp", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc<C, C, C, 0, fp16Ldexp>},
19135 {"FClamp", "", 3, C, C, C, C, &getInputData, compareFP16ArithmeticFunc<C, C, C, C, fp16FClamp>},
19136 {"FMix", "", 3, C, C, C, C, &getInputDataD, compareFP16ArithmeticFunc<C, C, C, C, fp16FMix>},
19137 {"SmoothStep", "", 3, C, C, C, C, &getInputDataSS, compareFP16ArithmeticFunc<C, C, C, C, fp16SmoothStep>},
19138 {"Fma", "", 3, C, C, C, C, &getInputData, compareFP16ArithmeticFunc<C, C, C, C, fp16Fma>},
19139 {"Length", "", 1, 1, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<1, C, 0, 0, fp16Length>},
19140 {"Distance", "", 2, 1, C, C, 0, &getInputData, compareFP16ArithmeticFunc<1, C, C, 0, fp16Distance>},
19141 {"Cross", "", 2, C, C, C, 0, &getInputDataD, compareFP16ArithmeticFunc<C, C, C, 0, fp16Cross>},
19142 {"Normalize", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc<C, C, 0, 0, fp16Normalize>},
19143 {"FaceForward", "", 3, C, C, C, C, &getInputDataD, compareFP16ArithmeticFunc<C, C, C, C, fp16FaceForward>},
19144 {"Reflect", "", 2, C, C, C, 0, &getInputDataD, compareFP16ArithmeticFunc<C, C, C, 0, fp16Reflect>},
19145 {"Refract", "", 3, C, C, C, 1, &getInputDataN, compareFP16ArithmeticFunc<C, C, C, 1, fp16Refract>},
19146 {"OpDot", "", 2, 1, C, C, 0, &getInputDataD, compareFP16ArithmeticFunc<1, C, C, 0, fp16Dot>},
19147 {"OpVectorTimesScalar", "", 2, C, C, 1, 0, &getInputDataV,
19148 compareFP16ArithmeticFunc<C, C, 1, 0, fp16VectorTimesScalar>},
19149 };
19150
19151 for (uint32_t testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
19152 {
19153 const Math16TestFunc &testFunc = testFuncs[testFuncIdx];
19154 const string funcNameString = testFunc.funcName;
19155
19156 if ((C != 3) && funcNameString == "Cross")
19157 continue;
19158
19159 if ((C < 2) && funcNameString == "OpDot")
19160 continue;
19161
19162 if ((C < 2) && funcNameString == "OpVectorTimesScalar")
19163 continue;
19164
19165 createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), C, testFunc);
19166 }
19167
19168 return testGroup.release();
19169 }
19170
19171 template <class SpecResource>
createFloat16ArithmeticSet(tcu::TestContext & testCtx)19172 tcu::TestCaseGroup *createFloat16ArithmeticSet(tcu::TestContext &testCtx)
19173 {
19174 const std::string testGroupName("arithmetic");
19175 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, testGroupName.c_str()));
19176 const Math16TestFunc testFuncs[] = {
19177 {"OpTranspose", "2x2", 1, MAT2X2, MAT2X2, 0, 0, &getInputDataM,
19178 compareFP16ArithmeticFunc<4, 4, 0, 0, fp16Transpose<2, 2>>},
19179 {"OpTranspose", "3x2", 1, MAT2X3, MAT3X2, 0, 0, &getInputDataM,
19180 compareFP16ArithmeticFunc<8, 8, 0, 0, fp16Transpose<3, 2>>},
19181 {"OpTranspose", "4x2", 1, MAT2X4, MAT4X2, 0, 0, &getInputDataM,
19182 compareFP16ArithmeticFunc<8, 8, 0, 0, fp16Transpose<4, 2>>},
19183 {"OpTranspose", "2x3", 1, MAT3X2, MAT2X3, 0, 0, &getInputDataM,
19184 compareFP16ArithmeticFunc<8, 8, 0, 0, fp16Transpose<2, 3>>},
19185 {"OpTranspose", "3x3", 1, MAT3X3, MAT3X3, 0, 0, &getInputDataM,
19186 compareFP16ArithmeticFunc<16, 16, 0, 0, fp16Transpose<3, 3>>},
19187 {"OpTranspose", "4x3", 1, MAT3X4, MAT4X3, 0, 0, &getInputDataM,
19188 compareFP16ArithmeticFunc<16, 16, 0, 0, fp16Transpose<4, 3>>},
19189 {"OpTranspose", "2x4", 1, MAT4X2, MAT2X4, 0, 0, &getInputDataM,
19190 compareFP16ArithmeticFunc<8, 8, 0, 0, fp16Transpose<2, 4>>},
19191 {"OpTranspose", "3x4", 1, MAT4X3, MAT3X4, 0, 0, &getInputDataM,
19192 compareFP16ArithmeticFunc<16, 16, 0, 0, fp16Transpose<3, 4>>},
19193 {"OpTranspose", "4x4", 1, MAT4X4, MAT4X4, 0, 0, &getInputDataM,
19194 compareFP16ArithmeticFunc<16, 16, 0, 0, fp16Transpose<4, 4>>},
19195 {"OpMatrixTimesScalar", "2x2", 2, MAT2X2, MAT2X2, 1, 0, &getInputDataD,
19196 compareFP16ArithmeticFunc<4, 4, 1, 0, fp16MatrixTimesScalar<2, 2>>},
19197 {"OpMatrixTimesScalar", "2x3", 2, MAT2X3, MAT2X3, 1, 0, &getInputDataD,
19198 compareFP16ArithmeticFunc<8, 8, 1, 0, fp16MatrixTimesScalar<2, 3>>},
19199 {"OpMatrixTimesScalar", "2x4", 2, MAT2X4, MAT2X4, 1, 0, &getInputDataD,
19200 compareFP16ArithmeticFunc<8, 8, 1, 0, fp16MatrixTimesScalar<2, 4>>},
19201 {"OpMatrixTimesScalar", "3x2", 2, MAT3X2, MAT3X2, 1, 0, &getInputDataD,
19202 compareFP16ArithmeticFunc<8, 8, 1, 0, fp16MatrixTimesScalar<3, 2>>},
19203 {"OpMatrixTimesScalar", "3x3", 2, MAT3X3, MAT3X3, 1, 0, &getInputDataD,
19204 compareFP16ArithmeticFunc<16, 16, 1, 0, fp16MatrixTimesScalar<3, 3>>},
19205 {"OpMatrixTimesScalar", "3x4", 2, MAT3X4, MAT3X4, 1, 0, &getInputDataD,
19206 compareFP16ArithmeticFunc<16, 16, 1, 0, fp16MatrixTimesScalar<3, 4>>},
19207 {"OpMatrixTimesScalar", "4x2", 2, MAT4X2, MAT4X2, 1, 0, &getInputDataD,
19208 compareFP16ArithmeticFunc<8, 8, 1, 0, fp16MatrixTimesScalar<4, 2>>},
19209 {"OpMatrixTimesScalar", "4x3", 2, MAT4X3, MAT4X3, 1, 0, &getInputDataD,
19210 compareFP16ArithmeticFunc<16, 16, 1, 0, fp16MatrixTimesScalar<4, 3>>},
19211 {"OpMatrixTimesScalar", "4x4", 2, MAT4X4, MAT4X4, 1, 0, &getInputDataD,
19212 compareFP16ArithmeticFunc<16, 16, 1, 0, fp16MatrixTimesScalar<4, 4>>},
19213 {"OpVectorTimesMatrix", "2x2", 2, VEC2, VEC2, MAT2X2, 0, &getInputDataD,
19214 compareFP16ArithmeticFunc<2, 2, 4, 0, fp16VectorTimesMatrix<2, 2>>},
19215 {"OpVectorTimesMatrix", "2x3", 2, VEC2, VEC3, MAT2X3, 0, &getInputDataD,
19216 compareFP16ArithmeticFunc<2, 3, 8, 0, fp16VectorTimesMatrix<2, 3>>},
19217 {"OpVectorTimesMatrix", "2x4", 2, VEC2, VEC4, MAT2X4, 0, &getInputDataD,
19218 compareFP16ArithmeticFunc<2, 4, 8, 0, fp16VectorTimesMatrix<2, 4>>},
19219 {"OpVectorTimesMatrix", "3x2", 2, VEC3, VEC2, MAT3X2, 0, &getInputDataD,
19220 compareFP16ArithmeticFunc<3, 2, 8, 0, fp16VectorTimesMatrix<3, 2>>},
19221 {"OpVectorTimesMatrix", "3x3", 2, VEC3, VEC3, MAT3X3, 0, &getInputDataD,
19222 compareFP16ArithmeticFunc<3, 3, 16, 0, fp16VectorTimesMatrix<3, 3>>},
19223 {"OpVectorTimesMatrix", "3x4", 2, VEC3, VEC4, MAT3X4, 0, &getInputDataD,
19224 compareFP16ArithmeticFunc<3, 4, 16, 0, fp16VectorTimesMatrix<3, 4>>},
19225 {"OpVectorTimesMatrix", "4x2", 2, VEC4, VEC2, MAT4X2, 0, &getInputDataD,
19226 compareFP16ArithmeticFunc<4, 2, 8, 0, fp16VectorTimesMatrix<4, 2>>},
19227 {"OpVectorTimesMatrix", "4x3", 2, VEC4, VEC3, MAT4X3, 0, &getInputDataD,
19228 compareFP16ArithmeticFunc<4, 3, 16, 0, fp16VectorTimesMatrix<4, 3>>},
19229 {"OpVectorTimesMatrix", "4x4", 2, VEC4, VEC4, MAT4X4, 0, &getInputDataD,
19230 compareFP16ArithmeticFunc<4, 4, 16, 0, fp16VectorTimesMatrix<4, 4>>},
19231 {"OpMatrixTimesVector", "2x2", 2, VEC2, MAT2X2, VEC2, 0, &getInputDataD,
19232 compareFP16ArithmeticFunc<2, 4, 2, 0, fp16MatrixTimesVector<2, 2>>},
19233 {"OpMatrixTimesVector", "2x3", 2, VEC3, MAT2X3, VEC2, 0, &getInputDataD,
19234 compareFP16ArithmeticFunc<3, 8, 2, 0, fp16MatrixTimesVector<2, 3>>},
19235 {"OpMatrixTimesVector", "2x4", 2, VEC4, MAT2X4, VEC2, 0, &getInputDataD,
19236 compareFP16ArithmeticFunc<4, 8, 2, 0, fp16MatrixTimesVector<2, 4>>},
19237 {"OpMatrixTimesVector", "3x2", 2, VEC2, MAT3X2, VEC3, 0, &getInputDataD,
19238 compareFP16ArithmeticFunc<2, 8, 3, 0, fp16MatrixTimesVector<3, 2>>},
19239 {"OpMatrixTimesVector", "3x3", 2, VEC3, MAT3X3, VEC3, 0, &getInputDataD,
19240 compareFP16ArithmeticFunc<3, 16, 3, 0, fp16MatrixTimesVector<3, 3>>},
19241 {"OpMatrixTimesVector", "3x4", 2, VEC4, MAT3X4, VEC3, 0, &getInputDataD,
19242 compareFP16ArithmeticFunc<4, 16, 3, 0, fp16MatrixTimesVector<3, 4>>},
19243 {"OpMatrixTimesVector", "4x2", 2, VEC2, MAT4X2, VEC4, 0, &getInputDataD,
19244 compareFP16ArithmeticFunc<2, 8, 4, 0, fp16MatrixTimesVector<4, 2>>},
19245 {"OpMatrixTimesVector", "4x3", 2, VEC3, MAT4X3, VEC4, 0, &getInputDataD,
19246 compareFP16ArithmeticFunc<3, 16, 4, 0, fp16MatrixTimesVector<4, 3>>},
19247 {"OpMatrixTimesVector", "4x4", 2, VEC4, MAT4X4, VEC4, 0, &getInputDataD,
19248 compareFP16ArithmeticFunc<4, 16, 4, 0, fp16MatrixTimesVector<4, 4>>},
19249 {"OpMatrixTimesMatrix", "2x2_2x2", 2, MAT2X2, MAT2X2, MAT2X2, 0, &getInputDataD,
19250 compareFP16ArithmeticFunc<4, 4, 4, 0, fp16MatrixTimesMatrix<2, 2, 2, 2>>},
19251 {"OpMatrixTimesMatrix", "2x2_3x2", 2, MAT3X2, MAT2X2, MAT3X2, 0, &getInputDataD,
19252 compareFP16ArithmeticFunc<8, 4, 8, 0, fp16MatrixTimesMatrix<2, 2, 3, 2>>},
19253 {"OpMatrixTimesMatrix", "2x2_4x2", 2, MAT4X2, MAT2X2, MAT4X2, 0, &getInputDataD,
19254 compareFP16ArithmeticFunc<8, 4, 8, 0, fp16MatrixTimesMatrix<2, 2, 4, 2>>},
19255 {"OpMatrixTimesMatrix", "2x3_2x2", 2, MAT2X3, MAT2X3, MAT2X2, 0, &getInputDataD,
19256 compareFP16ArithmeticFunc<8, 8, 4, 0, fp16MatrixTimesMatrix<2, 3, 2, 2>>},
19257 {"OpMatrixTimesMatrix", "2x3_3x2", 2, MAT3X3, MAT2X3, MAT3X2, 0, &getInputDataD,
19258 compareFP16ArithmeticFunc<16, 8, 8, 0, fp16MatrixTimesMatrix<2, 3, 3, 2>>},
19259 {"OpMatrixTimesMatrix", "2x3_4x2", 2, MAT4X3, MAT2X3, MAT4X2, 0, &getInputDataD,
19260 compareFP16ArithmeticFunc<16, 8, 8, 0, fp16MatrixTimesMatrix<2, 3, 4, 2>>},
19261 {"OpMatrixTimesMatrix", "2x4_2x2", 2, MAT2X4, MAT2X4, MAT2X2, 0, &getInputDataD,
19262 compareFP16ArithmeticFunc<8, 8, 4, 0, fp16MatrixTimesMatrix<2, 4, 2, 2>>},
19263 {"OpMatrixTimesMatrix", "2x4_3x2", 2, MAT3X4, MAT2X4, MAT3X2, 0, &getInputDataD,
19264 compareFP16ArithmeticFunc<16, 8, 8, 0, fp16MatrixTimesMatrix<2, 4, 3, 2>>},
19265 {"OpMatrixTimesMatrix", "2x4_4x2", 2, MAT4X4, MAT2X4, MAT4X2, 0, &getInputDataD,
19266 compareFP16ArithmeticFunc<16, 8, 8, 0, fp16MatrixTimesMatrix<2, 4, 4, 2>>},
19267 {"OpMatrixTimesMatrix", "3x2_2x3", 2, MAT2X2, MAT3X2, MAT2X3, 0, &getInputDataD,
19268 compareFP16ArithmeticFunc<4, 8, 8, 0, fp16MatrixTimesMatrix<3, 2, 2, 3>>},
19269 {"OpMatrixTimesMatrix", "3x2_3x3", 2, MAT3X2, MAT3X2, MAT3X3, 0, &getInputDataD,
19270 compareFP16ArithmeticFunc<8, 8, 16, 0, fp16MatrixTimesMatrix<3, 2, 3, 3>>},
19271 {"OpMatrixTimesMatrix", "3x2_4x3", 2, MAT4X2, MAT3X2, MAT4X3, 0, &getInputDataD,
19272 compareFP16ArithmeticFunc<8, 8, 16, 0, fp16MatrixTimesMatrix<3, 2, 4, 3>>},
19273 {"OpMatrixTimesMatrix", "3x3_2x3", 2, MAT2X3, MAT3X3, MAT2X3, 0, &getInputDataD,
19274 compareFP16ArithmeticFunc<8, 16, 8, 0, fp16MatrixTimesMatrix<3, 3, 2, 3>>},
19275 {"OpMatrixTimesMatrix", "3x3_3x3", 2, MAT3X3, MAT3X3, MAT3X3, 0, &getInputDataD,
19276 compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<3, 3, 3, 3>>},
19277 {"OpMatrixTimesMatrix", "3x3_4x3", 2, MAT4X3, MAT3X3, MAT4X3, 0, &getInputDataD,
19278 compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<3, 3, 4, 3>>},
19279 {"OpMatrixTimesMatrix", "3x4_2x3", 2, MAT2X4, MAT3X4, MAT2X3, 0, &getInputDataD,
19280 compareFP16ArithmeticFunc<8, 16, 8, 0, fp16MatrixTimesMatrix<3, 4, 2, 3>>},
19281 {"OpMatrixTimesMatrix", "3x4_3x3", 2, MAT3X4, MAT3X4, MAT3X3, 0, &getInputDataD,
19282 compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<3, 4, 3, 3>>},
19283 {"OpMatrixTimesMatrix", "3x4_4x3", 2, MAT4X4, MAT3X4, MAT4X3, 0, &getInputDataD,
19284 compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<3, 4, 4, 3>>},
19285 {"OpMatrixTimesMatrix", "4x2_2x4", 2, MAT2X2, MAT4X2, MAT2X4, 0, &getInputDataD,
19286 compareFP16ArithmeticFunc<4, 8, 8, 0, fp16MatrixTimesMatrix<4, 2, 2, 4>>},
19287 {"OpMatrixTimesMatrix", "4x2_3x4", 2, MAT3X2, MAT4X2, MAT3X4, 0, &getInputDataD,
19288 compareFP16ArithmeticFunc<8, 8, 16, 0, fp16MatrixTimesMatrix<4, 2, 3, 4>>},
19289 {"OpMatrixTimesMatrix", "4x2_4x4", 2, MAT4X2, MAT4X2, MAT4X4, 0, &getInputDataD,
19290 compareFP16ArithmeticFunc<8, 8, 16, 0, fp16MatrixTimesMatrix<4, 2, 4, 4>>},
19291 {"OpMatrixTimesMatrix", "4x3_2x4", 2, MAT2X3, MAT4X3, MAT2X4, 0, &getInputDataD,
19292 compareFP16ArithmeticFunc<8, 16, 8, 0, fp16MatrixTimesMatrix<4, 3, 2, 4>>},
19293 {"OpMatrixTimesMatrix", "4x3_3x4", 2, MAT3X3, MAT4X3, MAT3X4, 0, &getInputDataD,
19294 compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<4, 3, 3, 4>>},
19295 {"OpMatrixTimesMatrix", "4x3_4x4", 2, MAT4X3, MAT4X3, MAT4X4, 0, &getInputDataD,
19296 compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<4, 3, 4, 4>>},
19297 {"OpMatrixTimesMatrix", "4x4_2x4", 2, MAT2X4, MAT4X4, MAT2X4, 0, &getInputDataD,
19298 compareFP16ArithmeticFunc<8, 16, 8, 0, fp16MatrixTimesMatrix<4, 4, 2, 4>>},
19299 {"OpMatrixTimesMatrix", "4x4_3x4", 2, MAT3X4, MAT4X4, MAT3X4, 0, &getInputDataD,
19300 compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<4, 4, 3, 4>>},
19301 {"OpMatrixTimesMatrix", "4x4_4x4", 2, MAT4X4, MAT4X4, MAT4X4, 0, &getInputDataD,
19302 compareFP16ArithmeticFunc<16, 16, 16, 0, fp16MatrixTimesMatrix<4, 4, 4, 4>>},
19303 {"OpOuterProduct", "2x2", 2, MAT2X2, VEC2, VEC2, 0, &getInputDataD,
19304 compareFP16ArithmeticFunc<4, 2, 2, 0, fp16OuterProduct<2, 2>>},
19305 {"OpOuterProduct", "2x3", 2, MAT2X3, VEC3, VEC2, 0, &getInputDataD,
19306 compareFP16ArithmeticFunc<8, 3, 2, 0, fp16OuterProduct<2, 3>>},
19307 {"OpOuterProduct", "2x4", 2, MAT2X4, VEC4, VEC2, 0, &getInputDataD,
19308 compareFP16ArithmeticFunc<8, 4, 2, 0, fp16OuterProduct<2, 4>>},
19309 {"OpOuterProduct", "3x2", 2, MAT3X2, VEC2, VEC3, 0, &getInputDataD,
19310 compareFP16ArithmeticFunc<8, 2, 3, 0, fp16OuterProduct<3, 2>>},
19311 {"OpOuterProduct", "3x3", 2, MAT3X3, VEC3, VEC3, 0, &getInputDataD,
19312 compareFP16ArithmeticFunc<16, 3, 3, 0, fp16OuterProduct<3, 3>>},
19313 {"OpOuterProduct", "3x4", 2, MAT3X4, VEC4, VEC3, 0, &getInputDataD,
19314 compareFP16ArithmeticFunc<16, 4, 3, 0, fp16OuterProduct<3, 4>>},
19315 {"OpOuterProduct", "4x2", 2, MAT4X2, VEC2, VEC4, 0, &getInputDataD,
19316 compareFP16ArithmeticFunc<8, 2, 4, 0, fp16OuterProduct<4, 2>>},
19317 {"OpOuterProduct", "4x3", 2, MAT4X3, VEC3, VEC4, 0, &getInputDataD,
19318 compareFP16ArithmeticFunc<16, 3, 4, 0, fp16OuterProduct<4, 3>>},
19319 {"OpOuterProduct", "4x4", 2, MAT4X4, VEC4, VEC4, 0, &getInputDataD,
19320 compareFP16ArithmeticFunc<16, 4, 4, 0, fp16OuterProduct<4, 4>>},
19321 {"Determinant", "2x2", 1, SCALAR, MAT2X2, NONE, 0, &getInputDataC,
19322 compareFP16ArithmeticFunc<1, 4, 0, 0, fp16Determinant<2>>},
19323 {"Determinant", "3x3", 1, SCALAR, MAT3X3, NONE, 0, &getInputDataC,
19324 compareFP16ArithmeticFunc<1, 16, 0, 0, fp16Determinant<3>>},
19325 {"Determinant", "4x4", 1, SCALAR, MAT4X4, NONE, 0, &getInputDataC,
19326 compareFP16ArithmeticFunc<1, 16, 0, 0, fp16Determinant<4>>},
19327 {"MatrixInverse", "2x2", 1, MAT2X2, MAT2X2, NONE, 0, &getInputDataC,
19328 compareFP16ArithmeticFunc<4, 4, 0, 0, fp16Inverse<2>>},
19329 };
19330
19331 for (uint32_t testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
19332 {
19333 const Math16TestFunc &testFunc = testFuncs[testFuncIdx];
19334
19335 createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), 0, testFunc);
19336 }
19337
19338 return testGroup.release();
19339 }
19340
19341 struct ComparisonCase
19342 {
19343 string name;
19344 string desc;
19345 };
19346
19347 template <size_t C>
createFloat32ComparisonComputeSet(tcu::TestContext & testCtx)19348 tcu::TestCaseGroup *createFloat32ComparisonComputeSet(tcu::TestContext &testCtx)
19349 {
19350 const string testGroupName("comparison_" + de::toString(C));
19351 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, testGroupName.c_str()));
19352 #ifndef CTS_USES_VULKANSC
19353 const char *dataDir = "spirv_assembly/instruction/float32/comparison";
19354
19355 const ComparisonCase amberTests[] = {{"modfstruct", "modf and modfStruct"},
19356 {"frexpstruct", "frexp and frexpStruct"}};
19357
19358 for (ComparisonCase test : amberTests)
19359 {
19360 const string caseDesc("Compare output of " + test.desc);
19361 const string fileName(test.name + "_" + de::toString(C) + "_comp.amber");
19362
19363 testGroup->addChild(
19364 cts_amber::createAmberTestCase(testCtx, test.name.c_str(), caseDesc.c_str(), dataDir, fileName));
19365 }
19366 #endif
19367 return testGroup.release();
19368 }
19369
19370 struct ShaderStage
19371 {
19372 string name;
19373 vector<string> requirement;
19374 };
19375
19376 template <size_t C>
createFloat32ComparisonGraphicsSet(tcu::TestContext & testCtx)19377 tcu::TestCaseGroup *createFloat32ComparisonGraphicsSet(tcu::TestContext &testCtx)
19378 {
19379 const string testGroupName("comparison_" + de::toString(C));
19380 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, testGroupName.c_str()));
19381 #ifndef CTS_USES_VULKANSC
19382 const char *dataDir = "spirv_assembly/instruction/float32/comparison";
19383
19384 const ShaderStage stages[] = {
19385 {"vert", vector<string>(1, "Features.vertexPipelineStoresAndAtomics")},
19386 {"tesc", vector<string>({"Features.vertexPipelineStoresAndAtomics", "Features.tessellationShader"})},
19387 {"tese", vector<string>({"Features.vertexPipelineStoresAndAtomics", "Features.tessellationShader"})},
19388 {"geom", vector<string>({"Features.vertexPipelineStoresAndAtomics", "Features.geometryShader"})},
19389 {"frag", vector<string>(0)}};
19390
19391 const ComparisonCase amberTests[] = {{"modfstruct", "modf and modfStruct"},
19392 {"frexpstruct", "frexp and frexpStruct"}};
19393
19394 for (ComparisonCase test : amberTests)
19395 for (ShaderStage stage : stages)
19396 {
19397 const string caseName(test.name + "_" + stage.name);
19398 const string caseDesc("Compare output of " + test.desc);
19399 const string fileName(test.name + "_" + de::toString(C) + "_" + stage.name + ".amber");
19400
19401 testGroup->addChild(cts_amber::createAmberTestCase(testCtx, caseName.c_str(), caseDesc.c_str(), dataDir,
19402 fileName, stage.requirement));
19403 }
19404 #endif
19405
19406 return testGroup.release();
19407 }
19408
getNumberTypeName(const NumberType type)19409 const string getNumberTypeName(const NumberType type)
19410 {
19411 if (type == NUMBERTYPE_INT32)
19412 {
19413 return "int";
19414 }
19415 else if (type == NUMBERTYPE_UINT32)
19416 {
19417 return "uint";
19418 }
19419 else if (type == NUMBERTYPE_FLOAT32)
19420 {
19421 return "float";
19422 }
19423 else
19424 {
19425 DE_ASSERT(false);
19426 return "";
19427 }
19428 }
19429
getInt(de::Random & rnd)19430 int32_t getInt(de::Random &rnd)
19431 {
19432 return rnd.getInt(std::numeric_limits<int>::min(), std::numeric_limits<int>::max());
19433 }
19434
repeatString(const string & str,int times)19435 const string repeatString(const string &str, int times)
19436 {
19437 string filler;
19438 for (int i = 0; i < times; ++i)
19439 {
19440 filler += str;
19441 }
19442 return filler;
19443 }
19444
getRandomConstantString(const NumberType type,de::Random & rnd)19445 const string getRandomConstantString(const NumberType type, de::Random &rnd)
19446 {
19447 if (type == NUMBERTYPE_INT32)
19448 {
19449 return numberToString<int32_t>(getInt(rnd));
19450 }
19451 else if (type == NUMBERTYPE_UINT32)
19452 {
19453 return numberToString<uint32_t>(rnd.getUint32());
19454 }
19455 else if (type == NUMBERTYPE_FLOAT32)
19456 {
19457 return numberToString<float>(rnd.getFloat());
19458 }
19459 else
19460 {
19461 DE_ASSERT(false);
19462 return "";
19463 }
19464 }
19465
createVectorCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19466 void createVectorCompositeCases(vector<map<string, string>> &testCases, de::Random &rnd, const NumberType type)
19467 {
19468 map<string, string> params;
19469
19470 // Vec2 to Vec4
19471 for (int width = 2; width <= 4; ++width)
19472 {
19473 const string randomConst = numberToString(getInt(rnd));
19474 const string widthStr = numberToString(width);
19475 const string composite_type = "${customType}vec" + widthStr;
19476 const int index = rnd.getInt(0, width - 1);
19477
19478 params["type"] = "vec";
19479 params["name"] = params["type"] + "_" + widthStr;
19480 params["compositeDecl"] = composite_type + " = OpTypeVector ${customType} " + widthStr + "\n";
19481 params["compositeType"] = composite_type;
19482 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19483 params["compositeConstruct"] =
19484 "%instance = OpCompositeConstruct " + composite_type + repeatString(" %filler", width) + "\n";
19485 params["indexes"] = numberToString(index);
19486 testCases.push_back(params);
19487 }
19488 }
19489
createArrayCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19490 void createArrayCompositeCases(vector<map<string, string>> &testCases, de::Random &rnd, const NumberType type)
19491 {
19492 const int limit = 10;
19493 map<string, string> params;
19494
19495 for (int width = 2; width <= limit; ++width)
19496 {
19497 string randomConst = numberToString(getInt(rnd));
19498 string widthStr = numberToString(width);
19499 int index = rnd.getInt(0, width - 1);
19500
19501 params["type"] = "array";
19502 params["name"] = params["type"] + "_" + widthStr;
19503 params["compositeDecl"] = string("%arraywidth = OpConstant %u32 " + widthStr + "\n") +
19504 "%composite = OpTypeArray ${customType} %arraywidth\n";
19505 params["compositeType"] = "%composite";
19506 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19507 params["compositeConstruct"] =
19508 "%instance = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19509 params["indexes"] = numberToString(index);
19510 testCases.push_back(params);
19511 }
19512 }
19513
createStructCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19514 void createStructCompositeCases(vector<map<string, string>> &testCases, de::Random &rnd, const NumberType type)
19515 {
19516 const int limit = 10;
19517 map<string, string> params;
19518
19519 for (int width = 2; width <= limit; ++width)
19520 {
19521 string randomConst = numberToString(getInt(rnd));
19522 int index = rnd.getInt(0, width - 1);
19523
19524 params["type"] = "struct";
19525 params["name"] = params["type"] + "_" + numberToString(width);
19526 params["compositeDecl"] = "%composite = OpTypeStruct" + repeatString(" ${customType}", width) + "\n";
19527 params["compositeType"] = "%composite";
19528 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19529 params["compositeConstruct"] =
19530 "%instance = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19531 params["indexes"] = numberToString(index);
19532 testCases.push_back(params);
19533 }
19534 }
19535
createMatrixCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19536 void createMatrixCompositeCases(vector<map<string, string>> &testCases, de::Random &rnd, const NumberType type)
19537 {
19538 map<string, string> params;
19539
19540 // Vec2 to Vec4
19541 for (int width = 2; width <= 4; ++width)
19542 {
19543 string widthStr = numberToString(width);
19544
19545 for (int column = 2; column <= 4; ++column)
19546 {
19547 int index_0 = rnd.getInt(0, column - 1);
19548 int index_1 = rnd.getInt(0, width - 1);
19549 string columnStr = numberToString(column);
19550
19551 params["type"] = "matrix";
19552 params["name"] = params["type"] + "_" + widthStr + "x" + columnStr;
19553 params["compositeDecl"] = string("%vectype = OpTypeVector ${customType} " + widthStr + "\n") +
19554 "%composite = OpTypeMatrix %vectype " + columnStr + "\n";
19555 params["compositeType"] = "%composite";
19556
19557 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) +
19558 "\n" + "%fillerVec = OpConstantComposite %vectype" + repeatString(" %filler", width) +
19559 "\n";
19560
19561 params["compositeConstruct"] =
19562 "%instance = OpCompositeConstruct %composite" + repeatString(" %fillerVec", column) + "\n";
19563 params["indexes"] = numberToString(index_0) + " " + numberToString(index_1);
19564 testCases.push_back(params);
19565 }
19566 }
19567 }
19568
createCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19569 void createCompositeCases(vector<map<string, string>> &testCases, de::Random &rnd, const NumberType type)
19570 {
19571 createVectorCompositeCases(testCases, rnd, type);
19572 createArrayCompositeCases(testCases, rnd, type);
19573 createStructCompositeCases(testCases, rnd, type);
19574 // Matrix only supports float types
19575 if (type == NUMBERTYPE_FLOAT32)
19576 {
19577 createMatrixCompositeCases(testCases, rnd, type);
19578 }
19579 }
19580
getAssemblyTypeDeclaration(const NumberType type)19581 const string getAssemblyTypeDeclaration(const NumberType type)
19582 {
19583 switch (type)
19584 {
19585 case NUMBERTYPE_INT32:
19586 return "OpTypeInt 32 1";
19587 case NUMBERTYPE_UINT32:
19588 return "OpTypeInt 32 0";
19589 case NUMBERTYPE_FLOAT32:
19590 return "OpTypeFloat 32";
19591 default:
19592 DE_ASSERT(false);
19593 return "";
19594 }
19595 }
19596
getAssemblyTypeName(const NumberType type)19597 const string getAssemblyTypeName(const NumberType type)
19598 {
19599 switch (type)
19600 {
19601 case NUMBERTYPE_INT32:
19602 return "%i32";
19603 case NUMBERTYPE_UINT32:
19604 return "%u32";
19605 case NUMBERTYPE_FLOAT32:
19606 return "%f32";
19607 default:
19608 DE_ASSERT(false);
19609 return "";
19610 }
19611 }
19612
specializeCompositeInsertShaderTemplate(const NumberType type,const map<string,string> & params)19613 const string specializeCompositeInsertShaderTemplate(const NumberType type, const map<string, string> ¶ms)
19614 {
19615 map<string, string> parameters(params);
19616
19617 const string customType = getAssemblyTypeName(type);
19618 map<string, string> substCustomType;
19619 substCustomType["customType"] = customType;
19620 parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19621 parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19622 parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19623 parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19624 parameters["customType"] = customType;
19625 parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19626
19627 if (parameters.at("compositeType") != "%u32vec3")
19628 {
19629 parameters["u32vec3Decl"] = "%u32vec3 = OpTypeVector %u32 3\n";
19630 }
19631
19632 return StringTemplate("OpCapability Shader\n"
19633 "OpCapability Matrix\n"
19634 "OpMemoryModel Logical GLSL450\n"
19635 "OpEntryPoint GLCompute %main \"main\" %id\n"
19636 "OpExecutionMode %main LocalSize 1 1 1\n"
19637
19638 "OpSource GLSL 430\n"
19639 "OpName %main \"main\"\n"
19640 "OpName %id \"gl_GlobalInvocationID\"\n"
19641
19642 // Decorators
19643 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19644 "OpDecorate %buf BufferBlock\n"
19645 "OpDecorate %indata DescriptorSet 0\n"
19646 "OpDecorate %indata Binding 0\n"
19647 "OpDecorate %outdata DescriptorSet 0\n"
19648 "OpDecorate %outdata Binding 1\n"
19649 "OpDecorate %customarr ArrayStride 4\n"
19650 "${compositeDecorator}"
19651 "OpMemberDecorate %buf 0 Offset 0\n"
19652
19653 // General types
19654 "%void = OpTypeVoid\n"
19655 "%voidf = OpTypeFunction %void\n"
19656 "%u32 = OpTypeInt 32 0\n"
19657 "%i32 = OpTypeInt 32 1\n"
19658 "%f32 = OpTypeFloat 32\n"
19659
19660 // Composite declaration
19661 "${compositeDecl}"
19662
19663 // Constants
19664 "${filler}"
19665
19666 "${u32vec3Decl:opt}"
19667 "%uvec3ptr = OpTypePointer Input %u32vec3\n"
19668
19669 // Inherited from custom
19670 "%customptr = OpTypePointer Uniform ${customType}\n"
19671 "%customarr = OpTypeRuntimeArray ${customType}\n"
19672 "%buf = OpTypeStruct %customarr\n"
19673 "%bufptr = OpTypePointer Uniform %buf\n"
19674
19675 "%indata = OpVariable %bufptr Uniform\n"
19676 "%outdata = OpVariable %bufptr Uniform\n"
19677
19678 "%id = OpVariable %uvec3ptr Input\n"
19679 "%zero = OpConstant %i32 0\n"
19680
19681 "%main = OpFunction %void None %voidf\n"
19682 "%label = OpLabel\n"
19683 "%idval = OpLoad %u32vec3 %id\n"
19684 "%x = OpCompositeExtract %u32 %idval 0\n"
19685
19686 "%inloc = OpAccessChain %customptr %indata %zero %x\n"
19687 "%outloc = OpAccessChain %customptr %outdata %zero %x\n"
19688 // Read the input value
19689 "%inval = OpLoad ${customType} %inloc\n"
19690 // Create the composite and fill it
19691 "${compositeConstruct}"
19692 // Insert the input value to a place
19693 "%instance2 = OpCompositeInsert ${compositeType} %inval %instance ${indexes}\n"
19694 // Read back the value from the position
19695 "%out_val = OpCompositeExtract ${customType} %instance2 ${indexes}\n"
19696 // Store it in the output position
19697 " OpStore %outloc %out_val\n"
19698 " OpReturn\n"
19699 " OpFunctionEnd\n")
19700 .specialize(parameters);
19701 }
19702
19703 template <typename T>
createCompositeBuffer(T number)19704 BufferSp createCompositeBuffer(T number)
19705 {
19706 return BufferSp(new Buffer<T>(vector<T>(1, number)));
19707 }
19708
createOpCompositeInsertGroup(tcu::TestContext & testCtx)19709 tcu::TestCaseGroup *createOpCompositeInsertGroup(tcu::TestContext &testCtx)
19710 {
19711 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opcompositeinsert"));
19712 de::Random rnd(deStringHash(group->getName()));
19713
19714 for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19715 {
19716 NumberType numberType = NumberType(type);
19717 const string typeName = getNumberTypeName(numberType);
19718 de::MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(testCtx, typeName.c_str()));
19719 vector<map<string, string>> testCases;
19720
19721 createCompositeCases(testCases, rnd, numberType);
19722
19723 for (vector<map<string, string>>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19724 {
19725 ComputeShaderSpec spec;
19726
19727 spec.assembly = specializeCompositeInsertShaderTemplate(numberType, *test);
19728
19729 switch (numberType)
19730 {
19731 case NUMBERTYPE_INT32:
19732 {
19733 int32_t number = getInt(rnd);
19734 spec.inputs.push_back(createCompositeBuffer<int32_t>(number));
19735 spec.outputs.push_back(createCompositeBuffer<int32_t>(number));
19736 break;
19737 }
19738 case NUMBERTYPE_UINT32:
19739 {
19740 uint32_t number = rnd.getUint32();
19741 spec.inputs.push_back(createCompositeBuffer<uint32_t>(number));
19742 spec.outputs.push_back(createCompositeBuffer<uint32_t>(number));
19743 break;
19744 }
19745 case NUMBERTYPE_FLOAT32:
19746 {
19747 float number = rnd.getFloat();
19748 spec.inputs.push_back(createCompositeBuffer<float>(number));
19749 spec.outputs.push_back(createCompositeBuffer<float>(number));
19750 break;
19751 }
19752 default:
19753 DE_ASSERT(false);
19754 }
19755
19756 spec.numWorkGroups = IVec3(1, 1, 1);
19757 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), spec));
19758 }
19759 group->addChild(subGroup.release());
19760 }
19761 return group.release();
19762 }
19763
19764 struct AssemblyStructInfo
19765 {
AssemblyStructInfovkt::SpirVAssembly::AssemblyStructInfo19766 AssemblyStructInfo(const uint32_t comp, const uint32_t idx) : components(comp), index(idx)
19767 {
19768 }
19769
19770 uint32_t components;
19771 uint32_t index;
19772 };
19773
specializeInBoundsShaderTemplate(const NumberType type,const AssemblyStructInfo & structInfo,const map<string,string> & params)19774 const string specializeInBoundsShaderTemplate(const NumberType type, const AssemblyStructInfo &structInfo,
19775 const map<string, string> ¶ms)
19776 {
19777 // Create the full index string
19778 string fullIndex = numberToString(structInfo.index) + " " + params.at("indexes");
19779 // Convert it to list of indexes
19780 vector<string> indexes = de::splitString(fullIndex, ' ');
19781
19782 map<string, string> parameters(params);
19783 parameters["structType"] = repeatString(" ${compositeType}", structInfo.components);
19784 parameters["structConstruct"] = repeatString(" %instance", structInfo.components);
19785 parameters["insertIndexes"] = fullIndex;
19786
19787 // In matrix cases the last two index is the CompositeExtract indexes
19788 const uint32_t extractIndexes = (parameters["type"] == "matrix") ? 2 : 1;
19789
19790 // Construct the extractIndex
19791 for (vector<string>::const_iterator index = indexes.end() - extractIndexes; index != indexes.end(); ++index)
19792 {
19793 parameters["extractIndexes"] += " " + *index;
19794 }
19795
19796 // Remove the last 1 or 2 element depends on matrix case or not
19797 indexes.erase(indexes.end() - extractIndexes, indexes.end());
19798
19799 uint32_t id = 0;
19800 // Generate AccessChain index expressions (except for the last one, because we use ptr to the composite)
19801 for (vector<string>::const_iterator index = indexes.begin(); index != indexes.end(); ++index)
19802 {
19803 string indexId = "%index_" + numberToString(id++);
19804 parameters["accessChainConstDeclaration"] += indexId + " = OpConstant %u32 " + *index + "\n";
19805 parameters["accessChainIndexes"] += " " + indexId;
19806 }
19807
19808 parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19809
19810 const string customType = getAssemblyTypeName(type);
19811 map<string, string> substCustomType;
19812 substCustomType["customType"] = customType;
19813 parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19814 parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19815 parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19816 parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19817 parameters["customType"] = customType;
19818
19819 const string compositeType = parameters.at("compositeType");
19820 map<string, string> substCompositeType;
19821 substCompositeType["compositeType"] = compositeType;
19822 parameters["structType"] = StringTemplate(parameters.at("structType")).specialize(substCompositeType);
19823 if (compositeType != "%u32vec3")
19824 {
19825 parameters["u32vec3Decl"] = "%u32vec3 = OpTypeVector %u32 3\n";
19826 }
19827
19828 return StringTemplate("OpCapability Shader\n"
19829 "OpCapability Matrix\n"
19830 "OpMemoryModel Logical GLSL450\n"
19831 "OpEntryPoint GLCompute %main \"main\" %id\n"
19832 "OpExecutionMode %main LocalSize 1 1 1\n"
19833
19834 "OpSource GLSL 430\n"
19835 "OpName %main \"main\"\n"
19836 "OpName %id \"gl_GlobalInvocationID\"\n"
19837 // Decorators
19838 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19839 "OpDecorate %buf BufferBlock\n"
19840 "OpDecorate %indata DescriptorSet 0\n"
19841 "OpDecorate %indata Binding 0\n"
19842 "OpDecorate %outdata DescriptorSet 0\n"
19843 "OpDecorate %outdata Binding 1\n"
19844 "OpDecorate %customarr ArrayStride 4\n"
19845 "${compositeDecorator}"
19846 "OpMemberDecorate %buf 0 Offset 0\n"
19847 // General types
19848 "%void = OpTypeVoid\n"
19849 "%voidf = OpTypeFunction %void\n"
19850 "%i32 = OpTypeInt 32 1\n"
19851 "%u32 = OpTypeInt 32 0\n"
19852 "%f32 = OpTypeFloat 32\n"
19853 // Custom types
19854 "${compositeDecl}"
19855 // %u32vec3 if not already declared in ${compositeDecl}
19856 "${u32vec3Decl:opt}"
19857 "%uvec3ptr = OpTypePointer Input %u32vec3\n"
19858 // Inherited from composite
19859 "%composite_p = OpTypePointer Function ${compositeType}\n"
19860 "%struct_t = OpTypeStruct${structType}\n"
19861 "%struct_p = OpTypePointer Function %struct_t\n"
19862 // Constants
19863 "${filler}"
19864 "${accessChainConstDeclaration}"
19865 // Inherited from custom
19866 "%customptr = OpTypePointer Uniform ${customType}\n"
19867 "%customarr = OpTypeRuntimeArray ${customType}\n"
19868 "%buf = OpTypeStruct %customarr\n"
19869 "%bufptr = OpTypePointer Uniform %buf\n"
19870 "%indata = OpVariable %bufptr Uniform\n"
19871 "%outdata = OpVariable %bufptr Uniform\n"
19872
19873 "%id = OpVariable %uvec3ptr Input\n"
19874 "%zero = OpConstant %u32 0\n"
19875 "%main = OpFunction %void None %voidf\n"
19876 "%label = OpLabel\n"
19877 "%struct_v = OpVariable %struct_p Function\n"
19878 "%idval = OpLoad %u32vec3 %id\n"
19879 "%x = OpCompositeExtract %u32 %idval 0\n"
19880 // Create the input/output type
19881 "%inloc = OpInBoundsAccessChain %customptr %indata %zero %x\n"
19882 "%outloc = OpInBoundsAccessChain %customptr %outdata %zero %x\n"
19883 // Read the input value
19884 "%inval = OpLoad ${customType} %inloc\n"
19885 // Create the composite and fill it
19886 "${compositeConstruct}"
19887 // Create the struct and fill it with the composite
19888 "%struct = OpCompositeConstruct %struct_t${structConstruct}\n"
19889 // Insert the value
19890 "%comp_obj = OpCompositeInsert %struct_t %inval %struct ${insertIndexes}\n"
19891 // Store the object
19892 " OpStore %struct_v %comp_obj\n"
19893 // Get deepest possible composite pointer
19894 "%inner_ptr = OpInBoundsAccessChain %composite_p %struct_v${accessChainIndexes}\n"
19895 "%read_obj = OpLoad ${compositeType} %inner_ptr\n"
19896 // Read back the stored value
19897 "%read_val = OpCompositeExtract ${customType} %read_obj${extractIndexes}\n"
19898 " OpStore %outloc %read_val\n"
19899 " OpReturn\n"
19900 " OpFunctionEnd\n")
19901 .specialize(parameters);
19902 }
19903
createOpInBoundsAccessChainGroup(tcu::TestContext & testCtx)19904 tcu::TestCaseGroup *createOpInBoundsAccessChainGroup(tcu::TestContext &testCtx)
19905 {
19906 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opinboundsaccesschain"));
19907 de::Random rnd(deStringHash(group->getName()));
19908
19909 for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19910 {
19911 NumberType numberType = NumberType(type);
19912 const string typeName = getNumberTypeName(numberType);
19913 de::MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(testCtx, typeName.c_str()));
19914
19915 vector<map<string, string>> testCases;
19916 createCompositeCases(testCases, rnd, numberType);
19917
19918 for (vector<map<string, string>>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19919 {
19920 ComputeShaderSpec spec;
19921
19922 // Number of components inside of a struct
19923 uint32_t structComponents = rnd.getInt(2, 8);
19924 // Component index value
19925 uint32_t structIndex = rnd.getInt(0, structComponents - 1);
19926 AssemblyStructInfo structInfo(structComponents, structIndex);
19927
19928 spec.assembly = specializeInBoundsShaderTemplate(numberType, structInfo, *test);
19929
19930 switch (numberType)
19931 {
19932 case NUMBERTYPE_INT32:
19933 {
19934 int32_t number = getInt(rnd);
19935 spec.inputs.push_back(createCompositeBuffer<int32_t>(number));
19936 spec.outputs.push_back(createCompositeBuffer<int32_t>(number));
19937 break;
19938 }
19939 case NUMBERTYPE_UINT32:
19940 {
19941 uint32_t number = rnd.getUint32();
19942 spec.inputs.push_back(createCompositeBuffer<uint32_t>(number));
19943 spec.outputs.push_back(createCompositeBuffer<uint32_t>(number));
19944 break;
19945 }
19946 case NUMBERTYPE_FLOAT32:
19947 {
19948 float number = rnd.getFloat();
19949 spec.inputs.push_back(createCompositeBuffer<float>(number));
19950 spec.outputs.push_back(createCompositeBuffer<float>(number));
19951 break;
19952 }
19953 default:
19954 DE_ASSERT(false);
19955 }
19956 spec.numWorkGroups = IVec3(1, 1, 1);
19957 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), spec));
19958 }
19959 group->addChild(subGroup.release());
19960 }
19961 return group.release();
19962 }
19963
19964 // If the params missing, uninitialized case
19965 const string specializeDefaultOutputShaderTemplate(const NumberType type,
19966 const map<string, string> ¶ms = map<string, string>())
19967 {
19968 map<string, string> parameters(params);
19969
19970 parameters["customType"] = getAssemblyTypeName(type);
19971
19972 // Declare the const value, and use it in the initializer
19973 if (params.find("constValue") != params.end())
19974 {
19975 parameters["variableInitializer"] = " %const";
19976 }
19977 // Uninitialized case
19978 else
19979 {
19980 parameters["commentDecl"] = ";";
19981 }
19982
19983 return StringTemplate("OpCapability Shader\n"
19984 "OpMemoryModel Logical GLSL450\n"
19985 "OpEntryPoint GLCompute %main \"main\" %id\n"
19986 "OpExecutionMode %main LocalSize 1 1 1\n"
19987 "OpSource GLSL 430\n"
19988 "OpName %main \"main\"\n"
19989 "OpName %id \"gl_GlobalInvocationID\"\n"
19990 // Decorators
19991 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19992 "OpDecorate %indata DescriptorSet 0\n"
19993 "OpDecorate %indata Binding 0\n"
19994 "OpDecorate %outdata DescriptorSet 0\n"
19995 "OpDecorate %outdata Binding 1\n"
19996 "OpDecorate %in_arr ArrayStride 4\n"
19997 "OpDecorate %in_buf BufferBlock\n"
19998 "OpMemberDecorate %in_buf 0 Offset 0\n"
19999 // Base types
20000 "%void = OpTypeVoid\n"
20001 "%voidf = OpTypeFunction %void\n"
20002 "%u32 = OpTypeInt 32 0\n"
20003 "%i32 = OpTypeInt 32 1\n"
20004 "%f32 = OpTypeFloat 32\n"
20005 "%uvec3 = OpTypeVector %u32 3\n"
20006 "%uvec3ptr = OpTypePointer Input %uvec3\n"
20007 "${commentDecl:opt}%const = OpConstant ${customType} ${constValue:opt}\n"
20008 // Derived types
20009 "%in_ptr = OpTypePointer Uniform ${customType}\n"
20010 "%in_arr = OpTypeRuntimeArray ${customType}\n"
20011 "%in_buf = OpTypeStruct %in_arr\n"
20012 "%in_bufptr = OpTypePointer Uniform %in_buf\n"
20013 "%indata = OpVariable %in_bufptr Uniform\n"
20014 "%outdata = OpVariable %in_bufptr Uniform\n"
20015 "%id = OpVariable %uvec3ptr Input\n"
20016 "%var_ptr = OpTypePointer Function ${customType}\n"
20017 // Constants
20018 "%zero = OpConstant %i32 0\n"
20019 // Main function
20020 "%main = OpFunction %void None %voidf\n"
20021 "%label = OpLabel\n"
20022 "%out_var = OpVariable %var_ptr Function${variableInitializer:opt}\n"
20023 "%idval = OpLoad %uvec3 %id\n"
20024 "%x = OpCompositeExtract %u32 %idval 0\n"
20025 "%inloc = OpAccessChain %in_ptr %indata %zero %x\n"
20026 "%outloc = OpAccessChain %in_ptr %outdata %zero %x\n"
20027
20028 "%outval = OpLoad ${customType} %out_var\n"
20029 " OpStore %outloc %outval\n"
20030 " OpReturn\n"
20031 " OpFunctionEnd\n")
20032 .specialize(parameters);
20033 }
20034
compareFloats(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)20035 bool compareFloats(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
20036 const std::vector<Resource> &expectedOutputs, TestLog &log)
20037 {
20038 DE_ASSERT(outputAllocs.size() != 0);
20039 DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
20040
20041 // Use custom epsilon because of the float->string conversion
20042 const float epsilon = 0.00001f;
20043
20044 for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
20045 {
20046 vector<uint8_t> expectedBytes;
20047 float expected;
20048 float actual;
20049
20050 expectedOutputs[outputNdx].getBytes(expectedBytes);
20051 memcpy(&expected, &expectedBytes.front(), expectedBytes.size());
20052 memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size());
20053
20054 // Test with epsilon
20055 if (fabs(expected - actual) > epsilon)
20056 {
20057 log << TestLog::Message << "Error: The actual and expected values not matching."
20058 << " Expected: " << expected << " Actual: " << actual << " Epsilon: " << epsilon << TestLog::EndMessage;
20059 return false;
20060 }
20061 }
20062 return true;
20063 }
20064
20065 // Checks if the driver crash with uninitialized cases
passthruVerify(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)20066 bool passthruVerify(const std::vector<Resource> &, const vector<AllocationSp> &outputAllocs,
20067 const std::vector<Resource> &expectedOutputs, TestLog &)
20068 {
20069 DE_ASSERT(outputAllocs.size() != 0);
20070 DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
20071
20072 // Copy and discard the result.
20073 for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
20074 {
20075 vector<uint8_t> expectedBytes;
20076 expectedOutputs[outputNdx].getBytes(expectedBytes);
20077
20078 const size_t width = expectedBytes.size();
20079 vector<char> data(width);
20080
20081 memcpy(&data[0], outputAllocs[outputNdx]->getHostPtr(), width);
20082 }
20083 return true;
20084 }
20085
createShaderDefaultOutputGroup(tcu::TestContext & testCtx)20086 tcu::TestCaseGroup *createShaderDefaultOutputGroup(tcu::TestContext &testCtx)
20087 {
20088 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "shader_default_output"));
20089 de::Random rnd(deStringHash(group->getName()));
20090
20091 for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
20092 {
20093 NumberType numberType = NumberType(type);
20094 const string typeName = getNumberTypeName(numberType);
20095 de::MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(testCtx, typeName.c_str()));
20096
20097 // 2 similar subcases (initialized and uninitialized)
20098 for (int subCase = 0; subCase < 2; ++subCase)
20099 {
20100 ComputeShaderSpec spec;
20101 spec.numWorkGroups = IVec3(1, 1, 1);
20102
20103 map<string, string> params;
20104
20105 switch (numberType)
20106 {
20107 case NUMBERTYPE_INT32:
20108 {
20109 int32_t number = getInt(rnd);
20110 spec.inputs.push_back(createCompositeBuffer<int32_t>(number));
20111 spec.outputs.push_back(createCompositeBuffer<int32_t>(number));
20112 params["constValue"] = numberToString(number);
20113 break;
20114 }
20115 case NUMBERTYPE_UINT32:
20116 {
20117 uint32_t number = rnd.getUint32();
20118 spec.inputs.push_back(createCompositeBuffer<uint32_t>(number));
20119 spec.outputs.push_back(createCompositeBuffer<uint32_t>(number));
20120 params["constValue"] = numberToString(number);
20121 break;
20122 }
20123 case NUMBERTYPE_FLOAT32:
20124 {
20125 float number = rnd.getFloat();
20126 spec.inputs.push_back(createCompositeBuffer<float>(number));
20127 spec.outputs.push_back(createCompositeBuffer<float>(number));
20128 spec.verifyIO = &compareFloats;
20129 params["constValue"] = numberToString(number);
20130 break;
20131 }
20132 default:
20133 DE_ASSERT(false);
20134 }
20135
20136 // Initialized subcase
20137 if (!subCase)
20138 {
20139 spec.assembly = specializeDefaultOutputShaderTemplate(numberType, params);
20140 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "initialized", spec));
20141 }
20142 // Uninitialized subcase
20143 else
20144 {
20145 spec.assembly = specializeDefaultOutputShaderTemplate(numberType);
20146 spec.verifyIO = &passthruVerify;
20147 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "uninitialized", spec));
20148 }
20149 }
20150 group->addChild(subGroup.release());
20151 }
20152 return group.release();
20153 }
20154
createOpNopTests(tcu::TestContext & testCtx)20155 tcu::TestCaseGroup *createOpNopTests(tcu::TestContext &testCtx)
20156 {
20157 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "opnop"));
20158 RGBA defaultColors[4];
20159 map<string, string> opNopFragments;
20160
20161 getDefaultColors(defaultColors);
20162
20163 opNopFragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20164 "%param1 = OpFunctionParameter %v4f32\n"
20165 "%label_testfun = OpLabel\n"
20166 "OpNop\n"
20167 "OpNop\n"
20168 "OpNop\n"
20169 "OpNop\n"
20170 "OpNop\n"
20171 "OpNop\n"
20172 "OpNop\n"
20173 "OpNop\n"
20174 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20175 "%b = OpFAdd %f32 %a %a\n"
20176 "OpNop\n"
20177 "%c = OpFSub %f32 %b %a\n"
20178 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
20179 "OpNop\n"
20180 "OpNop\n"
20181 "OpReturnValue %ret\n"
20182 "OpFunctionEnd\n";
20183
20184 createTestsForAllStages("opnop", defaultColors, defaultColors, opNopFragments, testGroup.get());
20185
20186 return testGroup.release();
20187 }
20188
createOpNameTests(tcu::TestContext & testCtx)20189 tcu::TestCaseGroup *createOpNameTests(tcu::TestContext &testCtx)
20190 {
20191 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "opname"));
20192 RGBA defaultColors[4];
20193 map<string, string> opNameFragments;
20194
20195 getDefaultColors(defaultColors);
20196
20197 opNameFragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20198 "%param1 = OpFunctionParameter %v4f32\n"
20199 "%label_func = OpLabel\n"
20200 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20201 "%b = OpFAdd %f32 %a %a\n"
20202 "%c = OpFSub %f32 %b %a\n"
20203 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
20204 "OpReturnValue %ret\n"
20205 "OpFunctionEnd\n";
20206
20207 opNameFragments["debug"] = "OpName %BP_main \"not_main\"";
20208
20209 createTestsForAllStages("opname", defaultColors, defaultColors, opNameFragments, testGroup.get());
20210
20211 return testGroup.release();
20212 }
20213
createFloat16Tests(tcu::TestContext & testCtx)20214 tcu::TestCaseGroup *createFloat16Tests(tcu::TestContext &testCtx)
20215 {
20216 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "float16"));
20217
20218 testGroup->addChild(createOpConstantFloat16Tests(testCtx));
20219 testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITH_NAN));
20220 testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITHOUT_NAN));
20221 testGroup->addChild(createFloat16FuncSet<GraphicsResources>(testCtx));
20222 testGroup->addChild(createFloat16VectorExtractSet<GraphicsResources>(testCtx));
20223 testGroup->addChild(createFloat16VectorInsertSet<GraphicsResources>(testCtx));
20224 testGroup->addChild(createFloat16VectorShuffleSet<GraphicsResources>(testCtx));
20225 testGroup->addChild(createFloat16CompositeConstructSet<GraphicsResources>(testCtx));
20226 testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeExtract"));
20227 testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeInsert"));
20228 testGroup->addChild(createFloat16ArithmeticSet<GraphicsResources>(testCtx));
20229 testGroup->addChild(createFloat16ArithmeticSet<1, GraphicsResources>(testCtx));
20230 testGroup->addChild(createFloat16ArithmeticSet<2, GraphicsResources>(testCtx));
20231 testGroup->addChild(createFloat16ArithmeticSet<3, GraphicsResources>(testCtx));
20232 testGroup->addChild(createFloat16ArithmeticSet<4, GraphicsResources>(testCtx));
20233
20234 return testGroup.release();
20235 }
20236
createFloat32Tests(tcu::TestContext & testCtx)20237 tcu::TestCaseGroup *createFloat32Tests(tcu::TestContext &testCtx)
20238 {
20239 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "float32"));
20240
20241 testGroup->addChild(createFloat32ComparisonGraphicsSet<1>(testCtx));
20242 testGroup->addChild(createFloat32ComparisonGraphicsSet<2>(testCtx));
20243 testGroup->addChild(createFloat32ComparisonGraphicsSet<3>(testCtx));
20244 testGroup->addChild(createFloat32ComparisonGraphicsSet<4>(testCtx));
20245
20246 return testGroup.release();
20247 }
20248
createFloat16Group(tcu::TestContext & testCtx)20249 tcu::TestCaseGroup *createFloat16Group(tcu::TestContext &testCtx)
20250 {
20251 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "float16"));
20252
20253 testGroup->addChild(createFloat16OpConstantCompositeGroup(testCtx));
20254 testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITH_NAN));
20255 testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITHOUT_NAN));
20256 testGroup->addChild(createFloat16FuncSet<ComputeShaderSpec>(testCtx));
20257 testGroup->addChild(createFloat16VectorExtractSet<ComputeShaderSpec>(testCtx));
20258 testGroup->addChild(createFloat16VectorInsertSet<ComputeShaderSpec>(testCtx));
20259 testGroup->addChild(createFloat16VectorShuffleSet<ComputeShaderSpec>(testCtx));
20260 testGroup->addChild(createFloat16CompositeConstructSet<ComputeShaderSpec>(testCtx));
20261 testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeExtract"));
20262 testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeInsert"));
20263 testGroup->addChild(createFloat16ArithmeticSet<ComputeShaderSpec>(testCtx));
20264 testGroup->addChild(createFloat16ArithmeticSet<1, ComputeShaderSpec>(testCtx));
20265 testGroup->addChild(createFloat16ArithmeticSet<2, ComputeShaderSpec>(testCtx));
20266 testGroup->addChild(createFloat16ArithmeticSet<3, ComputeShaderSpec>(testCtx));
20267 testGroup->addChild(createFloat16ArithmeticSet<4, ComputeShaderSpec>(testCtx));
20268
20269 return testGroup.release();
20270 }
20271
createFloat32Group(tcu::TestContext & testCtx)20272 tcu::TestCaseGroup *createFloat32Group(tcu::TestContext &testCtx)
20273 {
20274 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "float32"));
20275
20276 testGroup->addChild(createFloat32ComparisonComputeSet<1>(testCtx));
20277 testGroup->addChild(createFloat32ComparisonComputeSet<2>(testCtx));
20278 testGroup->addChild(createFloat32ComparisonComputeSet<3>(testCtx));
20279 testGroup->addChild(createFloat32ComparisonComputeSet<4>(testCtx));
20280
20281 return testGroup.release();
20282 }
20283
createBoolMixedBitSizeGroup(tcu::TestContext & testCtx)20284 tcu::TestCaseGroup *createBoolMixedBitSizeGroup(tcu::TestContext &testCtx)
20285 {
20286 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "mixed_bitsize"));
20287
20288 de::Random rnd(deStringHash(group->getName()));
20289 const int numElements = 100;
20290 vector<float> inputData(numElements, 0);
20291 vector<float> outputData(numElements, 0);
20292 fillRandomScalars(rnd, 0.0f, 100.0f, &inputData[0], 100);
20293
20294 const StringTemplate shaderTemplate("${CAPS}\n"
20295 "OpMemoryModel Logical GLSL450\n"
20296 "OpEntryPoint GLCompute %main \"main\" %id\n"
20297 "OpExecutionMode %main LocalSize 1 1 1\n"
20298 "OpSource GLSL 430\n"
20299 "OpName %main \"main\"\n"
20300 "OpName %id \"gl_GlobalInvocationID\"\n"
20301
20302 "OpDecorate %id BuiltIn GlobalInvocationId\n"
20303
20304 + string(getComputeAsmInputOutputBufferTraits()) +
20305 string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
20306
20307 "%id = OpVariable %uvec3ptr Input\n"
20308 "${CONST}\n"
20309 "%main = OpFunction %void None %voidf\n"
20310 "%label = OpLabel\n"
20311 "%idval = OpLoad %uvec3 %id\n"
20312 "%x = OpCompositeExtract %u32 %idval 0\n"
20313 "%inloc = OpAccessChain %f32ptr %indata %c0i32 %x\n"
20314
20315 "${TEST}\n"
20316
20317 "%outloc = OpAccessChain %f32ptr %outdata %c0i32 %x\n"
20318 " OpStore %outloc %res\n"
20319 " OpReturn\n"
20320 " OpFunctionEnd\n");
20321
20322 // Each test case produces 4 boolean values, and we want each of these values
20323 // to come froma different combination of the available bit-sizes, so compute
20324 // all possible combinations here.
20325 vector<uint32_t> widths;
20326 widths.push_back(32);
20327 widths.push_back(16);
20328 widths.push_back(8);
20329
20330 vector<IVec4> cases;
20331 for (size_t width0 = 0; width0 < widths.size(); width0++)
20332 {
20333 for (size_t width1 = 0; width1 < widths.size(); width1++)
20334 {
20335 for (size_t width2 = 0; width2 < widths.size(); width2++)
20336 {
20337 for (size_t width3 = 0; width3 < widths.size(); width3++)
20338 {
20339 cases.push_back(IVec4(widths[width0], widths[width1], widths[width2], widths[width3]));
20340 }
20341 }
20342 }
20343 }
20344
20345 for (size_t caseNdx = 0; caseNdx < cases.size(); caseNdx++)
20346 {
20347 /// Skip cases where all bitsizes are the same, we are only interested in testing booleans produced from instructions with different native bit-sizes
20348 if (cases[caseNdx][0] == cases[caseNdx][1] && cases[caseNdx][0] == cases[caseNdx][2] &&
20349 cases[caseNdx][0] == cases[caseNdx][3])
20350 continue;
20351
20352 map<string, string> specializations;
20353 ComputeShaderSpec spec;
20354
20355 // Inject appropriate capabilities and reference constants depending
20356 // on the bit-sizes required by this test case
20357 bool hasFloat32 =
20358 cases[caseNdx][0] == 32 || cases[caseNdx][1] == 32 || cases[caseNdx][2] == 32 || cases[caseNdx][3] == 32;
20359 bool hasFloat16 =
20360 cases[caseNdx][0] == 16 || cases[caseNdx][1] == 16 || cases[caseNdx][2] == 16 || cases[caseNdx][3] == 16;
20361 bool hasInt8 =
20362 cases[caseNdx][0] == 8 || cases[caseNdx][1] == 8 || cases[caseNdx][2] == 8 || cases[caseNdx][3] == 8;
20363
20364 string capsStr = "OpCapability Shader\n";
20365 string constStr = "%c0i32 = OpConstant %i32 0\n"
20366 "%c1f32 = OpConstant %f32 1.0\n"
20367 "%c0f32 = OpConstant %f32 0.0\n";
20368
20369 if (hasFloat32)
20370 {
20371 constStr += "%c10f32 = OpConstant %f32 10.0\n"
20372 "%c25f32 = OpConstant %f32 25.0\n"
20373 "%c50f32 = OpConstant %f32 50.0\n"
20374 "%c90f32 = OpConstant %f32 90.0\n";
20375 }
20376
20377 if (hasFloat16)
20378 {
20379 capsStr += "OpCapability Float16\n";
20380 constStr += "%f16 = OpTypeFloat 16\n"
20381 "%c10f16 = OpConstant %f16 10.0\n"
20382 "%c25f16 = OpConstant %f16 25.0\n"
20383 "%c50f16 = OpConstant %f16 50.0\n"
20384 "%c90f16 = OpConstant %f16 90.0\n";
20385 }
20386
20387 if (hasInt8)
20388 {
20389 capsStr += "OpCapability Int8\n";
20390 constStr += "%i8 = OpTypeInt 8 1\n"
20391 "%c10i8 = OpConstant %i8 10\n"
20392 "%c25i8 = OpConstant %i8 25\n"
20393 "%c50i8 = OpConstant %i8 50\n"
20394 "%c90i8 = OpConstant %i8 90\n";
20395 }
20396
20397 // Each invocation reads a different float32 value as input. Depending on
20398 // the bit-sizes required by the particular test case, we also produce
20399 // float16 and/or and int8 values by converting from the 32-bit float.
20400 string testStr = "";
20401 testStr += "%inval32 = OpLoad %f32 %inloc\n";
20402 if (hasFloat16)
20403 testStr += "%inval16 = OpFConvert %f16 %inval32\n";
20404 if (hasInt8)
20405 testStr += "%inval8 = OpConvertFToS %i8 %inval32\n";
20406
20407 // Because conversions from Float to Int round towards 0 we want our "greater" comparisons to be >=,
20408 // that way a float32/float16 comparison such as 50.6f >= 50.0f will preserve its result
20409 // when converted to int8, since FtoS(50.6f) results in 50. For "less" comparisons, it is the
20410 // other way around, so in this case we want < instead of <=.
20411 if (cases[caseNdx][0] == 32)
20412 testStr += "%cmp1 = OpFOrdGreaterThanEqual %bool %inval32 %c25f32\n";
20413 else if (cases[caseNdx][0] == 16)
20414 testStr += "%cmp1 = OpFOrdGreaterThanEqual %bool %inval16 %c25f16\n";
20415 else
20416 testStr += "%cmp1 = OpSGreaterThanEqual %bool %inval8 %c25i8\n";
20417
20418 if (cases[caseNdx][1] == 32)
20419 testStr += "%cmp2 = OpFOrdLessThan %bool %inval32 %c50f32\n";
20420 else if (cases[caseNdx][1] == 16)
20421 testStr += "%cmp2 = OpFOrdLessThan %bool %inval16 %c50f16\n";
20422 else
20423 testStr += "%cmp2 = OpSLessThan %bool %inval8 %c50i8\n";
20424
20425 if (cases[caseNdx][2] == 32)
20426 testStr += "%cmp3 = OpFOrdLessThan %bool %inval32 %c10f32\n";
20427 else if (cases[caseNdx][2] == 16)
20428 testStr += "%cmp3 = OpFOrdLessThan %bool %inval16 %c10f16\n";
20429 else
20430 testStr += "%cmp3 = OpSLessThan %bool %inval8 %c10i8\n";
20431
20432 if (cases[caseNdx][3] == 32)
20433 testStr += "%cmp4 = OpFOrdGreaterThanEqual %bool %inval32 %c90f32\n";
20434 else if (cases[caseNdx][3] == 16)
20435 testStr += "%cmp4 = OpFOrdGreaterThanEqual %bool %inval16 %c90f16\n";
20436 else
20437 testStr += "%cmp4 = OpSGreaterThanEqual %bool %inval8 %c90i8\n";
20438
20439 testStr += "%and1 = OpLogicalAnd %bool %cmp1 %cmp2\n";
20440 testStr += "%or1 = OpLogicalOr %bool %cmp3 %cmp4\n";
20441 testStr += "%or2 = OpLogicalOr %bool %and1 %or1\n";
20442 testStr += "%not1 = OpLogicalNot %bool %or2\n";
20443 testStr += "%res = OpSelect %f32 %not1 %c1f32 %c0f32\n";
20444
20445 specializations["CAPS"] = capsStr;
20446 specializations["CONST"] = constStr;
20447 specializations["TEST"] = testStr;
20448
20449 // Compute expected result by evaluating the boolean expression computed in the shader for each input value
20450 for (size_t ndx = 0; ndx < numElements; ++ndx)
20451 outputData[ndx] = !((inputData[ndx] >= 25.0f && inputData[ndx] < 50.0f) ||
20452 (inputData[ndx] < 10.0f || inputData[ndx] >= 90.0f));
20453
20454 spec.assembly = shaderTemplate.specialize(specializations);
20455 spec.inputs.push_back(BufferSp(new Float32Buffer(inputData)));
20456 spec.outputs.push_back(BufferSp(new Float32Buffer(outputData)));
20457 spec.numWorkGroups = IVec3(numElements, 1, 1);
20458 if (hasFloat16)
20459 spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
20460 if (hasInt8)
20461 spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
20462 spec.extensions.push_back("VK_KHR_shader_float16_int8");
20463
20464 string testName = "b" + de::toString(cases[caseNdx][0]) + "b" + de::toString(cases[caseNdx][1]) + "b" +
20465 de::toString(cases[caseNdx][2]) + "b" + de::toString(cases[caseNdx][3]);
20466 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
20467 }
20468
20469 return group.release();
20470 }
20471
createBoolGroup(tcu::TestContext & testCtx)20472 tcu::TestCaseGroup *createBoolGroup(tcu::TestContext &testCtx)
20473 {
20474 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "bool"));
20475
20476 testGroup->addChild(createBoolMixedBitSizeGroup(testCtx));
20477
20478 return testGroup.release();
20479 }
20480
createOpNameAbuseTests(tcu::TestContext & testCtx)20481 tcu::TestCaseGroup *createOpNameAbuseTests(tcu::TestContext &testCtx)
20482 {
20483 de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opname_abuse"));
20484 vector<CaseParameter> abuseCases;
20485 RGBA defaultColors[4];
20486 map<string, string> opNameFragments;
20487
20488 getOpNameAbuseCases(abuseCases);
20489 getDefaultColors(defaultColors);
20490
20491 opNameFragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20492 "%param1 = OpFunctionParameter %v4f32\n"
20493 "%label_func = OpLabel\n"
20494 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20495 "%b = OpFAdd %f32 %a %a\n"
20496 "%c = OpFSub %f32 %b %a\n"
20497 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
20498 "OpReturnValue %ret\n"
20499 "OpFunctionEnd\n";
20500
20501 for (unsigned int i = 0; i < abuseCases.size(); i++)
20502 {
20503 string casename;
20504 casename = string("main") + abuseCases[i].name;
20505
20506 opNameFragments["debug"] = "OpName %BP_main \"" + abuseCases[i].param + "\"";
20507
20508 createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20509 }
20510
20511 for (unsigned int i = 0; i < abuseCases.size(); i++)
20512 {
20513 string casename;
20514 casename = string("b") + abuseCases[i].name;
20515
20516 opNameFragments["debug"] = "OpName %b \"" + abuseCases[i].param + "\"";
20517
20518 createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20519 }
20520
20521 {
20522 opNameFragments["debug"] = "OpName %test_code \"name1\"\n"
20523 "OpName %param1 \"name2\"\n"
20524 "OpName %a \"name3\"\n"
20525 "OpName %b \"name4\"\n"
20526 "OpName %c \"name5\"\n"
20527 "OpName %ret \"name6\"\n";
20528
20529 createTestsForAllStages("everything_named", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20530 }
20531
20532 {
20533 opNameFragments["debug"] = "OpName %test_code \"the_same\"\n"
20534 "OpName %param1 \"the_same\"\n"
20535 "OpName %a \"the_same\"\n"
20536 "OpName %b \"the_same\"\n"
20537 "OpName %c \"the_same\"\n"
20538 "OpName %ret \"the_same\"\n";
20539
20540 createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opNameFragments,
20541 abuseGroup.get());
20542 }
20543
20544 {
20545 opNameFragments["debug"] = "OpName %BP_main \"to_be\"\n"
20546 "OpName %BP_main \"or_not\"\n"
20547 "OpName %BP_main \"to_be\"\n";
20548
20549 createTestsForAllStages("main_has_multiple_names", defaultColors, defaultColors, opNameFragments,
20550 abuseGroup.get());
20551 }
20552
20553 {
20554 opNameFragments["debug"] = "OpName %b \"to_be\"\n"
20555 "OpName %b \"or_not\"\n"
20556 "OpName %b \"to_be\"\n";
20557
20558 createTestsForAllStages("b_has_multiple_names", defaultColors, defaultColors, opNameFragments,
20559 abuseGroup.get());
20560 }
20561
20562 return abuseGroup.release();
20563 }
20564
createOpMemberNameAbuseTests(tcu::TestContext & testCtx)20565 tcu::TestCaseGroup *createOpMemberNameAbuseTests(tcu::TestContext &testCtx)
20566 {
20567 de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opmembername_abuse"));
20568 vector<CaseParameter> abuseCases;
20569 RGBA defaultColors[4];
20570 map<string, string> opMemberNameFragments;
20571
20572 getOpNameAbuseCases(abuseCases);
20573 getDefaultColors(defaultColors);
20574
20575 opMemberNameFragments["pre_main"] = "%f3str = OpTypeStruct %f32 %f32 %f32\n";
20576
20577 opMemberNameFragments["testfun"] = "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20578 "%param1 = OpFunctionParameter %v4f32\n"
20579 "%label_func = OpLabel\n"
20580 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20581 "%b = OpFAdd %f32 %a %a\n"
20582 "%c = OpFSub %f32 %b %a\n"
20583 "%cstr = OpCompositeConstruct %f3str %c %c %c\n"
20584 "%d = OpCompositeExtract %f32 %cstr 0\n"
20585 "%ret = OpVectorInsertDynamic %v4f32 %param1 %d %c_i32_0\n"
20586 "OpReturnValue %ret\n"
20587 "OpFunctionEnd\n";
20588
20589 for (unsigned int i = 0; i < abuseCases.size(); i++)
20590 {
20591 string casename;
20592 casename = string("f3str_x") + abuseCases[i].name;
20593
20594 opMemberNameFragments["debug"] = "OpMemberName %f3str 0 \"" + abuseCases[i].param + "\"";
20595
20596 createTestsForAllStages(casename, defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20597 }
20598
20599 {
20600 opMemberNameFragments["debug"] = "OpMemberName %f3str 0 \"name1\"\n"
20601 "OpMemberName %f3str 1 \"name2\"\n"
20602 "OpMemberName %f3str 2 \"name3\"\n";
20603
20604 createTestsForAllStages("everything_named", defaultColors, defaultColors, opMemberNameFragments,
20605 abuseGroup.get());
20606 }
20607
20608 {
20609 opMemberNameFragments["debug"] = "OpMemberName %f3str 0 \"the_same\"\n"
20610 "OpMemberName %f3str 1 \"the_same\"\n"
20611 "OpMemberName %f3str 2 \"the_same\"\n";
20612
20613 createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opMemberNameFragments,
20614 abuseGroup.get());
20615 }
20616
20617 {
20618 opMemberNameFragments["debug"] = "OpMemberName %f3str 0 \"to_be\"\n"
20619 "OpMemberName %f3str 1 \"or_not\"\n"
20620 "OpMemberName %f3str 0 \"to_be\"\n"
20621 "OpMemberName %f3str 2 \"makes_no\"\n"
20622 "OpMemberName %f3str 0 \"difference\"\n"
20623 "OpMemberName %f3str 0 \"to_me\"\n";
20624
20625 createTestsForAllStages("f3str_x_has_multiple_names", defaultColors, defaultColors, opMemberNameFragments,
20626 abuseGroup.get());
20627 }
20628
20629 return abuseGroup.release();
20630 }
20631
getSparseIdsAbuseData(const uint32_t numDataPoints,const uint32_t seed)20632 vector<uint32_t> getSparseIdsAbuseData(const uint32_t numDataPoints, const uint32_t seed)
20633 {
20634 vector<uint32_t> result;
20635 de::Random rnd(seed);
20636
20637 result.reserve(numDataPoints);
20638
20639 for (uint32_t dataPointNdx = 0; dataPointNdx < numDataPoints; ++dataPointNdx)
20640 result.push_back(rnd.getUint32());
20641
20642 return result;
20643 }
20644
getSparseIdsAbuseResults(const vector<uint32_t> & inData1,const vector<uint32_t> & inData2)20645 vector<uint32_t> getSparseIdsAbuseResults(const vector<uint32_t> &inData1, const vector<uint32_t> &inData2)
20646 {
20647 vector<uint32_t> result;
20648
20649 result.reserve(inData1.size());
20650
20651 for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20652 result.push_back(inData1[dataPointNdx] + inData2[dataPointNdx]);
20653
20654 return result;
20655 }
20656
20657 template <class SpecResource>
createSparseIdsAbuseTest(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> & testGroup)20658 void createSparseIdsAbuseTest(tcu::TestContext &testCtx, de::MovePtr<tcu::TestCaseGroup> &testGroup)
20659 {
20660 const uint32_t numDataPoints = 16;
20661 const std::string testName("sparse_ids");
20662 const uint32_t seed(deStringHash(testName.c_str()));
20663 const vector<uint32_t> inData1(getSparseIdsAbuseData(numDataPoints, seed + 1));
20664 const vector<uint32_t> inData2(getSparseIdsAbuseData(numDataPoints, seed + 2));
20665 const vector<uint32_t> outData(getSparseIdsAbuseResults(inData1, inData2));
20666 const StringTemplate preMain("%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20667 " %up_u32 = OpTypePointer Uniform %u32\n"
20668 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20669 " %SSBO32 = OpTypeStruct %ra_u32\n"
20670 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20671 "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20672 "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20673 " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n");
20674 const StringTemplate decoration("OpDecorate %ra_u32 ArrayStride 4\n"
20675 "OpMemberDecorate %SSBO32 0 Offset 0\n"
20676 "OpDecorate %SSBO32 BufferBlock\n"
20677 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
20678 "OpDecorate %ssbo_src0 Binding 0\n"
20679 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
20680 "OpDecorate %ssbo_src1 Binding 1\n"
20681 "OpDecorate %ssbo_dst DescriptorSet 0\n"
20682 "OpDecorate %ssbo_dst Binding 2\n");
20683 const StringTemplate testFun(
20684 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20685 " %param = OpFunctionParameter %v4f32\n"
20686
20687 " %entry = OpLabel\n"
20688 " %i = OpVariable %fp_i32 Function\n"
20689 " OpStore %i %c_i32_0\n"
20690 " OpBranch %loop\n"
20691
20692 " %loop = OpLabel\n"
20693 " %i_cmp = OpLoad %i32 %i\n"
20694 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20695 " OpLoopMerge %merge %next None\n"
20696 " OpBranchConditional %lt %write %merge\n"
20697
20698 " %write = OpLabel\n"
20699 " %ndx = OpLoad %i32 %i\n"
20700
20701 " %127 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20702 " %128 = OpLoad %u32 %127\n"
20703
20704 // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20705 " %4194000 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20706 " %4194001 = OpLoad %u32 %4194000\n"
20707
20708 " %2097151 = OpIAdd %u32 %128 %4194001\n"
20709 " %2097152 = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20710 " OpStore %2097152 %2097151\n"
20711 " OpBranch %next\n"
20712
20713 " %next = OpLabel\n"
20714 " %i_cur = OpLoad %i32 %i\n"
20715 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20716 " OpStore %i %i_new\n"
20717 " OpBranch %loop\n"
20718
20719 " %merge = OpLabel\n"
20720 " OpReturnValue %param\n"
20721
20722 " OpFunctionEnd\n");
20723 SpecResource specResource;
20724 map<string, string> specs;
20725 VulkanFeatures features;
20726 map<string, string> fragments;
20727 vector<string> extensions;
20728
20729 specs["num_data_points"] = de::toString(numDataPoints);
20730
20731 fragments["decoration"] = decoration.specialize(specs);
20732 fragments["pre_main"] = preMain.specialize(specs);
20733 fragments["testfun"] = testFun.specialize(specs);
20734
20735 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20736 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20737 specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20738
20739 if (std::is_base_of<GraphicsResources, SpecResource>::value)
20740 {
20741 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
20742 features.coreFeatures.fragmentStoresAndAtomics = true;
20743 }
20744
20745 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
20746 IVec3(1, 1, 1));
20747 }
20748
getLotsIdsAbuseData(const uint32_t numDataPoints,const uint32_t seed)20749 vector<uint32_t> getLotsIdsAbuseData(const uint32_t numDataPoints, const uint32_t seed)
20750 {
20751 vector<uint32_t> result;
20752 de::Random rnd(seed);
20753
20754 result.reserve(numDataPoints);
20755
20756 // Fixed value
20757 result.push_back(1u);
20758
20759 // Random values
20760 for (uint32_t dataPointNdx = 1; dataPointNdx < numDataPoints; ++dataPointNdx)
20761 result.push_back(rnd.getUint8());
20762
20763 return result;
20764 }
20765
getLotsIdsAbuseResults(const vector<uint32_t> & inData1,const vector<uint32_t> & inData2,const uint32_t count)20766 vector<uint32_t> getLotsIdsAbuseResults(const vector<uint32_t> &inData1, const vector<uint32_t> &inData2,
20767 const uint32_t count)
20768 {
20769 vector<uint32_t> result;
20770
20771 result.reserve(inData1.size());
20772
20773 for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20774 result.push_back(inData1[dataPointNdx] + count * inData2[dataPointNdx]);
20775
20776 return result;
20777 }
20778
20779 template <class SpecResource>
createLotsIdsAbuseTest(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> & testGroup)20780 void createLotsIdsAbuseTest(tcu::TestContext &testCtx, de::MovePtr<tcu::TestCaseGroup> &testGroup)
20781 {
20782 const uint32_t numDataPoints = 16;
20783 const uint32_t firstNdx = 100u;
20784 const uint32_t sequenceCount = 10000u;
20785 const std::string testName("lots_ids");
20786 const uint32_t seed(deStringHash(testName.c_str()));
20787 const vector<uint32_t> inData1(getLotsIdsAbuseData(numDataPoints, seed + 1));
20788 const vector<uint32_t> inData2(getLotsIdsAbuseData(numDataPoints, seed + 2));
20789 const vector<uint32_t> outData(getLotsIdsAbuseResults(inData1, inData2, sequenceCount));
20790 const StringTemplate preMain("%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20791 " %up_u32 = OpTypePointer Uniform %u32\n"
20792 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20793 " %SSBO32 = OpTypeStruct %ra_u32\n"
20794 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20795 "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20796 "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20797 " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n");
20798 const StringTemplate decoration("OpDecorate %ra_u32 ArrayStride 4\n"
20799 "OpMemberDecorate %SSBO32 0 Offset 0\n"
20800 "OpDecorate %SSBO32 BufferBlock\n"
20801 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
20802 "OpDecorate %ssbo_src0 Binding 0\n"
20803 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
20804 "OpDecorate %ssbo_src1 Binding 1\n"
20805 "OpDecorate %ssbo_dst DescriptorSet 0\n"
20806 "OpDecorate %ssbo_dst Binding 2\n");
20807 const StringTemplate testFun(
20808 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20809 " %param = OpFunctionParameter %v4f32\n"
20810
20811 " %entry = OpLabel\n"
20812 " %i = OpVariable %fp_i32 Function\n"
20813 " OpStore %i %c_i32_0\n"
20814 " OpBranch %loop\n"
20815
20816 " %loop = OpLabel\n"
20817 " %i_cmp = OpLoad %i32 %i\n"
20818 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20819 " OpLoopMerge %merge %next None\n"
20820 " OpBranchConditional %lt %write %merge\n"
20821
20822 " %write = OpLabel\n"
20823 " %ndx = OpLoad %i32 %i\n"
20824
20825 " %90 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20826 " %91 = OpLoad %u32 %90\n"
20827
20828 " %98 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20829 " %${zeroth_id} = OpLoad %u32 %98\n"
20830
20831 "${seq}\n"
20832
20833 // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20834 " %dst = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20835 " OpStore %dst %${last_id}\n"
20836 " OpBranch %next\n"
20837
20838 " %next = OpLabel\n"
20839 " %i_cur = OpLoad %i32 %i\n"
20840 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20841 " OpStore %i %i_new\n"
20842 " OpBranch %loop\n"
20843
20844 " %merge = OpLabel\n"
20845 " OpReturnValue %param\n"
20846
20847 " OpFunctionEnd\n");
20848 uint32_t lastId = firstNdx;
20849 SpecResource specResource;
20850 map<string, string> specs;
20851 VulkanFeatures features;
20852 map<string, string> fragments;
20853 vector<string> extensions;
20854 std::string sequence;
20855
20856 for (uint32_t sequenceNdx = 0; sequenceNdx < sequenceCount; ++sequenceNdx)
20857 {
20858 const uint32_t sequenceId = sequenceNdx + firstNdx;
20859 const std::string sequenceIdStr = de::toString(sequenceId);
20860
20861 sequence += "%" + sequenceIdStr + " = OpIAdd %u32 %91 %" + de::toString(sequenceId - 1) + "\n";
20862 lastId = sequenceId;
20863
20864 if (sequenceNdx == 0)
20865 sequence.reserve((10 + sequence.length()) * sequenceCount);
20866 }
20867
20868 specs["num_data_points"] = de::toString(numDataPoints);
20869 specs["zeroth_id"] = de::toString(firstNdx - 1);
20870 specs["last_id"] = de::toString(lastId);
20871 specs["seq"] = sequence;
20872
20873 fragments["decoration"] = decoration.specialize(specs);
20874 fragments["pre_main"] = preMain.specialize(specs);
20875 fragments["testfun"] = testFun.specialize(specs);
20876
20877 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20878 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20879 specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20880
20881 if (std::is_base_of<GraphicsResources, SpecResource>::value)
20882 {
20883 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
20884 features.coreFeatures.fragmentStoresAndAtomics = true;
20885 }
20886
20887 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions,
20888 IVec3(1, 1, 1));
20889 }
20890
createSpirvIdsAbuseTests(tcu::TestContext & testCtx)20891 tcu::TestCaseGroup *createSpirvIdsAbuseTests(tcu::TestContext &testCtx)
20892 {
20893 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse"));
20894
20895 createSparseIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20896 createLotsIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20897
20898 return testGroup.release();
20899 }
20900
createSpirvIdsAbuseGroup(tcu::TestContext & testCtx)20901 tcu::TestCaseGroup *createSpirvIdsAbuseGroup(tcu::TestContext &testCtx)
20902 {
20903 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse"));
20904
20905 createSparseIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20906 createLotsIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20907
20908 return testGroup.release();
20909 }
20910
createFunctionParamsGroup(tcu::TestContext & testCtx)20911 tcu::TestCaseGroup *createFunctionParamsGroup(tcu::TestContext &testCtx)
20912 {
20913 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "function_params"));
20914 #ifndef CTS_USES_VULKANSC
20915 static const char data_dir[] = "spirv_assembly/instruction/function_params";
20916
20917 static const struct
20918 {
20919 const std::string name;
20920 const std::string desc;
20921 } cases[] = {
20922 {"sampler_param", "Test combined image sampler as function parameter"},
20923 };
20924
20925 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
20926 {
20927 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(
20928 testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), data_dir, cases[i].name + ".amber");
20929 testGroup->addChild(testCase);
20930 }
20931 #endif
20932 return testGroup.release();
20933 }
20934
createEarlyFragmentTests(tcu::TestContext & testCtx)20935 tcu::TestCaseGroup *createEarlyFragmentTests(tcu::TestContext &testCtx)
20936 {
20937 de::MovePtr<tcu::TestCaseGroup> earlyFragTests(new tcu::TestCaseGroup(testCtx, "early_fragment"));
20938
20939 #ifndef CTS_USES_VULKANSC
20940 static const char dataDir[] = "spirv_assembly/instruction/graphics/early_fragment";
20941
20942 static const struct Case
20943 {
20944 const string name;
20945 const string desc;
20946 } cases[] = {// Overwriting the gl_FragDepth should be ignored, when Early Fragment Test Mode is enabled.
20947 {"depth_less", "gl_FragDepth > CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."},
20948 {"depth_greater", "gl_FragDepth < CLEAR_DEPTH. Polygon depth > CLEAR_DEPTH."},
20949 {"depth_less_or_equal", "gl_FragDepth > CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."},
20950 {"depth_greater_or_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."},
20951 {"depth_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."},
20952 {"depth_not_equal", "gl_FragDepth == CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."}};
20953
20954 for (const auto &tCase : cases)
20955 {
20956 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(
20957 testCtx, tCase.name.c_str(), tCase.desc.c_str(), dataDir, tCase.name + ".amber");
20958
20959 earlyFragTests->addChild(testCase);
20960 }
20961 #endif // CTS_USES_VULKANSC
20962
20963 return earlyFragTests.release();
20964 }
20965
createEarlyAndLateFragmentTests(tcu::TestContext & testCtx)20966 tcu::TestCaseGroup *createEarlyAndLateFragmentTests(tcu::TestContext &testCtx)
20967 {
20968 de::MovePtr<tcu::TestCaseGroup> earlyLateFragTests(new tcu::TestCaseGroup(testCtx, "early_and_late_fragment"));
20969 #ifndef CTS_USES_VULKANSC
20970 static const char dataDir[] = "spirv_assembly/instruction/graphics/early_and_late_fragment";
20971
20972 static const struct Case
20973 {
20974 const string name;
20975 const string desc;
20976 } cases[] = {{"depth_less", "gl_FragDepth < CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."},
20977 {"depth_greater", "gl_FragDepth > CLEAR_DEPTH. Polygon depth > CLEAR_DEPTH."},
20978 {"depth_less_or_equal", "gl_FragDepth > CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."},
20979 {"depth_greater_or_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."},
20980 {"depth_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."},
20981 {"depth_not_equal", "gl_FragDepth == CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."}};
20982
20983 for (const auto &tCase : cases)
20984 {
20985 cts_amber::AmberTestCase *testCase =
20986 cts_amber::createAmberTestCase(testCtx, tCase.name.c_str(), tCase.desc.c_str(), dataDir,
20987 tCase.name + ".amber", {"VK_AMD_shader_early_and_late_fragment_tests"});
20988
20989 earlyLateFragTests->addChild(testCase);
20990 }
20991 #endif
20992
20993 return earlyLateFragTests.release();
20994 }
20995
createOpExecutionModeTests(tcu::TestContext & testCtx)20996 tcu::TestCaseGroup *createOpExecutionModeTests(tcu::TestContext &testCtx)
20997 {
20998 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "execution_mode"));
20999
21000 #ifndef CTS_USES_VULKANSC
21001 static const char dataDir[] = "spirv_assembly/instruction/graphics/execution_mode";
21002
21003 static const struct Case
21004 {
21005 const string name;
21006 const string desc;
21007 } cases[] = {
21008 {"depthless_0", "FragDepth < Polygon depth: depth test should pass."},
21009 {"depthless_1", "FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit "
21010 "depth, but the depth test should pass."},
21011 {"depthless_2", "FragDepth < Polygon depth: depth test should fail."},
21012 {"depthless_3", "FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit "
21013 "depth, the depth test should fail."},
21014 {"depthless_4", "FragDepth < Polygon depth: depth test should pass."},
21015 {"depthgreater_0", "FragDepth > Polygon depth: depth test should pass."},
21016 {"depthgreater_1", "FragDepth < Polygon depth: violates the promise that FragDepth is greater than the "
21017 "implicit depth, but the depth test should pass."},
21018 {"depthgreater_2", "FragDepth > Polygon depth: depth test should fail."},
21019 {"depthgreater_3", "FragDepth > Polygon depth: violates the promise that FragDepth is greater than the "
21020 "implicit depth, the depth test should fail."},
21021 {"depthgreater_4", "FragDepth > Polygon depth: depth test should pass."},
21022 {"depthunchanged_0", "FragDepth == Polygon depth: depth test should pass."},
21023 {"depthunchanged_1", "FragDepth == Polygon depth: depth test should fail."},
21024 {"depthunchanged_2", "FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit "
21025 "depth, the depth test should pass."},
21026 {"depthunchanged_3", "FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit "
21027 "depth, the depth test should fail."},
21028 };
21029
21030 for (const auto &case_ : cases)
21031 {
21032 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(
21033 testCtx, case_.name.c_str(), case_.desc.c_str(), dataDir, case_.name + ".amber");
21034 testGroup->addChild(testCase);
21035 }
21036 #endif // CTS_USES_VULKANSC
21037
21038 return testGroup.release();
21039 }
21040
createOpMulExtendedGroup(tcu::TestContext & testCtx)21041 tcu::TestCaseGroup *createOpMulExtendedGroup(tcu::TestContext &testCtx)
21042 {
21043 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "mul_extended"));
21044
21045 #ifndef CTS_USES_VULKANSC
21046 static const char dataDir[] = "spirv_assembly/instruction/compute/mul_extended";
21047
21048 static const struct Case
21049 {
21050 const string name;
21051 const vector<string> features;
21052 } cases[] = {{"signed_16bit", {"Features.shaderInt16", "Storage16BitFeatures.storageBuffer16BitAccess"}},
21053 {"signed_32bit", {}},
21054 {"signed_64bit", {"Features.shaderInt64"}},
21055 {"signed_8bit", {"Float16Int8Features.shaderInt8", "Storage8BitFeatures.storageBuffer8BitAccess"}},
21056 {"unsigned_16bit", {"Features.shaderInt16", "Storage16BitFeatures.storageBuffer16BitAccess"}},
21057 {"unsigned_32bit", {}},
21058 {"unsigned_64bit", {"Features.shaderInt64"}},
21059 {"unsigned_8bit", {"Float16Int8Features.shaderInt8", "Storage8BitFeatures.storageBuffer8BitAccess"}}};
21060
21061 for (const auto &test : cases)
21062 {
21063 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx, test.name.c_str(), "", dataDir,
21064 test.name + ".amber", test.features);
21065 testGroup->addChild(testCase);
21066 }
21067 #endif // CTS_USES_VULKANSC
21068
21069 return testGroup.release();
21070 }
21071
createQueryGroup(tcu::TestContext & testCtx)21072 tcu::TestCaseGroup *createQueryGroup(tcu::TestContext &testCtx)
21073 {
21074 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "image_query"));
21075
21076 #ifndef CTS_USES_VULKANSC
21077 static const char data_dir[] = "spirv_assembly/instruction/image_query";
21078
21079 static const struct
21080 {
21081 const std::string name;
21082 const std::string desc;
21083 } cases[] = {
21084 {"samples_storage", "Test samples query can be used on storage images"},
21085 };
21086
21087 vector<string> requirements(1, "Features.shaderStorageImageMultisample");
21088
21089 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
21090 {
21091 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(
21092 testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), data_dir, cases[i].name + ".amber", requirements);
21093 testGroup->addChild(testCase);
21094 }
21095 #endif // CTS_USES_VULKANSC
21096
21097 return testGroup.release();
21098 }
21099
createInstructionTests(tcu::TestContext & testCtx)21100 tcu::TestCaseGroup *createInstructionTests(tcu::TestContext &testCtx)
21101 {
21102 const bool testComputePipeline = true;
21103
21104 de::MovePtr<tcu::TestCaseGroup> instructionTests(new tcu::TestCaseGroup(testCtx, "instruction"));
21105 de::MovePtr<tcu::TestCaseGroup> computeTests(new tcu::TestCaseGroup(testCtx, "compute"));
21106 de::MovePtr<tcu::TestCaseGroup> graphicsTests(new tcu::TestCaseGroup(testCtx, "graphics"));
21107
21108 computeTests->addChild(createSpivVersionCheckTests(testCtx, testComputePipeline));
21109 computeTests->addChild(createLocalSizeGroup(testCtx, false));
21110 computeTests->addChild(createLocalSizeGroup(testCtx, true));
21111 computeTests->addChild(createNonSemanticInfoGroup(testCtx));
21112 computeTests->addChild(createOpNopGroup(testCtx));
21113 computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITHOUT_NAN));
21114 computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITH_NAN));
21115 computeTests->addChild(createOpAtomicGroup(testCtx, false));
21116 computeTests->addChild(createOpAtomicGroup(testCtx, true)); // Using new StorageBuffer decoration
21117 computeTests->addChild(createOpAtomicGroup(testCtx, false, 1024, true)); // Return value validation
21118 computeTests->addChild(createOpAtomicGroup(testCtx, true, 65535, false, true)); // volatile atomics
21119 computeTests->addChild(createOpLineGroup(testCtx));
21120 computeTests->addChild(createOpModuleProcessedGroup(testCtx));
21121 computeTests->addChild(createOpNoLineGroup(testCtx));
21122 computeTests->addChild(createOpConstantNullGroup(testCtx));
21123 computeTests->addChild(createOpConstantCompositeGroup(testCtx));
21124 computeTests->addChild(createOpConstantUsageGroup(testCtx));
21125 computeTests->addChild(createSpecConstantGroup(testCtx));
21126 computeTests->addChild(createOpSourceGroup(testCtx));
21127 computeTests->addChild(createOpSourceExtensionGroup(testCtx));
21128 computeTests->addChild(createDecorationGroupGroup(testCtx));
21129 computeTests->addChild(createOpPhiGroup(testCtx));
21130 computeTests->addChild(createLoopControlGroup(testCtx));
21131 computeTests->addChild(createFunctionControlGroup(testCtx));
21132 computeTests->addChild(createSelectionControlGroup(testCtx));
21133 computeTests->addChild(createBlockOrderGroup(testCtx));
21134 computeTests->addChild(createMultipleShaderGroup(testCtx));
21135 computeTests->addChild(createMultipleShaderExtendedGroup(testCtx));
21136 computeTests->addChild(createMemoryAccessGroup(testCtx));
21137 computeTests->addChild(createOpCopyMemoryGroup(testCtx));
21138 computeTests->addChild(createOpCopyObjectGroup(testCtx));
21139 computeTests->addChild(createNoContractionGroup(testCtx));
21140 computeTests->addChild(createOpUndefGroup(testCtx));
21141 computeTests->addChild(createOpUnreachableGroup(testCtx));
21142 computeTests->addChild(createOpQuantizeToF16Group(testCtx));
21143 computeTests->addChild(createOpFRemGroup(testCtx));
21144 computeTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_PASS));
21145 computeTests->addChild(createOpSRemComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
21146 computeTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_PASS));
21147 computeTests->addChild(createOpSModComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
21148 #ifndef CTS_USES_VULKANSC
21149 computeTests->addChild(createOpSDotKHRComputeGroup(testCtx));
21150 computeTests->addChild(createOpUDotKHRComputeGroup(testCtx));
21151 computeTests->addChild(createOpSUDotKHRComputeGroup(testCtx));
21152 computeTests->addChild(createOpSDotAccSatKHRComputeGroup(testCtx));
21153 computeTests->addChild(createOpUDotAccSatKHRComputeGroup(testCtx));
21154 computeTests->addChild(createOpSUDotAccSatKHRComputeGroup(testCtx));
21155 #endif // CTS_USES_VULKANSC
21156 computeTests->addChild(createConvertComputeTests(testCtx, "OpSConvert", "sconvert"));
21157 computeTests->addChild(createConvertComputeTests(testCtx, "OpUConvert", "uconvert"));
21158 computeTests->addChild(createConvertComputeTests(testCtx, "OpFConvert", "fconvert"));
21159 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertSToF", "convertstof"));
21160 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToS", "convertftos"));
21161 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertUToF", "convertutof"));
21162 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToU", "convertftou"));
21163 computeTests->addChild(createOpCompositeInsertGroup(testCtx));
21164 computeTests->addChild(createOpInBoundsAccessChainGroup(testCtx));
21165 computeTests->addChild(createShaderDefaultOutputGroup(testCtx));
21166 computeTests->addChild(createOpNMinGroup(testCtx));
21167 computeTests->addChild(createOpNMaxGroup(testCtx));
21168 computeTests->addChild(createOpNClampGroup(testCtx));
21169 computeTests->addChild(createFloatControlsExtensionlessGroup(testCtx));
21170 {
21171 de::MovePtr<tcu::TestCaseGroup> computeAndroidTests(new tcu::TestCaseGroup(testCtx, "android"));
21172
21173 computeAndroidTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
21174 computeAndroidTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
21175
21176 computeTests->addChild(computeAndroidTests.release());
21177 }
21178
21179 computeTests->addChild(create8BitStorageComputeGroup(testCtx));
21180 computeTests->addChild(create16BitStorageComputeGroup(testCtx));
21181 computeTests->addChild(createFloatControlsComputeGroup(testCtx));
21182 #ifndef CTS_USES_VULKANSC
21183 computeTests->addChild(createFloatControls2ComputeGroup(testCtx));
21184 #endif // CTS_USES_VULKANSC
21185 computeTests->addChild(createUboMatrixPaddingComputeGroup(testCtx));
21186 computeTests->addChild(createCompositeInsertComputeGroup(testCtx));
21187 computeTests->addChild(createVariableInitComputeGroup(testCtx));
21188 computeTests->addChild(createConditionalBranchComputeGroup(testCtx));
21189 computeTests->addChild(createIndexingComputeGroup(testCtx));
21190 computeTests->addChild(createVariablePointersComputeGroup(testCtx));
21191 computeTests->addChild(createPhysicalPointersComputeGroup(testCtx));
21192 computeTests->addChild(createImageSamplerComputeGroup(testCtx));
21193 computeTests->addChild(createOpNameGroup(testCtx));
21194 computeTests->addChild(createOpMemberNameGroup(testCtx));
21195 computeTests->addChild(createPointerParameterComputeGroup(testCtx));
21196 computeTests->addChild(createFloat16Group(testCtx));
21197 #ifndef CTS_USES_VULKANSC
21198 computeTests->addChild(createFloat32Group(testCtx));
21199 #endif // CTS_USES_VULKANSC
21200 computeTests->addChild(createBoolGroup(testCtx));
21201 computeTests->addChild(createWorkgroupMemoryComputeGroup(testCtx));
21202 computeTests->addChild(createSpirvIdsAbuseGroup(testCtx));
21203 #ifndef CTS_USES_VULKANSC
21204 computeTests->addChild(createSignedIntCompareGroup(testCtx));
21205 computeTests->addChild(createSignedOpTestsGroup(testCtx));
21206 #endif // CTS_USES_VULKANSC
21207 computeTests->addChild(createUnusedVariableComputeTests(testCtx));
21208 #ifndef CTS_USES_VULKANSC
21209 computeTests->addChild(createPtrAccessChainGroup(testCtx));
21210 computeTests->addChild(createVectorShuffleGroup(testCtx));
21211 #endif // CTS_USES_VULKANSC
21212 computeTests->addChild(createHlslComputeGroup(testCtx));
21213 computeTests->addChild(createEmptyStructComputeGroup(testCtx));
21214 computeTests->addChild(create64bitCompareComputeGroup(testCtx));
21215 #ifndef CTS_USES_VULKANSC
21216 computeTests->addChild(createOpArrayLengthComputeGroup(testCtx));
21217 #endif // CTS_USES_VULKANSC
21218 computeTests->addChild(createPhysicalStorageBufferTestGroup(testCtx));
21219 computeTests->addChild(createOpMulExtendedGroup(testCtx));
21220
21221 graphicsTests->addChild(createCrossStageInterfaceTests(testCtx));
21222 graphicsTests->addChild(createSpivVersionCheckTests(testCtx, !testComputePipeline));
21223 graphicsTests->addChild(createOpNopTests(testCtx));
21224 graphicsTests->addChild(createOpSourceTests(testCtx));
21225 graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
21226 graphicsTests->addChild(createOpModuleProcessedTests(testCtx));
21227 graphicsTests->addChild(createOpLineTests(testCtx));
21228 graphicsTests->addChild(createOpNoLineTests(testCtx));
21229 graphicsTests->addChild(createOpConstantNullTests(testCtx));
21230 graphicsTests->addChild(createOpConstantCompositeTests(testCtx));
21231 graphicsTests->addChild(createMemoryAccessTests(testCtx));
21232 graphicsTests->addChild(createOpUndefTests(testCtx));
21233 graphicsTests->addChild(createSelectionBlockOrderTests(testCtx));
21234 graphicsTests->addChild(createModuleTests(testCtx));
21235 graphicsTests->addChild(createUnusedVariableTests(testCtx));
21236 graphicsTests->addChild(createSwitchBlockOrderTests(testCtx));
21237 graphicsTests->addChild(createOpPhiTests(testCtx));
21238 graphicsTests->addChild(createNoContractionTests(testCtx));
21239 graphicsTests->addChild(createOpQuantizeTests(testCtx));
21240 graphicsTests->addChild(createLoopTests(testCtx));
21241 graphicsTests->addChild(createSpecConstantTests(testCtx));
21242 graphicsTests->addChild(createSpecConstantOpQuantizeToF16Group(testCtx));
21243 graphicsTests->addChild(createBarrierTests(testCtx));
21244 graphicsTests->addChild(createDecorationGroupTests(testCtx));
21245 graphicsTests->addChild(createFRemTests(testCtx));
21246 graphicsTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
21247 graphicsTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
21248
21249 {
21250 de::MovePtr<tcu::TestCaseGroup> graphicsAndroidTests(new tcu::TestCaseGroup(testCtx, "android"));
21251
21252 graphicsAndroidTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
21253 graphicsAndroidTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
21254
21255 graphicsTests->addChild(graphicsAndroidTests.release());
21256 }
21257
21258 graphicsTests->addChild(createOpNameTests(testCtx));
21259 graphicsTests->addChild(createOpNameAbuseTests(testCtx));
21260 graphicsTests->addChild(createOpMemberNameAbuseTests(testCtx));
21261
21262 graphicsTests->addChild(create8BitStorageGraphicsGroup(testCtx));
21263 graphicsTests->addChild(create16BitStorageGraphicsGroup(testCtx));
21264 graphicsTests->addChild(createFloatControlsGraphicsGroup(testCtx));
21265 #ifndef CTS_USES_VULKANSC
21266 graphicsTests->addChild(createFloatControls2GraphicsGroup(testCtx));
21267 #endif // CTS_USES_VULKANSC
21268 graphicsTests->addChild(createUboMatrixPaddingGraphicsGroup(testCtx));
21269 graphicsTests->addChild(createCompositeInsertGraphicsGroup(testCtx));
21270 graphicsTests->addChild(createVariableInitGraphicsGroup(testCtx));
21271 graphicsTests->addChild(createConditionalBranchGraphicsGroup(testCtx));
21272 graphicsTests->addChild(createIndexingGraphicsGroup(testCtx));
21273 graphicsTests->addChild(createVariablePointersGraphicsGroup(testCtx));
21274 graphicsTests->addChild(createImageSamplerGraphicsGroup(testCtx));
21275 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpSConvert", "sconvert"));
21276 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpUConvert", "uconvert"));
21277 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpFConvert", "fconvert"));
21278 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertSToF", "convertstof"));
21279 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToS", "convertftos"));
21280 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertUToF", "convertutof"));
21281 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToU", "convertftou"));
21282 graphicsTests->addChild(createPointerParameterGraphicsGroup(testCtx));
21283 graphicsTests->addChild(createVaryingNameGraphicsGroup(testCtx));
21284 graphicsTests->addChild(createFloat16Tests(testCtx));
21285 #ifndef CTS_USES_VULKANSC
21286 graphicsTests->addChild(createFloat32Tests(testCtx));
21287 #endif // CTS_USES_VULKANSC
21288 graphicsTests->addChild(createSpirvIdsAbuseTests(testCtx));
21289 graphicsTests->addChild(create64bitCompareGraphicsGroup(testCtx));
21290 graphicsTests->addChild(createEarlyFragmentTests(testCtx));
21291 graphicsTests->addChild(createEarlyAndLateFragmentTests(testCtx));
21292 graphicsTests->addChild(createOpExecutionModeTests(testCtx));
21293
21294 instructionTests->addChild(computeTests.release());
21295 instructionTests->addChild(graphicsTests.release());
21296 #ifndef CTS_USES_VULKANSC
21297 instructionTests->addChild(createSpirvVersion1p4Group(testCtx));
21298 instructionTests->addChild(createFunctionParamsGroup(testCtx));
21299 #endif // CTS_USES_VULKANSC
21300 instructionTests->addChild(createQueryGroup(testCtx));
21301 instructionTests->addChild(createTrinaryMinMaxGroup(testCtx));
21302 instructionTests->addChild(createTerminateInvocationGroup(testCtx));
21303
21304 return instructionTests.release();
21305 }
21306
21307 } // namespace SpirVAssembly
21308 } // namespace vkt
21309