1 /*-------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2018 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief VK_KHR_shader_float_controls tests.
22 *//*--------------------------------------------------------------------*/
23
24 #define _USE_MATH_DEFINES
25
26 #include "vktSpvAsmFloatControlsTests.hpp"
27 #include "vktSpvAsmComputeShaderCase.hpp"
28 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
29 #include "vktTestGroupUtil.hpp"
30 #include "tcuFloat.hpp"
31 #include "tcuFloatFormat.hpp"
32 #include "tcuStringTemplate.hpp"
33 #include "deUniquePtr.hpp"
34 #include "deFloat16.h"
35 #include "vkQueryUtil.hpp"
36 #include "vkRefUtil.hpp"
37 #include <cstring>
38 #include <vector>
39 #include <limits>
40 #include <cstdint>
41 #include <fenv.h>
42 #include <cstdint>
43 #include <cmath>
44
45 namespace vkt
46 {
47 namespace SpirVAssembly
48 {
49
50 namespace
51 {
52
53 using namespace std;
54 using namespace tcu;
55
56 enum VariableType
57 {
58 FP16 = 0,
59 FP32,
60 FP64,
61 UINT32,
62 UINT64,
63 INT32,
64 INT64
65 };
66
67 enum class BufferDataType
68 {
69 DATA_UNKNOWN = 0,
70 DATA_FP16 = 1,
71 DATA_FP32 = 2,
72 DATA_FP64 = 3,
73 };
74
75 enum FloatUsage
76 {
77 // If the float type is 16bit, then the use of the type is supported by
78 // VK_KHR_16bit_storage.
79 FLOAT_STORAGE_ONLY = 0,
80 // Use of the float type goes beyond VK_KHR_16bit_storage.
81 FLOAT_ARITHMETIC
82 };
83
84 enum FloatStatementUsageBits
85 {
86 B_STATEMENT_USAGE_ARGS_CONST_FLOAT = (1 << 0),
87 B_STATEMENT_USAGE_ARGS_CONST_FP16 = (1 << 1),
88 B_STATEMENT_USAGE_ARGS_CONST_FP32 = (1 << 2),
89 B_STATEMENT_USAGE_ARGS_CONST_FP64 = (1 << 3),
90 B_STATEMENT_USAGE_TYPES_TYPE_FLOAT = (1 << 4),
91 B_STATEMENT_USAGE_TYPES_TYPE_FP16 = (1 << 5),
92 B_STATEMENT_USAGE_TYPES_TYPE_FP32 = (1 << 6),
93 B_STATEMENT_USAGE_TYPES_TYPE_FP64 = (1 << 7),
94 B_STATEMENT_USAGE_CONSTS_TYPE_FLOAT = (1 << 8),
95 B_STATEMENT_USAGE_CONSTS_TYPE_FP16 = (1 << 9),
96 B_STATEMENT_USAGE_CONSTS_TYPE_FP32 = (1 << 10),
97 B_STATEMENT_USAGE_CONSTS_TYPE_FP64 = (1 << 11),
98 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT = (1 << 12),
99 B_STATEMENT_USAGE_COMMANDS_CONST_FP16 = (1 << 13),
100 B_STATEMENT_USAGE_COMMANDS_CONST_FP32 = (1 << 14),
101 B_STATEMENT_USAGE_COMMANDS_CONST_FP64 = (1 << 15),
102 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT = (1 << 16),
103 B_STATEMENT_USAGE_COMMANDS_TYPE_FP16 = (1 << 17),
104 B_STATEMENT_USAGE_COMMANDS_TYPE_FP32 = (1 << 18),
105 B_STATEMENT_USAGE_COMMANDS_TYPE_FP64 = (1 << 19),
106 };
107
108 typedef uint32_t FloatStatementUsageFlags;
109
110 // Enum containing float behaviors that its possible to test.
111 enum BehaviorFlagBits
112 {
113 B_DENORM_PRESERVE = 0x00000001, // DenormPreserve
114 B_DENORM_FLUSH = 0x00000002, // DenormFlushToZero
115 B_ZIN_PRESERVE = 0x00000004, // SignedZeroInfNanPreserve
116 B_RTE_ROUNDING = 0x00000008, // RoundingModeRTE
117 B_RTZ_ROUNDING = 0x00000010 // RoundingModeRTZ
118 };
119
120 typedef uint32_t BehaviorFlags;
121
122 // Codes for all float values used in tests as arguments and operation results
123 // This approach allows to replace values with different types reducing complexity of the tests implementation
124 enum ValueId
125 {
126 // common values used as both arguments and results
127 V_UNUSED = 0, // used to mark arguments that are not used in operation
128 V_MINUS_INF, // or results of tests cases that should be skipped
129 V_MINUS_ONE, // -1.0
130 V_MINUS_ZERO, // -0.0
131 V_ZERO, // 0.0
132 V_HALF, // 0.5
133 V_ONE, // 1.0
134 V_INF,
135 V_DENORM,
136 V_NAN,
137
138 // arguments for rounding mode tests - used only when arguments are passed from input
139 V_ADD_ARG_A,
140 V_ADD_ARG_B,
141 V_SUB_ARG_A,
142 V_SUB_ARG_B,
143 V_MUL_ARG_A,
144 V_MUL_ARG_B,
145 V_DOT_ARG_A,
146 V_DOT_ARG_B,
147
148 // arguments of conversion operations - used only when arguments are passed from input
149 // Subcases are:
150 // ...UP: rounds away from zero, e.g. trailing bits are 101..
151 // ...DOWN: rounds toward zero, e.g. trailing bits are 011..
152 // ...TIE_UP: rounds up to even, e.g. preserved bit is 1, trailing are 10*
153 // ...TIE_DOWN: rounds up to even, e.g. preserved bit is 0, trailing are 10*
154 V_CONV_FROM_FP32_TO_FP16_UP_ARG,
155 V_CONV_FROM_FP32_TO_FP16_DOWN_ARG,
156 V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG,
157 V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG,
158 V_CONV_FROM_FP64_TO_FP16_UP_ARG,
159 V_CONV_FROM_FP64_TO_FP16_DOWN_ARG,
160 V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG,
161 V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG,
162 V_CONV_FROM_FP64_TO_FP32_UP_ARG,
163 V_CONV_FROM_FP64_TO_FP32_DOWN_ARG,
164 V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG,
165 V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG,
166
167 // arguments of integer conversion rounding, not all values can be represented by all integer sizes
168 // and only those that can will be used for testing
169 // Subcases are:
170 // ...UP: rounds away from zero, e.g. integer's value is closer to higher float value even
171 // ...DOWN: rounds towards zero, e.g. integer's value is closer to lower float value even
172 // ...TIE: rounds towards zero, e.g. integer's value is equidistant to lower and higher float value
173 // 16 bit values can only use width-conversions -> No rounding testing
174 V_CONV_FROM_UINT_TO_FP32_UP_ARG,
175 V_CONV_FROM_UINT_TO_FP32_DOWN_ARG,
176 V_CONV_FROM_UINT_TO_FP32_TIE_ARG,
177 V_CONV_FROM_UINT_TO_FP64_UP_ARG,
178 V_CONV_FROM_UINT_TO_FP64_DOWN_ARG,
179 V_CONV_FROM_UINT_TO_FP64_TIE_ARG,
180
181 // Same as UINT but will only test with negative values
182 V_CONV_FROM_INT_TO_FP32_UP_ARG,
183 V_CONV_FROM_INT_TO_FP32_DOWN_ARG,
184 V_CONV_FROM_INT_TO_FP32_TIE_ARG,
185 V_CONV_FROM_INT_TO_FP64_UP_ARG,
186 V_CONV_FROM_INT_TO_FP64_DOWN_ARG,
187 V_CONV_FROM_INT_TO_FP64_TIE_ARG,
188
189 // arguments of rounding operations
190 V_ADD_RTZ_RESULT,
191 V_ADD_RTE_RESULT,
192 V_SUB_RTZ_RESULT,
193 V_SUB_RTE_RESULT,
194 V_MUL_RTZ_RESULT,
195 V_MUL_RTE_RESULT,
196 V_DOT_RTZ_RESULT,
197 V_DOT_RTE_RESULT,
198
199 // non comon results of some operation - corner cases
200 V_ZERO_OR_DENORM_TIMES_TWO, // fp16 addition of non-flushed denorm with itself (or equivalent dot-product or vector-matrix multiply)
201 V_MINUS_ONE_OR_CLOSE, // value used only for fp16 subtraction result of preserved denorm and one
202 V_PI_DIV_2,
203 V_ZERO_OR_MINUS_ZERO, // both +0 and -0 are accepted
204 V_ZERO_OR_ONE, // both +0 and 1 are accepted
205 V_ZERO_OR_FP16_DENORM_TO_FP32, // both 0 and fp32 representation of fp16 denorm are accepted
206 V_ZERO_OR_FP16_DENORM_TO_FP64,
207 V_ZERO_OR_FP32_DENORM_TO_FP64,
208 V_DENORM_TIMES_TWO,
209 V_DEGREES_DENORM,
210 V_TRIG_ONE, // 1.0 trigonometric operations, including precision margin
211 V_MINUS_INF_OR_LOG_DENORM,
212 V_MINUS_INF_OR_LOG2_DENORM,
213 V_ZERO_OR_SQRT_DENORM,
214 V_INF_OR_INV_SQRT_DENORM,
215
216 // Results of conversion operations: RTZ
217 V_CONV_FROM_FP32_TO_FP16_UP_RTZ_RESULT,
218 V_CONV_FROM_FP32_TO_FP16_DOWN_RTZ_RESULT,
219 V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTZ_RESULT,
220 V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTZ_RESULT,
221 V_CONV_FROM_FP64_TO_FP16_UP_RTZ_RESULT,
222 V_CONV_FROM_FP64_TO_FP16_DOWN_RTZ_RESULT,
223 V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTZ_RESULT,
224 V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTZ_RESULT,
225 V_CONV_FROM_FP64_TO_FP32_UP_RTZ_RESULT,
226 V_CONV_FROM_FP64_TO_FP32_DOWN_RTZ_RESULT,
227 V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTZ_RESULT,
228 V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTZ_RESULT,
229 // Results of conversion operations: RTE
230 V_CONV_FROM_FP32_TO_FP16_UP_RTE_RESULT,
231 V_CONV_FROM_FP32_TO_FP16_DOWN_RTE_RESULT,
232 V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTE_RESULT,
233 V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTE_RESULT,
234 V_CONV_FROM_FP64_TO_FP16_UP_RTE_RESULT,
235 V_CONV_FROM_FP64_TO_FP16_DOWN_RTE_RESULT,
236 V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTE_RESULT,
237 V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTE_RESULT,
238 V_CONV_FROM_FP64_TO_FP32_UP_RTE_RESULT,
239 V_CONV_FROM_FP64_TO_FP32_DOWN_RTE_RESULT,
240 V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTE_RESULT,
241 V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTE_RESULT,
242
243 // Results of conversion operations: RTZ
244 // 16 bit values can only use width-conversions -> No rounding testing
245 V_CONV_FROM_UINT32_UP_RTZ_RESULT,
246 V_CONV_FROM_UINT32_DOWN_RTZ_RESULT,
247 V_CONV_FROM_UINT32_TIE_RTZ_RESULT,
248 V_CONV_FROM_UINT64_UP_RTZ_RESULT,
249 V_CONV_FROM_UINT64_DOWN_RTZ_RESULT,
250 V_CONV_FROM_UINT64_TIE_RTZ_RESULT,
251 // Results of conversion operations: RTE
252 // 16 bit values can only use width-conversions -> No rounding testing
253 V_CONV_FROM_UINT32_UP_RTE_RESULT,
254 V_CONV_FROM_UINT32_DOWN_RTE_RESULT,
255 V_CONV_FROM_UINT32_TIE_RTE_RESULT,
256 V_CONV_FROM_UINT64_UP_RTE_RESULT,
257 V_CONV_FROM_UINT64_DOWN_RTE_RESULT,
258 V_CONV_FROM_UINT64_TIE_RTE_RESULT,
259
260 // Same as UINT but will only test with negative values
261 // Results of conversion operations: RTZ
262 V_CONV_FROM_INT32_UP_RTZ_RESULT,
263 V_CONV_FROM_INT32_DOWN_RTZ_RESULT,
264 V_CONV_FROM_INT32_TIE_RTZ_RESULT,
265 V_CONV_FROM_INT64_UP_RTZ_RESULT,
266 V_CONV_FROM_INT64_DOWN_RTZ_RESULT,
267 V_CONV_FROM_INT64_TIE_RTZ_RESULT,
268 // Results of conversion operations: RTE
269 V_CONV_FROM_INT32_UP_RTE_RESULT,
270 V_CONV_FROM_INT32_DOWN_RTE_RESULT,
271 V_CONV_FROM_INT32_TIE_RTE_RESULT,
272 V_CONV_FROM_INT64_UP_RTE_RESULT,
273 V_CONV_FROM_INT64_DOWN_RTE_RESULT,
274 V_CONV_FROM_INT64_TIE_RTE_RESULT,
275
276 V_CONV_DENORM_SMALLER, // used e.g. when converting fp16 denorm to fp32
277 V_CONV_DENORM_BIGGER,
278 };
279
280 // Enum containing all tested operations. Operations are defined in generic way so that
281 // they can be used to generate tests operating on arguments with different values of
282 // specified float type.
283 enum OperationId
284 {
285 // spir-v unary operations
286 OID_NEGATE = 0,
287 OID_COMPOSITE,
288 OID_COMPOSITE_INS,
289 OID_COPY,
290 OID_D_EXTRACT,
291 OID_D_INSERT,
292 OID_SHUFFLE,
293 OID_TRANSPOSE,
294 OID_CONV_FROM_UINT_TO_FP32,
295 OID_CONV_FROM_UINT_TO_FP64,
296 OID_CONV_FROM_INT_TO_FP32,
297 OID_CONV_FROM_INT_TO_FP64,
298 // No SCONST_CONV_FROM_UINT since it requires Kernel Capability and Vulkan does not expose it
299 OID_CONV_FROM_FP16,
300 OID_CONV_FROM_FP32,
301 OID_CONV_FROM_FP64,
302 OID_SCONST_CONV_FROM_FP32_TO_FP16_UP, // Round::UP case
303 OID_SCONST_CONV_FROM_FP32_TO_FP16_DOWN, // Round::DOWN case
304 OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_UP, // Round::TIE_DOWN case
305 OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_DOWN, // Round::TIE_DOWN case
306 OID_SCONST_CONV_FROM_FP64_TO_FP32_UP,
307 OID_SCONST_CONV_FROM_FP64_TO_FP32_DOWN,
308 OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_UP,
309 OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_DOWN,
310 OID_SCONST_CONV_FROM_FP64_TO_FP16_UP,
311 OID_SCONST_CONV_FROM_FP64_TO_FP16_DOWN,
312 OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_UP,
313 OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_DOWN,
314 OID_RETURN_VAL,
315
316 // spir-v binary operations
317 OID_ADD,
318 OID_SUB,
319 OID_MUL,
320 OID_DIV,
321 OID_REM,
322 OID_MOD,
323 OID_PHI,
324 OID_SELECT,
325 OID_DOT,
326 OID_VEC_MUL_S,
327 OID_VEC_MUL_M,
328 OID_MAT_MUL_S,
329 OID_MAT_MUL_V,
330 OID_MAT_MUL_M,
331 OID_OUT_PROD,
332 OID_ORD_EQ,
333 OID_UORD_EQ,
334 OID_ORD_NEQ,
335 OID_UORD_NEQ,
336 OID_ORD_LS,
337 OID_UORD_LS,
338 OID_ORD_GT,
339 OID_UORD_GT,
340 OID_ORD_LE,
341 OID_UORD_LE,
342 OID_ORD_GE,
343 OID_UORD_GE,
344
345 // glsl unary operations
346 OID_ROUND,
347 OID_ROUND_EV,
348 OID_TRUNC,
349 OID_ABS,
350 OID_SIGN,
351 OID_FLOOR,
352 OID_CEIL,
353 OID_FRACT,
354 OID_RADIANS,
355 OID_DEGREES,
356 OID_SIN,
357 OID_COS,
358 OID_TAN,
359 OID_ASIN,
360 OID_ACOS,
361 OID_ATAN,
362 OID_SINH,
363 OID_COSH,
364 OID_TANH,
365 OID_ASINH,
366 OID_ACOSH,
367 OID_ATANH,
368 OID_EXP,
369 OID_LOG,
370 OID_EXP2,
371 OID_LOG2,
372 OID_SQRT,
373 OID_INV_SQRT,
374 OID_MODF,
375 OID_MODF_ST,
376 OID_FREXP,
377 OID_FREXP_ST,
378 OID_LENGTH,
379 OID_NORMALIZE,
380 OID_REFLECT,
381 OID_REFRACT,
382 OID_MAT_DET,
383 OID_MAT_INV,
384 OID_PH_DENORM, // PackHalf2x16
385 OID_UPH_DENORM,
386 OID_PD_DENORM, // PackDouble2x32
387 OID_UPD_DENORM_FLUSH,
388 OID_UPD_DENORM_PRESERVE,
389
390 // glsl binary operations
391 OID_ATAN2,
392 OID_POW,
393 OID_MIX,
394 OID_FMA,
395 OID_MIN,
396 OID_MAX,
397 OID_CLAMP,
398 OID_STEP,
399 OID_SSTEP,
400 OID_DIST,
401 OID_CROSS,
402 OID_FACE_FWD,
403 OID_NMIN,
404 OID_NMAX,
405 OID_NCLAMP,
406
407 OID_ORTE_ROUND,
408 OID_ORTZ_ROUND
409 };
410
411 // Structures storing data required to test DenormPreserve and DenormFlushToZero modes.
412 // Operations are separated into binary and unary lists because binary operations can be tested with
413 // two attributes and thus denorms can be tested in combination with value, denorm, inf and nan.
414 // Unary operations are only tested with denorms.
415 struct BinaryCase
416 {
417 OperationId operationId;
418 ValueId opVarResult;
419 ValueId opDenormResult;
420 ValueId opInfResult;
421 ValueId opNanResult;
422 };
423 struct UnaryCase
424 {
425 OperationId operationId;
426 ValueId result;
427 };
428
429 // Function replacing all occurrences of substring with string passed in last parameter.
replace(string str,const string & from,const string & to)430 string replace(string str, const string &from, const string &to)
431 {
432 // to keep spir-v code clean and easier to read parts of it are processed
433 // with this method instead of StringTemplate; main usage of this method is the
434 // replacement of "float_" with "f16_", "f32_" or "f64_" depending on test case
435
436 size_t start_pos = 0;
437 while ((start_pos = str.find(from, start_pos)) != std::string::npos)
438 {
439 str.replace(start_pos, from.length(), to);
440 start_pos += to.length();
441 }
442 return str;
443 }
444
445 // Structure used to perform bits conversion int type <-> float type.
446 template <typename FLOAT_TYPE, typename UINT_TYPE>
447 struct RawConvert
448 {
449 union Value
450 {
451 FLOAT_TYPE fp;
452 UINT_TYPE ui;
453 };
454 };
455
456 // Traits used to get int type that can store equivalent float type.
457 template <typename FLOAT_TYPE>
458 struct GetCoresponding
459 {
460 typedef uint16_t uint_type;
461 };
462 template <>
463 struct GetCoresponding<float>
464 {
465 typedef uint32_t uint_type;
466 };
467 template <>
468 struct GetCoresponding<double>
469 {
470 typedef uint64_t uint_type;
471 };
472
473 // All values used for arguments and operation results are stored in single map.
474 // Each float type (fp16, fp32, fp64) has its own map that is used during
475 // test setup and during verification. TypeValuesBase is interface to that map.
476 class TypeValuesBase
477 {
478 public:
479 TypeValuesBase();
480 virtual ~TypeValuesBase() = default;
481
482 virtual BufferSp constructInputBuffer(const ValueId *twoArguments) const = 0;
483 virtual BufferSp constructOutputBuffer(ValueId result) const = 0;
484 virtual void fillInputData(const ValueId *twoArguments, vector<uint8_t> &bufferData, uint32_t &offset) const = 0;
485 };
486
TypeValuesBase()487 TypeValuesBase::TypeValuesBase()
488 {
489 }
490
491 typedef de::SharedPtr<TypeValuesBase> TypeValuesSP;
492
493 template <typename FLOAT_TYPE>
494 class TypeValues : public TypeValuesBase
495 {
496 public:
497 TypeValues();
498
499 BufferSp constructInputBuffer(const ValueId *twoArguments) const override;
500 BufferSp constructOutputBuffer(ValueId result) const override;
501 void fillInputData(const ValueId *twoArguments, vector<uint8_t> &bufferData, uint32_t &offset) const override;
502
503 FLOAT_TYPE getValue(ValueId id) const;
504
505 template <typename UINT_TYPE>
506 FLOAT_TYPE exactByteEquivalent(UINT_TYPE byteValue) const;
507
508 private:
509 typedef map<ValueId, FLOAT_TYPE> ValueMap;
510 ValueMap m_valueIdToVariableType;
511 };
512
513 template <typename FLOAT_TYPE>
constructInputBuffer(const ValueId * twoArguments) const514 BufferSp TypeValues<FLOAT_TYPE>::constructInputBuffer(const ValueId *twoArguments) const
515 {
516 std::vector<FLOAT_TYPE> inputData(2);
517 inputData[0] = m_valueIdToVariableType.at(twoArguments[0]);
518 inputData[1] = m_valueIdToVariableType.at(twoArguments[1]);
519 return BufferSp(new Buffer<FLOAT_TYPE>(inputData));
520 }
521
522 template <typename FLOAT_TYPE>
constructOutputBuffer(ValueId result) const523 BufferSp TypeValues<FLOAT_TYPE>::constructOutputBuffer(ValueId result) const
524 {
525 // note: we are not doing maping here, ValueId is directly saved in
526 // float type in order to be able to retireve it during verification
527
528 typedef typename GetCoresponding<FLOAT_TYPE>::uint_type uint_t;
529 uint_t value = static_cast<uint_t>(result);
530
531 // For FP16 we increase the buffer size to hold an unsigned integer, as
532 // we can be in the no 16bit_storage case.
533 const uint_t outputSize = sizeof(FLOAT_TYPE) == 2u ? 2u : 1u;
534 std::vector<FLOAT_TYPE> outputData(outputSize, exactByteEquivalent<uint_t>(value));
535 return BufferSp(new Buffer<FLOAT_TYPE>(outputData));
536 }
537
538 template <typename FLOAT_TYPE>
fillInputData(const ValueId * twoArguments,vector<uint8_t> & bufferData,uint32_t & offset) const539 void TypeValues<FLOAT_TYPE>::fillInputData(const ValueId *twoArguments, vector<uint8_t> &bufferData,
540 uint32_t &offset) const
541 {
542 uint32_t typeSize = sizeof(FLOAT_TYPE);
543
544 FLOAT_TYPE argA = getValue(twoArguments[0]);
545 deMemcpy(&bufferData[offset], &argA, typeSize);
546 offset += typeSize;
547
548 FLOAT_TYPE argB = getValue(twoArguments[1]);
549 deMemcpy(&bufferData[offset], &argB, typeSize);
550 offset += typeSize;
551 }
552
553 template <typename FLOAT_TYPE>
getValue(ValueId id) const554 FLOAT_TYPE TypeValues<FLOAT_TYPE>::getValue(ValueId id) const
555 {
556 return m_valueIdToVariableType.at(id);
557 }
558
559 template <typename FLOAT_TYPE>
560 template <typename UINT_TYPE>
exactByteEquivalent(UINT_TYPE byteValue) const561 FLOAT_TYPE TypeValues<FLOAT_TYPE>::exactByteEquivalent(UINT_TYPE byteValue) const
562 {
563 typename RawConvert<FLOAT_TYPE, UINT_TYPE>::Value value;
564 value.ui = byteValue;
565 return value.fp;
566 }
567
568 // For floating point conversions, rounding modes only matter when
569 // doing a narrowing conversion, i.e. from more mantissa bits
570 // to fewer.
571 //
572 // There are four rounding cases, depending on the value of the
573 // least significant mantissa bit that is preserved, and the
574 // mantissa bits that are eliminated:
575 //
576 // Least significant | Eliminated bit | Produces which
577 // retained bit | string | Rounding Case
578 // -------------------|--------------------|-----------------
579 // don't care | 0y, y is anything | DOWN: Round toward zero
580 // don't care | 1y, y is non-zero | UP: Round away from zero
581 // 0 | 1y, y is zero | TIE_DOWN: Round toward zero
582 // 1 | 1y, y is zero | TIE_UP: Round away from zero
583 enum class Round
584 {
585 DOWN,
586 UP,
587 TIE_DOWN,
588 TIE_UP
589 };
590
591 template <typename FROM_FLOAT_TYPE, typename TO_FLOAT_TYPE>
592 struct conversionDetail
593 {
594 typedef typename FROM_FLOAT_TYPE::StorageType FromInt;
595 typedef typename TO_FLOAT_TYPE::StorageType ToInt;
596
597 // How many bits will be removed from the mantissa by the conversion?
598 static const int excessWidth = FROM_FLOAT_TYPE::MANTISSA_BITS - TO_FLOAT_TYPE::MANTISSA_BITS;
599
600 // 'tie' contains the bits for the "1y, y is 0" case in RoundCase table.
601 // All the positions in tie32 will be thrown away, but help determine
602 // the rounding direction.
603 static const FromInt tie = ((FromInt)1) << (excessWidth - 1);
604 static const FromInt down = tie - 1; // bits to trigger down case
605 static const FromInt up = tie + 1; // bits to trigger up case
606 static const FromInt tieDown = tie; // bits to trigger tie-down case
607 static const FromInt tieUp = (tie << 1) | tie; // bits to trigger tie-up case
608 static const int exampleSign = 1; // Could be -1
609 static const int exampleExponent = TO_FLOAT_TYPE::EXPONENT_BIAS;
610
611 // Not all platforms will support 16 or 64 bit values. We need to detect those cases
612 // and make the tests pass through since we cannot validate them.
hasExcessBitsvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail613 static bool hasExcessBits(void)
614 {
615 return 0 < excessWidth;
616 }
617
618 // Returns arbitrary but nontrivial bits for the mantissa of the conversion
619 // result. This has TO_FLOAT_TYPE::MANTISSA_BITS. The bottom bit must be
620 // zero so it can be filled in later.
exampleMSBBitsvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail621 static ToInt exampleMSBBits(void)
622 {
623 switch (int(TO_FLOAT_TYPE::MANTISSA_BITS))
624 {
625 case 10: // Float16
626 // The Mantissa has 10 explicitly represented bits, and 1 bit
627 // that is normally hidden, but required here.
628 // The upper 9 are arbitrary, and the bottom bit is 0, to be filled
629 // in later.
630 return static_cast<ToInt>((1 << 10) | 0x39a);
631 case 23: // Float32
632 // The Mantissa has 23 explicitly represented bits, and 1 bit
633 // that is normally hidden, but required here.
634 // The upper 22 are arbitrary, and the bottom bit is 0, to be filled
635 // in later.
636 return static_cast<ToInt>((1 << 23) | 0x3a5a5a);
637 }
638 DE_ASSERT(false && "Expected Float16 or Float32");
639 return 0;
640 }
641
inputMantissavkt::SpirVAssembly::__anon1f0d25030111::conversionDetail642 static FromInt inputMantissa(Round r)
643 {
644 const FromInt base = static_cast<FromInt>(exampleMSBBits()) << excessWidth;
645 switch (r)
646 {
647 case Round::DOWN:
648 return base | down;
649 case Round::UP:
650 return base | up;
651 case Round::TIE_DOWN:
652 return base | tieDown;
653 case Round::TIE_UP:
654 return base | tieUp;
655 }
656 DE_ASSERT(false);
657 return 0; // Unreachable
658 }
659
outputMantissavkt::SpirVAssembly::__anon1f0d25030111::conversionDetail660 static ToInt outputMantissa(FromInt mantissa, Round r)
661 {
662 const ToInt base = static_cast<ToInt>(mantissa >> excessWidth);
663 switch (r)
664 {
665 case Round::DOWN:
666 case Round::TIE_DOWN:
667 return base;
668 case Round::UP:
669 case Round::TIE_UP:
670 return static_cast<ToInt>(base + 1);
671 }
672 DE_ASSERT(false);
673 return 0; // Unreachable
674 }
675
676 // Returns the value for the sample input, for an intended rounding outcome.
fromvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail677 static FROM_FLOAT_TYPE from(Round r)
678 {
679 return FROM_FLOAT_TYPE::construct(exampleSign, exampleExponent, inputMantissa(r));
680 }
681
682 // Returns the value of from(r) in string form as a sequence of 32 bit words.
fromStrvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail683 static std::string fromStr(Round r)
684 {
685 const FromInt value = from(r).bits();
686 switch (sizeof(FromInt))
687 {
688 case 8:
689 // Return low word first, high word second
690 return to_string(value & 0xFFFFFFFFu) + " " + to_string(value >> 16 >> 16);
691 case 4:
692 return to_string(value);
693 }
694 DE_ASSERT(false);
695 return "";
696 }
697
698 // Return the float value expected for a RTZ conversion.
resultRTZvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail699 static TO_FLOAT_TYPE resultRTZ(Round r)
700 {
701 // Reconstruct the original input, then round toward zero.
702 const ToInt mantissa = outputMantissa(inputMantissa(r), Round::DOWN);
703 return TO_FLOAT_TYPE::construct(exampleSign, exampleExponent, mantissa);
704 }
705 // Return the bits for the float value expected for a RTZ conversion.
resultRTZBitsvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail706 static ToInt resultRTZBits(Round r)
707 {
708 return resultRTZ(r).bits();
709 }
710 // Return the float value expected for a RTE conversion.
resultRTEvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail711 static TO_FLOAT_TYPE resultRTE(Round r)
712 {
713 // Reconstruct the original input, then round as specified.
714 const ToInt mantissa = outputMantissa(inputMantissa(r), r);
715 return TO_FLOAT_TYPE::construct(exampleSign, exampleExponent, mantissa);
716 }
717 // Return the bits for the float value expected for a RTE conversion.
resultRTEBitsvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail718 static ToInt resultRTEBits(Round r)
719 {
720 return resultRTE(r).bits();
721 }
722 };
723
724 template <>
TypeValues()725 TypeValues<deFloat16>::TypeValues() : TypeValuesBase()
726 {
727 // NOTE: when updating entries in m_valueIdToVariableType make sure to
728 // update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
729 ValueMap &vm = m_valueIdToVariableType;
730 vm[V_UNUSED] = deFloat32To16(0.0f);
731 vm[V_MINUS_INF] = 0xfc00;
732 vm[V_MINUS_ONE] = deFloat32To16(-1.0f);
733 vm[V_MINUS_ZERO] = 0x8000;
734 vm[V_ZERO] = 0x0000;
735 vm[V_HALF] = deFloat32To16(0.5f);
736 vm[V_ONE] = deFloat32To16(1.0f);
737 vm[V_INF] = 0x7c00;
738 vm[V_DENORM] = 0x03f0; // this value should be the same as the result of denormBase - epsilon
739 vm[V_NAN] = 0x7cf0;
740
741 vm[V_PI_DIV_2] = deFloat32To16((float)M_PI_2);
742 vm[V_DENORM_TIMES_TWO] = 0x07e0;
743 vm[V_DEGREES_DENORM] = 0x1b0c;
744
745 vm[V_ADD_ARG_A] = 0x3c03;
746 vm[V_ADD_ARG_B] = vm[V_ONE];
747 vm[V_SUB_ARG_A] = vm[V_ADD_ARG_A];
748 vm[V_SUB_ARG_B] = 0x4203;
749 vm[V_MUL_ARG_A] = vm[V_ADD_ARG_A];
750 vm[V_MUL_ARG_B] = 0x1900;
751 vm[V_DOT_ARG_A] = vm[V_ADD_ARG_A];
752 vm[V_DOT_ARG_B] = vm[V_MUL_ARG_B];
753
754 // Float16 is not the source type for a narrowing conversion, so these
755 // entries are unused.
756 vm[V_CONV_FROM_FP32_TO_FP16_UP_ARG] = vm[V_UNUSED];
757 vm[V_CONV_FROM_FP32_TO_FP16_DOWN_ARG] = vm[V_UNUSED];
758 vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG] = vm[V_UNUSED];
759 vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG] = vm[V_UNUSED];
760 vm[V_CONV_FROM_FP64_TO_FP16_UP_ARG] = vm[V_UNUSED];
761 vm[V_CONV_FROM_FP64_TO_FP16_DOWN_ARG] = vm[V_UNUSED];
762 vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG] = vm[V_UNUSED];
763 vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG] = vm[V_UNUSED];
764 vm[V_CONV_FROM_FP64_TO_FP32_UP_ARG] = vm[V_UNUSED];
765 vm[V_CONV_FROM_FP64_TO_FP32_DOWN_ARG] = vm[V_UNUSED];
766 vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG] = vm[V_UNUSED];
767 vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG] = vm[V_UNUSED];
768
769 // 16 values can only be used for width-conversions
770 vm[V_CONV_FROM_UINT_TO_FP32_UP_ARG] = vm[V_UNUSED];
771 vm[V_CONV_FROM_UINT_TO_FP32_DOWN_ARG] = vm[V_UNUSED];
772 vm[V_CONV_FROM_UINT_TO_FP32_TIE_ARG] = vm[V_UNUSED];
773 vm[V_CONV_FROM_UINT_TO_FP64_UP_ARG] = vm[V_UNUSED];
774 vm[V_CONV_FROM_UINT_TO_FP64_DOWN_ARG] = vm[V_UNUSED];
775 vm[V_CONV_FROM_UINT_TO_FP64_TIE_ARG] = vm[V_UNUSED];
776
777 vm[V_CONV_FROM_INT_TO_FP32_UP_ARG] = vm[V_UNUSED];
778 vm[V_CONV_FROM_INT_TO_FP32_DOWN_ARG] = vm[V_UNUSED];
779 vm[V_CONV_FROM_INT_TO_FP32_TIE_ARG] = vm[V_UNUSED];
780 vm[V_CONV_FROM_INT_TO_FP64_UP_ARG] = vm[V_UNUSED];
781 vm[V_CONV_FROM_INT_TO_FP64_DOWN_ARG] = vm[V_UNUSED];
782 vm[V_CONV_FROM_INT_TO_FP64_TIE_ARG] = vm[V_UNUSED];
783
784 vm[V_ADD_RTZ_RESULT] = 0x4001; // deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rtz)
785 vm[V_SUB_RTZ_RESULT] = 0xc001; // deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rtz)
786 vm[V_MUL_RTZ_RESULT] = 0x1903; // deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rtz)
787 vm[V_DOT_RTZ_RESULT] = 0x1d03;
788
789 vm[V_ADD_RTE_RESULT] = 0x4002; // deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rte)
790 vm[V_SUB_RTE_RESULT] = 0xc002; // deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rte)
791 vm[V_MUL_RTE_RESULT] = 0x1904; // deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rte)
792 vm[V_DOT_RTE_RESULT] = 0x1d04;
793
794 typedef conversionDetail<Float32, Float16> from32;
795 typedef conversionDetail<Float64, Float16> from64;
796 vm[V_CONV_FROM_FP32_TO_FP16_UP_RTZ_RESULT] =
797 from32::hasExcessBits() ? from32::resultRTZBits(Round::UP) : vm[V_UNUSED];
798 vm[V_CONV_FROM_FP32_TO_FP16_DOWN_RTZ_RESULT] =
799 from32::hasExcessBits() ? from32::resultRTZBits(Round::DOWN) : vm[V_UNUSED];
800 vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTZ_RESULT] =
801 from32::hasExcessBits() ? from32::resultRTZBits(Round::TIE_UP) : vm[V_UNUSED];
802 vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTZ_RESULT] =
803 from32::hasExcessBits() ? from32::resultRTZBits(Round::TIE_DOWN) : vm[V_UNUSED];
804 vm[V_CONV_FROM_FP64_TO_FP16_UP_RTZ_RESULT] =
805 from64::hasExcessBits() ? from64::resultRTZBits(Round::UP) : vm[V_UNUSED];
806 vm[V_CONV_FROM_FP64_TO_FP16_DOWN_RTZ_RESULT] =
807 from64::hasExcessBits() ? from64::resultRTZBits(Round::DOWN) : vm[V_UNUSED];
808 vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTZ_RESULT] =
809 from64::hasExcessBits() ? from64::resultRTZBits(Round::TIE_UP) : vm[V_UNUSED];
810 vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTZ_RESULT] =
811 from64::hasExcessBits() ? from64::resultRTZBits(Round::TIE_DOWN) : vm[V_UNUSED];
812 vm[V_CONV_FROM_FP64_TO_FP32_UP_RTZ_RESULT] = vm[V_UNUSED];
813 vm[V_CONV_FROM_FP64_TO_FP32_DOWN_RTZ_RESULT] = vm[V_UNUSED];
814 vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTZ_RESULT] = vm[V_UNUSED];
815 vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTZ_RESULT] = vm[V_UNUSED];
816
817 vm[V_CONV_FROM_FP32_TO_FP16_UP_RTE_RESULT] =
818 from32::hasExcessBits() ? from32::resultRTEBits(Round::UP) : vm[V_UNUSED];
819 vm[V_CONV_FROM_FP32_TO_FP16_DOWN_RTE_RESULT] =
820 from32::hasExcessBits() ? from32::resultRTEBits(Round::DOWN) : vm[V_UNUSED];
821 vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTE_RESULT] =
822 from32::hasExcessBits() ? from32::resultRTEBits(Round::TIE_UP) : vm[V_UNUSED];
823 vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTE_RESULT] =
824 from32::hasExcessBits() ? from32::resultRTEBits(Round::TIE_DOWN) : vm[V_UNUSED];
825 vm[V_CONV_FROM_FP64_TO_FP16_UP_RTE_RESULT] =
826 from64::hasExcessBits() ? from64::resultRTEBits(Round::UP) : vm[V_UNUSED];
827 vm[V_CONV_FROM_FP64_TO_FP16_DOWN_RTE_RESULT] =
828 from64::hasExcessBits() ? from64::resultRTEBits(Round::DOWN) : vm[V_UNUSED];
829 vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTE_RESULT] =
830 from64::hasExcessBits() ? from64::resultRTEBits(Round::TIE_UP) : vm[V_UNUSED];
831 vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTE_RESULT] =
832 from64::hasExcessBits() ? from64::resultRTEBits(Round::TIE_DOWN) : vm[V_UNUSED];
833 vm[V_CONV_FROM_FP64_TO_FP32_UP_RTE_RESULT] = vm[V_UNUSED];
834 vm[V_CONV_FROM_FP64_TO_FP32_DOWN_RTE_RESULT] = vm[V_UNUSED];
835 vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTE_RESULT] = vm[V_UNUSED];
836 vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTE_RESULT] = vm[V_UNUSED];
837
838 // 16 values can only be used for width-conversions
839 vm[V_CONV_FROM_UINT32_UP_RTZ_RESULT] = vm[V_UNUSED];
840 vm[V_CONV_FROM_UINT32_DOWN_RTZ_RESULT] = vm[V_UNUSED];
841 vm[V_CONV_FROM_UINT32_TIE_RTZ_RESULT] = vm[V_UNUSED];
842 vm[V_CONV_FROM_UINT64_UP_RTZ_RESULT] = vm[V_UNUSED];
843 vm[V_CONV_FROM_UINT64_DOWN_RTZ_RESULT] = vm[V_UNUSED];
844 vm[V_CONV_FROM_UINT64_TIE_RTZ_RESULT] = vm[V_UNUSED];
845
846 vm[V_CONV_FROM_UINT32_UP_RTE_RESULT] = vm[V_UNUSED];
847 vm[V_CONV_FROM_UINT32_DOWN_RTE_RESULT] = vm[V_UNUSED];
848 vm[V_CONV_FROM_UINT32_TIE_RTE_RESULT] = vm[V_UNUSED];
849 vm[V_CONV_FROM_UINT64_UP_RTE_RESULT] = vm[V_UNUSED];
850 vm[V_CONV_FROM_UINT64_DOWN_RTE_RESULT] = vm[V_UNUSED];
851 vm[V_CONV_FROM_UINT64_TIE_RTE_RESULT] = vm[V_UNUSED];
852
853 vm[V_CONV_FROM_INT32_UP_RTZ_RESULT] = vm[V_UNUSED];
854 vm[V_CONV_FROM_INT32_DOWN_RTZ_RESULT] = vm[V_UNUSED];
855 vm[V_CONV_FROM_INT32_TIE_RTZ_RESULT] = vm[V_UNUSED];
856 vm[V_CONV_FROM_INT64_UP_RTZ_RESULT] = vm[V_UNUSED];
857 vm[V_CONV_FROM_INT64_DOWN_RTZ_RESULT] = vm[V_UNUSED];
858 vm[V_CONV_FROM_INT64_TIE_RTZ_RESULT] = vm[V_UNUSED];
859
860 vm[V_CONV_FROM_INT32_UP_RTE_RESULT] = vm[V_UNUSED];
861 vm[V_CONV_FROM_INT32_DOWN_RTE_RESULT] = vm[V_UNUSED];
862 vm[V_CONV_FROM_INT32_TIE_RTE_RESULT] = vm[V_UNUSED];
863 vm[V_CONV_FROM_INT64_UP_RTE_RESULT] = vm[V_UNUSED];
864 vm[V_CONV_FROM_INT64_DOWN_RTE_RESULT] = vm[V_UNUSED];
865 vm[V_CONV_FROM_INT64_TIE_RTE_RESULT] = vm[V_UNUSED];
866
867 // there is no precision to store fp32 denorm nor fp64 denorm
868 vm[V_CONV_DENORM_SMALLER] = vm[V_ZERO];
869 vm[V_CONV_DENORM_BIGGER] = vm[V_ZERO];
870 }
871
872 template <>
TypeValues()873 TypeValues<float>::TypeValues() : TypeValuesBase()
874 {
875 // NOTE: when updating entries in m_valueIdToVariableType make sure to
876 // update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
877 ValueMap &vm = m_valueIdToVariableType;
878 vm[V_UNUSED] = 0.0f;
879 vm[V_MINUS_INF] = -std::numeric_limits<float>::infinity();
880 vm[V_MINUS_ONE] = -1.0f;
881 vm[V_MINUS_ZERO] = -0.0f;
882 vm[V_ZERO] = 0.0f;
883 vm[V_HALF] = 0.5f;
884 vm[V_ONE] = 1.0f;
885 vm[V_INF] = std::numeric_limits<float>::infinity();
886 vm[V_DENORM] = static_cast<float>(1.413e-42); // 0x000003f0
887 vm[V_NAN] = std::numeric_limits<float>::quiet_NaN();
888
889 vm[V_PI_DIV_2] = static_cast<float>(M_PI_2);
890 vm[V_DENORM_TIMES_TWO] = vm[V_DENORM] + vm[V_DENORM];
891 vm[V_DEGREES_DENORM] = deFloatDegrees(vm[V_DENORM]);
892
893 float e = std::numeric_limits<float>::epsilon();
894 vm[V_ADD_ARG_A] = 1.0f + 3 * e;
895 vm[V_ADD_ARG_B] = 1.0f;
896 vm[V_SUB_ARG_A] = vm[V_ADD_ARG_A];
897 vm[V_SUB_ARG_B] = 3.0f + 6 * e;
898 vm[V_MUL_ARG_A] = vm[V_ADD_ARG_A];
899 vm[V_MUL_ARG_B] = 5 * e;
900 vm[V_DOT_ARG_A] = vm[V_ADD_ARG_A];
901 vm[V_DOT_ARG_B] = 5 * e;
902
903 // Float32 is the source of a narrowing conversionsto Float16.
904 typedef conversionDetail<Float32, Float16> from32;
905 vm[V_CONV_FROM_FP32_TO_FP16_UP_ARG] = from32::hasExcessBits() ? from32::from(Round::UP).asFloat() : vm[V_UNUSED];
906 vm[V_CONV_FROM_FP32_TO_FP16_DOWN_ARG] =
907 from32::hasExcessBits() ? from32::from(Round::DOWN).asFloat() : vm[V_UNUSED];
908 vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG] =
909 from32::hasExcessBits() ? from32::from(Round::TIE_UP).asFloat() : vm[V_UNUSED];
910 vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG] =
911 from32::hasExcessBits() ? from32::from(Round::TIE_DOWN).asFloat() : vm[V_UNUSED];
912 vm[V_CONV_FROM_FP64_TO_FP16_UP_ARG] = vm[V_UNUSED];
913 vm[V_CONV_FROM_FP64_TO_FP16_DOWN_ARG] = vm[V_UNUSED];
914 vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG] = vm[V_UNUSED];
915 vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG] = vm[V_UNUSED];
916 vm[V_CONV_FROM_FP64_TO_FP32_UP_ARG] = vm[V_UNUSED];
917 vm[V_CONV_FROM_FP64_TO_FP32_DOWN_ARG] = vm[V_UNUSED];
918 vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG] = vm[V_UNUSED];
919 vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG] = vm[V_UNUSED];
920
921 vm[V_CONV_FROM_UINT_TO_FP32_UP_ARG] = exactByteEquivalent(0x02000003); // 33554435
922 vm[V_CONV_FROM_UINT_TO_FP32_DOWN_ARG] = exactByteEquivalent(0x02000001); // 33554433
923 vm[V_CONV_FROM_UINT_TO_FP32_TIE_ARG] = exactByteEquivalent(0x02000002); // 33554434
924 vm[V_CONV_FROM_UINT_TO_FP64_UP_ARG] = vm[V_UNUSED];
925 vm[V_CONV_FROM_UINT_TO_FP64_DOWN_ARG] = vm[V_UNUSED];
926 vm[V_CONV_FROM_UINT_TO_FP64_TIE_ARG] = vm[V_UNUSED];
927
928 vm[V_CONV_FROM_INT_TO_FP32_UP_ARG] = exactByteEquivalent(0xfdfffffd); // -33554435
929 vm[V_CONV_FROM_INT_TO_FP32_DOWN_ARG] = exactByteEquivalent(0xfdffffff); // -33554433
930 vm[V_CONV_FROM_INT_TO_FP32_TIE_ARG] = exactByteEquivalent(0xfdfffffe); // -33554434
931 vm[V_CONV_FROM_INT_TO_FP64_UP_ARG] = vm[V_UNUSED];
932 vm[V_CONV_FROM_INT_TO_FP64_DOWN_ARG] = vm[V_UNUSED];
933 vm[V_CONV_FROM_INT_TO_FP64_TIE_ARG] = vm[V_UNUSED];
934
935 int prevRound = fegetround();
936 fesetround(FE_TOWARDZERO);
937 vm[V_ADD_RTZ_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
938 vm[V_SUB_RTZ_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
939 vm[V_MUL_RTZ_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
940 vm[V_DOT_RTZ_RESULT] = vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
941
942 fesetround(FE_TONEAREST);
943 vm[V_ADD_RTE_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
944 vm[V_SUB_RTE_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
945 vm[V_MUL_RTE_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
946 vm[V_DOT_RTE_RESULT] = vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
947 fesetround(prevRound);
948
949 typedef conversionDetail<Float64, Float32> from64;
950 vm[V_CONV_FROM_FP32_TO_FP16_UP_RTZ_RESULT] = vm[V_UNUSED];
951 vm[V_CONV_FROM_FP32_TO_FP16_DOWN_RTZ_RESULT] = vm[V_UNUSED];
952 vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTZ_RESULT] = vm[V_UNUSED];
953 vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTZ_RESULT] = vm[V_UNUSED];
954 vm[V_CONV_FROM_FP64_TO_FP16_UP_RTZ_RESULT] = vm[V_UNUSED];
955 vm[V_CONV_FROM_FP64_TO_FP16_DOWN_RTZ_RESULT] = vm[V_UNUSED];
956 vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTZ_RESULT] = vm[V_UNUSED];
957 vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTZ_RESULT] = vm[V_UNUSED];
958 vm[V_CONV_FROM_FP64_TO_FP32_UP_RTZ_RESULT] =
959 from64::hasExcessBits() ? from64::resultRTZ(Round::UP).asFloat() : vm[V_UNUSED];
960 vm[V_CONV_FROM_FP64_TO_FP32_DOWN_RTZ_RESULT] =
961 from64::hasExcessBits() ? from64::resultRTZ(Round::DOWN).asFloat() : vm[V_UNUSED];
962 vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTZ_RESULT] =
963 from64::hasExcessBits() ? from64::resultRTZ(Round::TIE_UP).asFloat() : vm[V_UNUSED];
964 vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTZ_RESULT] =
965 from64::hasExcessBits() ? from64::resultRTZ(Round::TIE_DOWN).asFloat() : vm[V_UNUSED];
966
967 vm[V_CONV_FROM_FP32_TO_FP16_UP_RTE_RESULT] = vm[V_UNUSED];
968 vm[V_CONV_FROM_FP32_TO_FP16_DOWN_RTE_RESULT] = vm[V_UNUSED];
969 vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTE_RESULT] = vm[V_UNUSED];
970 vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTE_RESULT] = vm[V_UNUSED];
971 vm[V_CONV_FROM_FP64_TO_FP16_UP_RTE_RESULT] = vm[V_UNUSED];
972 vm[V_CONV_FROM_FP64_TO_FP16_DOWN_RTE_RESULT] = vm[V_UNUSED];
973 vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTE_RESULT] = vm[V_UNUSED];
974 vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTE_RESULT] = vm[V_UNUSED];
975 vm[V_CONV_FROM_FP64_TO_FP32_UP_RTE_RESULT] =
976 from64::hasExcessBits() ? from64::resultRTE(Round::UP).asFloat() : vm[V_UNUSED];
977 vm[V_CONV_FROM_FP64_TO_FP32_DOWN_RTE_RESULT] =
978 from64::hasExcessBits() ? from64::resultRTE(Round::DOWN).asFloat() : vm[V_UNUSED];
979 vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTE_RESULT] =
980 from64::hasExcessBits() ? from64::resultRTE(Round::TIE_UP).asFloat() : vm[V_UNUSED];
981 vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTE_RESULT] =
982 from64::hasExcessBits() ? from64::resultRTE(Round::TIE_DOWN).asFloat() : vm[V_UNUSED];
983
984 vm[V_CONV_FROM_UINT32_UP_RTZ_RESULT] = exactByteEquivalent(0x4c000000); // 33554432.0
985 vm[V_CONV_FROM_UINT32_DOWN_RTZ_RESULT] = exactByteEquivalent(0x4c000000); // 33554432.0
986 vm[V_CONV_FROM_UINT32_TIE_RTZ_RESULT] = exactByteEquivalent(0x4c000000); // 33554432.0
987 vm[V_CONV_FROM_UINT64_UP_RTZ_RESULT] = exactByteEquivalent(0x4c000000); // 33554432.0
988 vm[V_CONV_FROM_UINT64_DOWN_RTZ_RESULT] = exactByteEquivalent(0x4c000000); // 33554432.0
989 vm[V_CONV_FROM_UINT64_TIE_RTZ_RESULT] = exactByteEquivalent(0x4c000000); // 33554432.0
990
991 vm[V_CONV_FROM_UINT32_UP_RTE_RESULT] = exactByteEquivalent(0x4c000001); // 33554434.0
992 vm[V_CONV_FROM_UINT32_DOWN_RTE_RESULT] = exactByteEquivalent(0x4c000000); // 33554432.0
993 vm[V_CONV_FROM_UINT32_TIE_RTE_RESULT] = exactByteEquivalent(0x4c000000); // 33554432.0
994 vm[V_CONV_FROM_UINT64_UP_RTE_RESULT] = exactByteEquivalent(0x4c000001); // 33554434.0
995 vm[V_CONV_FROM_UINT64_DOWN_RTE_RESULT] = exactByteEquivalent(0x4c000000); // 33554432.0
996 vm[V_CONV_FROM_UINT64_TIE_RTE_RESULT] = exactByteEquivalent(0x4c000000); // 33554432.0
997
998 vm[V_CONV_FROM_INT32_UP_RTZ_RESULT] = exactByteEquivalent(0xcc000000); // -33554432.0
999 vm[V_CONV_FROM_INT32_DOWN_RTZ_RESULT] = exactByteEquivalent(0xcc000000); // -33554432.0
1000 vm[V_CONV_FROM_INT32_TIE_RTZ_RESULT] = exactByteEquivalent(0xcc000000); // -33554432.0
1001 vm[V_CONV_FROM_INT64_UP_RTZ_RESULT] = exactByteEquivalent(0xcc000000); // -33554432.0
1002 vm[V_CONV_FROM_INT64_DOWN_RTZ_RESULT] = exactByteEquivalent(0xcc000000); // -33554432.0
1003 vm[V_CONV_FROM_INT64_TIE_RTZ_RESULT] = exactByteEquivalent(0xcc000000); // -33554432.0
1004
1005 vm[V_CONV_FROM_INT32_UP_RTE_RESULT] = exactByteEquivalent(0xcc000001); // -33554434.0
1006 vm[V_CONV_FROM_INT32_DOWN_RTE_RESULT] = exactByteEquivalent(0xcc000000); // -33554432.0
1007 vm[V_CONV_FROM_INT32_TIE_RTE_RESULT] = exactByteEquivalent(0xcc000000); // -33554432.0
1008 vm[V_CONV_FROM_INT64_UP_RTE_RESULT] = exactByteEquivalent(0xcc000001); // -33554434.0
1009 vm[V_CONV_FROM_INT64_DOWN_RTE_RESULT] = exactByteEquivalent(0xcc000000); // -33554432.0
1010 vm[V_CONV_FROM_INT64_TIE_RTE_RESULT] = exactByteEquivalent(0xcc000000); // -33554432.0
1011
1012 // there is no precision to store fp64 denorm
1013 vm[V_CONV_DENORM_SMALLER] = exactByteEquivalent<uint32_t>(0x387c0000); // fp16 denorm
1014 vm[V_CONV_DENORM_BIGGER] = vm[V_ZERO];
1015 }
1016
1017 template <>
TypeValues()1018 TypeValues<double>::TypeValues() : TypeValuesBase()
1019 {
1020 // NOTE: when updating entries in m_valueIdToVariableType make sure to
1021 // update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
1022 ValueMap &vm = m_valueIdToVariableType;
1023 vm[V_UNUSED] = 0.0;
1024 vm[V_MINUS_INF] = -std::numeric_limits<double>::infinity();
1025 vm[V_MINUS_ONE] = -1.0;
1026 vm[V_MINUS_ZERO] = -0.0;
1027 vm[V_ZERO] = 0.0;
1028 vm[V_HALF] = 0.5;
1029 vm[V_ONE] = 1.0;
1030 vm[V_INF] = std::numeric_limits<double>::infinity();
1031 vm[V_DENORM] = 4.98e-321; // 0x00000000000003F0
1032 vm[V_NAN] = std::numeric_limits<double>::quiet_NaN();
1033
1034 vm[V_PI_DIV_2] = M_PI_2;
1035 vm[V_DENORM_TIMES_TWO] = vm[V_DENORM] + vm[V_DENORM];
1036 vm[V_DEGREES_DENORM] = vm[V_UNUSED];
1037
1038 double e = std::numeric_limits<double>::epsilon();
1039 vm[V_ADD_ARG_A] = 1.0 + 3 * e;
1040 vm[V_ADD_ARG_B] = 1.0;
1041 vm[V_SUB_ARG_A] = vm[V_ADD_ARG_A];
1042 vm[V_SUB_ARG_B] = 3.0 + 6 * e;
1043 vm[V_MUL_ARG_A] = vm[V_ADD_ARG_A];
1044 vm[V_MUL_ARG_B] = 5 * e;
1045 vm[V_DOT_ARG_A] = vm[V_ADD_ARG_A];
1046 vm[V_DOT_ARG_B] = 5 * e;
1047
1048 // Float64 is the source of narrowing conversions to Float32 and Float16.
1049 typedef conversionDetail<Float64, Float16> to16;
1050 typedef conversionDetail<Float64, Float32> to32;
1051 vm[V_CONV_FROM_FP32_TO_FP16_UP_ARG] = vm[V_UNUSED];
1052 vm[V_CONV_FROM_FP32_TO_FP16_DOWN_ARG] = vm[V_UNUSED];
1053 vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG] = vm[V_UNUSED];
1054 vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG] = vm[V_UNUSED];
1055 vm[V_CONV_FROM_FP64_TO_FP16_UP_ARG] = to16::hasExcessBits() ? to16::from(Round::UP).asDouble() : vm[V_UNUSED];
1056 vm[V_CONV_FROM_FP64_TO_FP16_DOWN_ARG] = to16::hasExcessBits() ? to16::from(Round::DOWN).asDouble() : vm[V_UNUSED];
1057 vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG] =
1058 to16::hasExcessBits() ? to16::from(Round::TIE_UP).asDouble() : vm[V_UNUSED];
1059 vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG] =
1060 to16::hasExcessBits() ? to16::from(Round::TIE_DOWN).asDouble() : vm[V_UNUSED];
1061 vm[V_CONV_FROM_FP64_TO_FP32_UP_ARG] = to32::hasExcessBits() ? to32::from(Round::UP).asDouble() : vm[V_UNUSED];
1062 vm[V_CONV_FROM_FP64_TO_FP32_DOWN_ARG] = to32::hasExcessBits() ? to32::from(Round::DOWN).asDouble() : vm[V_UNUSED];
1063 vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG] =
1064 to32::hasExcessBits() ? to32::from(Round::TIE_UP).asDouble() : vm[V_UNUSED];
1065 vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG] =
1066 to32::hasExcessBits() ? to32::from(Round::TIE_DOWN).asDouble() : vm[V_UNUSED];
1067
1068 vm[V_CONV_FROM_UINT_TO_FP32_UP_ARG] = exactByteEquivalent(static_cast<uint64_t>(0x0000000002000003)); // 33554435
1069 vm[V_CONV_FROM_UINT_TO_FP32_DOWN_ARG] = exactByteEquivalent(static_cast<uint64_t>(0x0000000002000001)); // 33554433
1070 vm[V_CONV_FROM_UINT_TO_FP32_TIE_ARG] = exactByteEquivalent(static_cast<uint64_t>(0x0000000002000002)); // 33554434
1071 vm[V_CONV_FROM_UINT_TO_FP64_UP_ARG] =
1072 exactByteEquivalent(static_cast<uint64_t>(0x0040000000000003)); // 18014398509481987
1073 vm[V_CONV_FROM_UINT_TO_FP64_DOWN_ARG] =
1074 exactByteEquivalent(static_cast<uint64_t>(0x0040000000000001)); // 18014398509481985
1075 vm[V_CONV_FROM_UINT_TO_FP64_TIE_ARG] =
1076 exactByteEquivalent(static_cast<uint64_t>(0x0040000000000002)); // 18014398509481986
1077
1078 vm[V_CONV_FROM_INT_TO_FP32_UP_ARG] = exactByteEquivalent(static_cast<uint64_t>(0xfffffffffdfffffd)); // -33554435
1079 vm[V_CONV_FROM_INT_TO_FP32_DOWN_ARG] = exactByteEquivalent(static_cast<uint64_t>(0xfffffffffdffffff)); // -33554433
1080 vm[V_CONV_FROM_INT_TO_FP32_TIE_ARG] = exactByteEquivalent(static_cast<uint64_t>(0xfffffffffdfffffe)); // -33554434
1081 vm[V_CONV_FROM_INT_TO_FP64_UP_ARG] =
1082 exactByteEquivalent(static_cast<uint64_t>(0xffbffffffffffffd)); // -18014398509481987
1083 vm[V_CONV_FROM_INT_TO_FP64_DOWN_ARG] =
1084 exactByteEquivalent(static_cast<uint64_t>(0xffbfffffffffffff)); // -18014398509481985
1085 vm[V_CONV_FROM_INT_TO_FP64_TIE_ARG] =
1086 exactByteEquivalent(static_cast<uint64_t>(0xffbffffffffffffe)); // -18014398509481986
1087
1088 int prevRound = fegetround();
1089 fesetround(FE_TOWARDZERO);
1090 vm[V_ADD_RTZ_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
1091 vm[V_SUB_RTZ_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
1092 vm[V_MUL_RTZ_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
1093 vm[V_DOT_RTZ_RESULT] = vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
1094
1095 fesetround(FE_TONEAREST);
1096 vm[V_ADD_RTE_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
1097 vm[V_SUB_RTE_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
1098 vm[V_MUL_RTE_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
1099 vm[V_DOT_RTE_RESULT] = vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
1100 fesetround(prevRound);
1101
1102 // Float64 is not the destination of any narrowing conversions.
1103 vm[V_CONV_FROM_FP32_TO_FP16_UP_RTZ_RESULT] = vm[V_UNUSED];
1104 vm[V_CONV_FROM_FP32_TO_FP16_DOWN_RTZ_RESULT] = vm[V_UNUSED];
1105 vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTZ_RESULT] = vm[V_UNUSED];
1106 vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTZ_RESULT] = vm[V_UNUSED];
1107 vm[V_CONV_FROM_FP64_TO_FP16_UP_RTZ_RESULT] = vm[V_UNUSED];
1108 vm[V_CONV_FROM_FP64_TO_FP16_DOWN_RTZ_RESULT] = vm[V_UNUSED];
1109 vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTZ_RESULT] = vm[V_UNUSED];
1110 vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTZ_RESULT] = vm[V_UNUSED];
1111 vm[V_CONV_FROM_FP64_TO_FP32_UP_RTZ_RESULT] = vm[V_UNUSED];
1112 vm[V_CONV_FROM_FP64_TO_FP32_DOWN_RTZ_RESULT] = vm[V_UNUSED];
1113 vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTZ_RESULT] = vm[V_UNUSED];
1114 vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTZ_RESULT] = vm[V_UNUSED];
1115
1116 vm[V_CONV_FROM_FP32_TO_FP16_UP_RTE_RESULT] = vm[V_UNUSED];
1117 vm[V_CONV_FROM_FP32_TO_FP16_DOWN_RTE_RESULT] = vm[V_UNUSED];
1118 vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTE_RESULT] = vm[V_UNUSED];
1119 vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTE_RESULT] = vm[V_UNUSED];
1120 vm[V_CONV_FROM_FP64_TO_FP16_UP_RTE_RESULT] = vm[V_UNUSED];
1121 vm[V_CONV_FROM_FP64_TO_FP16_DOWN_RTE_RESULT] = vm[V_UNUSED];
1122 vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTE_RESULT] = vm[V_UNUSED];
1123 vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTE_RESULT] = vm[V_UNUSED];
1124 vm[V_CONV_FROM_FP64_TO_FP32_UP_RTE_RESULT] = vm[V_UNUSED];
1125 vm[V_CONV_FROM_FP64_TO_FP32_DOWN_RTE_RESULT] = vm[V_UNUSED];
1126 vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTE_RESULT] = vm[V_UNUSED];
1127 vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTE_RESULT] = vm[V_UNUSED];
1128
1129 vm[V_CONV_FROM_UINT32_UP_RTZ_RESULT] = vm[V_UNUSED];
1130 vm[V_CONV_FROM_UINT32_DOWN_RTZ_RESULT] = vm[V_UNUSED];
1131 vm[V_CONV_FROM_UINT32_TIE_RTZ_RESULT] = vm[V_UNUSED];
1132 vm[V_CONV_FROM_UINT64_UP_RTZ_RESULT] =
1133 exactByteEquivalent(static_cast<uint64_t>(0x4350000000000000)); // 18014398509481984.0
1134 vm[V_CONV_FROM_UINT64_DOWN_RTZ_RESULT] =
1135 exactByteEquivalent(static_cast<uint64_t>(0x4350000000000000)); // 18014398509481984.0
1136 vm[V_CONV_FROM_UINT64_TIE_RTZ_RESULT] =
1137 exactByteEquivalent(static_cast<uint64_t>(0x4350000000000000)); // 18014398509481984.0
1138
1139 vm[V_CONV_FROM_UINT32_UP_RTE_RESULT] = vm[V_UNUSED];
1140 vm[V_CONV_FROM_UINT32_DOWN_RTE_RESULT] = vm[V_UNUSED];
1141 vm[V_CONV_FROM_UINT32_TIE_RTE_RESULT] = vm[V_UNUSED];
1142 vm[V_CONV_FROM_UINT64_UP_RTE_RESULT] =
1143 exactByteEquivalent(static_cast<uint64_t>(0x4350000000000001)); // 18014398509481988.0
1144 vm[V_CONV_FROM_UINT64_DOWN_RTE_RESULT] =
1145 exactByteEquivalent(static_cast<uint64_t>(0x4350000000000000)); // 18014398509481984.0
1146 vm[V_CONV_FROM_UINT64_TIE_RTE_RESULT] =
1147 exactByteEquivalent(static_cast<uint64_t>(0x4350000000000000)); // 18014398509481984.0
1148
1149 vm[V_CONV_FROM_INT32_UP_RTZ_RESULT] = vm[V_UNUSED];
1150 vm[V_CONV_FROM_INT32_DOWN_RTZ_RESULT] = vm[V_UNUSED];
1151 vm[V_CONV_FROM_INT32_TIE_RTZ_RESULT] = vm[V_UNUSED];
1152 vm[V_CONV_FROM_INT64_UP_RTZ_RESULT] =
1153 exactByteEquivalent(static_cast<uint64_t>(0xc350000000000000)); // -18014398509481984.0
1154 vm[V_CONV_FROM_INT64_DOWN_RTZ_RESULT] =
1155 exactByteEquivalent(static_cast<uint64_t>(0xc350000000000000)); // -18014398509481984.0
1156 vm[V_CONV_FROM_INT64_TIE_RTZ_RESULT] =
1157 exactByteEquivalent(static_cast<uint64_t>(0xc350000000000000)); // -18014398509481984.0
1158
1159 vm[V_CONV_FROM_INT32_UP_RTE_RESULT] = vm[V_UNUSED];
1160 vm[V_CONV_FROM_INT32_DOWN_RTE_RESULT] = vm[V_UNUSED];
1161 vm[V_CONV_FROM_INT32_TIE_RTE_RESULT] = vm[V_UNUSED];
1162 vm[V_CONV_FROM_INT64_UP_RTE_RESULT] =
1163 exactByteEquivalent(static_cast<uint64_t>(0xc350000000000001)); // -18014398509481988.0
1164 vm[V_CONV_FROM_INT64_DOWN_RTE_RESULT] =
1165 exactByteEquivalent(static_cast<uint64_t>(0xc350000000000000)); // -18014398509481984.0
1166 vm[V_CONV_FROM_INT64_TIE_RTE_RESULT] =
1167 exactByteEquivalent(static_cast<uint64_t>(0xc350000000000000)); // -18014398509481984.0
1168
1169 vm[V_CONV_DENORM_SMALLER] = exactByteEquivalent<uint64_t>(0x3f0f800000000000); // 0x03f0 is fp16 denorm
1170 vm[V_CONV_DENORM_BIGGER] = exactByteEquivalent<uint64_t>(0x373f800000000000); // 0x000003f0 is fp32 denorm
1171 }
1172
1173 // Each type (fp16, fp32, fp64, uint16, uint32, uint64, int16, int32, int64)
1174 // has specific set of SPIR-V snippets that was extracted to separate template
1175 // specialization. Those snippets are used to compose final test shaders.
1176 // With this approach parameterization can be done just once per type and reused
1177 // for many tests.
1178 class TypeSnippetsBase
1179 {
1180 public:
TypeSnippetsBase(bool floatType,bool signedInteger)1181 TypeSnippetsBase(bool floatType, bool signedInteger) : isFloatType(floatType), isSignedInteger(signedInteger)
1182 {
1183 }
1184
1185 virtual ~TypeSnippetsBase() = default;
1186
getValueTypeString() const1187 const char *getValueTypeString() const
1188 {
1189 return isFloatType ? "f" : (isSignedInteger ? "i" : "u");
1190 }
1191
1192 protected:
1193 void updateSpirvSnippets();
1194
1195 public: // Type specific data:
1196 // Number of bits consumed by float type
1197 string bitWidth;
1198
1199 // Minimum positive normal
1200 string epsilon;
1201
1202 // denormBase is a normal value (found empirically) used to generate denorm value.
1203 // Denorm is generated by substracting epsilon from denormBase.
1204 // denormBase is not a denorm - it is used to create denorm.
1205 // This value is needed when operations are tested with arguments that were
1206 // generated in the code. Generated denorm should be the same as denorm
1207 // used when arguments are passed via input (m_valueIdToVariableType[V_DENORM]).
1208 // This is required as result of some operations depends on actual denorm value
1209 // e.g. OpRadians(0x0001) is 0 but OpRadians(0x03f0) is denorm.
1210 string denormBase;
1211
1212 string capabilities;
1213 string extensions;
1214 string capabilitiesFp16Without16BitStorage;
1215 string extensionsFp16Without16BitStorage;
1216 string arrayStride;
1217
1218 bool loadStoreRequiresShaderFloat16;
1219 bool isFloatType;
1220 bool isSignedInteger;
1221
1222 public: // Type specific spir-v snippets:
1223 // Common annotations
1224 string typeAnnotationsSnippet;
1225
1226 // Definitions of all types commonly used by operation tests
1227 string typeDefinitionsSnippet;
1228
1229 // Definitions of all types commonly used by settings tests
1230 string minTypeDefinitionsSnippet;
1231
1232 // Definitions of all constants commonly used by tests
1233 string constantsDefinitionsSnippet;
1234
1235 // Map that stores instructions that generate arguments of specified value.
1236 // Every test that uses generated inputod will select up to two items from this map
1237 typedef map<ValueId, string> SnippetMap;
1238 SnippetMap valueIdToSnippetArgMap;
1239
1240 // Spir-v snippets that read argument from SSBO
1241 string argumentsFromInputSnippet;
1242 string multiArgumentsFromInputSnippet;
1243
1244 // SSBO with stage input/output definitions
1245 string inputAnnotationsSnippet;
1246 string inputDefinitionsSnippet;
1247 string outputAnnotationsSnippet;
1248 string multiOutputAnnotationsSnippet;
1249 string outputDefinitionsSnippet;
1250 string multiOutputDefinitionsSnippet;
1251
1252 // Varying is required to pass result from vertex stage to fragment stage,
1253 // one of requirements was to not use SSBO writes in vertex stage so we
1254 // need to do that in fragment stage; we also cant pass operation result
1255 // directly because of interpolation, to avoid it we do a bitcast to uint
1256 string varyingsTypesSnippet;
1257 string inputVaryingsSnippet;
1258 string outputVaryingsSnippet;
1259 string storeVertexResultSnippet;
1260 string loadVertexResultSnippet;
1261
1262 string storeResultsSnippet;
1263 string multiStoreResultsSnippet;
1264
1265 string argumentsFromInputFp16Snippet;
1266 string storeResultsFp16Snippet;
1267 string multiArgumentsFromInputFp16Snippet;
1268 string multiOutputAnnotationsFp16Snippet;
1269 string multiStoreResultsFp16Snippet;
1270 string multiOutputDefinitionsFp16Snippet;
1271 string inputDefinitionsFp16Snippet;
1272 string outputDefinitionsFp16Snippet;
1273 string typeAnnotationsFp16Snippet;
1274 string typeDefinitionsFp16Snippet;
1275 };
1276
updateSpirvSnippets()1277 void TypeSnippetsBase::updateSpirvSnippets()
1278 {
1279 // annotations to types that are commonly used by tests
1280 const string typeAnnotationsTemplate = "OpDecorate %type_valueType_arr_1 ArrayStride " + arrayStride +
1281 "\n"
1282 "OpDecorate %type_valueType_arr_2 ArrayStride " +
1283 arrayStride + "\n";
1284
1285 // definition off all types that are commonly used by tests
1286 const string floatTypeDefinition = "%type_valueType = OpTypeFloat " + bitWidth +
1287 "\n"
1288 "%type_valueType_uptr = OpTypePointer Uniform %type_valueType\n"
1289 "%type_valueType_fptr = OpTypePointer Function %type_valueType\n"
1290 "%type_valueType_vec2 = OpTypeVector %type_valueType 2\n"
1291 "%type_valueType_vec3 = OpTypeVector %type_valueType 3\n"
1292 "%type_valueType_vec4 = OpTypeVector %type_valueType 4\n"
1293 "%type_valueType_vec4_iptr = OpTypePointer Input %type_valueType_vec4\n"
1294 "%type_valueType_vec4_optr = OpTypePointer Output %type_valueType_vec4\n"
1295 "%type_valueType_mat2x2 = OpTypeMatrix %type_valueType_vec2 2\n"
1296 "%type_valueType_arr_1 = OpTypeArray %type_valueType %c_i32_1\n"
1297 "%type_valueType_arr_2 = OpTypeArray %type_valueType %c_i32_2\n";
1298 const string uintTypeDefinition =
1299 (bitWidth == "32" ? "" : // 32 bit values are already defined
1300 "%type_valueType = OpTypeInt " + bitWidth + " " + (isSignedInteger ? "1" : "0") + "\n") +
1301 "%type_valueType_uptr = OpTypePointer Uniform %type_valueType\n" +
1302 (bitWidth == "32" ? "" : // 32 bit values are already defined
1303 "%type_valueType_fptr = OpTypePointer Function %type_valueType\n"
1304 "%type_valueType_vec2 = OpTypeVector %type_valueType 2\n"
1305 "%type_valueType_vec3 = OpTypeVector %type_valueType 3\n") +
1306 "%type_valueType_vec4 = OpTypeVector %type_valueType 4\n"
1307 "%type_valueType_vec4_iptr = OpTypePointer Input %type_valueType_vec4\n"
1308 "%type_valueType_vec4_optr = OpTypePointer Output %type_valueType_vec4\n"
1309 "%type_valueType_arr_1 = OpTypeArray %type_valueType %c_i32_1\n"
1310 "%type_valueType_arr_2 = OpTypeArray %type_valueType %c_i32_2\n";
1311
1312 const string typeDefinitionsTemplate = isFloatType ? floatTypeDefinition : uintTypeDefinition;
1313
1314 // minimal type definition set that is used by settings tests
1315 const string minTypeDefinitionsTemplate = "%type_valueType = OpTypeFloat " + bitWidth +
1316 "\n"
1317 "%type_valueType_uptr = OpTypePointer Uniform %type_valueType\n"
1318 "%type_valueType_arr_2 = OpTypeArray %type_valueType %c_i32_2\n";
1319
1320 // definition off all constants that are used by tests
1321 const string constantsDefinitionsTemplate = "%c_valueType_n1 = OpConstant %type_valueType -1\n"
1322 "%c_valueType_0 = OpConstant %type_valueType 0.0\n"
1323 "%c_valueType_0_5 = OpConstant %type_valueType 0.5\n"
1324 "%c_valueType_1 = OpConstant %type_valueType 1\n"
1325 "%c_valueType_2 = OpConstant %type_valueType 2\n"
1326 "%c_valueType_3 = OpConstant %type_valueType 3\n"
1327 "%c_valueType_4 = OpConstant %type_valueType 4\n"
1328 "%c_valueType_5 = OpConstant %type_valueType 5\n"
1329 "%c_valueType_6 = OpConstant %type_valueType 6\n"
1330 "%c_valueType_eps = OpConstant %type_valueType " +
1331 epsilon +
1332 "\n"
1333 "%c_valueType_denorm_base = OpConstant %type_valueType " +
1334 denormBase + "\n";
1335
1336 // when arguments are read from SSBO this snipped is placed in main function
1337 const string argumentsFromInputTemplate =
1338 "%arg1loc = OpAccessChain %type_valueType_uptr %ssbo_in %c_i32_0 %c_i32_0\n"
1339 "%arg1 = OpLoad %type_valueType %arg1loc\n"
1340 "%arg2loc = OpAccessChain %type_valueType_uptr %ssbo_in %c_i32_0 %c_i32_1\n"
1341 "%arg2 = OpLoad %type_valueType %arg2loc\n";
1342
1343 const string multiArgumentsFromInputTemplate =
1344 "%arg1_valueType_loc = OpAccessChain %type_valueType_uptr %ssbo_in %c_i32_${attr} %c_i32_0\n"
1345 "%arg2_valueType_loc = OpAccessChain %type_valueType_uptr %ssbo_in %c_i32_${attr} %c_i32_1\n"
1346 "%arg1_valueType = OpLoad %type_valueType %arg1_valueType_loc\n"
1347 "%arg2_valueType = OpLoad %type_valueType %arg2_valueType_loc\n";
1348
1349 // when tested shader stage reads from SSBO it has to have this snippet
1350 inputAnnotationsSnippet = "OpMemberDecorate %SSBO_in 0 Offset 0\n"
1351 "OpDecorate %SSBO_in BufferBlock\n"
1352 "OpDecorate %ssbo_in DescriptorSet 0\n"
1353 "OpDecorate %ssbo_in Binding 0\n"
1354 "OpDecorate %ssbo_in NonWritable\n";
1355
1356 const string inputDefinitionsTemplate = "%SSBO_in = OpTypeStruct %type_valueType_arr_2\n"
1357 "%up_SSBO_in = OpTypePointer Uniform %SSBO_in\n"
1358 "%ssbo_in = OpVariable %up_SSBO_in Uniform\n";
1359
1360 outputAnnotationsSnippet = "OpMemberDecorate %SSBO_out 0 Offset 0\n"
1361 "OpDecorate %SSBO_out BufferBlock\n"
1362 "OpDecorate %ssbo_out DescriptorSet 0\n"
1363 "OpDecorate %ssbo_out Binding 1\n";
1364
1365 const string multiOutputAnnotationsTemplate = "OpMemberDecorate %SSBO_valueType_out 0 Offset 0\n"
1366 "OpDecorate %type_valueType_arr_2 ArrayStride " +
1367 arrayStride +
1368 "\n"
1369 "OpDecorate %SSBO_valueType_out BufferBlock\n"
1370 "OpDecorate %ssbo_valueType_out DescriptorSet 0\n";
1371
1372 const string outputDefinitionsTemplate = "%SSBO_out = OpTypeStruct %type_valueType_arr_1\n"
1373 "%up_SSBO_out = OpTypePointer Uniform %SSBO_out\n"
1374 "%ssbo_out = OpVariable %up_SSBO_out Uniform\n";
1375
1376 const string multiOutputDefinitionsTemplate =
1377 "%SSBO_valueType_out = OpTypeStruct %type_valueType\n"
1378 "%up_SSBO_valueType_out = OpTypePointer Uniform %SSBO_valueType_out\n"
1379 "%ssbo_valueType_out = OpVariable %up_SSBO_valueType_out Uniform\n";
1380
1381 // this snippet is used by compute and fragment stage but not by vertex stage
1382 const string storeResultsTemplate =
1383 "%outloc = OpAccessChain %type_valueType_uptr %ssbo_out %c_i32_0 %c_i32_0\n"
1384 "OpStore %outloc %result\n";
1385
1386 const string multiStoreResultsTemplate =
1387 "%outloc" + bitWidth +
1388 " = OpAccessChain %type_valueType_uptr %ssbo_valueType_out %c_i32_0\n"
1389 " OpStore %outloc" +
1390 bitWidth + " %result" + bitWidth + "\n";
1391
1392 const string typeToken = "_valueType";
1393 const string typeName = string("_") + getValueTypeString() + bitWidth;
1394
1395 typeAnnotationsSnippet = replace(typeAnnotationsTemplate, typeToken, typeName);
1396 typeDefinitionsSnippet = replace(typeDefinitionsTemplate, typeToken, typeName);
1397 minTypeDefinitionsSnippet = replace(minTypeDefinitionsTemplate, typeToken, typeName);
1398 constantsDefinitionsSnippet = isFloatType ? replace(constantsDefinitionsTemplate, typeToken, typeName) :
1399 ""; // Not needed for int conversion tests
1400 argumentsFromInputSnippet = replace(argumentsFromInputTemplate, typeToken, typeName);
1401 multiArgumentsFromInputSnippet = replace(multiArgumentsFromInputTemplate, typeToken, typeName);
1402 inputDefinitionsSnippet = replace(inputDefinitionsTemplate, typeToken, typeName);
1403 multiOutputAnnotationsSnippet = replace(multiOutputAnnotationsTemplate, typeToken, typeName);
1404 outputDefinitionsSnippet = replace(outputDefinitionsTemplate, typeToken, typeName);
1405 multiOutputDefinitionsSnippet = replace(multiOutputDefinitionsTemplate, typeToken, typeName);
1406 storeResultsSnippet = replace(storeResultsTemplate, typeToken, typeName);
1407 multiStoreResultsSnippet = replace(multiStoreResultsTemplate, typeToken, typeName);
1408
1409 argumentsFromInputFp16Snippet = "";
1410 storeResultsFp16Snippet = "";
1411 multiArgumentsFromInputFp16Snippet = "";
1412 multiOutputAnnotationsFp16Snippet = "";
1413 multiStoreResultsFp16Snippet = "";
1414 multiOutputDefinitionsFp16Snippet = "";
1415 inputDefinitionsFp16Snippet = "";
1416 typeAnnotationsFp16Snippet = "";
1417 outputDefinitionsFp16Snippet = "";
1418 typeDefinitionsFp16Snippet = "";
1419
1420 if (bitWidth.compare("16") == 0)
1421 {
1422 typeDefinitionsFp16Snippet = "%type_u32_uptr = OpTypePointer Uniform %type_u32\n"
1423 "%type_u32_arr_1 = OpTypeArray %type_u32 %c_i32_1\n";
1424
1425 typeAnnotationsFp16Snippet = "OpDecorate %type_u32_arr_1 ArrayStride 4\n";
1426 const string inputToken = "_f16_arr_2";
1427 const string inputName = "_u32_arr_1";
1428 inputDefinitionsFp16Snippet = replace(inputDefinitionsSnippet, inputToken, inputName);
1429
1430 argumentsFromInputFp16Snippet = "%argloc = OpAccessChain %type_u32_uptr %ssbo_in %c_i32_0 %c_i32_0\n"
1431 "%inval = OpLoad %type_u32 %argloc\n"
1432 "%arg = OpBitcast %type_f16_vec2 %inval\n"
1433 "%arg1 = OpCompositeExtract %type_f16 %arg 0\n"
1434 "%arg2 = OpCompositeExtract %type_f16 %arg 1\n";
1435
1436 const string outputToken = "_f16_arr_1";
1437 const string outputName = "_u32_arr_1";
1438 outputDefinitionsFp16Snippet = replace(outputDefinitionsSnippet, outputToken, outputName);
1439
1440 storeResultsFp16Snippet = "%result_f16_vec2 = OpCompositeConstruct %type_f16_vec2 %result %c_f16_0\n"
1441 "%result_u32 = OpBitcast %type_u32 %result_f16_vec2\n"
1442 "%outloc = OpAccessChain %type_u32_uptr %ssbo_out %c_i32_0 %c_i32_0\n"
1443 "OpStore %outloc %result_u32\n";
1444
1445 multiArgumentsFromInputFp16Snippet =
1446 "%arg_u32_loc = OpAccessChain %type_u32_uptr %ssbo_in %c_i32_${attr} %c_i32_0\n"
1447 "%arg_u32 = OpLoad %type_u32 %arg_u32_loc\n"
1448 "%arg_f16_vec2 = OpBitcast %type_f16_vec2 %arg_u32\n"
1449 "%arg1_f16 = OpCompositeExtract %type_f16 %arg_f16_vec2 0\n"
1450 "%arg2_f16 = OpCompositeExtract %type_f16 %arg_f16_vec2 1\n";
1451
1452 multiOutputAnnotationsFp16Snippet = "OpMemberDecorate %SSBO_u32_out 0 Offset 0\n"
1453 "OpDecorate %type_u32_arr_1 ArrayStride 4\n"
1454 "OpDecorate %SSBO_u32_out BufferBlock\n"
1455 "OpDecorate %ssbo_u32_out DescriptorSet 0\n";
1456
1457 multiStoreResultsFp16Snippet = "%outloc_u32 = OpAccessChain %type_u32_uptr %ssbo_u32_out %c_i32_0\n"
1458 "%result16_vec2 = OpCompositeConstruct %type_f16_vec2 %result16 %c_f16_0\n"
1459 "%result_u32 = OpBitcast %type_u32 %result16_vec2\n"
1460 " OpStore %outloc_u32 %result_u32\n";
1461
1462 multiOutputDefinitionsFp16Snippet = "%c_f16_0 = OpConstant %type_f16 0.0\n"
1463 "%SSBO_u32_out = OpTypeStruct %type_u32\n"
1464 "%up_SSBO_u32_out = OpTypePointer Uniform %SSBO_u32_out\n"
1465 "%ssbo_u32_out = OpVariable %up_SSBO_u32_out Uniform\n";
1466 }
1467
1468 // NOTE: only values used as _generated_ arguments in test operations
1469 // need to be in this map, arguments that are only used by tests,
1470 // that grab arguments from input, do need to be in this map
1471 // NOTE: when updating entries in valueIdToSnippetArgMap make
1472 // sure to update also m_valueIdToVariableType for all valueType width
1473 SnippetMap &sm = valueIdToSnippetArgMap;
1474 sm[V_UNUSED] = "OpFSub %type_valueType %c_valueType_0 %c_valueType_0\n";
1475 sm[V_MINUS_INF] = "OpFDiv %type_valueType %c_valueType_n1 %c_valueType_0\n";
1476 sm[V_MINUS_ONE] = "OpFAdd %type_valueType %c_valueType_n1 %c_valueType_0\n";
1477 sm[V_MINUS_ZERO] = "OpFMul %type_valueType %c_valueType_n1 %c_valueType_0\n";
1478 sm[V_ZERO] = "OpFMul %type_valueType %c_valueType_0 %c_valueType_0\n";
1479 sm[V_HALF] = "OpFAdd %type_valueType %c_valueType_0_5 %c_valueType_0\n";
1480 sm[V_ONE] = "OpFAdd %type_valueType %c_valueType_1 %c_valueType_0\n";
1481 sm[V_INF] = "OpFDiv %type_valueType %c_valueType_1 %c_valueType_0\n"; // x / 0 == Inf
1482 sm[V_DENORM] = "OpFSub %type_valueType %c_valueType_denorm_base %c_valueType_eps\n";
1483 sm[V_NAN] = "OpFDiv %type_valueType %c_valueType_0 %c_valueType_0\n"; // 0 / 0 == Nan
1484
1485 map<ValueId, string>::iterator it;
1486 for (it = sm.begin(); it != sm.end(); it++)
1487 sm[it->first] = replace(it->second, typeToken, typeName);
1488 }
1489
1490 typedef de::SharedPtr<TypeSnippetsBase> TypeSnippetsSP;
1491
1492 template <typename FLOAT_TYPE>
1493 class TypeSnippets : public TypeSnippetsBase
1494 {
1495 public:
1496 TypeSnippets(bool floatType = true, bool signedInteger = false);
1497 };
1498
1499 template <>
TypeSnippets(bool floatType,bool signedInteger)1500 TypeSnippets<deFloat16>::TypeSnippets(bool floatType, bool signedInteger) : TypeSnippetsBase(floatType, signedInteger)
1501 {
1502 bitWidth = "16";
1503 epsilon = "6.104e-5"; // 2^-14 = 0x0400
1504
1505 // 1.2113e-4 is 0x07f0 which after substracting epsilon will give 0x03f0 (same as vm[V_DENORM])
1506 // NOTE: constants in SPIR-V cant be specified as exact fp16 - there is conversion from double to fp16
1507 denormBase = "1.2113e-4";
1508
1509 capabilities = "OpCapability StorageUniform16\n";
1510 extensions = "OpExtension \"SPV_KHR_16bit_storage\"\n";
1511
1512 capabilitiesFp16Without16BitStorage = "OpCapability Float16\n";
1513 extensionsFp16Without16BitStorage = "";
1514
1515 arrayStride = "2";
1516
1517 varyingsTypesSnippet = "%type_u32_iptr = OpTypePointer Input %type_u32\n"
1518 "%type_u32_optr = OpTypePointer Output %type_u32\n";
1519 inputVaryingsSnippet = "%BP_vertex_result = OpVariable %type_u32_iptr Input\n";
1520 outputVaryingsSnippet = "%BP_vertex_result = OpVariable %type_u32_optr Output\n";
1521 storeVertexResultSnippet = "%tmp_vec2 = OpCompositeConstruct %type_f16_vec2 %result %c_f16_0\n"
1522 "%packed_result = OpBitcast %type_u32 %tmp_vec2\n"
1523 "OpStore %BP_vertex_result %packed_result\n";
1524 loadVertexResultSnippet = "%packed_result = OpLoad %type_u32 %BP_vertex_result\n"
1525 "%tmp_vec2 = OpBitcast %type_f16_vec2 %packed_result\n"
1526 "%result = OpCompositeExtract %type_f16 %tmp_vec2 0\n";
1527
1528 loadStoreRequiresShaderFloat16 = true;
1529
1530 updateSpirvSnippets();
1531 }
1532
1533 template <>
TypeSnippets(bool floatType,bool signedInteger)1534 TypeSnippets<float>::TypeSnippets(bool floatType, bool signedInteger) : TypeSnippetsBase(floatType, signedInteger)
1535 {
1536 bitWidth = "32";
1537 epsilon = "1.175494351e-38";
1538 denormBase = "1.1756356e-38";
1539 capabilities = "";
1540 extensions = "";
1541 capabilitiesFp16Without16BitStorage = "";
1542 extensionsFp16Without16BitStorage = "";
1543 arrayStride = "4";
1544
1545 varyingsTypesSnippet = "%type_u32_iptr = OpTypePointer Input %type_u32\n"
1546 "%type_u32_optr = OpTypePointer Output %type_u32\n";
1547 inputVaryingsSnippet = "%BP_vertex_result = OpVariable %type_u32_iptr Input\n";
1548 outputVaryingsSnippet = "%BP_vertex_result = OpVariable %type_u32_optr Output\n";
1549 storeVertexResultSnippet = "%packed_result = OpBitcast %type_u32 %result\n"
1550 "OpStore %BP_vertex_result %packed_result\n";
1551 loadVertexResultSnippet = "%packed_result = OpLoad %type_u32 %BP_vertex_result\n"
1552 "%result = OpBitcast %type_f32 %packed_result\n";
1553
1554 loadStoreRequiresShaderFloat16 = false;
1555
1556 updateSpirvSnippets();
1557 }
1558
1559 template <>
TypeSnippets(bool floatType,bool signedInteger)1560 TypeSnippets<double>::TypeSnippets(bool floatType, bool signedInteger) : TypeSnippetsBase(floatType, signedInteger)
1561 {
1562 const string float64Capability = "OpCapability Float64\n";
1563 const string int64Capability = "OpCapability Int64\n";
1564 bitWidth = "64";
1565 epsilon = "2.2250738585072014e-308"; // 0x0010000000000000
1566 denormBase = "2.2250738585076994e-308"; // 0x00100000000003F0
1567 capabilities = floatType ? float64Capability : int64Capability;
1568 extensions = "";
1569 capabilitiesFp16Without16BitStorage = "";
1570 extensionsFp16Without16BitStorage = "";
1571 arrayStride = "8";
1572
1573 varyingsTypesSnippet = "%type_u32_vec2_iptr = OpTypePointer Input %type_u32_vec2\n"
1574 "%type_u32_vec2_optr = OpTypePointer Output %type_u32_vec2\n";
1575 inputVaryingsSnippet = "%BP_vertex_result = OpVariable %type_u32_vec2_iptr Input\n";
1576 outputVaryingsSnippet = "%BP_vertex_result = OpVariable %type_u32_vec2_optr Output\n";
1577 storeVertexResultSnippet = "%packed_result = OpBitcast %type_u32_vec2 %result\n"
1578 "OpStore %BP_vertex_result %packed_result\n";
1579 loadVertexResultSnippet = "%packed_result = OpLoad %type_u32_vec2 %BP_vertex_result\n"
1580 "%result = OpBitcast %type_f64 %packed_result\n";
1581
1582 loadStoreRequiresShaderFloat16 = false;
1583
1584 updateSpirvSnippets();
1585 }
1586
1587 class TypeTestResultsBase
1588 {
1589 public:
~TypeTestResultsBase()1590 virtual ~TypeTestResultsBase()
1591 {
1592 }
1593 VariableType variableType() const;
1594
1595 protected:
1596 VariableType m_variableType;
1597
1598 public:
1599 // Vectors containing test data for float controls
1600 vector<BinaryCase> binaryOpFTZ;
1601 vector<UnaryCase> unaryOpFTZ;
1602 vector<BinaryCase> binaryOpDenormPreserve;
1603 vector<UnaryCase> unaryOpDenormPreserve;
1604 };
1605
variableType() const1606 VariableType TypeTestResultsBase::variableType() const
1607 {
1608 return m_variableType;
1609 }
1610
1611 typedef de::SharedPtr<TypeTestResultsBase> TypeTestResultsSP;
1612
1613 template <typename FLOAT_TYPE>
1614 class TypeTestResults : public TypeTestResultsBase
1615 {
1616 public:
1617 TypeTestResults();
1618 };
1619
1620 template <>
TypeTestResults()1621 TypeTestResults<deFloat16>::TypeTestResults()
1622 {
1623 m_variableType = FP16;
1624
1625 // note: there are many FTZ test cases that can produce diferent result depending
1626 // on input denorm being flushed or not; because of that FTZ tests can be limited
1627 // to those that return denorm as those are the ones affected by tested extension
1628 const BinaryCase binaryOpFTZArr[] = {
1629 //operation den op one den op den den op inf den op nan
1630 {OID_ADD, V_ONE, V_ZERO_OR_DENORM_TIMES_TWO, V_INF, V_UNUSED},
1631 {OID_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_UNUSED},
1632 {OID_MUL, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1633 {OID_DIV, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED},
1634 {OID_REM, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED},
1635 {OID_MOD, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED},
1636 {OID_VEC_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1637 {OID_VEC_MUL_M, V_ZERO_OR_DENORM_TIMES_TWO, V_ZERO, V_UNUSED, V_UNUSED},
1638 {OID_MAT_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1639 {OID_MAT_MUL_V, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1640 {OID_MAT_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1641 {OID_OUT_PROD, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1642 {OID_DOT, V_ZERO_OR_DENORM_TIMES_TWO, V_ZERO, V_UNUSED, V_UNUSED},
1643 {OID_ATAN2, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED},
1644 {OID_POW, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED},
1645 {OID_MIX, V_HALF, V_ZERO, V_INF, V_UNUSED},
1646 {OID_MIN, V_ZERO, V_ZERO, V_ZERO, V_UNUSED},
1647 {OID_MAX, V_ONE, V_ZERO, V_INF, V_UNUSED},
1648 {OID_CLAMP, V_ONE, V_ZERO, V_INF, V_UNUSED},
1649 {OID_STEP, V_ONE, V_ONE, V_ONE, V_UNUSED},
1650 {OID_SSTEP, V_HALF, V_ONE, V_ZERO, V_UNUSED},
1651 {OID_FMA, V_HALF, V_HALF, V_UNUSED, V_UNUSED},
1652 {OID_FACE_FWD, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE},
1653 {OID_NMIN, V_ZERO, V_ZERO, V_ZERO, V_ZERO},
1654 {OID_NMAX, V_ONE, V_ZERO, V_INF, V_ZERO},
1655 {OID_NCLAMP, V_ONE, V_ZERO, V_INF, V_ZERO},
1656 {OID_DIST, V_ONE, V_ZERO, V_INF, V_UNUSED},
1657 {OID_CROSS, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1658 };
1659
1660 const UnaryCase unaryOpFTZArr[] = {
1661 //operation op den
1662 {OID_NEGATE, V_MINUS_ZERO},
1663 {OID_ROUND, V_ZERO},
1664 {OID_ROUND_EV, V_ZERO},
1665 {OID_TRUNC, V_ZERO},
1666 {OID_ABS, V_ZERO},
1667 {OID_FLOOR, V_ZERO},
1668 {OID_CEIL, V_ZERO_OR_ONE},
1669 {OID_FRACT, V_ZERO},
1670 {OID_RADIANS, V_ZERO},
1671 {OID_DEGREES, V_ZERO},
1672 {OID_SIN, V_ZERO},
1673 {OID_COS, V_TRIG_ONE},
1674 {OID_TAN, V_ZERO},
1675 {OID_ASIN, V_ZERO},
1676 {OID_ACOS, V_PI_DIV_2},
1677 {OID_ATAN, V_ZERO},
1678 {OID_SINH, V_ZERO},
1679 {OID_COSH, V_ONE},
1680 {OID_TANH, V_ZERO},
1681 {OID_ASINH, V_ZERO},
1682 {OID_ACOSH, V_UNUSED},
1683 {OID_ATANH, V_ZERO},
1684 {OID_EXP, V_ONE},
1685 {OID_LOG, V_MINUS_INF_OR_LOG_DENORM},
1686 {OID_EXP2, V_ONE},
1687 {OID_LOG2, V_MINUS_INF_OR_LOG2_DENORM},
1688 {OID_SQRT, V_ZERO_OR_SQRT_DENORM},
1689 {OID_INV_SQRT, V_INF_OR_INV_SQRT_DENORM},
1690 {OID_MAT_DET, V_ZERO},
1691 {OID_MAT_INV, V_ZERO_OR_MINUS_ZERO},
1692 {OID_MODF, V_ZERO},
1693 {OID_MODF_ST, V_ZERO},
1694 {OID_NORMALIZE, V_ZERO},
1695 {OID_REFLECT, V_ZERO},
1696 {OID_REFRACT, V_ZERO},
1697 {OID_LENGTH, V_ZERO},
1698 };
1699
1700 const BinaryCase binaryOpDenormPreserveArr[] = {
1701 //operation den op one den op den den op inf den op nan
1702 {OID_PHI, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1703 {OID_SELECT, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1704 {OID_ADD, V_ONE, V_DENORM_TIMES_TWO, V_INF, V_NAN},
1705 {OID_SUB, V_MINUS_ONE_OR_CLOSE, V_ZERO, V_MINUS_INF, V_NAN},
1706 {OID_MUL, V_DENORM, V_ZERO, V_INF, V_NAN},
1707 {OID_VEC_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN},
1708 {OID_VEC_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1709 {OID_MAT_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN},
1710 {OID_MAT_MUL_V, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1711 {OID_MAT_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1712 {OID_OUT_PROD, V_DENORM, V_ZERO, V_INF, V_NAN},
1713 {OID_DOT, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1714 {OID_MIX, V_HALF, V_DENORM, V_INF, V_NAN},
1715 {OID_FMA, V_HALF, V_HALF, V_INF, V_NAN},
1716 {OID_MIN, V_DENORM, V_DENORM, V_DENORM, V_UNUSED},
1717 {OID_MAX, V_ONE, V_DENORM, V_INF, V_UNUSED},
1718 {OID_CLAMP, V_ONE, V_DENORM, V_INF, V_UNUSED},
1719 {OID_NMIN, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1720 {OID_NMAX, V_ONE, V_DENORM, V_INF, V_DENORM},
1721 {OID_NCLAMP, V_ONE, V_DENORM, V_INF, V_DENORM},
1722 };
1723
1724 const UnaryCase unaryOpDenormPreserveArr[] = {
1725 //operation op den
1726 {OID_RETURN_VAL, V_DENORM},
1727 {OID_D_EXTRACT, V_DENORM},
1728 {OID_D_INSERT, V_DENORM},
1729 {OID_SHUFFLE, V_DENORM},
1730 {OID_COMPOSITE, V_DENORM},
1731 {OID_COMPOSITE_INS, V_DENORM},
1732 {OID_COPY, V_DENORM},
1733 {OID_TRANSPOSE, V_DENORM},
1734 {OID_NEGATE, V_DENORM},
1735 {OID_ABS, V_DENORM},
1736 {OID_SIGN, V_ONE},
1737 {OID_RADIANS, V_DENORM},
1738 {OID_DEGREES, V_DEGREES_DENORM},
1739 };
1740
1741 binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr, binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1742 unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr, unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1743 binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1744 binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1745 unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1746 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1747 }
1748
1749 template <>
TypeTestResults()1750 TypeTestResults<float>::TypeTestResults()
1751 {
1752 m_variableType = FP32;
1753
1754 const BinaryCase binaryOpFTZArr[] = {
1755 //operation den op one den op den den op inf den op nan
1756 {OID_ADD, V_ONE, V_ZERO, V_INF, V_UNUSED},
1757 {OID_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_UNUSED},
1758 {OID_MUL, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1759 {OID_DIV, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED},
1760 {OID_REM, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED},
1761 {OID_MOD, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED},
1762 {OID_VEC_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1763 {OID_VEC_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1764 {OID_MAT_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1765 {OID_MAT_MUL_V, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1766 {OID_MAT_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1767 {OID_OUT_PROD, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1768 {OID_DOT, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1769 {OID_ATAN2, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED},
1770 {OID_POW, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED},
1771 {OID_MIX, V_HALF, V_ZERO, V_INF, V_UNUSED},
1772 {OID_MIN, V_ZERO, V_ZERO, V_ZERO, V_UNUSED},
1773 {OID_MAX, V_ONE, V_ZERO, V_INF, V_UNUSED},
1774 {OID_CLAMP, V_ONE, V_ZERO, V_INF, V_UNUSED},
1775 {OID_STEP, V_ONE, V_ONE, V_ONE, V_UNUSED},
1776 {OID_SSTEP, V_HALF, V_ONE, V_ZERO, V_UNUSED},
1777 {OID_FMA, V_HALF, V_HALF, V_UNUSED, V_UNUSED},
1778 {OID_FACE_FWD, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE},
1779 {OID_NMIN, V_ZERO, V_ZERO, V_ZERO, V_ZERO},
1780 {OID_NMAX, V_ONE, V_ZERO, V_INF, V_ZERO},
1781 {OID_NCLAMP, V_ONE, V_ZERO, V_INF, V_ZERO},
1782 {OID_DIST, V_ONE, V_ZERO, V_INF, V_UNUSED},
1783 {OID_CROSS, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1784 };
1785
1786 const UnaryCase unaryOpFTZArr[] = {
1787 //operation op den
1788 {OID_NEGATE, V_MINUS_ZERO},
1789 {OID_ROUND, V_ZERO},
1790 {OID_ROUND_EV, V_ZERO},
1791 {OID_TRUNC, V_ZERO},
1792 {OID_ABS, V_ZERO},
1793 {OID_FLOOR, V_ZERO},
1794 {OID_CEIL, V_ZERO_OR_ONE},
1795 {OID_FRACT, V_ZERO},
1796 {OID_RADIANS, V_ZERO},
1797 {OID_DEGREES, V_ZERO},
1798 {OID_SIN, V_ZERO},
1799 {OID_COS, V_TRIG_ONE},
1800 {OID_TAN, V_ZERO},
1801 {OID_ASIN, V_ZERO},
1802 {OID_ACOS, V_PI_DIV_2},
1803 {OID_ATAN, V_ZERO},
1804 {OID_SINH, V_ZERO},
1805 {OID_COSH, V_ONE},
1806 {OID_TANH, V_ZERO},
1807 {OID_ASINH, V_ZERO},
1808 {OID_ACOSH, V_UNUSED},
1809 {OID_ATANH, V_ZERO},
1810 {OID_EXP, V_ONE},
1811 {OID_LOG, V_MINUS_INF_OR_LOG_DENORM},
1812 {OID_EXP2, V_ONE},
1813 {OID_LOG2, V_MINUS_INF_OR_LOG2_DENORM},
1814 {OID_SQRT, V_ZERO_OR_SQRT_DENORM},
1815 {OID_INV_SQRT, V_INF_OR_INV_SQRT_DENORM},
1816 {OID_MAT_DET, V_ZERO},
1817 {OID_MAT_INV, V_ZERO_OR_MINUS_ZERO},
1818 {OID_MODF, V_ZERO},
1819 {OID_MODF_ST, V_ZERO},
1820 {OID_NORMALIZE, V_ZERO},
1821 {OID_REFLECT, V_ZERO},
1822 {OID_REFRACT, V_ZERO},
1823 {OID_LENGTH, V_ZERO},
1824 };
1825
1826 const BinaryCase binaryOpDenormPreserveArr[] = {
1827 //operation den op one den op den den op inf den op nan
1828 {OID_PHI, V_DENORM, V_DENORM, V_DENORM, V_DENORM}, {OID_SELECT, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1829 {OID_ADD, V_ONE, V_DENORM_TIMES_TWO, V_INF, V_NAN}, {OID_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_NAN},
1830 {OID_MUL, V_DENORM, V_ZERO, V_INF, V_NAN}, {OID_VEC_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN},
1831 {OID_VEC_MUL_M, V_DENORM, V_ZERO, V_INF, V_NAN}, {OID_MAT_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN},
1832 {OID_MAT_MUL_V, V_DENORM, V_ZERO, V_INF, V_NAN}, {OID_MAT_MUL_M, V_DENORM, V_ZERO, V_INF, V_NAN},
1833 {OID_OUT_PROD, V_DENORM, V_ZERO, V_INF, V_NAN}, {OID_DOT, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1834 {OID_MIX, V_HALF, V_DENORM, V_INF, V_NAN}, {OID_FMA, V_HALF, V_HALF, V_INF, V_NAN},
1835 {OID_MIN, V_DENORM, V_DENORM, V_DENORM, V_UNUSED}, {OID_MAX, V_ONE, V_DENORM, V_INF, V_UNUSED},
1836 {OID_CLAMP, V_ONE, V_DENORM, V_INF, V_UNUSED}, {OID_NMIN, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1837 {OID_NMAX, V_ONE, V_DENORM, V_INF, V_DENORM}, {OID_NCLAMP, V_ONE, V_DENORM, V_INF, V_DENORM},
1838 };
1839
1840 const UnaryCase unaryOpDenormPreserveArr[] = {
1841 //operation op den
1842 {OID_RETURN_VAL, V_DENORM},
1843 {OID_D_EXTRACT, V_DENORM},
1844 {OID_D_INSERT, V_DENORM},
1845 {OID_SHUFFLE, V_DENORM},
1846 {OID_COMPOSITE, V_DENORM},
1847 {OID_COMPOSITE_INS, V_DENORM},
1848 {OID_COPY, V_DENORM},
1849 {OID_TRANSPOSE, V_DENORM},
1850 {OID_NEGATE, V_DENORM},
1851 {OID_ABS, V_DENORM},
1852 {OID_SIGN, V_ONE},
1853 {OID_RADIANS, V_DENORM},
1854 {OID_DEGREES, V_DEGREES_DENORM},
1855 };
1856
1857 binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr, binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1858 unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr, unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1859 binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1860 binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1861 unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1862 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1863 }
1864
1865 template <>
TypeTestResults()1866 TypeTestResults<double>::TypeTestResults()
1867 {
1868 m_variableType = FP64;
1869
1870 // fp64 is supported by fewer operations then fp16 and fp32
1871 // e.g. Radians and Degrees functions are not supported
1872 const BinaryCase binaryOpFTZArr[] = {
1873 //operation den op one den op den den op inf den op nan
1874 {OID_ADD, V_ONE, V_ZERO, V_INF, V_UNUSED},
1875 {OID_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_UNUSED},
1876 {OID_MUL, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1877 {OID_DIV, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED},
1878 {OID_REM, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED},
1879 {OID_MOD, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED},
1880 {OID_VEC_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1881 {OID_VEC_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1882 {OID_MAT_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1883 {OID_MAT_MUL_V, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1884 {OID_MAT_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1885 {OID_OUT_PROD, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1886 {OID_DOT, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1887 {OID_MIX, V_HALF, V_ZERO, V_INF, V_UNUSED},
1888 {OID_MIN, V_ZERO, V_ZERO, V_ZERO, V_UNUSED},
1889 {OID_MAX, V_ONE, V_ZERO, V_INF, V_UNUSED},
1890 {OID_CLAMP, V_ONE, V_ZERO, V_INF, V_UNUSED},
1891 {OID_STEP, V_ONE, V_ONE, V_ONE, V_UNUSED},
1892 {OID_SSTEP, V_HALF, V_ONE, V_ZERO, V_UNUSED},
1893 {OID_FMA, V_HALF, V_HALF, V_UNUSED, V_UNUSED},
1894 {OID_FACE_FWD, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE},
1895 {OID_NMIN, V_ZERO, V_ZERO, V_ZERO, V_ZERO},
1896 {OID_NMAX, V_ONE, V_ZERO, V_INF, V_ZERO},
1897 {OID_NCLAMP, V_ONE, V_ZERO, V_INF, V_ZERO},
1898 {OID_DIST, V_ONE, V_ZERO, V_INF, V_UNUSED},
1899 {OID_CROSS, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1900 };
1901
1902 const UnaryCase unaryOpFTZArr[] = {
1903 //operation op den
1904 {OID_NEGATE, V_MINUS_ZERO},
1905 {OID_ROUND, V_ZERO},
1906 {OID_ROUND_EV, V_ZERO},
1907 {OID_TRUNC, V_ZERO},
1908 {OID_ABS, V_ZERO},
1909 {OID_FLOOR, V_ZERO},
1910 {OID_CEIL, V_ZERO_OR_ONE},
1911 {OID_FRACT, V_ZERO},
1912 {OID_SQRT, V_ZERO_OR_SQRT_DENORM},
1913 {OID_INV_SQRT, V_INF_OR_INV_SQRT_DENORM},
1914 {OID_MAT_DET, V_ZERO},
1915 {OID_MAT_INV, V_ZERO_OR_MINUS_ZERO},
1916 {OID_MODF, V_ZERO},
1917 {OID_MODF_ST, V_ZERO},
1918 {OID_NORMALIZE, V_ZERO},
1919 {OID_REFLECT, V_ZERO},
1920 {OID_LENGTH, V_ZERO},
1921 };
1922
1923 const BinaryCase binaryOpDenormPreserveArr[] = {
1924 //operation den op one den op den den op inf den op nan
1925 {OID_PHI, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1926 {OID_SELECT, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1927 {OID_ADD, V_ONE, V_DENORM_TIMES_TWO, V_INF, V_NAN},
1928 {OID_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_NAN},
1929 {OID_MUL, V_DENORM, V_ZERO, V_INF, V_NAN},
1930 {OID_VEC_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN},
1931 {OID_VEC_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1932 {OID_MAT_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN},
1933 {OID_MAT_MUL_V, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1934 {OID_MAT_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1935 {OID_OUT_PROD, V_DENORM, V_ZERO, V_INF, V_NAN},
1936 {OID_DOT, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1937 {OID_MIX, V_HALF, V_DENORM, V_INF, V_NAN},
1938 {OID_FMA, V_HALF, V_HALF, V_INF, V_NAN},
1939 {OID_MIN, V_DENORM, V_DENORM, V_DENORM, V_UNUSED},
1940 {OID_MAX, V_ONE, V_DENORM, V_INF, V_UNUSED},
1941 {OID_CLAMP, V_ONE, V_DENORM, V_INF, V_UNUSED},
1942 {OID_NMIN, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1943 {OID_NMAX, V_ONE, V_DENORM, V_INF, V_DENORM},
1944 {OID_NCLAMP, V_ONE, V_DENORM, V_INF, V_DENORM},
1945 };
1946
1947 const UnaryCase unaryOpDenormPreserveArr[] = {
1948 //operation op den
1949 {OID_RETURN_VAL, V_DENORM}, {OID_D_EXTRACT, V_DENORM}, {OID_D_INSERT, V_DENORM}, {OID_SHUFFLE, V_DENORM},
1950 {OID_COMPOSITE, V_DENORM}, {OID_COMPOSITE_INS, V_DENORM}, {OID_COPY, V_DENORM}, {OID_TRANSPOSE, V_DENORM},
1951 {OID_NEGATE, V_DENORM}, {OID_ABS, V_DENORM}, {OID_SIGN, V_ONE},
1952 };
1953
1954 binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr, binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1955 unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr, unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1956 binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1957 binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1958 unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1959 unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1960 }
1961
1962 // Operation structure holds data needed to test specified SPIR-V operation. This class contains
1963 // additional annotations, additional types and aditional constants that should be properly included
1964 // in SPIR-V code. Commands attribute in this structure contains code that performs tested operation
1965 // on given arguments, in some cases verification is also performed there.
1966 // All snipets stroed in this structure are generic and can be specialized for fp16, fp32 or fp64,
1967 // thanks to that this data can be shared by many OperationTestCase instances (testing diferent
1968 // float behaviors on diferent float widths).
1969 struct Operation
1970 {
1971 // operation name is included in test case name
1972 const char *name;
1973
1974 // How extensively is the floating point type used?
1975 FloatUsage floatUsage;
1976
1977 // operation specific spir-v snippets that will be
1978 // placed in proper places in final test shader
1979 const char *annotations;
1980 const char *types;
1981 const char *constants;
1982 const char *variables;
1983 const char *functions;
1984 const char *commands;
1985
1986 // conversion operations operate on one float type and produce float
1987 // type with different bit width; restrictedInputType is used only when
1988 // isInputTypeRestricted is set to true and it restricts usage of this
1989 // operation to specified input type
1990 bool isInputTypeRestricted;
1991 VariableType restrictedInputType;
1992
1993 // arguments for OpSpecConstant need to be specified also as constant
1994 bool isSpecConstant;
1995
1996 // set if c_float* constant is used in operation
1997 FloatStatementUsageFlags statementUsageFlags;
1998
Operationvkt::SpirVAssembly::__anon1f0d25030111::Operation1999 Operation()
2000 {
2001 }
2002
2003 // Minimal constructor - used by most of operations
Operationvkt::SpirVAssembly::__anon1f0d25030111::Operation2004 Operation(const char *_name, FloatUsage _floatUsage, const char *_commands,
2005 const FloatStatementUsageFlags _statementUsageFlags = 0)
2006 : name(_name)
2007 , floatUsage(_floatUsage)
2008 , annotations("")
2009 , types("")
2010 , constants("")
2011 , variables("")
2012 , functions("")
2013 , commands(_commands)
2014 , isInputTypeRestricted(false)
2015 , restrictedInputType(FP16) // not used as isInputTypeRestricted is false
2016 , isSpecConstant(false)
2017 , statementUsageFlags(_statementUsageFlags)
2018 {
2019 }
2020
2021 // Conversion operations constructor (used also by conversions done in SpecConstantOp)
Operationvkt::SpirVAssembly::__anon1f0d25030111::Operation2022 Operation(const char *_name, FloatUsage _floatUsage, bool specConstant, VariableType _inputType,
2023 const char *_constants, const char *_commands, const FloatStatementUsageFlags _statementUsageFlags = 0)
2024 : name(_name)
2025 , floatUsage(_floatUsage)
2026 , annotations("")
2027 , types("")
2028 , constants(_constants)
2029 , variables("")
2030 , functions("")
2031 , commands(_commands)
2032 , isInputTypeRestricted(true)
2033 , restrictedInputType(_inputType)
2034 , isSpecConstant(specConstant)
2035 , statementUsageFlags(_statementUsageFlags)
2036 {
2037 }
2038
2039 // Full constructor - used by few operations, that are more complex to test
Operationvkt::SpirVAssembly::__anon1f0d25030111::Operation2040 Operation(const char *_name, FloatUsage _floatUsage, const char *_annotations, const char *_types,
2041 const char *_constants, const char *_variables, const char *_functions, const char *_commands,
2042 const FloatStatementUsageFlags _statementUsageFlags = 0)
2043 : name(_name)
2044 , floatUsage(_floatUsage)
2045 , annotations(_annotations)
2046 , types(_types)
2047 , constants(_constants)
2048 , variables(_variables)
2049 , functions(_functions)
2050 , commands(_commands)
2051 , isInputTypeRestricted(false)
2052 , restrictedInputType(FP16) // not used as isInputTypeRestricted is false
2053 , isSpecConstant(false)
2054 , statementUsageFlags(_statementUsageFlags)
2055 {
2056 }
2057
2058 // Full constructor - used by rounding override cases
Operationvkt::SpirVAssembly::__anon1f0d25030111::Operation2059 Operation(const char *_name, FloatUsage _floatUsage, VariableType _inputType, const char *_annotations,
2060 const char *_types, const char *_constants, const char *_commands,
2061 const FloatStatementUsageFlags _statementUsageFlags = 0)
2062 : name(_name)
2063 , floatUsage(_floatUsage)
2064 , annotations(_annotations)
2065 , types(_types)
2066 , constants(_constants)
2067 , variables("")
2068 , functions("")
2069 , commands(_commands)
2070 , isInputTypeRestricted(true)
2071 , restrictedInputType(_inputType)
2072 , isSpecConstant(false)
2073 , statementUsageFlags(_statementUsageFlags)
2074 {
2075 }
2076 };
2077
2078 // Class storing input that will be passed to operation and expected
2079 // output that should be generated for specified behaviour.
2080 class OperationTestCase
2081 {
2082 public:
OperationTestCase()2083 OperationTestCase()
2084 {
2085 }
2086
OperationTestCase(const char * _baseName,BehaviorFlags _behaviorFlags,OperationId _operationId,ValueId _input1,ValueId _input2,ValueId _expectedOutput,bool _fp16Without16BitStorage=false)2087 OperationTestCase(const char *_baseName, BehaviorFlags _behaviorFlags, OperationId _operationId, ValueId _input1,
2088 ValueId _input2, ValueId _expectedOutput, bool _fp16Without16BitStorage = false)
2089 : behaviorFlags(_behaviorFlags)
2090 , operationId(_operationId)
2091 , expectedOutput(_expectedOutput)
2092 , fp16Without16BitStorage(_fp16Without16BitStorage)
2093 {
2094 baseName = _baseName;
2095 if (fp16Without16BitStorage)
2096 baseName += "_nostorage";
2097 input[0] = _input1;
2098 input[1] = _input2;
2099 }
2100
2101 public:
2102 string baseName;
2103 BehaviorFlags behaviorFlags;
2104 OperationId operationId;
2105 ValueId input[2];
2106 ValueId expectedOutput;
2107 bool fp16Without16BitStorage;
2108 };
2109
2110 // Helper structure used to store specialized operation
2111 // data. This data is ready to be used during shader assembly.
2112 struct SpecializedOperation
2113 {
2114 string constants;
2115 string annotations;
2116 string types;
2117 string arguments;
2118 string variables;
2119 string functions;
2120 string commands;
2121
2122 VariableType inVariableType;
2123 TypeSnippetsSP inTypeSnippets;
2124 TypeSnippetsSP outTypeSnippets;
2125 FloatStatementUsageFlags argumentsUsesFloatConstant;
2126 };
2127
2128 // Class responsible for constructing list of test cases for specified
2129 // float type and specified way of preparation of arguments.
2130 // Arguments can be either read from input SSBO or generated via math
2131 // operations in spir-v code.
2132 class TestCasesBuilder
2133 {
2134 public:
2135 void init();
2136 void build(vector<OperationTestCase> &testCases, TypeTestResultsSP typeTestResults, bool argumentsFromInput);
2137 const Operation &getOperation(OperationId id) const;
2138
2139 private:
2140 void createUnaryTestCases(vector<OperationTestCase> &testCases, OperationId operationId,
2141 ValueId denormPreserveResult, ValueId denormFTZResult,
2142 bool fp16WithoutStorage = false) const;
2143
2144 private:
2145 // Operations are shared betwean test cases so they are
2146 // passed to them as pointers to data stored in TestCasesBuilder.
2147 typedef OperationTestCase OTC;
2148 typedef Operation Op;
2149 map<int, Op> m_operations;
2150 // SPIR-V assembly snippets that are used in m_operations
2151 vector<std::string> m_saved_strings;
2152
2153 // We expect 12 strings: 3 kinds of narrowing conversions, with
2154 // 4 cases each.
2155 const size_t m_num_expected_strings = 12;
2156 // Saves the given string in m_strings, and returns a pointer to its data.
save(std::string str)2157 const char *save(std::string str)
2158 {
2159 m_saved_strings.emplace_back(std::move(str));
2160 return m_saved_strings.back().data();
2161 }
2162 };
2163
init()2164 void TestCasesBuilder::init()
2165 {
2166 map<int, Op> &mo = m_operations;
2167 m_saved_strings.reserve(m_num_expected_strings);
2168
2169 // predefine operations repeatedly used in tests; note that "_valueType"
2170 // in every operation command will be replaced with either "_f16",
2171 // "_f32", "_f64", "_ui16", "ui32", "_ui64", "_i16", "_i32", "_i64"
2172 // StringTemplate is not used here because it would make code less
2173 // readable m_operations contains generic operation definitions that
2174 // can be used for all float types
2175
2176 mo[OID_NEGATE] = Op("negate", FLOAT_ARITHMETIC, "%result = OpFNegate %type_valueType %arg1\n",
2177 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2178 mo[OID_COMPOSITE] = Op("composite", FLOAT_ARITHMETIC,
2179 "%vec1 = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2180 "%result = OpCompositeExtract %type_valueType %vec1 0\n",
2181 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2182 mo[OID_COMPOSITE_INS] =
2183 Op("comp_ins", FLOAT_ARITHMETIC,
2184 "%vec1 = OpCompositeConstruct %type_valueType_vec2 %c_valueType_0 %c_valueType_0\n"
2185 "%vec2 = OpCompositeInsert %type_valueType_vec2 %arg1 %vec1 0\n"
2186 "%result = OpCompositeExtract %type_valueType %vec2 0\n",
2187 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2188 mo[OID_COPY] = Op("copy", FLOAT_STORAGE_ONLY, "%result = OpCopyObject %type_valueType %arg1\n",
2189 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2190 mo[OID_D_EXTRACT] = Op("extract", FLOAT_ARITHMETIC,
2191 "%vec1 = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2192 "%result = OpVectorExtractDynamic %type_valueType %vec1 %c_i32_0\n",
2193 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2194 mo[OID_D_INSERT] =
2195 Op("insert", FLOAT_ARITHMETIC,
2196 "%tmpVec = OpCompositeConstruct %type_valueType_vec2 %c_valueType_2 %c_valueType_2\n"
2197 "%vec1 = OpVectorInsertDynamic %type_valueType_vec2 %tmpVec %arg1 %c_i32_0\n"
2198 "%result = OpCompositeExtract %type_valueType %vec1 0\n",
2199 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2200 mo[OID_SHUFFLE] = Op(
2201 "shuffle", FLOAT_ARITHMETIC,
2202 "%tmpVec1 = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2203 "%tmpVec2 = OpCompositeConstruct %type_valueType_vec2 %c_valueType_2 "
2204 "%c_valueType_2\n" // NOTE: its impossible to test shuffle with denorms flushed
2205 "%vec1 = OpVectorShuffle %type_valueType_vec2 %tmpVec1 %tmpVec2 0 2\n" // to zero as this will be done by earlier operation
2206 "%result = OpCompositeExtract %type_valueType %vec1 0\n", // (this also applies to few other operations)
2207 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2208 mo[OID_TRANSPOSE] = Op("transpose", FLOAT_ARITHMETIC,
2209 "%col = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2210 "%mat = OpCompositeConstruct %type_valueType_mat2x2 %col %col\n"
2211 "%tmat = OpTranspose %type_valueType_mat2x2 %mat\n"
2212 "%tcol = OpCompositeExtract %type_valueType_vec2 %tmat 0\n"
2213 "%result = OpCompositeExtract %type_valueType %tcol 0\n",
2214 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2215 mo[OID_RETURN_VAL] = Op("ret_val", FLOAT_ARITHMETIC, "",
2216 "%type_test_fun = OpTypeFunction %type_valueType %type_valueType\n", "", "",
2217 "%test_fun = OpFunction %type_valueType None %type_test_fun\n"
2218 "%param = OpFunctionParameter %type_valueType\n"
2219 "%entry = OpLabel\n"
2220 "OpReturnValue %param\n"
2221 "OpFunctionEnd\n",
2222 "%result = OpFunctionCall %type_valueType %test_fun %arg1\n",
2223 B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2224
2225 // conversion operations that are meant to be used only for single output type (defined by the second number in name)
2226 const char *convertSource = "%result = OpFConvert %type_valueType %arg1\n";
2227 mo[OID_CONV_FROM_FP16] =
2228 Op("conv_from_fp16", FLOAT_STORAGE_ONLY, false, FP16, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2229 mo[OID_CONV_FROM_FP32] =
2230 Op("conv_from_fp32", FLOAT_STORAGE_ONLY, false, FP32, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2231 mo[OID_CONV_FROM_FP64] =
2232 Op("conv_from_fp64", FLOAT_STORAGE_ONLY, false, FP64, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2233
2234 const char *convertFromUintSource = "%result = OpConvertUToF %type_valueType %arg1\n";
2235 mo[OID_CONV_FROM_UINT_TO_FP32] = Op("conv_uint_to_fp32", FLOAT_STORAGE_ONLY, false, UINT32, "",
2236 convertFromUintSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2237 mo[OID_CONV_FROM_UINT_TO_FP64] = Op("conv_uint_to_fp64", FLOAT_STORAGE_ONLY, false, UINT64, "",
2238 convertFromUintSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2239 const char *convertFromIntSource = "%result = OpConvertSToF %type_valueType %arg1\n";
2240 mo[OID_CONV_FROM_INT_TO_FP32] = Op("conv_uint_to_fp32", FLOAT_STORAGE_ONLY, false, INT32, "", convertFromIntSource,
2241 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2242 mo[OID_CONV_FROM_INT_TO_FP64] = Op("conv_uint_to_fp64", FLOAT_STORAGE_ONLY, false, INT64, "", convertFromIntSource,
2243 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2244
2245 // From all operands supported by OpSpecConstantOp we can only test FConvert opcode with literals as everything
2246 // else requires Karnel capability (OpenCL); values of literals used in SPIR-V code must be equivalent to
2247 // the values V_CONV_FROM_.... Use the feature of the SPIR-V assembler where use ! to inject raw integer
2248 // words into the SPIR-V binary.
2249
2250 // fp32 -> fp16 with cases UP, DOWN, TIE_UP, TIE_DOWN
2251 typedef conversionDetail<Float32, Float16> conv32to16;
2252 mo[OID_SCONST_CONV_FROM_FP32_TO_FP16_UP] =
2253 Op("sconst_conv_from_fp32_up", FLOAT_ARITHMETIC, true, FP32,
2254 save("%c_arg = OpConstant %type_f32 !" + conv32to16::fromStr(Round::UP) +
2255 "\n"
2256 "%result = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2257 "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP32);
2258 mo[OID_SCONST_CONV_FROM_FP32_TO_FP16_DOWN] =
2259 Op("sconst_conv_from_fp32_down", FLOAT_ARITHMETIC, true, FP32,
2260 save("%c_arg = OpConstant %type_f32 !" + conv32to16::fromStr(Round::DOWN) +
2261 "\n"
2262 "%result = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2263 "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP32);
2264 mo[OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_UP] =
2265 Op("sconst_conv_from_fp32_tie_up", FLOAT_ARITHMETIC, true, FP32,
2266 save("%c_arg = OpConstant %type_f32 !" + conv32to16::fromStr(Round::TIE_UP) +
2267 "\n"
2268 "%result = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2269 "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP32);
2270 mo[OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_DOWN] =
2271 Op("sconst_conv_from_fp32_tie_down", FLOAT_ARITHMETIC, true, FP32,
2272 save("%c_arg = OpConstant %type_f32 !" + conv32to16::fromStr(Round::TIE_DOWN) +
2273 "\n"
2274 "%result = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2275 "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP32);
2276
2277 // fp64 -> fp32 with cases UP, DOWN, TIE_UP, TIE_DOWN
2278 // To inject a 64 bit value, inject 2 32-bit words.
2279 typedef conversionDetail<Float64, Float32> conv64to32;
2280 mo[OID_SCONST_CONV_FROM_FP64_TO_FP32_UP] =
2281 Op("sconst_conv_from_fp64_up", FLOAT_ARITHMETIC, true, FP64,
2282 save("%c_arg = OpConstant %type_f64 !" + conv64to32::fromStr(Round::UP) +
2283 "\n"
2284 "%result = OpSpecConstantOp %type_f32 FConvert %c_arg\n"),
2285 "", B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2286 mo[OID_SCONST_CONV_FROM_FP64_TO_FP32_DOWN] =
2287 Op("sconst_conv_from_fp64_down", FLOAT_ARITHMETIC, true, FP64,
2288 save("%c_arg = OpConstant %type_f64 !" + conv64to32::fromStr(Round::DOWN) +
2289 "\n"
2290 "%result = OpSpecConstantOp %type_f32 FConvert %c_arg\n"),
2291 "", B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2292 mo[OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_UP] =
2293 Op("sconst_conv_from_fp64_tie_up", FLOAT_ARITHMETIC, true, FP64,
2294 save("%c_arg = OpConstant %type_f64 !" + conv64to32::fromStr(Round::TIE_UP) +
2295 "\n"
2296 "%result = OpSpecConstantOp %type_f32 FConvert %c_arg\n"),
2297 "", B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2298 mo[OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_DOWN] =
2299 Op("sconst_conv_from_fp64_tie_down", FLOAT_ARITHMETIC, true, FP64,
2300 save("%c_arg = OpConstant %type_f64 !" + conv64to32::fromStr(Round::TIE_DOWN) +
2301 "\n"
2302 "%result = OpSpecConstantOp %type_f32 FConvert %c_arg\n"),
2303 "", B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2304
2305 // fp64 -> fp16 with cases UP, DOWN, TIE_UP, TIE_DOWN
2306 typedef conversionDetail<Float64, Float16> conv64to16;
2307 mo[OID_SCONST_CONV_FROM_FP64_TO_FP16_UP] =
2308 Op("sconst_conv_from_fp64_up", FLOAT_ARITHMETIC, true, FP64,
2309 save("%c_arg = OpConstant %type_f64 !" + conv64to16::fromStr(Round::UP) +
2310 "\n"
2311 "%result = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2312 "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2313 mo[OID_SCONST_CONV_FROM_FP64_TO_FP16_DOWN] =
2314 Op("sconst_conv_from_fp64_down", FLOAT_ARITHMETIC, true, FP64,
2315 save("%c_arg = OpConstant %type_f64 !" + conv64to16::fromStr(Round::DOWN) +
2316 "\n"
2317 "%result = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2318 "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2319 mo[OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_UP] =
2320 Op("sconst_conv_from_fp64_tie_up", FLOAT_ARITHMETIC, true, FP64,
2321 save("%c_arg = OpConstant %type_f64 !" + conv64to16::fromStr(Round::TIE_UP) +
2322 "\n"
2323 "%result = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2324 "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2325 mo[OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_DOWN] =
2326 Op("sconst_conv_from_fp64_tie_down", FLOAT_ARITHMETIC, true, FP64,
2327 save("%c_arg = OpConstant %type_f64 !" + conv64to16::fromStr(Round::TIE_DOWN) +
2328 "\n"
2329 "%result = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2330 "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2331
2332 mo[OID_ADD] = Op("add", FLOAT_ARITHMETIC, "%result = OpFAdd %type_valueType %arg1 %arg2\n",
2333 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2334 mo[OID_SUB] = Op("sub", FLOAT_ARITHMETIC, "%result = OpFSub %type_valueType %arg1 %arg2\n",
2335 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2336 mo[OID_MUL] = Op("mul", FLOAT_ARITHMETIC, "%result = OpFMul %type_valueType %arg1 %arg2\n",
2337 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2338 mo[OID_DIV] = Op("div", FLOAT_ARITHMETIC, "%result = OpFDiv %type_valueType %arg1 %arg2\n",
2339 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2340 mo[OID_REM] = Op("rem", FLOAT_ARITHMETIC, "%result = OpFRem %type_valueType %arg1 %arg2\n",
2341 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2342 mo[OID_MOD] = Op("mod", FLOAT_ARITHMETIC, "%result = OpFMod %type_valueType %arg1 %arg2\n",
2343 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2344 mo[OID_PHI] = Op("phi", FLOAT_ARITHMETIC,
2345 "%comp = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
2346 " OpSelectionMerge %comp_merge None\n"
2347 " OpBranchConditional %comp %true_branch %false_branch\n"
2348 "%true_branch = OpLabel\n"
2349 " OpBranch %comp_merge\n"
2350 "%false_branch = OpLabel\n"
2351 " OpBranch %comp_merge\n"
2352 "%comp_merge = OpLabel\n"
2353 "%result = OpPhi %type_valueType %arg2 %true_branch %arg1 %false_branch\n",
2354 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2355 mo[OID_SELECT] = Op("select", FLOAT_ARITHMETIC,
2356 "%always_true = OpFOrdGreaterThan %type_bool %c_valueType_1 %c_valueType_0\n"
2357 "%result = OpSelect %type_valueType %always_true %arg1 %arg2\n",
2358 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2359 mo[OID_DOT] = Op("dot", FLOAT_ARITHMETIC,
2360 "%vec1 = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2361 "%vec2 = OpCompositeConstruct %type_valueType_vec2 %arg2 %arg2\n"
2362 "%result = OpDot %type_valueType %vec1 %vec2\n",
2363 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2364 mo[OID_VEC_MUL_S] = Op("vmuls", FLOAT_ARITHMETIC,
2365 "%vec = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2366 "%tmpVec = OpVectorTimesScalar %type_valueType_vec2 %vec %arg2\n"
2367 "%result = OpCompositeExtract %type_valueType %tmpVec 0\n",
2368 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2369 mo[OID_VEC_MUL_M] = Op("vmulm", FLOAT_ARITHMETIC,
2370 "%col = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2371 "%mat = OpCompositeConstruct %type_valueType_mat2x2 %col %col\n"
2372 "%vec = OpCompositeConstruct %type_valueType_vec2 %arg2 %arg2\n"
2373 "%tmpVec = OpVectorTimesMatrix %type_valueType_vec2 %vec %mat\n"
2374 "%result = OpCompositeExtract %type_valueType %tmpVec 0\n",
2375 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2376 mo[OID_MAT_MUL_S] = Op("mmuls", FLOAT_ARITHMETIC,
2377 "%col = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2378 "%mat = OpCompositeConstruct %type_valueType_mat2x2 %col %col\n"
2379 "%mulMat = OpMatrixTimesScalar %type_valueType_mat2x2 %mat %arg2\n"
2380 "%extCol = OpCompositeExtract %type_valueType_vec2 %mulMat 0\n"
2381 "%result = OpCompositeExtract %type_valueType %extCol 0\n",
2382 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2383 mo[OID_MAT_MUL_V] = Op("mmulv", FLOAT_ARITHMETIC,
2384 "%col = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2385 "%mat = OpCompositeConstruct %type_valueType_mat2x2 %col %col\n"
2386 "%vec = OpCompositeConstruct %type_valueType_vec2 %arg2 %arg2\n"
2387 "%mulVec = OpMatrixTimesVector %type_valueType_vec2 %mat %vec\n"
2388 "%result = OpCompositeExtract %type_valueType %mulVec 0\n",
2389 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2390 mo[OID_MAT_MUL_M] = Op("mmulm", FLOAT_ARITHMETIC,
2391 "%col1 = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2392 "%mat1 = OpCompositeConstruct %type_valueType_mat2x2 %col1 %col1\n"
2393 "%col2 = OpCompositeConstruct %type_valueType_vec2 %arg2 %arg2\n"
2394 "%mat2 = OpCompositeConstruct %type_valueType_mat2x2 %col2 %col2\n"
2395 "%mulMat = OpMatrixTimesMatrix %type_valueType_mat2x2 %mat1 %mat2\n"
2396 "%extCol = OpCompositeExtract %type_valueType_vec2 %mulMat 0\n"
2397 "%result = OpCompositeExtract %type_valueType %extCol 0\n",
2398 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2399 mo[OID_OUT_PROD] = Op("out_prod", FLOAT_ARITHMETIC,
2400 "%vec1 = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2401 "%vec2 = OpCompositeConstruct %type_valueType_vec2 %arg2 %arg2\n"
2402 "%mulMat = OpOuterProduct %type_valueType_mat2x2 %vec1 %vec2\n"
2403 "%extCol = OpCompositeExtract %type_valueType_vec2 %mulMat 0\n"
2404 "%result = OpCompositeExtract %type_valueType %extCol 0\n",
2405 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2406
2407 // comparison operations
2408 mo[OID_ORD_EQ] = Op("ord_eq", FLOAT_ARITHMETIC,
2409 "%boolVal = OpFOrdEqual %type_bool %arg1 %arg2\n"
2410 "%result = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2411 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2412 mo[OID_UORD_EQ] = Op("uord_eq", FLOAT_ARITHMETIC,
2413 "%boolVal = OpFUnordEqual %type_bool %arg1 %arg2\n"
2414 "%result = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2415 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2416 mo[OID_ORD_NEQ] = Op("ord_neq", FLOAT_ARITHMETIC,
2417 "%boolVal = OpFOrdNotEqual %type_bool %arg1 %arg2\n"
2418 "%result = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2419 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2420 mo[OID_UORD_NEQ] = Op("uord_neq", FLOAT_ARITHMETIC,
2421 "%boolVal = OpFUnordNotEqual %type_bool %arg1 %arg2\n"
2422 "%result = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2423 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2424 mo[OID_ORD_LS] = Op("ord_ls", FLOAT_ARITHMETIC,
2425 "%boolVal = OpFOrdLessThan %type_bool %arg1 %arg2\n"
2426 "%result = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2427 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2428 mo[OID_UORD_LS] = Op("uord_ls", FLOAT_ARITHMETIC,
2429 "%boolVal = OpFUnordLessThan %type_bool %arg1 %arg2\n"
2430 "%result = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2431 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2432 mo[OID_ORD_GT] = Op("ord_gt", FLOAT_ARITHMETIC,
2433 "%boolVal = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
2434 "%result = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2435 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2436 mo[OID_UORD_GT] = Op("uord_gt", FLOAT_ARITHMETIC,
2437 "%boolVal = OpFUnordGreaterThan %type_bool %arg1 %arg2\n"
2438 "%result = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2439 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2440 mo[OID_ORD_LE] = Op("ord_le", FLOAT_ARITHMETIC,
2441 "%boolVal = OpFOrdLessThanEqual %type_bool %arg1 %arg2\n"
2442 "%result = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2443 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2444 mo[OID_UORD_LE] = Op("uord_le", FLOAT_ARITHMETIC,
2445 "%boolVal = OpFUnordLessThanEqual %type_bool %arg1 %arg2\n"
2446 "%result = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2447 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2448 mo[OID_ORD_GE] = Op("ord_ge", FLOAT_ARITHMETIC,
2449 "%boolVal = OpFOrdGreaterThanEqual %type_bool %arg1 %arg2\n"
2450 "%result = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2451 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2452 mo[OID_UORD_GE] = Op("uord_ge", FLOAT_ARITHMETIC,
2453 "%boolVal = OpFUnordGreaterThanEqual %type_bool %arg1 %arg2\n"
2454 "%result = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2455 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2456
2457 mo[OID_ATAN2] =
2458 Op("atan2", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Atan2 %arg1 %arg2\n",
2459 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2460 mo[OID_POW] =
2461 Op("pow", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Pow %arg1 %arg2\n",
2462 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2463 mo[OID_MIX] = Op("mix", FLOAT_ARITHMETIC,
2464 "%result = OpExtInst %type_valueType %std450 FMix %arg1 %arg2 %c_valueType_0_5\n",
2465 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2466 mo[OID_FMA] = Op("fma", FLOAT_ARITHMETIC,
2467 "%result = OpExtInst %type_valueType %std450 Fma %arg1 %arg2 %c_valueType_0_5\n",
2468 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2469 mo[OID_MIN] =
2470 Op("min", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 FMin %arg1 %arg2\n",
2471 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2472 mo[OID_MAX] =
2473 Op("max", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 FMax %arg1 %arg2\n",
2474 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2475 mo[OID_CLAMP] = Op("clamp", FLOAT_ARITHMETIC,
2476 "%result = OpExtInst %type_valueType %std450 FClamp %arg1 %arg2 %arg2\n",
2477 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2478 mo[OID_STEP] =
2479 Op("step", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Step %arg1 %arg2\n",
2480 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2481 mo[OID_SSTEP] =
2482 Op("sstep", FLOAT_ARITHMETIC,
2483 "%result = OpExtInst %type_valueType %std450 SmoothStep %arg1 %arg2 %c_valueType_0_5\n",
2484 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2485 mo[OID_DIST] = Op("distance", FLOAT_ARITHMETIC,
2486 "%result = OpExtInst %type_valueType %std450 Distance %arg1 %arg2\n",
2487 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2488 mo[OID_CROSS] = Op("cross", FLOAT_ARITHMETIC,
2489 "%vec1 = OpCompositeConstruct %type_valueType_vec3 %arg1 %arg1 %arg1\n"
2490 "%vec2 = OpCompositeConstruct %type_valueType_vec3 %arg2 %arg2 %arg2\n"
2491 "%tmpVec = OpExtInst %type_valueType_vec3 %std450 Cross %vec1 %vec2\n"
2492 "%result = OpCompositeExtract %type_valueType %tmpVec 0\n",
2493 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2494 mo[OID_FACE_FWD] =
2495 Op("face_fwd", FLOAT_ARITHMETIC,
2496 "%result = OpExtInst %type_valueType %std450 FaceForward %c_valueType_1 %arg1 %arg2\n",
2497 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2498 mo[OID_NMIN] =
2499 Op("nmin", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 NMin %arg1 %arg2\n",
2500 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2501 mo[OID_NMAX] =
2502 Op("nmax", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 NMax %arg1 %arg2\n",
2503 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2504 mo[OID_NCLAMP] = Op("nclamp", FLOAT_ARITHMETIC,
2505 "%result = OpExtInst %type_valueType %std450 NClamp %arg2 %arg1 %arg2\n",
2506 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2507
2508 mo[OID_ROUND] =
2509 Op("round", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Round %arg1\n",
2510 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2511 mo[OID_ROUND_EV] =
2512 Op("round_ev", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 RoundEven %arg1\n",
2513 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2514 mo[OID_TRUNC] =
2515 Op("trunc", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Trunc %arg1\n",
2516 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2517 mo[OID_ABS] = Op("abs", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 FAbs %arg1\n",
2518 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2519 mo[OID_SIGN] = Op("sign", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 FSign %arg1\n",
2520 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2521 mo[OID_FLOOR] =
2522 Op("floor", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Floor %arg1\n",
2523 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2524 mo[OID_CEIL] = Op("ceil", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Ceil %arg1\n",
2525 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2526 mo[OID_FRACT] =
2527 Op("fract", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Fract %arg1\n",
2528 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2529 mo[OID_RADIANS] =
2530 Op("radians", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Radians %arg1\n",
2531 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2532 mo[OID_DEGREES] =
2533 Op("degrees", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Degrees %arg1\n",
2534 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2535 mo[OID_SIN] = Op("sin", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Sin %arg1\n",
2536 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2537 mo[OID_COS] = Op("cos", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Cos %arg1\n",
2538 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2539 mo[OID_TAN] = Op("tan", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Tan %arg1\n",
2540 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2541 mo[OID_ASIN] = Op("asin", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Asin %arg1\n",
2542 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2543 mo[OID_ACOS] = Op("acos", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Acos %arg1\n",
2544 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2545 mo[OID_ATAN] = Op("atan", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Atan %arg1\n",
2546 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2547 mo[OID_SINH] = Op("sinh", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Sinh %arg1\n",
2548 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2549 mo[OID_COSH] = Op("cosh", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Cosh %arg1\n",
2550 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2551 mo[OID_TANH] = Op("tanh", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Tanh %arg1\n",
2552 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2553 mo[OID_ASINH] =
2554 Op("asinh", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Asinh %arg1\n",
2555 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2556 mo[OID_ACOSH] =
2557 Op("acosh", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Acosh %arg1\n",
2558 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2559 mo[OID_ATANH] =
2560 Op("atanh", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Atanh %arg1\n",
2561 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2562 mo[OID_EXP] = Op("exp", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Exp %arg1\n",
2563 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2564 mo[OID_LOG] = Op("log", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Log %arg1\n",
2565 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2566 mo[OID_EXP2] = Op("exp2", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Exp2 %arg1\n",
2567 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2568 mo[OID_LOG2] = Op("log2", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Log2 %arg1\n",
2569 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2570 mo[OID_SQRT] = Op("sqrt", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Sqrt %arg1\n",
2571 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2572 mo[OID_INV_SQRT] =
2573 Op("inv_sqrt", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 InverseSqrt %arg1\n",
2574 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2575 mo[OID_MODF] =
2576 Op("modf", FLOAT_ARITHMETIC, "", "", "", "%tmpVarPtr = OpVariable %type_valueType_fptr Function\n", "",
2577 "%result = OpExtInst %type_valueType %std450 Modf %arg1 %tmpVarPtr\n",
2578 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2579 mo[OID_MODF_ST] = Op("modf_st", FLOAT_ARITHMETIC,
2580 "OpMemberDecorate %struct_ff 0 Offset 0\n"
2581 "OpMemberDecorate %struct_ff 1 Offset ${float_width}\n",
2582 "%struct_ff = OpTypeStruct %type_valueType %type_valueType\n"
2583 "%struct_ff_fptr = OpTypePointer Function %struct_ff\n",
2584 "", "%tmpStructPtr = OpVariable %struct_ff_fptr Function\n", "",
2585 "%tmpStruct = OpExtInst %struct_ff %std450 ModfStruct %arg1\n"
2586 " OpStore %tmpStructPtr %tmpStruct\n"
2587 "%tmpLoc = OpAccessChain %type_valueType_fptr %tmpStructPtr %c_i32_0\n"
2588 "%result = OpLoad %type_valueType %tmpLoc\n",
2589 B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2590 mo[OID_FREXP] =
2591 Op("frexp", FLOAT_ARITHMETIC, "", "", "", "%tmpVarPtr = OpVariable %type_i32_fptr Function\n", "",
2592 "%result = OpExtInst %type_valueType %std450 Frexp %arg1 %tmpVarPtr\n",
2593 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2594 mo[OID_FREXP_ST] = Op("frexp_st", FLOAT_ARITHMETIC,
2595 "OpMemberDecorate %struct_fi 0 Offset 0\n"
2596 "OpMemberDecorate %struct_fi 1 Offset ${float_width}\n",
2597 "%struct_fi = OpTypeStruct %type_valueType %type_i32\n"
2598 "%struct_fi_fptr = OpTypePointer Function %struct_fi\n",
2599 "", "%tmpStructPtr = OpVariable %struct_fi_fptr Function\n", "",
2600 "%tmpStruct = OpExtInst %struct_fi %std450 FrexpStruct %arg1\n"
2601 " OpStore %tmpStructPtr %tmpStruct\n"
2602 "%tmpLoc = OpAccessChain %type_valueType_fptr %tmpStructPtr %c_i32_0\n"
2603 "%result = OpLoad %type_valueType %tmpLoc\n",
2604 B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2605 mo[OID_LENGTH] =
2606 Op("length", FLOAT_ARITHMETIC, "%result = OpExtInst %type_valueType %std450 Length %arg1\n",
2607 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2608 mo[OID_NORMALIZE] = Op("normalize", FLOAT_ARITHMETIC,
2609 "%vec1 = OpCompositeConstruct %type_valueType_vec2 %arg1 %c_valueType_2\n"
2610 "%tmpVec = OpExtInst %type_valueType_vec2 %std450 Normalize %vec1\n"
2611 "%result = OpCompositeExtract %type_valueType %tmpVec 0\n",
2612 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2613 mo[OID_REFLECT] =
2614 Op("reflect", FLOAT_ARITHMETIC,
2615 "%vec1 = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2616 "%vecN = OpCompositeConstruct %type_valueType_vec2 %c_valueType_0 %c_valueType_n1\n"
2617 "%tmpVec = OpExtInst %type_valueType_vec2 %std450 Reflect %vec1 %vecN\n"
2618 "%result = OpCompositeExtract %type_valueType %tmpVec 0\n",
2619 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2620 mo[OID_REFRACT] =
2621 Op("refract", FLOAT_ARITHMETIC,
2622 "%vec1 = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2623 "%vecN = OpCompositeConstruct %type_valueType_vec2 %c_valueType_0 %c_valueType_n1\n"
2624 "%tmpVec = OpExtInst %type_valueType_vec2 %std450 Refract %vec1 %vecN %c_valueType_0_5\n"
2625 "%result = OpCompositeExtract %type_valueType %tmpVec 0\n",
2626 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2627 mo[OID_MAT_DET] = Op("mat_det", FLOAT_ARITHMETIC,
2628 "%col = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2629 "%mat = OpCompositeConstruct %type_valueType_mat2x2 %col %col\n"
2630 "%result = OpExtInst %type_valueType %std450 Determinant %mat\n",
2631 B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2632 mo[OID_MAT_INV] =
2633 Op("mat_inv", FLOAT_ARITHMETIC,
2634 "%col1 = OpCompositeConstruct %type_valueType_vec2 %arg1 %c_valueType_1\n"
2635 "%col2 = OpCompositeConstruct %type_valueType_vec2 %c_valueType_1 %c_valueType_1\n"
2636 "%mat = OpCompositeConstruct %type_valueType_mat2x2 %col1 %col2\n"
2637 "%invMat = OpExtInst %type_valueType_mat2x2 %std450 MatrixInverse %mat\n"
2638 "%extCol = OpCompositeExtract %type_valueType_vec2 %invMat 1\n"
2639 "%result = OpCompositeExtract %type_valueType %extCol 1\n",
2640 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2641
2642 // PackHalf2x16 is a special case as it operates on fp32 vec2 and returns unsigned int,
2643 // the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
2644 mo[OID_PH_DENORM] =
2645 Op("ph_denorm", FLOAT_STORAGE_ONLY, "", "",
2646 "%c_fp32_denorm_fp16 = OpConstant %type_f32 6.01e-5\n" // fp32 representation of fp16 denorm value
2647 "%c_ref = OpConstant %type_u32 66061296\n",
2648 "", "",
2649 "%srcVec = OpCompositeConstruct %type_f32_vec2 %c_fp32_denorm_fp16 %c_fp32_denorm_fp16\n"
2650 "%packedInt = OpExtInst %type_u32 %std450 PackHalf2x16 %srcVec\n"
2651 "%boolVal = OpIEqual %type_bool %c_ref %packedInt\n"
2652 "%result = OpSelect %type_f32 %boolVal %c_f32_1 %c_f32_0\n",
2653 B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_COMMANDS_CONST_FP32 |
2654 B_STATEMENT_USAGE_COMMANDS_TYPE_FP32);
2655
2656 // UnpackHalf2x16 is a special case that operates on uint32 and returns two 32-bit floats,
2657 // this function is tested using constants
2658 mo[OID_UPH_DENORM] = Op("uph_denorm", FLOAT_STORAGE_ONLY, "", "",
2659 "%c_u32_2_16_pack = OpConstant %type_u32 66061296\n", // == packHalf2x16(vec2(denorm))
2660 "", "",
2661 "%tmpVec = OpExtInst %type_f32_vec2 %std450 UnpackHalf2x16 %c_u32_2_16_pack\n"
2662 "%result = OpCompositeExtract %type_f32 %tmpVec 0\n",
2663 B_STATEMENT_USAGE_COMMANDS_TYPE_FP32);
2664
2665 // PackDouble2x32 is a special case that operates on two uint32 and returns
2666 // double, this function is tested using constants
2667 mo[OID_PD_DENORM] = Op("pd_denorm", FLOAT_STORAGE_ONLY, "", "",
2668 "%c_p1 = OpConstant %type_u32 0\n"
2669 "%c_p2 = OpConstant %type_u32 262144\n", // == UnpackDouble2x32(denorm)
2670 "", "",
2671 "%srcVec = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
2672 "%result = OpExtInst %type_f64 %std450 PackDouble2x32 %srcVec\n",
2673 B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2674
2675 // UnpackDouble2x32 is a special case as it operates only on FP64 and returns two ints,
2676 // the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
2677 const char *unpackDouble2x32Types = "%type_bool_vec2 = OpTypeVector %type_bool 2\n";
2678 const char *unpackDouble2x32Source =
2679 "%refVec2 = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
2680 "%resVec2 = OpExtInst %type_u32_vec2 %std450 UnpackDouble2x32 %arg1\n"
2681 "%boolVec2 = OpIEqual %type_bool_vec2 %refVec2 %resVec2\n"
2682 "%boolVal = OpAll %type_bool %boolVec2\n"
2683 "%result = OpSelect %type_f64 %boolVal %c_f64_1 %c_f64_0\n";
2684 mo[OID_UPD_DENORM_FLUSH] = Op("upd_denorm", FLOAT_STORAGE_ONLY, "", unpackDouble2x32Types,
2685 "%c_p1 = OpConstant %type_u32 0\n"
2686 "%c_p2 = OpConstant %type_u32 0\n",
2687 "", "", unpackDouble2x32Source,
2688 B_STATEMENT_USAGE_COMMANDS_CONST_FP64 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2689 mo[OID_UPD_DENORM_PRESERVE] = Op("upd_denorm", FLOAT_STORAGE_ONLY, "", unpackDouble2x32Types,
2690 "%c_p1 = OpConstant %type_u32 1008\n"
2691 "%c_p2 = OpConstant %type_u32 0\n",
2692 "", "", unpackDouble2x32Source,
2693 B_STATEMENT_USAGE_COMMANDS_CONST_FP64 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2694
2695 mo[OID_ORTE_ROUND] = Op("orte_round", FLOAT_STORAGE_ONLY, FP32, "OpDecorate %result FPRoundingMode RTE\n", "", "",
2696 "%result = OpFConvert %type_f16 %arg1\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FP16);
2697 mo[OID_ORTZ_ROUND] = Op("ortz_round", FLOAT_STORAGE_ONLY, FP32, "OpDecorate %result FPRoundingMode RTZ\n", "", "",
2698 "%result = OpFConvert %type_f16 %arg1\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FP16);
2699
2700 DE_ASSERT(m_saved_strings.size() == m_num_expected_strings);
2701 }
2702
build(vector<OperationTestCase> & testCases,TypeTestResultsSP typeTestResults,bool argumentsFromInput)2703 void TestCasesBuilder::build(vector<OperationTestCase> &testCases, TypeTestResultsSP typeTestResults,
2704 bool argumentsFromInput)
2705 {
2706 // this method constructs a list of test cases; this list is a bit different
2707 // for every combination of float type, arguments preparation method and tested float control
2708
2709 testCases.reserve(750);
2710
2711 bool isFP16 = typeTestResults->variableType() == FP16;
2712
2713 for (int j = 0; j < 2; j++)
2714 {
2715 // fp16NoStorage tests only supported if testing fp16.
2716 bool fp16NoStorage = (j == 1);
2717 if (fp16NoStorage && !isFP16)
2718 continue;
2719
2720 // Denorm - FlushToZero - binary operations
2721 for (size_t i = 0; i < typeTestResults->binaryOpFTZ.size(); ++i)
2722 {
2723 const BinaryCase &binaryCase = typeTestResults->binaryOpFTZ[i];
2724 OperationId operation = binaryCase.operationId;
2725 testCases.push_back(OTC("denorm_op_var_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_ONE,
2726 binaryCase.opVarResult, fp16NoStorage));
2727 testCases.push_back(OTC("denorm_op_denorm_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_DENORM,
2728 binaryCase.opDenormResult, fp16NoStorage));
2729 testCases.push_back(OTC("denorm_op_inf_flush_to_zero", B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM,
2730 V_INF, binaryCase.opInfResult, fp16NoStorage));
2731 testCases.push_back(OTC("denorm_op_nan_flush_to_zero", B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM,
2732 V_NAN, binaryCase.opNanResult, fp16NoStorage));
2733 }
2734
2735 // Denorm - FlushToZero - unary operations
2736 for (size_t i = 0; i < typeTestResults->unaryOpFTZ.size(); ++i)
2737 {
2738 const UnaryCase &unaryCase = typeTestResults->unaryOpFTZ[i];
2739 OperationId operation = unaryCase.operationId;
2740 testCases.push_back(OTC("op_denorm_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_UNUSED,
2741 unaryCase.result, fp16NoStorage));
2742 }
2743
2744 // Denorm - Preserve - binary operations
2745 for (size_t i = 0; i < typeTestResults->binaryOpDenormPreserve.size(); ++i)
2746 {
2747 const BinaryCase &binaryCase = typeTestResults->binaryOpDenormPreserve[i];
2748 OperationId operation = binaryCase.operationId;
2749 testCases.push_back(OTC("denorm_op_var_preserve", B_DENORM_PRESERVE, operation, V_DENORM, V_ONE,
2750 binaryCase.opVarResult, fp16NoStorage));
2751 testCases.push_back(OTC("denorm_op_denorm_preserve", B_DENORM_PRESERVE, operation, V_DENORM, V_DENORM,
2752 binaryCase.opDenormResult, fp16NoStorage));
2753 testCases.push_back(OTC("denorm_op_inf_preserve", B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM,
2754 V_INF, binaryCase.opInfResult, fp16NoStorage));
2755 testCases.push_back(OTC("denorm_op_nan_preserve", B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM,
2756 V_NAN, binaryCase.opNanResult, fp16NoStorage));
2757 }
2758
2759 // Denorm - Preserve - unary operations
2760 for (size_t i = 0; i < typeTestResults->unaryOpDenormPreserve.size(); ++i)
2761 {
2762 const UnaryCase &unaryCase = typeTestResults->unaryOpDenormPreserve[i];
2763 OperationId operation = unaryCase.operationId;
2764 testCases.push_back(OTC("op_denorm_preserve", B_DENORM_PRESERVE, operation, V_DENORM, V_UNUSED,
2765 unaryCase.result, fp16NoStorage));
2766 }
2767 }
2768
2769 struct ZINCase
2770 {
2771 OperationId operationId;
2772 bool supportedByFP64;
2773 ValueId secondArgument;
2774 ValueId preserveZeroResult;
2775 ValueId preserveSZeroResult;
2776 ValueId preserveInfResult;
2777 ValueId preserveSInfResult;
2778 ValueId preserveNanResult;
2779 };
2780
2781 const ZINCase binaryOpZINPreserve[] = {
2782 // operation fp64 second arg preserve zero preserve szero preserve inf preserve sinf preserve nan
2783 {OID_PHI, true, V_INF, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2784 {OID_SELECT, true, V_ONE, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2785 {OID_ADD, true, V_ZERO, V_ZERO, V_ZERO, V_INF, V_MINUS_INF, V_NAN},
2786 {OID_SUB, true, V_ZERO, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2787 {OID_MUL, true, V_ONE, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2788 };
2789
2790 const ZINCase unaryOpZINPreserve[] = {
2791 // operation fp64 second arg preserve zero preserve szero preserve inf preserve sinf preserve nan
2792 {OID_RETURN_VAL, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2793 {OID_D_EXTRACT, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2794 {OID_D_INSERT, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2795 {OID_SHUFFLE, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2796 {OID_COMPOSITE, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2797 {OID_COMPOSITE_INS, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2798 {OID_COPY, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2799 {OID_TRANSPOSE, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2800 {OID_NEGATE, true, V_UNUSED, V_MINUS_ZERO, V_ZERO, V_MINUS_INF, V_INF, V_NAN},
2801 };
2802
2803 bool isFP64 = typeTestResults->variableType() == FP64;
2804
2805 // Signed Zero Inf Nan - Preserve - binary operations
2806 for (int j = 0; j < 2; j++)
2807 {
2808 // fp16NoStorage tests only supported if testing fp16.
2809 bool fp16NoStorage = (j == 1);
2810 if (fp16NoStorage && !isFP16)
2811 continue;
2812
2813 for (size_t i = 0; i < DE_LENGTH_OF_ARRAY(binaryOpZINPreserve); ++i)
2814 {
2815 const ZINCase &zc = binaryOpZINPreserve[i];
2816 if (isFP64 && !zc.supportedByFP64)
2817 continue;
2818
2819 testCases.push_back(OTC("zero_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_ZERO, zc.secondArgument,
2820 zc.preserveZeroResult, fp16NoStorage));
2821 testCases.push_back(OTC("signed_zero_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_MINUS_ZERO,
2822 zc.secondArgument, zc.preserveSZeroResult, fp16NoStorage));
2823 testCases.push_back(OTC("inf_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_INF, zc.secondArgument,
2824 zc.preserveInfResult, fp16NoStorage));
2825 testCases.push_back(OTC("signed_inf_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_MINUS_INF,
2826 zc.secondArgument, zc.preserveSInfResult, fp16NoStorage));
2827 testCases.push_back(OTC("nan_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_NAN, zc.secondArgument,
2828 zc.preserveNanResult, fp16NoStorage));
2829 }
2830
2831 // Signed Zero Inf Nan - Preserve - unary operations
2832 for (size_t i = 0; i < DE_LENGTH_OF_ARRAY(unaryOpZINPreserve); ++i)
2833 {
2834 const ZINCase &zc = unaryOpZINPreserve[i];
2835 if (isFP64 && !zc.supportedByFP64)
2836 continue;
2837
2838 testCases.push_back(OTC("op_zero_preserve", B_ZIN_PRESERVE, zc.operationId, V_ZERO, V_UNUSED,
2839 zc.preserveZeroResult, fp16NoStorage));
2840 testCases.push_back(OTC("op_signed_zero_preserve", B_ZIN_PRESERVE, zc.operationId, V_MINUS_ZERO, V_UNUSED,
2841 zc.preserveSZeroResult, fp16NoStorage));
2842 testCases.push_back(OTC("op_inf_preserve", B_ZIN_PRESERVE, zc.operationId, V_INF, V_UNUSED,
2843 zc.preserveInfResult, fp16NoStorage));
2844 testCases.push_back(OTC("op_signed_inf_preserve", B_ZIN_PRESERVE, zc.operationId, V_MINUS_INF, V_UNUSED,
2845 zc.preserveSInfResult, fp16NoStorage));
2846 testCases.push_back(OTC("op_nan_preserve", B_ZIN_PRESERVE, zc.operationId, V_NAN, V_UNUSED,
2847 zc.preserveNanResult, fp16NoStorage));
2848 }
2849 }
2850
2851 // comparison operations - tested differently because they return true/false
2852 struct ComparisonCase
2853 {
2854 OperationId operationId;
2855 ValueId denormPreserveResult;
2856 };
2857 const ComparisonCase comparisonCases[] = {// operation denorm
2858 {OID_ORD_EQ, V_ZERO}, {OID_UORD_EQ, V_ZERO}, {OID_ORD_NEQ, V_ONE},
2859 {OID_UORD_NEQ, V_ONE}, {OID_ORD_LS, V_ONE}, {OID_UORD_LS, V_ONE},
2860 {OID_ORD_GT, V_ZERO}, {OID_UORD_GT, V_ZERO}, {OID_ORD_LE, V_ONE},
2861 {OID_UORD_LE, V_ONE}, {OID_ORD_GE, V_ZERO}, {OID_UORD_GE, V_ZERO}};
2862 for (int op = 0; op < DE_LENGTH_OF_ARRAY(comparisonCases); ++op)
2863 {
2864 const ComparisonCase &cc = comparisonCases[op];
2865 testCases.push_back(
2866 OTC("denorm_op_var_preserve", B_DENORM_PRESERVE, cc.operationId, V_DENORM, V_ONE, cc.denormPreserveResult));
2867 if (isFP16)
2868 testCases.push_back(OTC("denorm_op_var_preserve", B_DENORM_PRESERVE, cc.operationId, V_DENORM, V_ONE,
2869 cc.denormPreserveResult, true));
2870 }
2871
2872 if (argumentsFromInput)
2873 {
2874 struct RoundingModeCase
2875 {
2876 OperationId operationId;
2877 ValueId arg1;
2878 ValueId arg2;
2879 ValueId expectedRTEResult;
2880 ValueId expectedRTZResult;
2881 };
2882
2883 const RoundingModeCase roundingCases[] = {
2884 {OID_ADD, V_ADD_ARG_A, V_ADD_ARG_B, V_ADD_RTE_RESULT, V_ADD_RTZ_RESULT},
2885 {OID_SUB, V_SUB_ARG_A, V_SUB_ARG_B, V_SUB_RTE_RESULT, V_SUB_RTZ_RESULT},
2886 {OID_MUL, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT},
2887 {OID_DOT, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT},
2888
2889 // in vect/mat multiplication by scalar operations only first element of result is checked
2890 // so argument and result values prepared for multiplication can be reused for those cases
2891 {OID_VEC_MUL_S, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT},
2892 {OID_MAT_MUL_S, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT},
2893 {OID_OUT_PROD, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT},
2894
2895 // in SPIR-V code we return first element of operation result so for following
2896 // cases argument and result values prepared for dot product can be reused
2897 {OID_VEC_MUL_M, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT},
2898 {OID_MAT_MUL_V, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT},
2899 {OID_MAT_MUL_M, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT},
2900
2901 // conversion operations are added separately - depending on float type width
2902 };
2903
2904 for (int c = 0; c < DE_LENGTH_OF_ARRAY(roundingCases); ++c)
2905 {
2906 const RoundingModeCase &rmc = roundingCases[c];
2907 testCases.push_back(
2908 OTC("rounding_rte_op", B_RTE_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTEResult));
2909 testCases.push_back(
2910 OTC("rounding_rtz_op", B_RTZ_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTZResult));
2911 if (isFP16)
2912 {
2913 testCases.push_back(OTC("rounding_rte_op", B_RTE_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2,
2914 rmc.expectedRTEResult, true));
2915 testCases.push_back(OTC("rounding_rtz_op", B_RTZ_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2,
2916 rmc.expectedRTZResult, true));
2917 }
2918 }
2919 }
2920
2921 // special cases
2922 if (typeTestResults->variableType() == FP16)
2923 {
2924 if (argumentsFromInput)
2925 {
2926 for (int i = 0; i < 2; i++)
2927 {
2928 bool noStorage = (i == 1);
2929
2930 //// Conversions from arguments
2931 // fp32 rte
2932 testCases.push_back(OTC("rounding_rte_conv_from_fp32_up", B_RTE_ROUNDING, OID_CONV_FROM_FP32,
2933 V_CONV_FROM_FP32_TO_FP16_UP_ARG, V_UNUSED,
2934 V_CONV_FROM_FP32_TO_FP16_UP_RTE_RESULT, noStorage));
2935 testCases.push_back(OTC("rounding_rte_conv_from_fp32_down", B_RTE_ROUNDING, OID_CONV_FROM_FP32,
2936 V_CONV_FROM_FP32_TO_FP16_DOWN_ARG, V_UNUSED,
2937 V_CONV_FROM_FP32_TO_FP16_DOWN_RTE_RESULT, noStorage));
2938 testCases.push_back(OTC("rounding_rte_conv_from_fp32_tie_up", B_RTE_ROUNDING, OID_CONV_FROM_FP32,
2939 V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG, V_UNUSED,
2940 V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTE_RESULT, noStorage));
2941 testCases.push_back(OTC("rounding_rte_conv_from_fp32_tie_down", B_RTE_ROUNDING, OID_CONV_FROM_FP32,
2942 V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
2943 V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTE_RESULT, noStorage));
2944
2945 // fp32 rtz
2946 testCases.push_back(OTC("rounding_rtz_conv_from_fp32_up", B_RTZ_ROUNDING, OID_CONV_FROM_FP32,
2947 V_CONV_FROM_FP32_TO_FP16_UP_ARG, V_UNUSED,
2948 V_CONV_FROM_FP32_TO_FP16_UP_RTZ_RESULT, noStorage));
2949 testCases.push_back(OTC("rounding_rtz_conv_from_fp32_down", B_RTZ_ROUNDING, OID_CONV_FROM_FP32,
2950 V_CONV_FROM_FP32_TO_FP16_DOWN_ARG, V_UNUSED,
2951 V_CONV_FROM_FP32_TO_FP16_DOWN_RTZ_RESULT, noStorage));
2952 testCases.push_back(OTC("rounding_rtz_conv_from_fp32_tie_up", B_RTZ_ROUNDING, OID_CONV_FROM_FP32,
2953 V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG, V_UNUSED,
2954 V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTZ_RESULT, noStorage));
2955 testCases.push_back(OTC("rounding_rtz_conv_from_fp32_tie_down", B_RTZ_ROUNDING, OID_CONV_FROM_FP32,
2956 V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
2957 V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTZ_RESULT, noStorage));
2958
2959 // fp64 rte
2960 testCases.push_back(OTC("rounding_rte_conv_from_fp64_up", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
2961 V_CONV_FROM_FP64_TO_FP16_UP_ARG, V_UNUSED,
2962 V_CONV_FROM_FP64_TO_FP16_UP_RTE_RESULT, noStorage));
2963 testCases.push_back(OTC("rounding_rte_conv_from_fp64_down", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
2964 V_CONV_FROM_FP64_TO_FP16_DOWN_ARG, V_UNUSED,
2965 V_CONV_FROM_FP64_TO_FP16_DOWN_RTE_RESULT, noStorage));
2966 testCases.push_back(OTC("rounding_rte_conv_from_fp64_tie_up", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
2967 V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG, V_UNUSED,
2968 V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTE_RESULT, noStorage));
2969 testCases.push_back(OTC("rounding_rte_conv_from_fp64_tie_down", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
2970 V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
2971 V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTE_RESULT, noStorage));
2972
2973 // fp64 rtz
2974 testCases.push_back(OTC("rounding_rtz_conv_from_fp64_up", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
2975 V_CONV_FROM_FP64_TO_FP16_UP_ARG, V_UNUSED,
2976 V_CONV_FROM_FP64_TO_FP16_UP_RTZ_RESULT, noStorage));
2977 testCases.push_back(OTC("rounding_rtz_conv_from_fp64_down", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
2978 V_CONV_FROM_FP64_TO_FP16_DOWN_ARG, V_UNUSED,
2979 V_CONV_FROM_FP64_TO_FP16_DOWN_RTZ_RESULT, noStorage));
2980 testCases.push_back(OTC("rounding_rtz_conv_from_fp64_tie_up", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
2981 V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG, V_UNUSED,
2982 V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTZ_RESULT, noStorage));
2983 testCases.push_back(OTC("rounding_rtz_conv_from_fp64_tie_down", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
2984 V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
2985 V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTZ_RESULT, noStorage));
2986
2987 //// Conversions from specialization constants
2988 // fp32 rte
2989 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32_up", B_RTE_ROUNDING,
2990 OID_SCONST_CONV_FROM_FP32_TO_FP16_UP, V_CONV_FROM_FP32_TO_FP16_UP_ARG, V_UNUSED,
2991 V_CONV_FROM_FP32_TO_FP16_UP_RTE_RESULT, noStorage));
2992 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32_down", B_RTE_ROUNDING,
2993 OID_SCONST_CONV_FROM_FP32_TO_FP16_DOWN, V_CONV_FROM_FP32_TO_FP16_DOWN_ARG,
2994 V_UNUSED, V_CONV_FROM_FP32_TO_FP16_DOWN_RTE_RESULT, noStorage));
2995 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32_tie_up", B_RTE_ROUNDING,
2996 OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_UP, V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG,
2997 V_UNUSED, V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTE_RESULT, noStorage));
2998 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32_tie_down", B_RTE_ROUNDING,
2999 OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_DOWN,
3000 V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
3001 V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTE_RESULT, noStorage));
3002
3003 // fp32 rtz
3004 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32_up", B_RTZ_ROUNDING,
3005 OID_SCONST_CONV_FROM_FP32_TO_FP16_UP, V_CONV_FROM_FP32_TO_FP16_UP_ARG, V_UNUSED,
3006 V_CONV_FROM_FP32_TO_FP16_UP_RTZ_RESULT, noStorage));
3007 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32_down", B_RTZ_ROUNDING,
3008 OID_SCONST_CONV_FROM_FP32_TO_FP16_DOWN, V_CONV_FROM_FP32_TO_FP16_DOWN_ARG,
3009 V_UNUSED, V_CONV_FROM_FP32_TO_FP16_DOWN_RTZ_RESULT, noStorage));
3010 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32_tie_up", B_RTZ_ROUNDING,
3011 OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_UP, V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG,
3012 V_UNUSED, V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTZ_RESULT, noStorage));
3013 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32_tie_down", B_RTZ_ROUNDING,
3014 OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_DOWN,
3015 V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
3016 V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTZ_RESULT, noStorage));
3017
3018 // fp64 rte
3019 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_up", B_RTE_ROUNDING,
3020 OID_SCONST_CONV_FROM_FP64_TO_FP16_UP, V_CONV_FROM_FP64_TO_FP16_UP_ARG, V_UNUSED,
3021 V_CONV_FROM_FP64_TO_FP16_UP_RTE_RESULT, noStorage));
3022 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_down", B_RTE_ROUNDING,
3023 OID_SCONST_CONV_FROM_FP64_TO_FP16_DOWN, V_CONV_FROM_FP64_TO_FP16_DOWN_ARG,
3024 V_UNUSED, V_CONV_FROM_FP64_TO_FP16_DOWN_RTE_RESULT, noStorage));
3025 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_tie_up", B_RTE_ROUNDING,
3026 OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_UP, V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG,
3027 V_UNUSED, V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTE_RESULT, noStorage));
3028 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_tie_down", B_RTE_ROUNDING,
3029 OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_DOWN,
3030 V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
3031 V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTE_RESULT, noStorage));
3032
3033 // fp64 rtz
3034 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_up", B_RTZ_ROUNDING,
3035 OID_SCONST_CONV_FROM_FP64_TO_FP16_UP, V_CONV_FROM_FP64_TO_FP16_UP_ARG, V_UNUSED,
3036 V_CONV_FROM_FP64_TO_FP16_UP_RTZ_RESULT, noStorage));
3037 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_down", B_RTZ_ROUNDING,
3038 OID_SCONST_CONV_FROM_FP64_TO_FP16_DOWN, V_CONV_FROM_FP64_TO_FP16_DOWN_ARG,
3039 V_UNUSED, V_CONV_FROM_FP64_TO_FP16_DOWN_RTZ_RESULT, noStorage));
3040 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_tie_up", B_RTZ_ROUNDING,
3041 OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_UP, V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG,
3042 V_UNUSED, V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTZ_RESULT, noStorage));
3043 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_tie_down", B_RTZ_ROUNDING,
3044 OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_DOWN,
3045 V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
3046 V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTZ_RESULT, noStorage));
3047 }
3048
3049 // verify that VkShaderFloatingPointRoundingModeKHR can be overridden for a given instruction by the FPRoundingMode decoration.
3050 // FPRoundingMode decoration requires VK_KHR_16bit_storage.
3051 testCases.push_back(OTC("rounding_rte_override_from_fp32_up", B_RTE_ROUNDING, OID_ORTZ_ROUND,
3052 V_CONV_FROM_FP32_TO_FP16_UP_ARG, V_UNUSED, V_CONV_FROM_FP32_TO_FP16_UP_RTZ_RESULT));
3053 testCases.push_back(OTC("rounding_rte_override_from_fp32_down", B_RTE_ROUNDING, OID_ORTZ_ROUND,
3054 V_CONV_FROM_FP32_TO_FP16_DOWN_ARG, V_UNUSED,
3055 V_CONV_FROM_FP32_TO_FP16_DOWN_RTZ_RESULT));
3056 testCases.push_back(OTC("rounding_rte_override_from_fp32_tie_up", B_RTE_ROUNDING, OID_ORTZ_ROUND,
3057 V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG, V_UNUSED,
3058 V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTZ_RESULT));
3059 testCases.push_back(OTC("rounding_rte_override_from_fp32_tie_down", B_RTE_ROUNDING, OID_ORTZ_ROUND,
3060 V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
3061 V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTZ_RESULT));
3062 // Missing for FP64 -> FP16
3063 // TODO(https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/4539)
3064
3065 testCases.push_back(OTC("rounding_rtz_override_from_fp32_up", B_RTE_ROUNDING, OID_ORTE_ROUND,
3066 V_CONV_FROM_FP32_TO_FP16_UP_ARG, V_UNUSED, V_CONV_FROM_FP32_TO_FP16_UP_RTE_RESULT));
3067 testCases.push_back(OTC("rounding_rtz_override_from_fp32_down", B_RTE_ROUNDING, OID_ORTE_ROUND,
3068 V_CONV_FROM_FP32_TO_FP16_DOWN_ARG, V_UNUSED,
3069 V_CONV_FROM_FP32_TO_FP16_DOWN_RTE_RESULT));
3070 testCases.push_back(OTC("rounding_rtz_override_from_fp32_tie_up", B_RTE_ROUNDING, OID_ORTE_ROUND,
3071 V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG, V_UNUSED,
3072 V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTE_RESULT));
3073 testCases.push_back(OTC("rounding_rtz_override_from_fp32_tie_down", B_RTE_ROUNDING, OID_ORTE_ROUND,
3074 V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
3075 V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTE_RESULT));
3076 // Missing for FP64 -> FP16
3077 // TODO(https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/4539)
3078 }
3079
3080 createUnaryTestCases(testCases, OID_CONV_FROM_FP32, V_CONV_DENORM_SMALLER, V_ZERO);
3081 createUnaryTestCases(testCases, OID_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
3082 createUnaryTestCases(testCases, OID_CONV_FROM_FP32, V_CONV_DENORM_SMALLER, V_ZERO, true);
3083 createUnaryTestCases(testCases, OID_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO, true);
3084 }
3085 else if (typeTestResults->variableType() == FP32)
3086 {
3087 if (argumentsFromInput)
3088 {
3089 //// Conversions from arguments
3090 // fp64 rte
3091 testCases.push_back(OTC("rounding_rte_conv_from_fp64_up", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
3092 V_CONV_FROM_FP64_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_FP64_TO_FP32_UP_RTE_RESULT));
3093 testCases.push_back(OTC("rounding_rte_conv_from_fp64_down", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
3094 V_CONV_FROM_FP64_TO_FP32_DOWN_ARG, V_UNUSED,
3095 V_CONV_FROM_FP64_TO_FP32_DOWN_RTE_RESULT));
3096 testCases.push_back(OTC("rounding_rte_conv_from_fp64_tie_up", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
3097 V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG, V_UNUSED,
3098 V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTE_RESULT));
3099 testCases.push_back(OTC("rounding_rte_conv_from_fp64_tie_down", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
3100 V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG, V_UNUSED,
3101 V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTE_RESULT));
3102
3103 // fp64 rtz
3104 testCases.push_back(OTC("rounding_rtz_conv_from_fp64_up", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
3105 V_CONV_FROM_FP64_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_FP64_TO_FP32_UP_RTZ_RESULT));
3106 testCases.push_back(OTC("rounding_rtz_conv_from_fp64_down", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
3107 V_CONV_FROM_FP64_TO_FP32_DOWN_ARG, V_UNUSED,
3108 V_CONV_FROM_FP64_TO_FP32_DOWN_RTZ_RESULT));
3109 testCases.push_back(OTC("rounding_rtz_conv_from_fp64_tie_up", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
3110 V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG, V_UNUSED,
3111 V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTZ_RESULT));
3112 testCases.push_back(OTC("rounding_rtz_conv_from_fp64_tie_down", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
3113 V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG, V_UNUSED,
3114 V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTZ_RESULT));
3115
3116 //// Conversions from specialization constants
3117 // fp64 rte
3118 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_up", B_RTE_ROUNDING,
3119 OID_SCONST_CONV_FROM_FP64_TO_FP32_UP, V_CONV_FROM_FP64_TO_FP32_UP_ARG, V_UNUSED,
3120 V_CONV_FROM_FP64_TO_FP32_UP_RTE_RESULT));
3121 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_down", B_RTE_ROUNDING,
3122 OID_SCONST_CONV_FROM_FP64_TO_FP32_DOWN, V_CONV_FROM_FP64_TO_FP32_DOWN_ARG, V_UNUSED,
3123 V_CONV_FROM_FP64_TO_FP32_DOWN_RTE_RESULT));
3124 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_tie_up", B_RTE_ROUNDING,
3125 OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_UP, V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG,
3126 V_UNUSED, V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTE_RESULT));
3127 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_tie_down", B_RTE_ROUNDING,
3128 OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_DOWN, V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG,
3129 V_UNUSED, V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTE_RESULT));
3130
3131 // fp64 rtz
3132 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_up", B_RTZ_ROUNDING,
3133 OID_SCONST_CONV_FROM_FP64_TO_FP32_UP, V_CONV_FROM_FP64_TO_FP32_UP_ARG, V_UNUSED,
3134 V_CONV_FROM_FP64_TO_FP32_UP_RTZ_RESULT));
3135 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_down", B_RTZ_ROUNDING,
3136 OID_SCONST_CONV_FROM_FP64_TO_FP32_DOWN, V_CONV_FROM_FP64_TO_FP32_DOWN_ARG, V_UNUSED,
3137 V_CONV_FROM_FP64_TO_FP32_DOWN_RTZ_RESULT));
3138 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_tie_up", B_RTZ_ROUNDING,
3139 OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_UP, V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG,
3140 V_UNUSED, V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTZ_RESULT));
3141 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_tie_down", B_RTZ_ROUNDING,
3142 OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_DOWN, V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG,
3143 V_UNUSED, V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTZ_RESULT));
3144
3145 // Verify that VkShaderFloatingPointRoundingModeKHR can be overridden for a given instruction by the FPRoundingMode decoration.
3146 // Missing for FP64 -> FP32
3147 // TODO(https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/4539)
3148
3149 // uint32 rtz
3150 testCases.push_back(OTC("rounding_rtz_conv_from_uint32_up", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP32,
3151 V_CONV_FROM_UINT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_UINT32_UP_RTZ_RESULT));
3152 testCases.push_back(OTC("rounding_rtz_conv_from_uint32_tie", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP32,
3153 V_CONV_FROM_UINT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_UINT32_TIE_RTZ_RESULT));
3154 testCases.push_back(OTC("rounding_rtz_conv_from_uint32_down", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP32,
3155 V_CONV_FROM_UINT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_UINT32_DOWN_RTZ_RESULT));
3156
3157 // uint64 rtz
3158 testCases.push_back(OTC("rounding_rtz_conv_from_uint64_up", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3159 V_CONV_FROM_UINT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_UINT64_UP_RTZ_RESULT));
3160 testCases.push_back(OTC("rounding_rtz_conv_from_uint64_tie", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3161 V_CONV_FROM_UINT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_UINT64_TIE_RTZ_RESULT));
3162 testCases.push_back(OTC("rounding_rtz_conv_from_uint64_down", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3163 V_CONV_FROM_UINT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_UINT64_DOWN_RTZ_RESULT));
3164
3165 // uint32 rte
3166 testCases.push_back(OTC("rounding_rte_conv_from_uint32_up", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP32,
3167 V_CONV_FROM_UINT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_UINT32_UP_RTE_RESULT));
3168 testCases.push_back(OTC("rounding_rte_conv_from_uint32_tie", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP32,
3169 V_CONV_FROM_UINT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_UINT32_TIE_RTE_RESULT));
3170 testCases.push_back(OTC("rounding_rte_conv_from_uint32_down", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP32,
3171 V_CONV_FROM_UINT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_UINT32_DOWN_RTE_RESULT));
3172
3173 // uint64 rte
3174 testCases.push_back(OTC("rounding_rte_conv_from_uint64_up", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3175 V_CONV_FROM_UINT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_UINT64_UP_RTE_RESULT));
3176 testCases.push_back(OTC("rounding_rte_conv_from_uint64_tie", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3177 V_CONV_FROM_UINT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_UINT64_TIE_RTE_RESULT));
3178 testCases.push_back(OTC("rounding_rte_conv_from_uint64_down", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3179 V_CONV_FROM_UINT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_UINT64_DOWN_RTE_RESULT));
3180
3181 // int32 rtz
3182 testCases.push_back(OTC("rounding_rtz_conv_from_int32_up", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP32,
3183 V_CONV_FROM_INT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_INT32_UP_RTZ_RESULT));
3184 testCases.push_back(OTC("rounding_rtz_conv_from_int32_tie", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP32,
3185 V_CONV_FROM_INT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_INT32_TIE_RTZ_RESULT));
3186 testCases.push_back(OTC("rounding_rtz_conv_from_int32_down", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP32,
3187 V_CONV_FROM_INT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_INT32_DOWN_RTZ_RESULT));
3188
3189 // int64 rtz
3190 testCases.push_back(OTC("rounding_rtz_conv_from_int64_up", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3191 V_CONV_FROM_INT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_INT64_UP_RTZ_RESULT));
3192 testCases.push_back(OTC("rounding_rtz_conv_from_int64_tie", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3193 V_CONV_FROM_INT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_INT64_TIE_RTZ_RESULT));
3194 testCases.push_back(OTC("rounding_rtz_conv_from_int64_down", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3195 V_CONV_FROM_INT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_INT64_DOWN_RTZ_RESULT));
3196
3197 // int32 rte
3198 testCases.push_back(OTC("rounding_rte_conv_from_int32_up", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP32,
3199 V_CONV_FROM_INT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_INT32_UP_RTE_RESULT));
3200 testCases.push_back(OTC("rounding_rte_conv_from_int32_tie", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP32,
3201 V_CONV_FROM_INT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_INT32_TIE_RTE_RESULT));
3202 testCases.push_back(OTC("rounding_rte_conv_from_int32_down", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP32,
3203 V_CONV_FROM_INT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_INT32_DOWN_RTE_RESULT));
3204
3205 // int64 rte
3206 testCases.push_back(OTC("rounding_rte_conv_from_int64_up", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3207 V_CONV_FROM_INT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_INT64_UP_RTE_RESULT));
3208 testCases.push_back(OTC("rounding_rte_conv_from_int64_tie", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3209 V_CONV_FROM_INT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_INT64_TIE_RTE_RESULT));
3210 testCases.push_back(OTC("rounding_rte_conv_from_int64_down", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3211 V_CONV_FROM_INT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_INT64_DOWN_RTE_RESULT));
3212 }
3213 else
3214 {
3215 // PackHalf2x16 - verification done in SPIR-V
3216 testCases.push_back(
3217 OTC("pack_half_denorm_preserve", B_DENORM_PRESERVE, OID_PH_DENORM, V_UNUSED, V_UNUSED, V_ONE));
3218
3219 // UnpackHalf2x16 - custom arguments defined as constants
3220 testCases.push_back(
3221 OTC("upack_half_denorm_flush_to_zero", B_DENORM_FLUSH, OID_UPH_DENORM, V_UNUSED, V_UNUSED, V_ZERO));
3222 testCases.push_back(OTC("upack_half_denorm_preserve", B_DENORM_PRESERVE, OID_UPH_DENORM, V_UNUSED, V_UNUSED,
3223 V_CONV_DENORM_SMALLER));
3224 }
3225
3226 createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP32);
3227 createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP32, true);
3228 createUnaryTestCases(testCases, OID_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
3229 }
3230 else // FP64
3231 {
3232 if (argumentsFromInput)
3233 {
3234 // uint64 rtz
3235 testCases.push_back(OTC("rounding_rtz_conv_from_uint64_up", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3236 V_CONV_FROM_UINT_TO_FP64_UP_ARG, V_UNUSED, V_CONV_FROM_UINT64_UP_RTZ_RESULT));
3237 testCases.push_back(OTC("rounding_rtz_conv_from_uint64_tie", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3238 V_CONV_FROM_UINT_TO_FP64_TIE_ARG, V_UNUSED, V_CONV_FROM_UINT64_TIE_RTZ_RESULT));
3239 testCases.push_back(OTC("rounding_rtz_conv_from_uint64_down", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3240 V_CONV_FROM_UINT_TO_FP64_DOWN_ARG, V_UNUSED, V_CONV_FROM_UINT64_DOWN_RTZ_RESULT));
3241
3242 // uint64 rte
3243 testCases.push_back(OTC("rounding_rte_conv_from_uint64_up", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3244 V_CONV_FROM_UINT_TO_FP64_UP_ARG, V_UNUSED, V_CONV_FROM_UINT64_UP_RTE_RESULT));
3245 testCases.push_back(OTC("rounding_rte_conv_from_uint64_tie", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3246 V_CONV_FROM_UINT_TO_FP64_TIE_ARG, V_UNUSED, V_CONV_FROM_UINT64_TIE_RTE_RESULT));
3247 testCases.push_back(OTC("rounding_rte_conv_from_uint64_down", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3248 V_CONV_FROM_UINT_TO_FP64_DOWN_ARG, V_UNUSED, V_CONV_FROM_UINT64_DOWN_RTE_RESULT));
3249
3250 // int64 rtz
3251 testCases.push_back(OTC("rounding_rtz_conv_from_int64_up", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3252 V_CONV_FROM_INT_TO_FP64_UP_ARG, V_UNUSED, V_CONV_FROM_INT64_UP_RTZ_RESULT));
3253 testCases.push_back(OTC("rounding_rtz_conv_from_int64_tie", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3254 V_CONV_FROM_INT_TO_FP64_TIE_ARG, V_UNUSED, V_CONV_FROM_INT64_TIE_RTZ_RESULT));
3255 testCases.push_back(OTC("rounding_rtz_conv_from_int64_down", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3256 V_CONV_FROM_INT_TO_FP64_DOWN_ARG, V_UNUSED, V_CONV_FROM_INT64_DOWN_RTZ_RESULT));
3257
3258 // int64 rte
3259 testCases.push_back(OTC("rounding_rte_conv_from_int64_up", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3260 V_CONV_FROM_INT_TO_FP64_UP_ARG, V_UNUSED, V_CONV_FROM_INT64_UP_RTE_RESULT));
3261 testCases.push_back(OTC("rounding_rte_conv_from_int64_tie", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3262 V_CONV_FROM_INT_TO_FP64_TIE_ARG, V_UNUSED, V_CONV_FROM_INT64_TIE_RTE_RESULT));
3263 testCases.push_back(OTC("rounding_rte_conv_from_int64_down", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3264 V_CONV_FROM_INT_TO_FP64_DOWN_ARG, V_UNUSED, V_CONV_FROM_INT64_DOWN_RTE_RESULT));
3265 }
3266 else
3267 {
3268 // PackDouble2x32 - custom arguments defined as constants
3269 testCases.push_back(
3270 OTC("pack_double_denorm_preserve", B_DENORM_PRESERVE, OID_PD_DENORM, V_UNUSED, V_UNUSED, V_DENORM));
3271
3272 // UnpackDouble2x32 - verification done in SPIR-V
3273 testCases.push_back(OTC("upack_double_denorm_flush_to_zero", B_DENORM_FLUSH, OID_UPD_DENORM_FLUSH, V_DENORM,
3274 V_UNUSED, V_ONE));
3275 testCases.push_back(OTC("upack_double_denorm_preserve", B_DENORM_PRESERVE, OID_UPD_DENORM_PRESERVE,
3276 V_DENORM, V_UNUSED, V_ONE));
3277 }
3278
3279 createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP64);
3280 createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP64, true);
3281 createUnaryTestCases(testCases, OID_CONV_FROM_FP32, V_CONV_DENORM_BIGGER, V_ZERO_OR_FP32_DENORM_TO_FP64);
3282 }
3283 }
3284
getOperation(OperationId id) const3285 const Operation &TestCasesBuilder::getOperation(OperationId id) const
3286 {
3287 return m_operations.at(id);
3288 }
3289
createUnaryTestCases(vector<OperationTestCase> & testCases,OperationId operationId,ValueId denormPreserveResult,ValueId denormFTZResult,bool fp16WithoutStorage) const3290 void TestCasesBuilder::createUnaryTestCases(vector<OperationTestCase> &testCases, OperationId operationId,
3291 ValueId denormPreserveResult, ValueId denormFTZResult,
3292 bool fp16WithoutStorage) const
3293 {
3294 // Denorm - Preserve
3295 testCases.push_back(OTC("op_denorm_preserve", B_DENORM_PRESERVE, operationId, V_DENORM, V_UNUSED,
3296 denormPreserveResult, fp16WithoutStorage));
3297
3298 // Denorm - FlushToZero
3299 testCases.push_back(OTC("op_denorm_flush_to_zero", B_DENORM_FLUSH, operationId, V_DENORM, V_UNUSED, denormFTZResult,
3300 fp16WithoutStorage));
3301
3302 // Signed Zero Inf Nan - Preserve
3303 testCases.push_back(
3304 OTC("op_zero_preserve", B_ZIN_PRESERVE, operationId, V_ZERO, V_UNUSED, V_ZERO, fp16WithoutStorage));
3305 testCases.push_back(OTC("op_signed_zero_preserve", B_ZIN_PRESERVE, operationId, V_MINUS_ZERO, V_UNUSED,
3306 V_MINUS_ZERO, fp16WithoutStorage));
3307 testCases.push_back(
3308 OTC("op_inf_preserve", B_ZIN_PRESERVE, operationId, V_INF, V_UNUSED, V_INF, fp16WithoutStorage));
3309 testCases.push_back(
3310 OTC("op_nan_preserve", B_ZIN_PRESERVE, operationId, V_NAN, V_UNUSED, V_NAN, fp16WithoutStorage));
3311 }
3312
3313 template <typename TYPE, typename FLOAT_TYPE>
isZeroOrOtherValue(const TYPE & returnedFloat,ValueId secondAcceptableResult,TestLog & log)3314 bool isZeroOrOtherValue(const TYPE &returnedFloat, ValueId secondAcceptableResult, TestLog &log)
3315 {
3316 if (returnedFloat.isZero() && !returnedFloat.signBit())
3317 return true;
3318
3319 TypeValues<FLOAT_TYPE> typeValues;
3320 typedef typename TYPE::StorageType SType;
3321 typename RawConvert<FLOAT_TYPE, SType>::Value value;
3322 value.fp = typeValues.getValue(secondAcceptableResult);
3323
3324 if (returnedFloat.bits() == value.ui)
3325 return true;
3326
3327 log << TestLog::Message << "Expected 0 or " << toHex(value.ui) << " (" << value.fp << ")" << TestLog::EndMessage;
3328 return false;
3329 }
3330
3331 template <typename TYPE>
isAcosResultCorrect(const TYPE & returnedFloat,TestLog & log)3332 bool isAcosResultCorrect(const TYPE &returnedFloat, TestLog &log)
3333 {
3334 // pi/2 is result of acos(0) which in the specs is defined as equivalent to
3335 // atan2(sqrt(1.0 - x^2), x), where atan2 has 4096 ULP, sqrt is equivalent to
3336 // 1.0 /inversesqrt(), inversesqrt() is 2 ULP and rcp is another 2.5 ULP
3337
3338 double precision = 0;
3339 const double piDiv2 = M_PI_2;
3340 if (returnedFloat.MANTISSA_BITS == 23)
3341 {
3342 FloatFormat fp32Format(-126, 127, 23, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
3343 precision = fp32Format.ulp(piDiv2, 4096.0);
3344 }
3345 else
3346 {
3347 FloatFormat fp16Format(-14, 15, 10, true, tcu::MAYBE);
3348 precision = fp16Format.ulp(piDiv2, 5.0);
3349 }
3350
3351 if (deAbs(returnedFloat.asDouble() - piDiv2) < precision)
3352 return true;
3353
3354 log << TestLog::Message << "Expected result to be in range"
3355 << " (" << piDiv2 - precision << ", " << piDiv2 + precision << "), got " << returnedFloat.asDouble()
3356 << TestLog::EndMessage;
3357 return false;
3358 }
3359
3360 template <typename TYPE>
isCosResultCorrect(const TYPE & returnedFloat,TestLog & log)3361 bool isCosResultCorrect(const TYPE &returnedFloat, TestLog &log)
3362 {
3363 // for cos(x) with x between -pi and pi, the precision error is 2^-11 for fp32 and 2^-7 for fp16.
3364 double precision = returnedFloat.MANTISSA_BITS == 23 ? dePow(2, -11) : dePow(2, -7);
3365 const double expected = 1.0;
3366
3367 if (deAbs(returnedFloat.asDouble() - expected) < precision)
3368 return true;
3369
3370 log << TestLog::Message << "Expected result to be in range"
3371 << " (" << expected - precision << ", " << expected + precision << "), got " << returnedFloat.asDouble()
3372 << TestLog::EndMessage;
3373 return false;
3374 }
3375
3376 template <typename FLOAT_TYPE>
getVariableTypeAsDouble(FLOAT_TYPE param)3377 double getVariableTypeAsDouble(FLOAT_TYPE param)
3378 {
3379 return param;
3380 }
3381 template <>
getVariableTypeAsDouble(deFloat16 param)3382 double getVariableTypeAsDouble(deFloat16 param)
3383 {
3384 return deFloat16To64(param);
3385 }
3386
getPrecisionAt(double value,float ulp,int mantissaBits)3387 double getPrecisionAt(double value, float ulp, int mantissaBits)
3388 {
3389 if (mantissaBits == 23)
3390 {
3391 FloatFormat fp32Format(-126, 127, 23, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
3392 return fp32Format.ulp(value, ulp);
3393 }
3394 else if (mantissaBits == 52)
3395 {
3396 FloatFormat fp32Format(-1022, 1023, 52, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
3397 return fp32Format.ulp(value, ulp);
3398 }
3399 else
3400 {
3401 DE_ASSERT(mantissaBits == 10);
3402 FloatFormat fp16Format(-14, 15, 10, true, tcu::MAYBE);
3403 return fp16Format.ulp(value, ulp);
3404 }
3405 }
3406
3407 template <typename TYPE, typename FLOAT_TYPE, typename REF_FUNCTION>
isLogResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,REF_FUNCTION refFunction,TestLog & log)3408 bool isLogResultCorrect(const TYPE &returnedFloat, FLOAT_TYPE param, REF_FUNCTION refFunction, TestLog &log)
3409 {
3410 if (returnedFloat.isInf() && returnedFloat.signBit())
3411 return true;
3412
3413 const double expected = refFunction(getVariableTypeAsDouble(param));
3414 const double precision = getPrecisionAt(expected, 3.0, returnedFloat.MANTISSA_BITS);
3415
3416 if (deAbs(returnedFloat.asDouble() - expected) < precision)
3417 return true;
3418
3419 log << TestLog::Message << "Expected result to be -INF or in range"
3420 << " (" << expected - precision << ", " << expected + precision << "), got " << returnedFloat.asDouble()
3421 << TestLog::EndMessage;
3422 return false;
3423 }
3424
3425 template <typename TYPE, typename FLOAT_TYPE>
isInverseSqrtResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,TestLog & log)3426 bool isInverseSqrtResultCorrect(const TYPE &returnedFloat, FLOAT_TYPE param, TestLog &log)
3427 {
3428 if (returnedFloat.isInf() && !returnedFloat.signBit())
3429 return true;
3430
3431 const double expected = 1.0 / deSqrt(getVariableTypeAsDouble(param));
3432 const double precision = getPrecisionAt(expected, 2.0, returnedFloat.MANTISSA_BITS);
3433
3434 if (deAbs(returnedFloat.asDouble() - expected) < precision)
3435 return true;
3436
3437 log << TestLog::Message << "Expected result to be INF or in range"
3438 << " (" << expected - precision << ", " << expected + precision << "), got " << returnedFloat.asDouble()
3439 << TestLog::EndMessage;
3440 return false;
3441 }
3442
3443 template <typename TYPE, typename FLOAT_TYPE>
isSqrtResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,TestLog & log)3444 bool isSqrtResultCorrect(const TYPE &returnedFloat, FLOAT_TYPE param, TestLog &log)
3445 {
3446 if (returnedFloat.isZero() && !returnedFloat.signBit())
3447 return true;
3448
3449 const double expected = deSqrt(getVariableTypeAsDouble(param));
3450 const double expectedInverseSqrt = 1.0 / expected;
3451 const double inverseSqrtPrecision = getPrecisionAt(expectedInverseSqrt, 2.0, returnedFloat.MANTISSA_BITS);
3452
3453 double expectedMin =
3454 deMin(1.0 / (expectedInverseSqrt - inverseSqrtPrecision), 1.0 / (expectedInverseSqrt + inverseSqrtPrecision));
3455 double expectedMax =
3456 deMax(1.0 / (expectedInverseSqrt - inverseSqrtPrecision), 1.0 / (expectedInverseSqrt + inverseSqrtPrecision));
3457
3458 expectedMin -= getPrecisionAt(expectedMin, 2.5, returnedFloat.MANTISSA_BITS);
3459 expectedMax += getPrecisionAt(expectedMax, 2.5, returnedFloat.MANTISSA_BITS);
3460
3461 if (returnedFloat.asDouble() >= expectedMin && returnedFloat.asDouble() <= expectedMax)
3462 return true;
3463
3464 log << TestLog::Message << "Expected result to be +0 or in range"
3465 << " (" << expectedMin << ", " << expectedMax << "), got " << returnedFloat.asDouble() << TestLog::EndMessage;
3466 return false;
3467 }
3468
3469 // Function used to compare test result with expected output.
3470 // TYPE can be Float16, Float32 or Float64.
3471 // FLOAT_TYPE can be deFloat16, float, double.
3472 template <typename TYPE, typename FLOAT_TYPE>
compareBytes(vector<uint8_t> & expectedBytes,AllocationSp outputAlloc,TestLog & log)3473 bool compareBytes(vector<uint8_t> &expectedBytes, AllocationSp outputAlloc, TestLog &log)
3474 {
3475 const TYPE *returned = static_cast<const TYPE *>(outputAlloc->getHostPtr());
3476 const TYPE *fValueId = reinterpret_cast<const TYPE *>(&expectedBytes.front());
3477
3478 // all test return single value
3479 // Fp16 nostorage tests get their values from a uint32_t value, but we create the
3480 // buffer with the same size for both cases: 4 bytes.
3481 if (sizeof(TYPE) == 2u)
3482 DE_ASSERT((expectedBytes.size() / sizeof(TYPE)) == 2);
3483 else
3484 DE_ASSERT((expectedBytes.size() / sizeof(TYPE)) == 1);
3485
3486 // during test setup we do not store expected value but id that can be used to
3487 // retrieve actual value - this is done to handle special cases like multiple
3488 // allowed results or epsilon checks for some cases
3489 // note that this is workaround - this should be done by changing
3490 // ComputerShaderCase and GraphicsShaderCase so that additional arguments can
3491 // be passed to this verification callback
3492 typedef typename TYPE::StorageType SType;
3493 SType expectedInt = fValueId[0].bits();
3494 ValueId expectedValueId = static_cast<ValueId>(expectedInt);
3495
3496 // something went wrong, expected value cant be V_UNUSED,
3497 // if this is the case then test shouldn't be created at all
3498 DE_ASSERT(expectedValueId != V_UNUSED);
3499
3500 TYPE returnedFloat = returned[0];
3501
3502 log << TestLog::Message << "Calculated result: " << toHex(returnedFloat.bits()) << " (" << returnedFloat.asFloat()
3503 << ")" << TestLog::EndMessage;
3504
3505 if (expectedValueId == V_NAN)
3506 {
3507 if (returnedFloat.isNaN())
3508 return true;
3509
3510 log << TestLog::Message << "Expected NaN" << TestLog::EndMessage;
3511 return false;
3512 }
3513
3514 if (expectedValueId == V_DENORM)
3515 {
3516 if (returnedFloat.isDenorm())
3517 return true;
3518
3519 log << TestLog::Message << "Expected Denorm" << TestLog::EndMessage;
3520 return false;
3521 }
3522
3523 // handle multiple acceptable results cases
3524 if (expectedValueId == V_ZERO_OR_MINUS_ZERO)
3525 {
3526 if (returnedFloat.isZero())
3527 return true;
3528
3529 log << TestLog::Message << "Expected 0 or -0" << TestLog::EndMessage;
3530 return false;
3531 }
3532 if (expectedValueId == V_ZERO_OR_ONE)
3533 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_ONE, log);
3534 if ((expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP32) || (expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP64))
3535 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_SMALLER, log);
3536 if (expectedValueId == V_ZERO_OR_FP32_DENORM_TO_FP64)
3537 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_BIGGER, log);
3538 if (expectedValueId == V_ZERO_OR_DENORM_TIMES_TWO)
3539 {
3540 // this expected value is only needed for fp16
3541 DE_ASSERT(returnedFloat.EXPONENT_BIAS == 15);
3542 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_DENORM_TIMES_TWO, log);
3543 }
3544 if (expectedValueId == V_MINUS_ONE_OR_CLOSE)
3545 {
3546 // this expected value is only needed for fp16
3547 DE_ASSERT(returnedFloat.EXPONENT_BIAS == 15);
3548 typename TYPE::StorageType returnedValue = returnedFloat.bits();
3549 return (returnedValue == 0xbc00) || (returnedValue == 0xbbff);
3550 }
3551
3552 // handle trigonometric operations precision errors
3553 if (expectedValueId == V_TRIG_ONE)
3554 return isCosResultCorrect<TYPE>(returnedFloat, log);
3555
3556 // handle acos(0) case
3557 if (expectedValueId == V_PI_DIV_2)
3558 return isAcosResultCorrect<TYPE>(returnedFloat, log);
3559
3560 TypeValues<FLOAT_TYPE> typeValues;
3561
3562 if (expectedValueId == V_MINUS_INF_OR_LOG_DENORM)
3563 return isLogResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), deLog, log);
3564
3565 if (expectedValueId == V_MINUS_INF_OR_LOG2_DENORM)
3566 return isLogResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), deLog2, log);
3567
3568 if (expectedValueId == V_ZERO_OR_SQRT_DENORM)
3569 return isSqrtResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), log);
3570
3571 if (expectedValueId == V_INF_OR_INV_SQRT_DENORM)
3572 return isInverseSqrtResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), log);
3573
3574 typename RawConvert<FLOAT_TYPE, SType>::Value value;
3575 value.fp = typeValues.getValue(expectedValueId);
3576
3577 if (returnedFloat.bits() == value.ui)
3578 return true;
3579
3580 log << TestLog::Message << "Expected " << toHex(value.ui) << " (" << value.fp << ")" << TestLog::EndMessage;
3581 return false;
3582 }
3583
3584 template <typename TYPE, typename FLOAT_TYPE>
checkFloats(const vector<Resource> &,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,TestLog & log)3585 bool checkFloats(const vector<Resource> &, const vector<AllocationSp> &outputAllocs,
3586 const vector<Resource> &expectedOutputs, TestLog &log)
3587 {
3588 if (outputAllocs.size() != expectedOutputs.size())
3589 return false;
3590
3591 for (uint32_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
3592 {
3593 vector<uint8_t> expectedBytes;
3594 expectedOutputs[outputNdx].getBytes(expectedBytes);
3595
3596 if (!compareBytes<TYPE, FLOAT_TYPE>(expectedBytes, outputAllocs[outputNdx], log))
3597 return false;
3598 }
3599
3600 return true;
3601 }
3602
checkMixedFloats(const vector<Resource> &,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,TestLog & log)3603 bool checkMixedFloats(const vector<Resource> &, const vector<AllocationSp> &outputAllocs,
3604 const vector<Resource> &expectedOutputs, TestLog &log)
3605 {
3606 // this function validates buffers containing floats of diferent widths, order is not important
3607
3608 if (outputAllocs.size() != expectedOutputs.size())
3609 return false;
3610
3611 // The comparison function depends on the data type stored in the resource.
3612 using compareFun = bool (*)(vector<uint8_t> &expectedBytes, AllocationSp outputAlloc, TestLog &log);
3613 const map<BufferDataType, compareFun> compareMap = {
3614 {BufferDataType::DATA_FP16, compareBytes<Float16, deFloat16>},
3615 {BufferDataType::DATA_FP32, compareBytes<Float32, float>},
3616 {BufferDataType::DATA_FP64, compareBytes<Float64, double>},
3617 };
3618
3619 vector<uint8_t> expectedBytes;
3620 bool allResultsAreCorrect = true;
3621 int resultIndex = static_cast<int>(outputAllocs.size());
3622
3623 while (resultIndex--)
3624 {
3625 expectedOutputs[resultIndex].getBytes(expectedBytes);
3626 BufferDataType type =
3627 static_cast<BufferDataType>(reinterpret_cast<std::uintptr_t>(expectedOutputs[resultIndex].getUserData()));
3628 allResultsAreCorrect &= compareMap.at(type)(expectedBytes, outputAllocs[resultIndex], log);
3629 }
3630
3631 return allResultsAreCorrect;
3632 }
3633
3634 // Base class for ComputeTestGroupBuilder and GrephicstestGroupBuilder classes.
3635 // It contains all functionalities that are used by both child classes.
3636 class TestGroupBuilderBase
3637 {
3638 public:
3639 TestGroupBuilderBase();
3640 virtual ~TestGroupBuilderBase() = default;
3641
3642 virtual void createOperationTests(TestCaseGroup *parentGroup, const char *groupName, VariableType variableType,
3643 bool argumentsFromInput) = 0;
3644
3645 virtual void createSettingsTests(TestCaseGroup *parentGroup) = 0;
3646
3647 protected:
3648 typedef vector<OperationTestCase> TestCaseVect;
3649
3650 // Structure containing all data required to create single operation test.
3651 struct OperationTestCaseInfo
3652 {
3653 VariableType outVariableType;
3654 bool argumentsFromInput;
3655 VkShaderStageFlagBits testedStage;
3656 const Operation &operation;
3657 const OperationTestCase &testCase;
3658 };
3659
3660 // Mode used by SettingsTestCaseInfo to specify what settings do we want to test.
3661 enum SettingsMode
3662 {
3663 SM_ROUNDING = 0,
3664 SM_DENORMS
3665 };
3666
3667 // Enum containing available options. When rounding is tested only SO_RTE and SO_RTZ
3668 // should be used. SO_FLUSH and SO_PRESERVE should be used only for denorm tests.
3669 enum SettingsOption
3670 {
3671 SO_UNUSED = 0,
3672 SO_RTE,
3673 SO_RTZ,
3674 SO_FLUSH,
3675 SO_PRESERVE
3676 };
3677
3678 // Structure containing all data required to create single settings test.
3679 struct SettingsTestCaseInfo
3680 {
3681 const char *name;
3682 SettingsMode testedMode;
3683 VkShaderFloatControlsIndependence independenceSetting;
3684
3685 SettingsOption fp16Option;
3686 SettingsOption fp32Option;
3687 SettingsOption fp64Option;
3688 bool fp16Without16BitStorage;
3689 };
3690
3691 void specializeOperation(const OperationTestCaseInfo &testCaseInfo,
3692 SpecializedOperation &specializedOperation) const;
3693
3694 void getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags, const string inBitWidth,
3695 const string outBitWidth, string &capability,
3696 string &executionMode) const;
3697
3698 void setupFloatControlsProperties(VariableType inVariableType, VariableType outVariableType,
3699 BehaviorFlags behaviorFlags,
3700 vk::VkPhysicalDeviceFloatControlsProperties &props) const;
3701
3702 protected:
3703 struct TypeData
3704 {
3705 TypeValuesSP values;
3706 TypeSnippetsSP snippets;
3707 TypeTestResultsSP testResults;
3708 };
3709
3710 // Type specific parameters are stored in this map.
3711 map<VariableType, TypeData> m_typeData;
3712
3713 // Map converting behaviuor id to OpCapability instruction
3714 typedef map<BehaviorFlagBits, string> BehaviorNameMap;
3715 BehaviorNameMap m_behaviorToName;
3716 };
3717
TestGroupBuilderBase()3718 TestGroupBuilderBase::TestGroupBuilderBase()
3719 {
3720 m_typeData[FP16] = TypeData();
3721 m_typeData[FP16].values = TypeValuesSP(new TypeValues<deFloat16>);
3722 m_typeData[FP16].snippets = TypeSnippetsSP(new TypeSnippets<deFloat16>);
3723 m_typeData[FP16].testResults = TypeTestResultsSP(new TypeTestResults<deFloat16>);
3724 m_typeData[FP32] = TypeData();
3725 m_typeData[FP32].values = TypeValuesSP(new TypeValues<float>);
3726 m_typeData[FP32].snippets = TypeSnippetsSP(new TypeSnippets<float>);
3727 m_typeData[FP32].testResults = TypeTestResultsSP(new TypeTestResults<float>);
3728 m_typeData[FP64] = TypeData();
3729 m_typeData[FP64].values = TypeValuesSP(new TypeValues<double>);
3730 m_typeData[FP64].snippets = TypeSnippetsSP(new TypeSnippets<double>);
3731 m_typeData[FP64].testResults = TypeTestResultsSP(new TypeTestResults<double>);
3732 m_typeData[UINT32] = TypeData();
3733 m_typeData[UINT32].values = TypeValuesSP(new TypeValues<float>);
3734 m_typeData[UINT32].snippets = TypeSnippetsSP(new TypeSnippets<float>(false));
3735 m_typeData[UINT32].testResults = TypeTestResultsSP(new TypeTestResults<float>);
3736 m_typeData[UINT64] = TypeData();
3737 m_typeData[UINT64].values = TypeValuesSP(new TypeValues<double>);
3738 m_typeData[UINT64].snippets = TypeSnippetsSP(new TypeSnippets<double>(false));
3739 m_typeData[UINT64].testResults = TypeTestResultsSP(new TypeTestResults<double>);
3740 m_typeData[INT32] = TypeData();
3741 m_typeData[INT32].values = TypeValuesSP(new TypeValues<float>);
3742 m_typeData[INT32].snippets = TypeSnippetsSP(new TypeSnippets<float>(false, true));
3743 m_typeData[INT32].testResults = TypeTestResultsSP(new TypeTestResults<float>);
3744 m_typeData[INT64] = TypeData();
3745 m_typeData[INT64].values = TypeValuesSP(new TypeValues<double>);
3746 m_typeData[INT64].snippets = TypeSnippetsSP(new TypeSnippets<double>(false, true));
3747 m_typeData[INT64].testResults = TypeTestResultsSP(new TypeTestResults<double>);
3748
3749 m_behaviorToName[B_DENORM_PRESERVE] = "DenormPreserve";
3750 m_behaviorToName[B_DENORM_FLUSH] = "DenormFlushToZero";
3751 m_behaviorToName[B_ZIN_PRESERVE] = "SignedZeroInfNanPreserve";
3752 m_behaviorToName[B_RTE_ROUNDING] = "RoundingModeRTE";
3753 m_behaviorToName[B_RTZ_ROUNDING] = "RoundingModeRTZ";
3754 }
3755
specializeOperation(const OperationTestCaseInfo & testCaseInfo,SpecializedOperation & specializedOperation) const3756 void TestGroupBuilderBase::specializeOperation(const OperationTestCaseInfo &testCaseInfo,
3757 SpecializedOperation &specializedOperation) const
3758 {
3759 const string typeToken = "_valueType";
3760 const string widthToken = "${float_width}";
3761
3762 VariableType outVariableType = testCaseInfo.outVariableType;
3763 const Operation &operation = testCaseInfo.operation;
3764 const TypeSnippetsSP outTypeSnippets = m_typeData.at(outVariableType).snippets;
3765 const bool inputRestricted = operation.isInputTypeRestricted;
3766 VariableType inVariableType = operation.restrictedInputType;
3767
3768 // usually input type is same as output but this is not the case for conversion
3769 // operations; in those cases operation definitions have restricted input type
3770 inVariableType = inputRestricted ? inVariableType : outVariableType;
3771
3772 TypeSnippetsSP inTypeSnippets = m_typeData.at(inVariableType).snippets;
3773
3774 const string inTypePrefix = string("_") + inTypeSnippets->getValueTypeString() + inTypeSnippets->bitWidth;
3775 const string outTypePrefix = string("_") + outTypeSnippets->getValueTypeString() + outTypeSnippets->bitWidth;
3776
3777 std::string byteWidthToken = std::to_string(std::stoi(outTypeSnippets->bitWidth) / 8);
3778
3779 specializedOperation.constants = replace(operation.constants, typeToken, inTypePrefix);
3780 specializedOperation.annotations = replace(operation.annotations, widthToken, byteWidthToken);
3781 specializedOperation.types = replace(operation.types, typeToken, outTypePrefix);
3782 specializedOperation.variables = replace(operation.variables, typeToken, outTypePrefix);
3783 specializedOperation.functions = replace(operation.functions, typeToken, outTypePrefix);
3784 specializedOperation.commands = replace(operation.commands, typeToken, outTypePrefix);
3785
3786 specializedOperation.inVariableType = inVariableType;
3787 specializedOperation.inTypeSnippets = inTypeSnippets;
3788 specializedOperation.outTypeSnippets = outTypeSnippets;
3789 specializedOperation.argumentsUsesFloatConstant = 0;
3790
3791 if (operation.isSpecConstant)
3792 return;
3793
3794 // select way arguments are prepared
3795 if (testCaseInfo.argumentsFromInput)
3796 {
3797 // read arguments from input SSBO in main function
3798 specializedOperation.arguments = inTypeSnippets->argumentsFromInputSnippet;
3799
3800 if (inVariableType == FP16 && testCaseInfo.testCase.fp16Without16BitStorage)
3801 specializedOperation.arguments = inTypeSnippets->argumentsFromInputFp16Snippet;
3802 }
3803 else
3804 {
3805 // generate proper values in main function
3806 const string arg1 = "%arg1 = ";
3807 const string arg2 = "%arg2 = ";
3808
3809 const ValueId *inputArguments = testCaseInfo.testCase.input;
3810 if (inputArguments[0] != V_UNUSED)
3811 {
3812 specializedOperation.arguments = arg1 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[0]);
3813 specializedOperation.argumentsUsesFloatConstant |= B_STATEMENT_USAGE_ARGS_CONST_FLOAT;
3814 }
3815 if (inputArguments[1] != V_UNUSED)
3816 {
3817 specializedOperation.arguments += arg2 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[1]);
3818 specializedOperation.argumentsUsesFloatConstant |= B_STATEMENT_USAGE_ARGS_CONST_FLOAT;
3819 }
3820 }
3821 }
3822
getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,const string inBitWidth,const string outBitWidth,string & capability,string & executionMode) const3823 void TestGroupBuilderBase::getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags, const string inBitWidth,
3824 const string outBitWidth, string &capability,
3825 string &executionMode) const
3826 {
3827 // iterate over all behaviours and request those that are needed
3828 BehaviorNameMap::const_iterator it = m_behaviorToName.begin();
3829 while (it != m_behaviorToName.end())
3830 {
3831 BehaviorFlagBits behaviorId = it->first;
3832 string behaviorName = it->second;
3833
3834 if (behaviorFlags & behaviorId)
3835 {
3836 capability += "OpCapability " + behaviorName + "\n";
3837
3838 // rounding mode should be obeyed for destination type
3839 bool rounding = (behaviorId == B_RTE_ROUNDING) || (behaviorId == B_RTZ_ROUNDING);
3840 executionMode +=
3841 "OpExecutionMode %main " + behaviorName + " " + (rounding ? outBitWidth : inBitWidth) + "\n";
3842 }
3843
3844 ++it;
3845 }
3846
3847 DE_ASSERT(!capability.empty() && !executionMode.empty());
3848 }
3849
setupFloatControlsProperties(VariableType inVariableType,VariableType outVariableType,BehaviorFlags behaviorFlags,vk::VkPhysicalDeviceFloatControlsProperties & props) const3850 void TestGroupBuilderBase::setupFloatControlsProperties(VariableType inVariableType, VariableType outVariableType,
3851 BehaviorFlags behaviorFlags,
3852 vk::VkPhysicalDeviceFloatControlsProperties &props) const
3853 {
3854 // rounding mode should obey the destination type
3855 bool rteRounding = (behaviorFlags & B_RTE_ROUNDING) != 0;
3856 bool rtzRounding = (behaviorFlags & B_RTZ_ROUNDING) != 0;
3857 if (rteRounding || rtzRounding)
3858 {
3859 switch (outVariableType)
3860 {
3861 case FP16:
3862 props.shaderRoundingModeRTEFloat16 = rteRounding;
3863 props.shaderRoundingModeRTZFloat16 = rtzRounding;
3864 return;
3865 case FP32:
3866 props.shaderRoundingModeRTEFloat32 = rteRounding;
3867 props.shaderRoundingModeRTZFloat32 = rtzRounding;
3868 return;
3869 case FP64:
3870 props.shaderRoundingModeRTEFloat64 = rteRounding;
3871 props.shaderRoundingModeRTZFloat64 = rtzRounding;
3872 return;
3873 case UINT32:
3874 case INT32:
3875 case UINT64:
3876 case INT64:
3877 return;
3878 }
3879 }
3880
3881 switch (inVariableType)
3882 {
3883 case FP16:
3884 props.shaderDenormPreserveFloat16 = behaviorFlags & B_DENORM_PRESERVE;
3885 props.shaderDenormFlushToZeroFloat16 = behaviorFlags & B_DENORM_FLUSH;
3886 props.shaderSignedZeroInfNanPreserveFloat16 = behaviorFlags & B_ZIN_PRESERVE;
3887 return;
3888 case FP32:
3889 props.shaderDenormPreserveFloat32 = behaviorFlags & B_DENORM_PRESERVE;
3890 props.shaderDenormFlushToZeroFloat32 = behaviorFlags & B_DENORM_FLUSH;
3891 props.shaderSignedZeroInfNanPreserveFloat32 = behaviorFlags & B_ZIN_PRESERVE;
3892 return;
3893 case FP64:
3894 props.shaderDenormPreserveFloat64 = behaviorFlags & B_DENORM_PRESERVE;
3895 props.shaderDenormFlushToZeroFloat64 = behaviorFlags & B_DENORM_FLUSH;
3896 props.shaderSignedZeroInfNanPreserveFloat64 = behaviorFlags & B_ZIN_PRESERVE;
3897 return;
3898 case UINT32:
3899 case INT32:
3900 case UINT64:
3901 case INT64:
3902 return;
3903 }
3904 }
3905
3906 // Test case not related to SPIR-V but executed with compute tests. It checks if specified
3907 // features are set to the same value when specific independence settings are used.
verifyIndependenceSettings(Context & context)3908 tcu::TestStatus verifyIndependenceSettings(Context &context)
3909 {
3910 if (!context.isDeviceFunctionalitySupported("VK_KHR_shader_float_controls"))
3911 TCU_THROW(NotSupportedError, "VK_KHR_shader_float_controls not supported");
3912
3913 vk::VkPhysicalDeviceFloatControlsProperties fcProperties;
3914 fcProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES;
3915 fcProperties.pNext = DE_NULL;
3916
3917 vk::VkPhysicalDeviceProperties2 deviceProperties;
3918 deviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
3919 deviceProperties.pNext = &fcProperties;
3920
3921 auto fail = [](const string &featureGroup)
3922 { return tcu::TestStatus::fail(featureGroup + " features should be set to the same value"); };
3923
3924 const VkPhysicalDevice physicalDevice = context.getPhysicalDevice();
3925 const vk::InstanceInterface &instanceInterface = context.getInstanceInterface();
3926 instanceInterface.getPhysicalDeviceProperties2(physicalDevice, &deviceProperties);
3927
3928 if (fcProperties.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE)
3929 {
3930 vk::VkBool32 fp16rte = fcProperties.shaderRoundingModeRTEFloat16;
3931 vk::VkBool32 fp32rte = fcProperties.shaderRoundingModeRTEFloat32;
3932 vk::VkBool32 fp64rte = fcProperties.shaderRoundingModeRTEFloat64;
3933 if ((fp16rte != fp32rte) || (fp32rte != fp64rte))
3934 return fail("shaderRoundingModeRTEFloat*");
3935
3936 vk::VkBool32 fp16rtz = fcProperties.shaderRoundingModeRTZFloat16;
3937 vk::VkBool32 fp32rtz = fcProperties.shaderRoundingModeRTZFloat32;
3938 vk::VkBool32 fp64rtz = fcProperties.shaderRoundingModeRTZFloat64;
3939 if ((fp16rtz != fp32rtz) || (fp32rtz != fp64rtz))
3940 return fail("shaderRoundingModeRTZFloat*");
3941 }
3942 else if (fcProperties.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY)
3943 {
3944 vk::VkBool32 fp16rte = fcProperties.shaderRoundingModeRTEFloat16;
3945 vk::VkBool32 fp64rte = fcProperties.shaderRoundingModeRTEFloat64;
3946 if ((fp16rte != fp64rte))
3947 return fail("shaderRoundingModeRTEFloat16 and 64");
3948
3949 vk::VkBool32 fp16rtz = fcProperties.shaderRoundingModeRTZFloat16;
3950 vk::VkBool32 fp64rtz = fcProperties.shaderRoundingModeRTZFloat64;
3951 if ((fp16rtz != fp64rtz))
3952 return fail("shaderRoundingModeRTZFloat16 and 64");
3953 }
3954
3955 if (fcProperties.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE)
3956 {
3957 vk::VkBool32 fp16flush = fcProperties.shaderDenormFlushToZeroFloat16;
3958 vk::VkBool32 fp32flush = fcProperties.shaderDenormFlushToZeroFloat32;
3959 vk::VkBool32 fp64flush = fcProperties.shaderDenormFlushToZeroFloat64;
3960 if ((fp16flush != fp32flush) || (fp32flush != fp64flush))
3961 return fail("shaderDenormFlushToZeroFloat*");
3962
3963 vk::VkBool32 fp16preserve = fcProperties.shaderDenormPreserveFloat16;
3964 vk::VkBool32 fp32preserve = fcProperties.shaderDenormPreserveFloat32;
3965 vk::VkBool32 fp64preserve = fcProperties.shaderDenormPreserveFloat64;
3966 if ((fp16preserve != fp32preserve) || (fp32preserve != fp64preserve))
3967 return fail("shaderDenormPreserveFloat*");
3968 }
3969 else if (fcProperties.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY)
3970 {
3971 vk::VkBool32 fp16flush = fcProperties.shaderDenormFlushToZeroFloat16;
3972 vk::VkBool32 fp64flush = fcProperties.shaderDenormFlushToZeroFloat64;
3973 if ((fp16flush != fp64flush))
3974 return fail("shaderDenormFlushToZeroFloat16 and 64");
3975
3976 vk::VkBool32 fp16preserve = fcProperties.shaderDenormPreserveFloat16;
3977 vk::VkBool32 fp64preserve = fcProperties.shaderDenormPreserveFloat64;
3978 if ((fp16preserve != fp64preserve))
3979 return fail("shaderDenormPreserveFloat16 and 64");
3980 }
3981
3982 return tcu::TestStatus::pass("Pass");
3983 }
3984
3985 // ComputeTestGroupBuilder contains logic that creates compute shaders
3986 // for all test cases. As most tests in spirv-assembly it uses functionality
3987 // implemented in vktSpvAsmComputeShaderTestUtil.cpp.
3988 class ComputeTestGroupBuilder : public TestGroupBuilderBase
3989 {
3990 public:
3991 void init();
3992
3993 void createOperationTests(TestCaseGroup *parentGroup, const char *groupName, VariableType variableType,
3994 bool argumentsFromInput) override;
3995
3996 void createSettingsTests(TestCaseGroup *parentGroup) override;
3997
3998 protected:
3999 void fillShaderSpec(const OperationTestCaseInfo &testCaseInfo, ComputeShaderSpec &csSpec) const;
4000 void fillShaderSpec(const SettingsTestCaseInfo &testCaseInfo, ComputeShaderSpec &csSpec) const;
4001
4002 private:
4003 StringTemplate m_operationShaderTemplate;
4004 StringTemplate m_settingsShaderTemplate;
4005 TestCasesBuilder m_operationTestCaseBuilder;
4006 };
4007
init()4008 void ComputeTestGroupBuilder::init()
4009 {
4010 m_operationTestCaseBuilder.init();
4011
4012 // generic compute shader template with common code for all
4013 // float types and all possible operations listed in OperationId enum
4014 m_operationShaderTemplate.setString("OpCapability Shader\n"
4015 "${capabilities}"
4016
4017 "OpExtension \"SPV_KHR_float_controls\"\n"
4018 "${extensions}"
4019
4020 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
4021 "OpMemoryModel Logical GLSL450\n"
4022 "OpEntryPoint GLCompute %main \"main\" %id\n"
4023 "OpExecutionMode %main LocalSize 1 1 1\n"
4024 "${execution_mode}"
4025
4026 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4027
4028 // some tests require additional annotations
4029 "${annotations}"
4030
4031 "%type_void = OpTypeVoid\n"
4032 "%type_voidf = OpTypeFunction %type_void\n"
4033 "%type_bool = OpTypeBool\n"
4034 "%type_u32 = OpTypeInt 32 0\n"
4035 "%type_i32 = OpTypeInt 32 1\n"
4036 "%type_i32_fptr = OpTypePointer Function %type_i32\n"
4037 "%type_u32_vec2 = OpTypeVector %type_u32 2\n"
4038 "%type_u32_vec3 = OpTypeVector %type_u32 3\n"
4039 "%type_u32_vec3_ptr = OpTypePointer Input %type_u32_vec3\n"
4040
4041 "%c_i32_0 = OpConstant %type_i32 0\n"
4042 "%c_i32_1 = OpConstant %type_i32 1\n"
4043 "%c_i32_2 = OpConstant %type_i32 2\n"
4044 "%c_u32_1 = OpConstant %type_u32 1\n"
4045
4046 // if input float type has different width then output then
4047 // both types are defined here along with all types derived from
4048 // them that are commonly used by tests; some tests also define
4049 // their own types (those that are needed just by this single test)
4050 "${types}"
4051
4052 // SSBO definitions
4053 "${io_definitions}"
4054
4055 "%id = OpVariable %type_u32_vec3_ptr Input\n"
4056
4057 // set of default constants per float type is placed here,
4058 // operation tests can also define additional constants.
4059 "${constants}"
4060
4061 // O_RETURN_VAL defines function here and becouse
4062 // of that this token needs to be directly before main function
4063 "${functions}"
4064
4065 "%main = OpFunction %type_void None %type_voidf\n"
4066 "%label = OpLabel\n"
4067
4068 "${variables}"
4069
4070 // depending on test case arguments are either read from input ssbo
4071 // or generated in spir-v code - in later case shader input is not used
4072 "${arguments}"
4073
4074 // perform test commands
4075 "${commands}"
4076
4077 // save result to SSBO
4078 "${save_result}"
4079
4080 "OpReturn\n"
4081 "OpFunctionEnd\n");
4082
4083 m_settingsShaderTemplate.setString("OpCapability Shader\n"
4084 "${capabilities}"
4085
4086 "OpExtension \"SPV_KHR_float_controls\"\n"
4087 "${extensions}"
4088
4089 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
4090 "OpMemoryModel Logical GLSL450\n"
4091 "OpEntryPoint GLCompute %main \"main\" %id\n"
4092 "OpExecutionMode %main LocalSize 1 1 1\n"
4093 "${execution_modes}"
4094
4095 // annotations
4096 "OpDecorate %SSBO_in BufferBlock\n"
4097 "OpDecorate %ssbo_in DescriptorSet 0\n"
4098 "OpDecorate %ssbo_in Binding 0\n"
4099 "OpDecorate %ssbo_in NonWritable\n"
4100 "${io_annotations}"
4101
4102 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4103
4104 // types
4105 "%type_void = OpTypeVoid\n"
4106 "%type_voidf = OpTypeFunction %type_void\n"
4107 "%type_u32 = OpTypeInt 32 0\n"
4108 "%type_i32 = OpTypeInt 32 1\n"
4109 "%type_i32_fptr = OpTypePointer Function %type_i32\n"
4110 "%type_u32_vec3 = OpTypeVector %type_u32 3\n"
4111 "%type_u32_vec3_ptr = OpTypePointer Input %type_u32_vec3\n"
4112
4113 "%c_i32_0 = OpConstant %type_i32 0\n"
4114 "%c_i32_1 = OpConstant %type_i32 1\n"
4115 "%c_i32_2 = OpConstant %type_i32 2\n"
4116
4117 "${types}"
4118
4119 // in SSBO definition
4120 "%SSBO_in = OpTypeStruct ${in_struct}\n"
4121 "%up_SSBO_in = OpTypePointer Uniform %SSBO_in\n"
4122 "%ssbo_in = OpVariable %up_SSBO_in Uniform\n"
4123
4124 // out SSBO definitions
4125 "${out_definitions}"
4126
4127 "%id = OpVariable %type_u32_vec3_ptr Input\n"
4128 "%main = OpFunction %type_void None %type_voidf\n"
4129 "%label = OpLabel\n"
4130
4131 "${commands}"
4132
4133 "${save_result}"
4134
4135 "OpReturn\n"
4136 "OpFunctionEnd\n");
4137 }
4138
createOperationTests(TestCaseGroup * parentGroup,const char * groupName,VariableType variableType,bool argumentsFromInput)4139 void ComputeTestGroupBuilder::createOperationTests(TestCaseGroup *parentGroup, const char *groupName,
4140 VariableType variableType, bool argumentsFromInput)
4141 {
4142 TestContext &testCtx = parentGroup->getTestContext();
4143 TestCaseGroup *group = new TestCaseGroup(testCtx, groupName);
4144 parentGroup->addChild(group);
4145
4146 TestCaseVect testCases;
4147 m_operationTestCaseBuilder.build(testCases, m_typeData[variableType].testResults, argumentsFromInput);
4148
4149 for (auto &testCase : testCases)
4150 {
4151 // skip cases with undefined output
4152 if (testCase.expectedOutput == V_UNUSED)
4153 continue;
4154
4155 OperationTestCaseInfo testCaseInfo = {variableType, argumentsFromInput, VK_SHADER_STAGE_COMPUTE_BIT,
4156 m_operationTestCaseBuilder.getOperation(testCase.operationId), testCase};
4157
4158 ComputeShaderSpec csSpec;
4159
4160 fillShaderSpec(testCaseInfo, csSpec);
4161
4162 string testName = replace(testCase.baseName, "op", testCaseInfo.operation.name);
4163 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), csSpec));
4164 }
4165 }
4166
createSettingsTests(TestCaseGroup * parentGroup)4167 void ComputeTestGroupBuilder::createSettingsTests(TestCaseGroup *parentGroup)
4168 {
4169 TestContext &testCtx = parentGroup->getTestContext();
4170 TestCaseGroup *group = new TestCaseGroup(testCtx, "independence_settings");
4171 parentGroup->addChild(group);
4172
4173 using SFCI = VkShaderFloatControlsIndependence;
4174 const SFCI independence32 = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
4175 const SFCI independenceAll = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
4176
4177 vector<SettingsTestCaseInfo> testCases = {
4178 // name mode independenceSetting fp16Option fp32Option fp64Option fp16Without16bitstorage
4179
4180 // test rounding modes when only two float widths are available
4181 {"rounding_ind_all_fp16_rte_fp32_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_UNUSED, false},
4182 {"rounding_ind_all_fp16_rtz_fp32_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_UNUSED, false},
4183 {"rounding_ind_32_fp16_rte_fp32_rtz", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_UNUSED, false},
4184 {"rounding_ind_32_fp16_rtz_fp32_rte", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_UNUSED, false},
4185 {"rounding_ind_all_fp16_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_UNUSED, SO_RTZ, false},
4186 {"rounding_ind_all_fp16_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_UNUSED, SO_RTE, false},
4187 {"rounding_ind_all_fp32_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_UNUSED, SO_RTE, SO_RTZ, false},
4188 {"rounding_ind_all_fp32_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_UNUSED, SO_RTZ, SO_RTE, false},
4189 {"rounding_ind_32_fp32_rte_fp64_rtz", SM_ROUNDING, independence32, SO_UNUSED, SO_RTE, SO_RTZ, false},
4190 {"rounding_ind_32_fp32_rtz_fp64_rte", SM_ROUNDING, independence32, SO_UNUSED, SO_RTZ, SO_RTE, false},
4191
4192 // test rounding modes when three widths are available
4193 {"rounding_ind_all_fp16_rtz_fp32_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTZ, false},
4194 {"rounding_ind_32_fp16_rtz_fp32_rte_fp64_rtz", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_RTZ, false},
4195 {"rounding_ind_all_fp16_rte_fp32_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTE, false},
4196 {"rounding_ind_32_fp16_rte_fp32_rtz_fp64_rte", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_RTE, false},
4197 {"rounding_ind_all_fp16_rtz_fp32_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTZ, SO_RTE, false},
4198 {"rounding_ind_all_fp16_rtz_fp32_rte_fp64_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTE, false},
4199 {"rounding_ind_all_fp16_rte_fp32_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_RTE, SO_RTZ, false},
4200 {"rounding_ind_all_fp16_rte_fp32_rtz_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTZ, false},
4201
4202 // test denorm settings when only two float widths are available
4203 {"denorm_ind_all_fp16_flush_fp32_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE, SO_UNUSED,
4204 false},
4205 {"denorm_ind_all_fp16_preserve_fp32_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH, SO_UNUSED,
4206 false},
4207 {"denorm_ind_32_fp16_flush_fp32_preserve", SM_DENORMS, independence32, SO_FLUSH, SO_PRESERVE, SO_UNUSED, false},
4208 {"denorm_ind_32_fp16_preserve_fp32_flush", SM_DENORMS, independence32, SO_PRESERVE, SO_FLUSH, SO_UNUSED, false},
4209 {"denorm_ind_all_fp16_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_UNUSED, SO_PRESERVE,
4210 false},
4211 {"denorm_ind_all_fp16_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_UNUSED, SO_FLUSH,
4212 false},
4213 {"denorm_ind_all_fp32_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_UNUSED, SO_FLUSH, SO_PRESERVE,
4214 false},
4215 {"denorm_ind_all_fp32_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_UNUSED, SO_PRESERVE, SO_FLUSH,
4216 false},
4217 {"denorm_ind_32_fp32_flush_fp64_preserve", SM_DENORMS, independence32, SO_UNUSED, SO_FLUSH, SO_PRESERVE, false},
4218 {"denorm_ind_32_fp32_preserve_fp64_flush", SM_DENORMS, independence32, SO_UNUSED, SO_PRESERVE, SO_FLUSH, false},
4219
4220 // test denorm settings when three widths are available
4221 {"denorm_ind_all_fp16_preserve_fp32_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH,
4222 SO_PRESERVE, false},
4223 {"denorm_ind_32_fp16_preserve_fp32_flush_fp64_preserve", SM_DENORMS, independence32, SO_PRESERVE, SO_FLUSH,
4224 SO_PRESERVE, false},
4225 {"denorm_ind_all_fp16_flush_fp32_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE,
4226 SO_FLUSH, false},
4227 {"denorm_ind_32_fp16_flush_fp32_preserve_fp64_flush", SM_DENORMS, independence32, SO_FLUSH, SO_PRESERVE,
4228 SO_FLUSH, false},
4229 {"denorm_ind_all_fp16_preserve_fp32_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_PRESERVE,
4230 SO_FLUSH, false},
4231 {"denorm_ind_all_fp16_preserve_fp32_flush_fp64_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH,
4232 SO_FLUSH, false},
4233 {"denorm_ind_all_fp16_flush_fp32_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_FLUSH,
4234 SO_PRESERVE, false},
4235 {"denorm_ind_all_fp16_flush_fp32_preserve_fp64_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE,
4236 SO_PRESERVE, false},
4237
4238 // Same fp16 tests but without requiring VK_KHR_16bit_storage
4239 // test rounding modes when only two float widths are available
4240 {"rounding_ind_all_fp16_rte_fp32_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_UNUSED, true},
4241 {"rounding_ind_all_fp16_rtz_fp32_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_UNUSED, true},
4242 {"rounding_ind_32_fp16_rte_fp32_rtz_nostorage", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_UNUSED, true},
4243 {"rounding_ind_32_fp16_rtz_fp32_rte_nostorage", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_UNUSED, true},
4244 {"rounding_ind_all_fp16_rte_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_UNUSED, SO_RTZ, true},
4245 {"rounding_ind_all_fp16_rtz_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_UNUSED, SO_RTE, true},
4246
4247 // test rounding modes when three widths are available
4248 {"rounding_ind_all_fp16_rtz_fp32_rte_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTZ,
4249 true},
4250 {"rounding_ind_32_fp16_rtz_fp32_rte_fp64_rtz_nostorage", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_RTZ,
4251 true},
4252 {"rounding_ind_all_fp16_rte_fp32_rtz_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTE,
4253 true},
4254 {"rounding_ind_32_fp16_rte_fp32_rtz_fp64_rte_nostorage", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_RTE,
4255 true},
4256 {"rounding_ind_all_fp16_rtz_fp32_rtz_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTZ, SO_RTE,
4257 true},
4258 {"rounding_ind_all_fp16_rtz_fp32_rte_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTE,
4259 true},
4260 {"rounding_ind_all_fp16_rte_fp32_rte_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTE, SO_RTZ,
4261 true},
4262 {"rounding_ind_all_fp16_rte_fp32_rtz_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTZ,
4263 true},
4264
4265 // test denorm settings when only two float widths are available
4266 {"denorm_ind_all_fp16_flush_fp32_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE,
4267 SO_UNUSED, true},
4268 {"denorm_ind_all_fp16_preserve_fp32_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH,
4269 SO_UNUSED, true},
4270 {"denorm_ind_32_fp16_flush_fp32_preserve_nostorage", SM_DENORMS, independence32, SO_FLUSH, SO_PRESERVE,
4271 SO_UNUSED, true},
4272 {"denorm_ind_32_fp16_preserve_fp32_flush_nostorage", SM_DENORMS, independence32, SO_PRESERVE, SO_FLUSH,
4273 SO_UNUSED, true},
4274 {"denorm_ind_all_fp16_flush_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH, SO_UNUSED,
4275 SO_PRESERVE, true},
4276 {"denorm_ind_all_fp16_preserve_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE, SO_UNUSED,
4277 SO_FLUSH, true},
4278
4279 // test denorm settings when three widths are available
4280 {"denorm_ind_all_fp16_preserve_fp32_flush_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE,
4281 SO_FLUSH, SO_PRESERVE, true},
4282 {"denorm_ind_32_fp16_preserve_fp32_flush_fp64_preserve_nostorage", SM_DENORMS, independence32, SO_PRESERVE,
4283 SO_FLUSH, SO_PRESERVE, true},
4284 {"denorm_ind_all_fp16_flush_fp32_preserve_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_FLUSH,
4285 SO_PRESERVE, SO_FLUSH, true},
4286 {"denorm_ind_32_fp16_flush_fp32_preserve_fp64_flush_nostorage", SM_DENORMS, independence32, SO_FLUSH,
4287 SO_PRESERVE, SO_FLUSH, true},
4288 {"denorm_ind_all_fp16_preserve_fp32_preserve_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE,
4289 SO_PRESERVE, SO_FLUSH, true},
4290 {"denorm_ind_all_fp16_preserve_fp32_flush_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE,
4291 SO_FLUSH, SO_FLUSH, true},
4292 {"denorm_ind_all_fp16_flush_fp32_flush_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH,
4293 SO_FLUSH, SO_PRESERVE, true},
4294 {"denorm_ind_all_fp16_flush_fp32_preserve_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH,
4295 SO_PRESERVE, SO_PRESERVE, true},
4296 };
4297
4298 for (const auto &testCase : testCases)
4299 {
4300 ComputeShaderSpec csSpec;
4301 fillShaderSpec(testCase, csSpec);
4302 group->addChild(new SpvAsmComputeShaderCase(testCtx, testCase.name, csSpec));
4303 }
4304
4305 addFunctionCase(group, "independence_settings", verifyIndependenceSettings);
4306 }
4307
fillShaderSpec(const OperationTestCaseInfo & testCaseInfo,ComputeShaderSpec & csSpec) const4308 void ComputeTestGroupBuilder::fillShaderSpec(const OperationTestCaseInfo &testCaseInfo, ComputeShaderSpec &csSpec) const
4309 {
4310 // LUT storing functions used to verify test results
4311 const VerifyIOFunc checkFloatsLUT[] = {checkFloats<Float16, deFloat16>, checkFloats<Float32, float>,
4312 checkFloats<Float64, double>};
4313
4314 const Operation &testOperation = testCaseInfo.operation;
4315 const OperationTestCase &testCase = testCaseInfo.testCase;
4316 VariableType outVariableType = testCaseInfo.outVariableType;
4317
4318 SpecializedOperation specOpData;
4319 specializeOperation(testCaseInfo, specOpData);
4320
4321 TypeSnippetsSP inTypeSnippets = specOpData.inTypeSnippets;
4322 TypeSnippetsSP outTypeSnippets = specOpData.outTypeSnippets;
4323 VariableType inVariableType = specOpData.inVariableType;
4324
4325 bool outFp16WithoutStorage = (outVariableType == FP16) && testCase.fp16Without16BitStorage;
4326 bool inFp16WithoutStorage = (inVariableType == FP16) && testCase.fp16Without16BitStorage;
4327
4328 // The feature is required if OpCapability StorageUniform16 is used in the shader.
4329 bool requiresUniformAndStorage16BitBufferAccess = false;
4330
4331 // UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
4332 // internaly operates on fp16 and this type should be used by float controls
4333 VariableType inVariableTypeForCaps = inVariableType;
4334 string inFloatWidthForCaps = inTypeSnippets->bitWidth;
4335 if (testCase.operationId == OID_UPH_DENORM)
4336 {
4337 inVariableTypeForCaps = FP16;
4338 inFloatWidthForCaps = "16";
4339 }
4340
4341 string behaviorCapability;
4342 string behaviorExecutionMode;
4343 getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags, inFloatWidthForCaps, outTypeSnippets->bitWidth,
4344 behaviorCapability, behaviorExecutionMode);
4345
4346 string capabilities = behaviorCapability + outTypeSnippets->capabilities;
4347 string extensions = outTypeSnippets->extensions;
4348 string annotations = inTypeSnippets->inputAnnotationsSnippet + outTypeSnippets->outputAnnotationsSnippet +
4349 outTypeSnippets->typeAnnotationsSnippet;
4350 string types = outTypeSnippets->typeDefinitionsSnippet;
4351 string constants = outTypeSnippets->constantsDefinitionsSnippet;
4352 string ioDefinitions = "";
4353
4354 // Getting rid of 16bit_storage dependency imply replacing lots of snippets.
4355 {
4356 if (inFp16WithoutStorage)
4357 {
4358 ioDefinitions = inTypeSnippets->inputDefinitionsFp16Snippet;
4359 }
4360 else
4361 {
4362 ioDefinitions = inTypeSnippets->inputDefinitionsSnippet;
4363 }
4364
4365 if (outFp16WithoutStorage)
4366 {
4367 extensions = outTypeSnippets->extensionsFp16Without16BitStorage;
4368 capabilities = behaviorCapability + outTypeSnippets->capabilitiesFp16Without16BitStorage;
4369 types += outTypeSnippets->typeDefinitionsFp16Snippet;
4370 annotations += outTypeSnippets->typeAnnotationsFp16Snippet;
4371 ioDefinitions += outTypeSnippets->outputDefinitionsFp16Snippet;
4372 }
4373 else
4374 {
4375 ioDefinitions += outTypeSnippets->outputDefinitionsSnippet;
4376
4377 requiresUniformAndStorage16BitBufferAccess |= (outVariableType == FP16);
4378 }
4379 }
4380
4381 bool outFp16TypeUsage = outTypeSnippets->loadStoreRequiresShaderFloat16;
4382 bool inFp16TypeUsage = false;
4383
4384 if (testOperation.isInputTypeRestricted)
4385 {
4386 annotations += inTypeSnippets->typeAnnotationsSnippet;
4387 types += inTypeSnippets->typeDefinitionsSnippet;
4388 constants += inTypeSnippets->constantsDefinitionsSnippet;
4389
4390 if (inFp16WithoutStorage)
4391 {
4392 annotations += inTypeSnippets->typeAnnotationsFp16Snippet;
4393 types += inTypeSnippets->typeDefinitionsFp16Snippet;
4394 capabilities += inTypeSnippets->capabilitiesFp16Without16BitStorage;
4395 extensions += inTypeSnippets->extensionsFp16Without16BitStorage;
4396 }
4397 else
4398 {
4399 capabilities += inTypeSnippets->capabilities;
4400 extensions += inTypeSnippets->extensions;
4401
4402 requiresUniformAndStorage16BitBufferAccess |= (inVariableType == FP16);
4403 }
4404
4405 inFp16TypeUsage = inTypeSnippets->loadStoreRequiresShaderFloat16;
4406 }
4407
4408 map<string, string> specializations;
4409 specializations["extensions"] = extensions;
4410 specializations["execution_mode"] = behaviorExecutionMode;
4411 specializations["annotations"] = annotations + specOpData.annotations;
4412 specializations["types"] = types + specOpData.types;
4413 specializations["io_definitions"] = ioDefinitions;
4414 specializations["variables"] = specOpData.variables;
4415 specializations["functions"] = specOpData.functions;
4416 specializations["save_result"] =
4417 (outFp16WithoutStorage ? outTypeSnippets->storeResultsFp16Snippet : outTypeSnippets->storeResultsSnippet);
4418 specializations["arguments"] = specOpData.arguments;
4419 specializations["commands"] = specOpData.commands;
4420
4421 // Build constants. They are only needed sometimes.
4422 const FloatStatementUsageFlags argsAnyFloatConstMask =
4423 B_STATEMENT_USAGE_ARGS_CONST_FLOAT | B_STATEMENT_USAGE_ARGS_CONST_FP16 | B_STATEMENT_USAGE_ARGS_CONST_FP32 |
4424 B_STATEMENT_USAGE_ARGS_CONST_FP64;
4425 const bool argsUseFPConstants = (specOpData.argumentsUsesFloatConstant & argsAnyFloatConstMask) != 0;
4426 const FloatStatementUsageFlags commandsAnyFloatConstMask =
4427 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_CONST_FP16 |
4428 B_STATEMENT_USAGE_COMMANDS_CONST_FP32 | B_STATEMENT_USAGE_COMMANDS_CONST_FP64;
4429 const bool commandsUseFPConstants = (testCaseInfo.operation.statementUsageFlags & commandsAnyFloatConstMask) != 0;
4430 const bool needConstants = argsUseFPConstants || commandsUseFPConstants;
4431 const FloatStatementUsageFlags constsFloatTypeMask =
4432 B_STATEMENT_USAGE_CONSTS_TYPE_FLOAT | B_STATEMENT_USAGE_CONSTS_TYPE_FP16;
4433 const bool constsUsesFP16Type = (testCaseInfo.operation.statementUsageFlags & constsFloatTypeMask) != 0;
4434 const bool loadStoreRequiresShaderFloat16 = inFp16TypeUsage || outFp16TypeUsage;
4435 const bool usesFP16Constants = constsUsesFP16Type || (needConstants && loadStoreRequiresShaderFloat16);
4436
4437 specializations["constants"] = "";
4438 if (needConstants || outFp16WithoutStorage)
4439 {
4440 specializations["constants"] = constants;
4441 }
4442 specializations["constants"] += specOpData.constants;
4443
4444 // check which format features are needed
4445 bool float16FeatureRequired = (outVariableType == FP16) || (inVariableType == FP16);
4446 bool float64FeatureRequired = (outVariableType == FP64) || (inVariableType == FP64);
4447 bool int64FeatureRequired = ((outVariableType == UINT64) || (outVariableType == INT64)) ||
4448 ((inVariableType == UINT64) || (inVariableType == INT64));
4449
4450 // Determine required capabilities.
4451 bool float16CapabilityAlreadyAdded = inFp16WithoutStorage || outFp16WithoutStorage;
4452 if ((testOperation.floatUsage == FLOAT_ARITHMETIC && float16FeatureRequired && !float16CapabilityAlreadyAdded) ||
4453 usesFP16Constants)
4454 {
4455 capabilities += "OpCapability Float16\n";
4456 }
4457 specializations["capabilities"] = capabilities;
4458
4459 // specialize shader
4460 const string shaderCode = m_operationShaderTemplate.specialize(specializations);
4461
4462 // construct input and output buffers of proper types
4463 TypeValuesSP inTypeValues = m_typeData.at(inVariableType).values;
4464 TypeValuesSP outTypeValues = m_typeData.at(outVariableType).values;
4465 BufferSp inBufferSp = inTypeValues->constructInputBuffer(testCase.input);
4466 BufferSp outBufferSp = outTypeValues->constructOutputBuffer(testCase.expectedOutput);
4467 csSpec.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4468 csSpec.outputs.push_back(Resource(outBufferSp));
4469
4470 // check which features/properties are needed
4471 csSpec.assembly = shaderCode;
4472 csSpec.numWorkGroups = IVec3(1, 1, 1);
4473 csSpec.verifyIO = checkFloatsLUT[outVariableType];
4474
4475 csSpec.extensions.push_back("VK_KHR_shader_float_controls");
4476
4477 csSpec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = float64FeatureRequired;
4478 csSpec.requestedVulkanFeatures.coreFeatures.shaderInt64 = int64FeatureRequired;
4479 csSpec.requestedVulkanFeatures.ext16BitStorage.uniformAndStorageBuffer16BitAccess =
4480 float16FeatureRequired && requiresUniformAndStorage16BitBufferAccess;
4481 csSpec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 =
4482 float16CapabilityAlreadyAdded || usesFP16Constants ||
4483 (float16FeatureRequired && requiresUniformAndStorage16BitBufferAccess &&
4484 testOperation.floatUsage == FLOAT_ARITHMETIC);
4485
4486 setupFloatControlsProperties(
4487 inVariableTypeForCaps, // usualy same as inFloatType - different only for UnpackHalf2x16
4488 outVariableType, testCase.behaviorFlags, csSpec.requestedVulkanFeatures.floatControlsProperties);
4489 }
4490
fillShaderSpec(const SettingsTestCaseInfo & testCaseInfo,ComputeShaderSpec & csSpec) const4491 void ComputeTestGroupBuilder::fillShaderSpec(const SettingsTestCaseInfo &testCaseInfo, ComputeShaderSpec &csSpec) const
4492 {
4493 string capabilities;
4494 string fp16behaviorName;
4495 string fp32behaviorName;
4496 string fp64behaviorName;
4497
4498 ValueId addArgs[2];
4499 ValueId fp16resultValue;
4500 ValueId fp32resultValue;
4501 ValueId fp64resultValue;
4502
4503 vk::VkPhysicalDeviceFloatControlsProperties &floatControls = csSpec.requestedVulkanFeatures.floatControlsProperties;
4504 bool fp16Required = testCaseInfo.fp16Option != SO_UNUSED;
4505 bool fp32Required = testCaseInfo.fp32Option != SO_UNUSED;
4506 bool fp64Required = testCaseInfo.fp64Option != SO_UNUSED;
4507
4508 if (testCaseInfo.testedMode == SM_ROUNDING)
4509 {
4510 // make sure that only rounding options are used
4511 DE_ASSERT((testCaseInfo.fp16Option != SO_FLUSH) && (testCaseInfo.fp16Option != SO_PRESERVE) &&
4512 (testCaseInfo.fp32Option != SO_FLUSH) && (testCaseInfo.fp32Option != SO_PRESERVE) &&
4513 (testCaseInfo.fp64Option != SO_FLUSH) && (testCaseInfo.fp64Option != SO_PRESERVE));
4514
4515 bool fp16RteRounding = testCaseInfo.fp16Option == SO_RTE;
4516 bool fp32RteRounding = testCaseInfo.fp32Option == SO_RTE;
4517 bool fp64RteRounding = testCaseInfo.fp64Option == SO_RTE;
4518
4519 const string &rte = m_behaviorToName.at(B_RTE_ROUNDING);
4520 const string &rtz = m_behaviorToName.at(B_RTZ_ROUNDING);
4521
4522 fp16behaviorName = fp16RteRounding ? rte : rtz;
4523 fp32behaviorName = fp32RteRounding ? rte : rtz;
4524 fp64behaviorName = fp64RteRounding ? rte : rtz;
4525
4526 addArgs[0] = V_ADD_ARG_A;
4527 addArgs[1] = V_ADD_ARG_B;
4528 fp16resultValue = fp16RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
4529 fp32resultValue = fp32RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
4530 fp64resultValue = fp64RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
4531
4532 capabilities = "OpCapability " + rte +
4533 "\n"
4534 "OpCapability " +
4535 rtz + "\n";
4536
4537 floatControls.roundingModeIndependence = testCaseInfo.independenceSetting;
4538 floatControls.denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
4539 floatControls.shaderRoundingModeRTEFloat16 = fp16RteRounding;
4540 floatControls.shaderRoundingModeRTZFloat16 = fp16Required && !fp16RteRounding;
4541 floatControls.shaderRoundingModeRTEFloat32 = fp32RteRounding;
4542 floatControls.shaderRoundingModeRTZFloat32 = fp32Required && !fp32RteRounding;
4543 floatControls.shaderRoundingModeRTEFloat64 = fp64RteRounding;
4544 floatControls.shaderRoundingModeRTZFloat64 = fp64Required && !fp64RteRounding;
4545 }
4546 else // SM_DENORMS
4547 {
4548 // make sure that only denorm options are used
4549 DE_ASSERT((testCaseInfo.fp16Option != SO_RTE) && (testCaseInfo.fp16Option != SO_RTZ) &&
4550 (testCaseInfo.fp32Option != SO_RTE) && (testCaseInfo.fp32Option != SO_RTZ) &&
4551 (testCaseInfo.fp64Option != SO_RTE) && (testCaseInfo.fp64Option != SO_RTZ));
4552
4553 bool fp16DenormPreserve = testCaseInfo.fp16Option == SO_PRESERVE;
4554 bool fp32DenormPreserve = testCaseInfo.fp32Option == SO_PRESERVE;
4555 bool fp64DenormPreserve = testCaseInfo.fp64Option == SO_PRESERVE;
4556
4557 const string &preserve = m_behaviorToName.at(B_DENORM_PRESERVE);
4558 const string &flush = m_behaviorToName.at(B_DENORM_FLUSH);
4559
4560 fp16behaviorName = fp16DenormPreserve ? preserve : flush;
4561 fp32behaviorName = fp32DenormPreserve ? preserve : flush;
4562 fp64behaviorName = fp64DenormPreserve ? preserve : flush;
4563
4564 addArgs[0] = V_DENORM;
4565 addArgs[1] = V_DENORM;
4566 fp16resultValue = fp16DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO_OR_DENORM_TIMES_TWO;
4567 fp32resultValue = fp32DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO;
4568 fp64resultValue = fp64DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO;
4569
4570 capabilities = "OpCapability " + preserve +
4571 "\n"
4572 "OpCapability " +
4573 flush + "\n";
4574
4575 floatControls.denormBehaviorIndependence = testCaseInfo.independenceSetting;
4576 floatControls.roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
4577 floatControls.shaderDenormPreserveFloat16 = fp16DenormPreserve;
4578 floatControls.shaderDenormFlushToZeroFloat16 = fp16Required && !fp16DenormPreserve;
4579 floatControls.shaderDenormPreserveFloat32 = fp32DenormPreserve;
4580 floatControls.shaderDenormFlushToZeroFloat32 = fp32Required && !fp32DenormPreserve;
4581 floatControls.shaderDenormPreserveFloat64 = fp64DenormPreserve;
4582 floatControls.shaderDenormFlushToZeroFloat64 = fp64Required && !fp64DenormPreserve;
4583 }
4584
4585 const auto &fp64Data = m_typeData.at(FP64);
4586 const auto &fp32Data = m_typeData.at(FP32);
4587 const auto &fp16Data = m_typeData.at(FP16);
4588
4589 uint32_t attributeIndex = 0;
4590 uint32_t attributeOffset = 0;
4591 string attribute;
4592 string extensions = "";
4593 string executionModes = "";
4594 string ioAnnotations = "";
4595 string types = "";
4596 string inStruct = "";
4597 string outDefinitions = "";
4598 string commands = "";
4599 string saveResult = "";
4600
4601 // construct single input buffer containing arguments for all float widths
4602 // (maxPerStageDescriptorStorageBuffers can be min 4 and we need 3 for outputs)
4603 uint32_t inputOffset = 0;
4604 std::vector<uint8_t> inputData((fp64Required * sizeof(double) + sizeof(float) + fp16Required * sizeof(deFloat16)) *
4605 2);
4606
4607 // to follow storage buffer layout rules we store data in ssbo in order 64 -> 16
4608 if (fp64Required)
4609 {
4610 capabilities += fp64Data.snippets->capabilities;
4611 executionModes += "OpExecutionMode %main " + fp64behaviorName + " 64\n";
4612 attribute = to_string(attributeIndex);
4613 ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) + "\n" +
4614 fp64Data.snippets->multiOutputAnnotationsSnippet + "OpDecorate %ssbo_f64_out Binding " +
4615 to_string(attributeIndex + 1) + "\n";
4616 types += fp64Data.snippets->minTypeDefinitionsSnippet;
4617 inStruct += " %type_f64_arr_2";
4618 outDefinitions += fp64Data.snippets->multiOutputDefinitionsSnippet;
4619 commands += replace(fp64Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
4620 "%result64 = OpFAdd %type_f64 %arg1_f64 %arg2_f64\n";
4621 saveResult += fp64Data.snippets->multiStoreResultsSnippet;
4622 attributeOffset += 2 * static_cast<uint32_t>(sizeof(double));
4623 attributeIndex++;
4624
4625 fp64Data.values->fillInputData(addArgs, inputData, inputOffset);
4626
4627 // construct separate buffers for outputs to make validation easier
4628 BufferSp fp64OutBufferSp = fp64Data.values->constructOutputBuffer(fp64resultValue);
4629 csSpec.outputs.push_back(Resource(fp64OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
4630 reinterpret_cast<void *>(BufferDataType::DATA_FP64)));
4631
4632 csSpec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
4633 }
4634 if (fp32Required)
4635 {
4636 executionModes += "OpExecutionMode %main " + fp32behaviorName + " 32\n";
4637 attribute = to_string(attributeIndex);
4638 ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) + "\n" +
4639 fp32Data.snippets->multiOutputAnnotationsSnippet + "OpDecorate %ssbo_f32_out Binding " +
4640 to_string(attributeIndex + 1) + "\n";
4641 types += fp32Data.snippets->minTypeDefinitionsSnippet;
4642 inStruct += " %type_f32_arr_2";
4643 outDefinitions += fp32Data.snippets->multiOutputDefinitionsSnippet;
4644 commands += replace(fp32Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
4645 "%result32 = OpFAdd %type_f32 %arg1_f32 %arg2_f32\n";
4646 saveResult += fp32Data.snippets->multiStoreResultsSnippet;
4647 attributeOffset += 2 * static_cast<uint32_t>(sizeof(float));
4648 attributeIndex++;
4649
4650 fp32Data.values->fillInputData(addArgs, inputData, inputOffset);
4651
4652 BufferSp fp32OutBufferSp = fp32Data.values->constructOutputBuffer(fp32resultValue);
4653 csSpec.outputs.push_back(Resource(fp32OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
4654 reinterpret_cast<void *>(BufferDataType::DATA_FP32)));
4655 }
4656 if (fp16Required)
4657 {
4658 if (testCaseInfo.fp16Without16BitStorage)
4659 {
4660 capabilities += fp16Data.snippets->capabilitiesFp16Without16BitStorage;
4661 extensions += fp16Data.snippets->extensionsFp16Without16BitStorage;
4662 executionModes += "OpExecutionMode %main " + fp16behaviorName + " 16\n";
4663 attribute = to_string(attributeIndex);
4664 ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) + "\n" +
4665 fp16Data.snippets->multiOutputAnnotationsFp16Snippet +
4666 "OpDecorate %ssbo_u32_out Binding " + to_string(attributeIndex + 1) + "\n";
4667 types += fp16Data.snippets->minTypeDefinitionsSnippet + fp16Data.snippets->typeDefinitionsFp16Snippet +
4668 "%type_f16_vec2 = OpTypeVector %type_f16 2\n";
4669 inStruct += " %type_u32_arr_1";
4670 outDefinitions += fp16Data.snippets->multiOutputDefinitionsFp16Snippet;
4671 commands += replace(fp16Data.snippets->multiArgumentsFromInputFp16Snippet, "${attr}", attribute) +
4672 "%result16 = OpFAdd %type_f16 %arg1_f16 %arg2_f16\n";
4673 saveResult += fp16Data.snippets->multiStoreResultsFp16Snippet;
4674
4675 csSpec.extensions.push_back("VK_KHR_shader_float16_int8");
4676 csSpec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
4677 }
4678 else
4679 {
4680 capabilities += fp16Data.snippets->capabilities + "OpCapability Float16\n";
4681 extensions += fp16Data.snippets->extensions;
4682 executionModes += "OpExecutionMode %main " + fp16behaviorName + " 16\n";
4683 attribute = to_string(attributeIndex);
4684 ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) + "\n" +
4685 fp16Data.snippets->multiOutputAnnotationsSnippet + "OpDecorate %ssbo_f16_out Binding " +
4686 to_string(attributeIndex + 1) + "\n";
4687 types += fp16Data.snippets->minTypeDefinitionsSnippet;
4688 inStruct += " %type_f16_arr_2";
4689 outDefinitions += fp16Data.snippets->multiOutputDefinitionsSnippet;
4690 commands += replace(fp16Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
4691 "%result16 = OpFAdd %type_f16 %arg1_f16 %arg2_f16\n";
4692 saveResult += fp16Data.snippets->multiStoreResultsSnippet;
4693
4694 csSpec.extensions.push_back("VK_KHR_16bit_storage");
4695 csSpec.requestedVulkanFeatures.ext16BitStorage.uniformAndStorageBuffer16BitAccess = true;
4696 }
4697
4698 fp16Data.values->fillInputData(addArgs, inputData, inputOffset);
4699
4700 BufferSp fp16OutBufferSp = fp16Data.values->constructOutputBuffer(fp16resultValue);
4701 csSpec.outputs.push_back(Resource(fp16OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
4702 reinterpret_cast<void *>(BufferDataType::DATA_FP16)));
4703 }
4704
4705 BufferSp inBufferSp(new Buffer<uint8_t>(inputData));
4706 csSpec.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4707
4708 map<string, string> specializations = {
4709 {"capabilities", capabilities}, {"extensions", extensions}, {"execution_modes", executionModes},
4710 {"io_annotations", ioAnnotations}, {"types", types}, {"in_struct", inStruct},
4711 {"out_definitions", outDefinitions}, {"commands", commands}, {"save_result", saveResult}};
4712
4713 // specialize shader
4714 const string shaderCode = m_settingsShaderTemplate.specialize(specializations);
4715
4716 csSpec.assembly = shaderCode;
4717 csSpec.numWorkGroups = IVec3(1, 1, 1);
4718 csSpec.verifyIO = checkMixedFloats;
4719 csSpec.extensions.push_back("VK_KHR_shader_float_controls");
4720 }
4721
getGraphicsShaderCode(vk::SourceCollections & dst,InstanceContext context)4722 void getGraphicsShaderCode(vk::SourceCollections &dst, InstanceContext context)
4723 {
4724 // this function is used only by GraphicsTestGroupBuilder but it couldn't
4725 // be implemented as a method because of how addFunctionCaseWithPrograms
4726 // was implemented
4727
4728 SpirvVersion targetSpirvVersion = context.resources.spirvVersion;
4729 const uint32_t vulkanVersion = dst.usedVulkanVersion;
4730
4731 static const string vertexTemplate =
4732 "OpCapability Shader\n"
4733 "${vert_capabilities}"
4734
4735 "OpExtension \"SPV_KHR_float_controls\"\n"
4736 "${vert_extensions}"
4737
4738 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
4739 "OpMemoryModel Logical GLSL450\n"
4740 "OpEntryPoint Vertex %main \"main\" %BP_stream %BP_position %BP_color %BP_gl_VertexIndex %BP_gl_InstanceIndex "
4741 "%BP_vertex_color %BP_vertex_result \n"
4742 "${vert_execution_mode}"
4743
4744 "OpMemberDecorate %BP_gl_PerVertex 0 BuiltIn Position\n"
4745 "OpMemberDecorate %BP_gl_PerVertex 1 BuiltIn PointSize\n"
4746 "OpMemberDecorate %BP_gl_PerVertex 2 BuiltIn ClipDistance\n"
4747 "OpMemberDecorate %BP_gl_PerVertex 3 BuiltIn CullDistance\n"
4748 "OpDecorate %BP_gl_PerVertex Block\n"
4749 "OpDecorate %BP_position Location 0\n"
4750 "OpDecorate %BP_color Location 1\n"
4751 "OpDecorate %BP_vertex_color Location 1\n"
4752 "OpDecorate %BP_vertex_result Location 2\n"
4753 "OpDecorate %BP_vertex_result Flat\n"
4754 "OpDecorate %BP_gl_VertexIndex BuiltIn VertexIndex\n"
4755 "OpDecorate %BP_gl_InstanceIndex BuiltIn InstanceIndex\n"
4756
4757 // some tests require additional annotations
4758 "${vert_annotations}"
4759
4760 // types required by most of tests
4761 "%type_void = OpTypeVoid\n"
4762 "%type_voidf = OpTypeFunction %type_void\n"
4763 "%type_bool = OpTypeBool\n"
4764 "%type_i32 = OpTypeInt 32 1\n"
4765 "%type_u32 = OpTypeInt 32 0\n"
4766 "%type_u32_vec2 = OpTypeVector %type_u32 2\n"
4767 "%type_i32_iptr = OpTypePointer Input %type_i32\n"
4768 "%type_i32_optr = OpTypePointer Output %type_i32\n"
4769 "%type_i32_fptr = OpTypePointer Function %type_i32\n"
4770
4771 // constants required by most of tests
4772 "%c_i32_0 = OpConstant %type_i32 0\n"
4773 "%c_i32_1 = OpConstant %type_i32 1\n"
4774 "%c_i32_2 = OpConstant %type_i32 2\n"
4775 "%c_u32_1 = OpConstant %type_u32 1\n"
4776
4777 // if input float type has different width then output then
4778 // both types are defined here along with all types derived from
4779 // them that are commonly used by tests; some tests also define
4780 // their own types (those that are needed just by this single test)
4781 "${vert_types}"
4782
4783 // SSBO is not universally supported for storing
4784 // data in vertex stages - it is onle read here
4785 "${vert_io_definitions}"
4786
4787 "%BP_gl_PerVertex = OpTypeStruct %type_f32_vec4 %type_f32 %type_f32_arr_1 %type_f32_arr_1\n"
4788 "%BP_gl_PerVertex_optr = OpTypePointer Output %BP_gl_PerVertex\n"
4789 "%BP_stream = OpVariable %BP_gl_PerVertex_optr Output\n"
4790 "%BP_position = OpVariable %type_f32_vec4_iptr Input\n"
4791 "%BP_color = OpVariable %type_f32_vec4_iptr Input\n"
4792 "%BP_gl_VertexIndex = OpVariable %type_i32_iptr Input\n"
4793 "%BP_gl_InstanceIndex = OpVariable %type_i32_iptr Input\n"
4794 "%BP_vertex_color = OpVariable %type_f32_vec4_optr Output\n"
4795
4796 // set of default constants per float type is placed here,
4797 // operation tests can also define additional constants.
4798 "${vert_constants}"
4799
4800 // O_RETURN_VAL defines function here and because
4801 // of that this token needs to be directly before main function.
4802 "${vert_functions}"
4803
4804 "%main = OpFunction %type_void None %type_voidf\n"
4805 "%label = OpLabel\n"
4806
4807 "${vert_variables}"
4808
4809 "%position = OpLoad %type_f32_vec4 %BP_position\n"
4810 "%gl_pos = OpAccessChain %type_f32_vec4_optr %BP_stream %c_i32_0\n"
4811 "OpStore %gl_pos %position\n"
4812 "%color = OpLoad %type_f32_vec4 %BP_color\n"
4813 "OpStore %BP_vertex_color %color\n"
4814
4815 // this token is filled only when vertex stage is tested;
4816 // depending on test case arguments are either read from input ssbo
4817 // or generated in spir-v code - in later case ssbo is not used
4818 "${vert_arguments}"
4819
4820 // when vertex shader is tested then test operations are performed
4821 // here and passed to fragment stage; if fragment stage ts tested
4822 // then ${comands} and ${vert_process_result} are rplaced with nop
4823 "${vert_commands}"
4824
4825 "${vert_process_result}"
4826
4827 "OpReturn\n"
4828 "OpFunctionEnd\n";
4829
4830 static const string fragmentTemplate =
4831 "OpCapability Shader\n"
4832 "${frag_capabilities}"
4833
4834 "OpExtension \"SPV_KHR_float_controls\"\n"
4835 "${frag_extensions}"
4836
4837 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
4838 "OpMemoryModel Logical GLSL450\n"
4839 "OpEntryPoint Fragment %main \"main\" %BP_vertex_color %BP_vertex_result %BP_fragColor %BP_gl_FragCoord \n"
4840 "OpExecutionMode %main OriginUpperLeft\n"
4841 "${frag_execution_mode}"
4842
4843 "OpDecorate %BP_fragColor Location 0\n"
4844 "OpDecorate %BP_vertex_color Location 1\n"
4845 "OpDecorate %BP_vertex_result Location 2\n"
4846 "OpDecorate %BP_vertex_result Flat\n"
4847 "OpDecorate %BP_gl_FragCoord BuiltIn FragCoord\n"
4848
4849 // some tests require additional annotations
4850 "${frag_annotations}"
4851
4852 // types required by most of tests
4853 "%type_void = OpTypeVoid\n"
4854 "%type_voidf = OpTypeFunction %type_void\n"
4855 "%type_bool = OpTypeBool\n"
4856 "%type_i32 = OpTypeInt 32 1\n"
4857 "%type_u32 = OpTypeInt 32 0\n"
4858 "%type_u32_vec2 = OpTypeVector %type_u32 2\n"
4859 "%type_i32_iptr = OpTypePointer Input %type_i32\n"
4860 "%type_i32_optr = OpTypePointer Output %type_i32\n"
4861 "%type_i32_fptr = OpTypePointer Function %type_i32\n"
4862
4863 // constants required by most of tests
4864 "%c_i32_0 = OpConstant %type_i32 0\n"
4865 "%c_i32_1 = OpConstant %type_i32 1\n"
4866 "%c_i32_2 = OpConstant %type_i32 2\n"
4867 "%c_u32_1 = OpConstant %type_u32 1\n"
4868
4869 // if input float type has different width then output then
4870 // both types are defined here along with all types derived from
4871 // them that are commonly used by tests; some tests also define
4872 // their own types (those that are needed just by this single test)
4873 "${frag_types}"
4874
4875 "%BP_gl_FragCoord = OpVariable %type_f32_vec4_iptr Input\n"
4876 "%BP_vertex_color = OpVariable %type_f32_vec4_iptr Input\n"
4877 "%BP_fragColor = OpVariable %type_f32_vec4_optr Output\n"
4878
4879 // SSBO definitions
4880 "${frag_io_definitions}"
4881
4882 // set of default constants per float type is placed here,
4883 // operation tests can also define additional constants.
4884 "${frag_constants}"
4885
4886 // O_RETURN_VAL defines function here and because
4887 // of that this token needs to be directly before main function.
4888 "${frag_functions}"
4889
4890 "%main = OpFunction %type_void None %type_voidf\n"
4891 "%label = OpLabel\n"
4892
4893 "${frag_variables}"
4894
4895 // just pass vertex color - rendered image is not important in our case
4896 "%vertex_color = OpLoad %type_f32_vec4 %BP_vertex_color\n"
4897 "OpStore %BP_fragColor %vertex_color\n"
4898
4899 // this token is filled only when fragment stage is tested;
4900 // depending on test case arguments are either read from input ssbo or
4901 // generated in spir-v code - in later case ssbo is used only for output
4902 "${frag_arguments}"
4903
4904 // when fragment shader is tested then test operations are performed
4905 // here and saved to ssbo; if vertex stage was tested then its
4906 // result is just saved to ssbo here
4907 "${frag_commands}"
4908 "${frag_process_result}"
4909
4910 "OpReturn\n"
4911 "OpFunctionEnd\n";
4912
4913 dst.spirvAsmSources.add("vert", DE_NULL) << StringTemplate(vertexTemplate).specialize(context.testCodeFragments)
4914 << SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4915 dst.spirvAsmSources.add("frag", DE_NULL) << StringTemplate(fragmentTemplate).specialize(context.testCodeFragments)
4916 << SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4917 }
4918
4919 // GraphicsTestGroupBuilder iterates over all test cases and creates test for both
4920 // vertex and fragment stages. As in most spirv-assembly tests, tests here are also
4921 // executed using functionality defined in vktSpvAsmGraphicsShaderTestUtil.cpp but
4922 // because one of requirements during development was that SSBO wont be used in
4923 // vertex stage we couldn't use createTestForStage functions - we need a custom
4924 // version for both vertex and fragmen shaders at the same time. This was required
4925 // as we needed to pass result from vertex stage to fragment stage where it could
4926 // be saved to ssbo. To achieve that InstanceContext is created manually in
4927 // createInstanceContext method.
4928 class GraphicsTestGroupBuilder : public TestGroupBuilderBase
4929 {
4930 public:
4931 void init();
4932
4933 void createOperationTests(TestCaseGroup *parentGroup, const char *groupName, VariableType variableType,
4934 bool argumentsFromInput) override;
4935 void createSettingsTests(TestCaseGroup *parentGroup) override;
4936
4937 protected:
4938 InstanceContext createInstanceContext(const OperationTestCaseInfo &testCaseInfo) const;
4939
4940 private:
4941 TestCasesBuilder m_testCaseBuilder;
4942 };
4943
init()4944 void GraphicsTestGroupBuilder::init()
4945 {
4946 m_testCaseBuilder.init();
4947 }
4948
createOperationTests(TestCaseGroup * parentGroup,const char * groupName,VariableType variableType,bool argumentsFromInput)4949 void GraphicsTestGroupBuilder::createOperationTests(TestCaseGroup *parentGroup, const char *groupName,
4950 VariableType variableType, bool argumentsFromInput)
4951 {
4952 TestContext &testCtx = parentGroup->getTestContext();
4953 TestCaseGroup *group = new TestCaseGroup(testCtx, groupName);
4954 parentGroup->addChild(group);
4955
4956 // create test cases for vertex stage
4957 TestCaseVect testCases;
4958 m_testCaseBuilder.build(testCases, m_typeData[variableType].testResults, argumentsFromInput);
4959
4960 for (auto &testCase : testCases)
4961 {
4962 // skip cases with undefined output
4963 if (testCase.expectedOutput == V_UNUSED)
4964 continue;
4965
4966 // FPRoundingMode decoration can be applied only to conversion instruction that is used as the object
4967 // argument of an OpStore storing through a pointer to a 16-bit floating-point object in Uniform, or
4968 // PushConstant, or Input, or Output Storage Classes. SSBO writes are not commonly supported
4969 // in VS so this test case needs to be skiped for vertex stage.
4970 if ((testCase.operationId == OID_ORTZ_ROUND) || (testCase.operationId == OID_ORTE_ROUND))
4971 continue;
4972
4973 OperationTestCaseInfo testCaseInfo = {variableType, argumentsFromInput, VK_SHADER_STAGE_VERTEX_BIT,
4974 m_testCaseBuilder.getOperation(testCase.operationId), testCase};
4975
4976 InstanceContext ctxVertex = createInstanceContext(testCaseInfo);
4977 string testName = replace(testCase.baseName, "op", testCaseInfo.operation.name);
4978
4979 addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_vert", getGraphicsShaderCode,
4980 runAndVerifyDefaultPipeline, ctxVertex);
4981 }
4982
4983 // create test cases for fragment stage
4984 testCases.clear();
4985 m_testCaseBuilder.build(testCases, m_typeData[variableType].testResults, argumentsFromInput);
4986
4987 for (auto &testCase : testCases)
4988 {
4989 // skip cases with undefined output
4990 if (testCase.expectedOutput == V_UNUSED)
4991 continue;
4992
4993 OperationTestCaseInfo testCaseInfo = {variableType, argumentsFromInput, VK_SHADER_STAGE_FRAGMENT_BIT,
4994 m_testCaseBuilder.getOperation(testCase.operationId), testCase};
4995
4996 InstanceContext ctxFragment = createInstanceContext(testCaseInfo);
4997 string testName = replace(testCase.baseName, "op", testCaseInfo.operation.name);
4998
4999 addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_frag", getGraphicsShaderCode,
5000 runAndVerifyDefaultPipeline, ctxFragment);
5001 }
5002 }
5003
createSettingsTests(TestCaseGroup * parentGroup)5004 void GraphicsTestGroupBuilder::createSettingsTests(TestCaseGroup *parentGroup)
5005 {
5006 DE_UNREF(parentGroup);
5007
5008 // WG decided that testing settings only for compute stage is sufficient
5009 }
5010
createInstanceContext(const OperationTestCaseInfo & testCaseInfo) const5011 InstanceContext GraphicsTestGroupBuilder::createInstanceContext(const OperationTestCaseInfo &testCaseInfo) const
5012 {
5013 // LUT storing functions used to verify test results
5014 const VerifyIOFunc checkFloatsLUT[] = {checkFloats<Float16, deFloat16>, checkFloats<Float32, float>,
5015 checkFloats<Float64, double>};
5016
5017 // 32-bit float types are always needed for standard operations on color
5018 // if tested operation does not require fp32 for either input or output
5019 // then this minimal type definitions must be appended to types section
5020 const string f32TypeMinimalRequired = "%type_f32 = OpTypeFloat 32\n"
5021 "%type_f32_arr_1 = OpTypeArray %type_f32 %c_i32_1\n"
5022 "%type_f32_iptr = OpTypePointer Input %type_f32\n"
5023 "%type_f32_optr = OpTypePointer Output %type_f32\n"
5024 "%type_f32_vec4 = OpTypeVector %type_f32 4\n"
5025 "%type_f32_vec4_iptr = OpTypePointer Input %type_f32_vec4\n"
5026 "%type_f32_vec4_optr = OpTypePointer Output %type_f32_vec4\n";
5027
5028 const Operation &testOperation = testCaseInfo.operation;
5029 const OperationTestCase &testCase = testCaseInfo.testCase;
5030 VariableType outVariableType = testCaseInfo.outVariableType;
5031 VkShaderStageFlagBits testedStage = testCaseInfo.testedStage;
5032
5033 DE_ASSERT((testedStage == VK_SHADER_STAGE_VERTEX_BIT) || (testedStage == VK_SHADER_STAGE_FRAGMENT_BIT));
5034
5035 SpecializedOperation specOpData;
5036 specializeOperation(testCaseInfo, specOpData);
5037
5038 TypeSnippetsSP inTypeSnippets = specOpData.inTypeSnippets;
5039 TypeSnippetsSP outTypeSnippets = specOpData.outTypeSnippets;
5040 VariableType inVariableType = specOpData.inVariableType;
5041
5042 bool outFp16WithoutStorage = (outVariableType == FP16) && testCase.fp16Without16BitStorage;
5043 bool inFp16WithoutStorage = (inVariableType == FP16) && testCase.fp16Without16BitStorage;
5044
5045 // The feature is required if OpCapability StorageUniform16 is used in the shader.
5046 bool requiresUniformAndStorage16BitBufferAccess = false;
5047
5048 // There may be several reasons why we need the shaderFloat16 Vulkan feature.
5049 bool needsShaderFloat16 = inFp16WithoutStorage || outFp16WithoutStorage;
5050 // There are some weird cases where we need the constants, but would otherwise drop them.
5051 bool needsSpecialConstants = false;
5052
5053 // UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
5054 // internaly operates on fp16 and this type should be used by float controls
5055 VariableType inVariableTypeForCaps = inVariableType;
5056 string inFloatWidthForCaps = inTypeSnippets->bitWidth;
5057 if (testCase.operationId == OID_UPH_DENORM)
5058 {
5059 inVariableTypeForCaps = FP16;
5060 inFloatWidthForCaps = "16";
5061 }
5062
5063 string behaviorCapability;
5064 string behaviorExecutionMode;
5065 getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags, inFloatWidthForCaps, outTypeSnippets->bitWidth,
5066 behaviorCapability, behaviorExecutionMode);
5067
5068 // check which format features are needed
5069 bool float16FeatureRequired = (inVariableType == FP16) || (outVariableType == FP16);
5070 bool float64FeatureRequired = (inVariableType == FP64) || (outVariableType == FP64);
5071 bool int64FeatureRequired = ((inVariableType == UINT64) || (inVariableType == INT64)) ||
5072 ((outVariableType == UINT64) || (outVariableType == INT64));
5073
5074 string vertExecutionMode;
5075 string fragExecutionMode;
5076 string vertCapabilities;
5077 string fragCapabilities;
5078 string vertExtensions;
5079 string fragExtensions;
5080 string vertAnnotations;
5081 string fragAnnotations;
5082 string vertTypes;
5083 string fragTypes;
5084 string vertConstants;
5085 string fragConstants;
5086 string vertFunctions;
5087 string fragFunctions;
5088 string vertIODefinitions;
5089 string fragIODefinitions;
5090 string vertArguments;
5091 string fragArguments;
5092 string vertVariables;
5093 string fragVariables;
5094 string vertCommands;
5095 string fragCommands;
5096 string vertProcessResult;
5097 string fragProcessResult;
5098
5099 // check if operation should be executed in vertex stage
5100 if (testedStage == VK_SHADER_STAGE_VERTEX_BIT)
5101 {
5102 vertAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet;
5103 fragAnnotations = outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
5104 vertFunctions = specOpData.functions;
5105
5106 // check if input type is different from tested type (conversion operations)
5107 if (testOperation.isInputTypeRestricted)
5108 {
5109 vertCapabilities = behaviorCapability + inTypeSnippets->capabilities + outTypeSnippets->capabilities;
5110 fragCapabilities = outTypeSnippets->capabilities;
5111 vertExtensions = inTypeSnippets->extensions + outTypeSnippets->extensions;
5112 fragExtensions = outTypeSnippets->extensions;
5113 vertTypes = inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet +
5114 outTypeSnippets->varyingsTypesSnippet;
5115 if (inFp16WithoutStorage)
5116 vertTypes += inTypeSnippets->typeDefinitionsFp16Snippet;
5117
5118 fragTypes = outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
5119 vertConstants = inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
5120 fragConstants = outTypeSnippets->constantsDefinitionsSnippet;
5121
5122 requiresUniformAndStorage16BitBufferAccess |= (inVariableType == FP16);
5123 }
5124 else
5125 {
5126 // input and output types are the same (majority of operations)
5127
5128 vertCapabilities = behaviorCapability + outTypeSnippets->capabilities;
5129 fragCapabilities = vertCapabilities;
5130 vertExtensions = outTypeSnippets->extensions;
5131 fragExtensions = vertExtensions;
5132 vertTypes = outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
5133 fragTypes = vertTypes;
5134 vertConstants = outTypeSnippets->constantsDefinitionsSnippet;
5135 fragConstants = outTypeSnippets->constantsDefinitionsSnippet;
5136 }
5137
5138 requiresUniformAndStorage16BitBufferAccess |= (outVariableType == FP16);
5139
5140 if (outVariableType != FP32)
5141 {
5142 fragTypes += f32TypeMinimalRequired;
5143 if (inVariableType != FP32)
5144 vertTypes += f32TypeMinimalRequired;
5145 }
5146
5147 vertAnnotations += specOpData.annotations;
5148 vertTypes += specOpData.types;
5149 vertConstants += specOpData.constants;
5150
5151 vertExecutionMode = behaviorExecutionMode;
5152 fragExecutionMode = "";
5153 vertIODefinitions = inTypeSnippets->inputDefinitionsSnippet + outTypeSnippets->outputVaryingsSnippet;
5154 fragIODefinitions = outTypeSnippets->inputVaryingsSnippet + outTypeSnippets->outputDefinitionsSnippet;
5155 vertArguments = specOpData.arguments;
5156 fragArguments = "";
5157 vertVariables = specOpData.variables;
5158 fragVariables = "";
5159 vertCommands = specOpData.commands;
5160 fragCommands = "";
5161 vertProcessResult = outTypeSnippets->storeVertexResultSnippet;
5162 fragProcessResult = outTypeSnippets->loadVertexResultSnippet + outTypeSnippets->storeResultsSnippet;
5163
5164 if (inFp16WithoutStorage)
5165 {
5166 vertAnnotations += inTypeSnippets->typeAnnotationsFp16Snippet;
5167 vertIODefinitions = inTypeSnippets->inputDefinitionsFp16Snippet + outTypeSnippets->outputVaryingsSnippet;
5168 }
5169
5170 if (outFp16WithoutStorage)
5171 {
5172 vertTypes += outTypeSnippets->typeDefinitionsFp16Snippet;
5173 fragTypes += outTypeSnippets->typeDefinitionsFp16Snippet;
5174 fragAnnotations += outTypeSnippets->typeAnnotationsFp16Snippet;
5175 fragIODefinitions = outTypeSnippets->inputVaryingsSnippet + outTypeSnippets->outputDefinitionsFp16Snippet;
5176 fragProcessResult = outTypeSnippets->loadVertexResultSnippet + outTypeSnippets->storeResultsFp16Snippet;
5177 }
5178
5179 needsShaderFloat16 |= outTypeSnippets->loadStoreRequiresShaderFloat16;
5180 }
5181 else // perform test in fragment stage - vertex stage is empty
5182 {
5183 fragFunctions = specOpData.functions;
5184 // check if input type is different from tested type
5185 if (testOperation.isInputTypeRestricted)
5186 {
5187 fragAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
5188 outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
5189 fragCapabilities = behaviorCapability +
5190 (inFp16WithoutStorage ? inTypeSnippets->capabilitiesFp16Without16BitStorage :
5191 inTypeSnippets->capabilities) +
5192 (outFp16WithoutStorage ? outTypeSnippets->capabilitiesFp16Without16BitStorage :
5193 outTypeSnippets->capabilities);
5194 fragExtensions = (inFp16WithoutStorage ? inTypeSnippets->extensionsFp16Without16BitStorage :
5195 inTypeSnippets->extensions) +
5196 (outFp16WithoutStorage ? outTypeSnippets->extensionsFp16Without16BitStorage :
5197 outTypeSnippets->extensions);
5198 fragTypes = inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet;
5199 fragConstants = inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
5200 ;
5201 requiresUniformAndStorage16BitBufferAccess |=
5202 ((inVariableType == FP16) && (testCase.fp16Without16BitStorage == false));
5203 }
5204 else
5205 {
5206 // input and output types are the same
5207
5208 fragAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
5209 outTypeSnippets->outputAnnotationsSnippet;
5210 fragCapabilities =
5211 behaviorCapability + (outFp16WithoutStorage ? outTypeSnippets->capabilitiesFp16Without16BitStorage :
5212 outTypeSnippets->capabilities);
5213 fragExtensions = (outFp16WithoutStorage ? outTypeSnippets->extensionsFp16Without16BitStorage :
5214 outTypeSnippets->extensions);
5215 fragTypes = outTypeSnippets->typeDefinitionsSnippet;
5216 fragConstants = outTypeSnippets->constantsDefinitionsSnippet;
5217 }
5218
5219 requiresUniformAndStorage16BitBufferAccess |=
5220 ((outVariableType == FP16) && (testCase.fp16Without16BitStorage == false));
5221
5222 // varying is not used but it needs to be specified so lets use type_i32 for it
5223 string unusedVertVarying = "%BP_vertex_result = OpVariable %type_i32_optr Output\n";
5224 string unusedFragVarying = "%BP_vertex_result = OpVariable %type_i32_iptr Input\n";
5225
5226 vertCapabilities = "";
5227 vertExtensions = "";
5228 vertAnnotations = "OpDecorate %type_f32_arr_1 ArrayStride 4\n";
5229 vertTypes = f32TypeMinimalRequired;
5230 vertConstants = "";
5231
5232 if ((outVariableType != FP32) && (inVariableType != FP32))
5233 fragTypes += f32TypeMinimalRequired;
5234
5235 fragAnnotations += specOpData.annotations;
5236 fragTypes += specOpData.types;
5237 fragConstants += specOpData.constants;
5238
5239 vertExecutionMode = "";
5240 fragExecutionMode = behaviorExecutionMode;
5241 vertIODefinitions = unusedVertVarying;
5242 fragIODefinitions = unusedFragVarying;
5243
5244 vertArguments = "";
5245 fragArguments = specOpData.arguments;
5246 vertVariables = "";
5247 fragVariables = specOpData.variables;
5248 vertCommands = "";
5249 fragCommands = specOpData.commands;
5250 vertProcessResult = "";
5251 fragProcessResult = outTypeSnippets->storeResultsSnippet;
5252
5253 if (inFp16WithoutStorage)
5254 {
5255 fragAnnotations += inTypeSnippets->typeAnnotationsFp16Snippet;
5256 if (testOperation.isInputTypeRestricted)
5257 {
5258 fragTypes += inTypeSnippets->typeDefinitionsFp16Snippet;
5259 }
5260 fragIODefinitions += inTypeSnippets->inputDefinitionsFp16Snippet;
5261 }
5262 else
5263 {
5264 fragIODefinitions += inTypeSnippets->inputDefinitionsSnippet;
5265 }
5266
5267 if (outFp16WithoutStorage)
5268 {
5269 if (testOperation.isInputTypeRestricted)
5270 {
5271 fragAnnotations += outTypeSnippets->typeAnnotationsFp16Snippet;
5272 }
5273 fragTypes += outTypeSnippets->typeDefinitionsFp16Snippet;
5274 fragIODefinitions += outTypeSnippets->outputDefinitionsFp16Snippet;
5275 fragProcessResult = outTypeSnippets->storeResultsFp16Snippet;
5276 }
5277 else
5278 {
5279 fragIODefinitions += outTypeSnippets->outputDefinitionsSnippet;
5280 }
5281
5282 if (!testCaseInfo.argumentsFromInput)
5283 {
5284 switch (testCaseInfo.testCase.operationId)
5285 {
5286 case OID_CONV_FROM_FP32:
5287 case OID_CONV_FROM_FP64:
5288 needsSpecialConstants = true;
5289 break;
5290 default:
5291 break;
5292 }
5293 }
5294 }
5295
5296 // Another reason we need shaderFloat16 is the executable instructions uses fp16
5297 // in a way not supported by the 16bit storage extension.
5298 needsShaderFloat16 |= float16FeatureRequired && testOperation.floatUsage == FLOAT_ARITHMETIC;
5299
5300 // Constants are only needed sometimes. Drop them in the fp16 case if the code doesn't need
5301 // them, and if we don't otherwise need shaderFloat16.
5302 bool needsFP16Constants = needsShaderFloat16 || needsSpecialConstants || outFp16WithoutStorage;
5303
5304 if (!needsFP16Constants && float16FeatureRequired)
5305 {
5306 // Check various code fragments
5307 const FloatStatementUsageFlags commandsFloatConstMask =
5308 B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_CONST_FP16;
5309 const bool commandsUsesFloatConstant =
5310 (testCaseInfo.operation.statementUsageFlags & commandsFloatConstMask) != 0;
5311 const FloatStatementUsageFlags argumentsFloatConstMask =
5312 B_STATEMENT_USAGE_ARGS_CONST_FLOAT | B_STATEMENT_USAGE_ARGS_CONST_FP16;
5313 const bool argumentsUsesFloatConstant = (specOpData.argumentsUsesFloatConstant & argumentsFloatConstMask) != 0;
5314 bool hasFP16ConstsInCommandsOrArguments = commandsUsesFloatConstant || argumentsUsesFloatConstant;
5315
5316 needsFP16Constants |= hasFP16ConstsInCommandsOrArguments;
5317
5318 if (!needsFP16Constants)
5319 {
5320 vertConstants = "";
5321 fragConstants = "";
5322 }
5323 }
5324 needsShaderFloat16 |= needsFP16Constants;
5325
5326 if (needsShaderFloat16)
5327 {
5328 vertCapabilities += "OpCapability Float16\n";
5329 fragCapabilities += "OpCapability Float16\n";
5330 }
5331
5332 map<string, string> specializations;
5333 specializations["vert_capabilities"] = vertCapabilities;
5334 specializations["vert_extensions"] = vertExtensions;
5335 specializations["vert_execution_mode"] = vertExecutionMode;
5336 specializations["vert_annotations"] = vertAnnotations;
5337 specializations["vert_types"] = vertTypes;
5338 specializations["vert_constants"] = vertConstants;
5339 specializations["vert_io_definitions"] = vertIODefinitions;
5340 specializations["vert_arguments"] = vertArguments;
5341 specializations["vert_variables"] = vertVariables;
5342 specializations["vert_functions"] = vertFunctions;
5343 specializations["vert_commands"] = vertCommands;
5344 specializations["vert_process_result"] = vertProcessResult;
5345 specializations["frag_capabilities"] = fragCapabilities;
5346 specializations["frag_extensions"] = fragExtensions;
5347 specializations["frag_execution_mode"] = fragExecutionMode;
5348 specializations["frag_annotations"] = fragAnnotations;
5349 specializations["frag_types"] = fragTypes;
5350 specializations["frag_constants"] = fragConstants;
5351 specializations["frag_functions"] = fragFunctions;
5352 specializations["frag_io_definitions"] = fragIODefinitions;
5353 specializations["frag_arguments"] = fragArguments;
5354 specializations["frag_variables"] = fragVariables;
5355 specializations["frag_commands"] = fragCommands;
5356 specializations["frag_process_result"] = fragProcessResult;
5357
5358 // colors are not used by the test - input is passed via uniform buffer
5359 RGBA defaultColors[4] = {RGBA::white(), RGBA::red(), RGBA::green(), RGBA::blue()};
5360
5361 // construct input and output buffers of proper types
5362 TypeValuesSP inTypeValues = m_typeData.at(inVariableType).values;
5363 TypeValuesSP outTypeValues = m_typeData.at(outVariableType).values;
5364 BufferSp inBufferSp = inTypeValues->constructInputBuffer(testCase.input);
5365 BufferSp outBufferSp = outTypeValues->constructOutputBuffer(testCase.expectedOutput);
5366
5367 vkt::SpirVAssembly::GraphicsResources resources;
5368 resources.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5369 resources.outputs.push_back(Resource(outBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5370 resources.verifyIO = checkFloatsLUT[outVariableType];
5371
5372 StageToSpecConstantMap noSpecConstants;
5373 PushConstants noPushConstants;
5374 GraphicsInterfaces noInterfaces;
5375
5376 VulkanFeatures vulkanFeatures;
5377 setupFloatControlsProperties(
5378 inVariableTypeForCaps, // usualy same as inFloatType - different only for UnpackHalf2x16
5379 outVariableType, testCase.behaviorFlags, vulkanFeatures.floatControlsProperties);
5380 vulkanFeatures.coreFeatures.fragmentStoresAndAtomics = true;
5381 vulkanFeatures.coreFeatures.shaderFloat64 = float64FeatureRequired;
5382 vulkanFeatures.coreFeatures.shaderInt64 = int64FeatureRequired;
5383 vulkanFeatures.extFloat16Int8.shaderFloat16 = needsShaderFloat16;
5384 vulkanFeatures.ext16BitStorage.uniformAndStorageBuffer16BitAccess =
5385 float16FeatureRequired && requiresUniformAndStorage16BitBufferAccess;
5386
5387 vector<string> extensions;
5388 extensions.push_back("VK_KHR_shader_float_controls");
5389
5390 InstanceContext ctx(defaultColors, defaultColors, specializations, noSpecConstants, noPushConstants, resources,
5391 noInterfaces, extensions, vulkanFeatures, testedStage);
5392
5393 ctx.moduleMap["vert"].push_back(std::make_pair("main", VK_SHADER_STAGE_VERTEX_BIT));
5394 ctx.moduleMap["frag"].push_back(std::make_pair("main", VK_SHADER_STAGE_FRAGMENT_BIT));
5395
5396 ctx.requiredStages = static_cast<VkShaderStageFlagBits>(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
5397 ctx.failResult = QP_TEST_RESULT_FAIL;
5398 ctx.failMessageTemplate = "Output doesn't match with expected";
5399
5400 return ctx;
5401 }
5402
5403 } // namespace
5404
createFloatControlsTestGroup(TestContext & testCtx,TestGroupBuilderBase * groupBuilder)5405 tcu::TestCaseGroup *createFloatControlsTestGroup(TestContext &testCtx, TestGroupBuilderBase *groupBuilder)
5406 {
5407 de::MovePtr<TestCaseGroup> group(new TestCaseGroup(testCtx, "float_controls"));
5408
5409 struct TestGroup
5410 {
5411 VariableType variableType;
5412 const char *groupName;
5413 };
5414 TestGroup testGroups[] = {
5415 {FP16, "fp16"},
5416 {FP32, "fp32"},
5417 {FP64, "fp64"},
5418 };
5419
5420 for (int i = 0; i < DE_LENGTH_OF_ARRAY(testGroups); ++i)
5421 {
5422 const TestGroup &testGroup = testGroups[i];
5423 TestCaseGroup *typeGroup = new TestCaseGroup(testCtx, testGroup.groupName);
5424 group->addChild(typeGroup);
5425
5426 groupBuilder->createOperationTests(typeGroup, "input_args", testGroup.variableType, true);
5427 groupBuilder->createOperationTests(typeGroup, "generated_args", testGroup.variableType, false);
5428 }
5429
5430 groupBuilder->createSettingsTests(group.get());
5431
5432 return group.release();
5433 }
5434
createFloatControlsComputeGroup(TestContext & testCtx)5435 tcu::TestCaseGroup *createFloatControlsComputeGroup(TestContext &testCtx)
5436 {
5437 ComputeTestGroupBuilder computeTestGroupBuilder;
5438 computeTestGroupBuilder.init();
5439
5440 return createFloatControlsTestGroup(testCtx, &computeTestGroupBuilder);
5441 }
5442
createFloatControlsGraphicsGroup(TestContext & testCtx)5443 tcu::TestCaseGroup *createFloatControlsGraphicsGroup(TestContext &testCtx)
5444 {
5445 GraphicsTestGroupBuilder graphicsTestGroupBuilder;
5446 graphicsTestGroupBuilder.init();
5447
5448 return createFloatControlsTestGroup(testCtx, &graphicsTestGroupBuilder);
5449 }
5450
5451 } // namespace SpirVAssembly
5452 } // namespace vkt
5453