1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2018 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief VK_KHR_shader_float_controls tests.
22  *//*--------------------------------------------------------------------*/
23 
24 #define _USE_MATH_DEFINES
25 
26 #include "vktSpvAsmFloatControlsTests.hpp"
27 #include "vktSpvAsmComputeShaderCase.hpp"
28 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
29 #include "vktTestGroupUtil.hpp"
30 #include "tcuFloat.hpp"
31 #include "tcuFloatFormat.hpp"
32 #include "tcuStringTemplate.hpp"
33 #include "deUniquePtr.hpp"
34 #include "deFloat16.h"
35 #include "vkQueryUtil.hpp"
36 #include "vkRefUtil.hpp"
37 #include <cstring>
38 #include <vector>
39 #include <limits>
40 #include <cstdint>
41 #include <fenv.h>
42 #include <cstdint>
43 #include <cmath>
44 
45 namespace vkt
46 {
47 namespace SpirVAssembly
48 {
49 
50 namespace
51 {
52 
53 using namespace std;
54 using namespace tcu;
55 
56 enum VariableType
57 {
58     FP16 = 0,
59     FP32,
60     FP64,
61     UINT32,
62     UINT64,
63     INT32,
64     INT64
65 };
66 
67 enum class BufferDataType
68 {
69     DATA_UNKNOWN = 0,
70     DATA_FP16    = 1,
71     DATA_FP32    = 2,
72     DATA_FP64    = 3,
73 };
74 
75 enum FloatUsage
76 {
77     // If the float type is 16bit, then the use of the type is supported by
78     // VK_KHR_16bit_storage.
79     FLOAT_STORAGE_ONLY = 0,
80     // Use of the float type goes beyond VK_KHR_16bit_storage.
81     FLOAT_ARITHMETIC
82 };
83 
84 enum FloatStatementUsageBits
85 {
86     B_STATEMENT_USAGE_ARGS_CONST_FLOAT     = (1 << 0),
87     B_STATEMENT_USAGE_ARGS_CONST_FP16      = (1 << 1),
88     B_STATEMENT_USAGE_ARGS_CONST_FP32      = (1 << 2),
89     B_STATEMENT_USAGE_ARGS_CONST_FP64      = (1 << 3),
90     B_STATEMENT_USAGE_TYPES_TYPE_FLOAT     = (1 << 4),
91     B_STATEMENT_USAGE_TYPES_TYPE_FP16      = (1 << 5),
92     B_STATEMENT_USAGE_TYPES_TYPE_FP32      = (1 << 6),
93     B_STATEMENT_USAGE_TYPES_TYPE_FP64      = (1 << 7),
94     B_STATEMENT_USAGE_CONSTS_TYPE_FLOAT    = (1 << 8),
95     B_STATEMENT_USAGE_CONSTS_TYPE_FP16     = (1 << 9),
96     B_STATEMENT_USAGE_CONSTS_TYPE_FP32     = (1 << 10),
97     B_STATEMENT_USAGE_CONSTS_TYPE_FP64     = (1 << 11),
98     B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT = (1 << 12),
99     B_STATEMENT_USAGE_COMMANDS_CONST_FP16  = (1 << 13),
100     B_STATEMENT_USAGE_COMMANDS_CONST_FP32  = (1 << 14),
101     B_STATEMENT_USAGE_COMMANDS_CONST_FP64  = (1 << 15),
102     B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT  = (1 << 16),
103     B_STATEMENT_USAGE_COMMANDS_TYPE_FP16   = (1 << 17),
104     B_STATEMENT_USAGE_COMMANDS_TYPE_FP32   = (1 << 18),
105     B_STATEMENT_USAGE_COMMANDS_TYPE_FP64   = (1 << 19),
106 };
107 
108 typedef uint32_t FloatStatementUsageFlags;
109 
110 // Enum containing float behaviors that its possible to test.
111 enum BehaviorFlagBits
112 {
113     B_DENORM_PRESERVE = 0x00000001, // DenormPreserve
114     B_DENORM_FLUSH    = 0x00000002, // DenormFlushToZero
115     B_ZIN_PRESERVE    = 0x00000004, // SignedZeroInfNanPreserve
116     B_RTE_ROUNDING    = 0x00000008, // RoundingModeRTE
117     B_RTZ_ROUNDING    = 0x00000010  // RoundingModeRTZ
118 };
119 
120 typedef uint32_t BehaviorFlags;
121 
122 // Codes for all float values used in tests as arguments and operation results
123 // This approach allows to replace values with different types reducing complexity of the tests implementation
124 enum ValueId
125 {
126     // common values used as both arguments and results
127     V_UNUSED = 0, // used to mark arguments that are not used in operation
128     V_MINUS_INF,  //    or results of tests cases that should be skipped
129     V_MINUS_ONE,  // -1.0
130     V_MINUS_ZERO, // -0.0
131     V_ZERO,       //  0.0
132     V_HALF,       //  0.5
133     V_ONE,        //  1.0
134     V_INF,
135     V_DENORM,
136     V_NAN,
137 
138     // arguments for rounding mode tests - used only when arguments are passed from input
139     V_ADD_ARG_A,
140     V_ADD_ARG_B,
141     V_SUB_ARG_A,
142     V_SUB_ARG_B,
143     V_MUL_ARG_A,
144     V_MUL_ARG_B,
145     V_DOT_ARG_A,
146     V_DOT_ARG_B,
147 
148     // arguments of conversion operations - used only when arguments are passed from input
149     // Subcases are:
150     //    ...UP: rounds away from zero, e.g. trailing bits are 101..
151     //    ...DOWN: rounds toward zero, e.g. trailing bits are 011..
152     //    ...TIE_UP: rounds up to even, e.g. preserved bit is 1, trailing are 10*
153     //    ...TIE_DOWN: rounds up to even, e.g. preserved bit is 0, trailing are 10*
154     V_CONV_FROM_FP32_TO_FP16_UP_ARG,
155     V_CONV_FROM_FP32_TO_FP16_DOWN_ARG,
156     V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG,
157     V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG,
158     V_CONV_FROM_FP64_TO_FP16_UP_ARG,
159     V_CONV_FROM_FP64_TO_FP16_DOWN_ARG,
160     V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG,
161     V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG,
162     V_CONV_FROM_FP64_TO_FP32_UP_ARG,
163     V_CONV_FROM_FP64_TO_FP32_DOWN_ARG,
164     V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG,
165     V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG,
166 
167     // arguments of integer conversion rounding, not all values can be represented by all integer sizes
168     // and only those that can will be used for testing
169     // Subcases are:
170     //    ...UP: rounds away from zero, e.g. integer's value is closer to higher float value even
171     //    ...DOWN: rounds towards zero, e.g. integer's value is closer to lower float value even
172     //    ...TIE: rounds towards zero, e.g. integer's value is equidistant to lower and higher float value
173     // 16 bit values can only use width-conversions -> No rounding testing
174     V_CONV_FROM_UINT_TO_FP32_UP_ARG,
175     V_CONV_FROM_UINT_TO_FP32_DOWN_ARG,
176     V_CONV_FROM_UINT_TO_FP32_TIE_ARG,
177     V_CONV_FROM_UINT_TO_FP64_UP_ARG,
178     V_CONV_FROM_UINT_TO_FP64_DOWN_ARG,
179     V_CONV_FROM_UINT_TO_FP64_TIE_ARG,
180 
181     // Same as UINT but will only test with negative values
182     V_CONV_FROM_INT_TO_FP32_UP_ARG,
183     V_CONV_FROM_INT_TO_FP32_DOWN_ARG,
184     V_CONV_FROM_INT_TO_FP32_TIE_ARG,
185     V_CONV_FROM_INT_TO_FP64_UP_ARG,
186     V_CONV_FROM_INT_TO_FP64_DOWN_ARG,
187     V_CONV_FROM_INT_TO_FP64_TIE_ARG,
188 
189     // arguments of rounding operations
190     V_ADD_RTZ_RESULT,
191     V_ADD_RTE_RESULT,
192     V_SUB_RTZ_RESULT,
193     V_SUB_RTE_RESULT,
194     V_MUL_RTZ_RESULT,
195     V_MUL_RTE_RESULT,
196     V_DOT_RTZ_RESULT,
197     V_DOT_RTE_RESULT,
198 
199     // non comon results of some operation - corner cases
200     V_ZERO_OR_DENORM_TIMES_TWO, // fp16 addition of non-flushed denorm with itself (or equivalent dot-product or vector-matrix multiply)
201     V_MINUS_ONE_OR_CLOSE, // value used only for fp16 subtraction result of preserved denorm and one
202     V_PI_DIV_2,
203     V_ZERO_OR_MINUS_ZERO,          // both +0 and -0 are accepted
204     V_ZERO_OR_ONE,                 // both +0 and 1 are accepted
205     V_ZERO_OR_FP16_DENORM_TO_FP32, // both 0 and fp32 representation of fp16 denorm are accepted
206     V_ZERO_OR_FP16_DENORM_TO_FP64,
207     V_ZERO_OR_FP32_DENORM_TO_FP64,
208     V_DENORM_TIMES_TWO,
209     V_DEGREES_DENORM,
210     V_TRIG_ONE, // 1.0 trigonometric operations, including precision margin
211     V_MINUS_INF_OR_LOG_DENORM,
212     V_MINUS_INF_OR_LOG2_DENORM,
213     V_ZERO_OR_SQRT_DENORM,
214     V_INF_OR_INV_SQRT_DENORM,
215 
216     // Results of conversion operations: RTZ
217     V_CONV_FROM_FP32_TO_FP16_UP_RTZ_RESULT,
218     V_CONV_FROM_FP32_TO_FP16_DOWN_RTZ_RESULT,
219     V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTZ_RESULT,
220     V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTZ_RESULT,
221     V_CONV_FROM_FP64_TO_FP16_UP_RTZ_RESULT,
222     V_CONV_FROM_FP64_TO_FP16_DOWN_RTZ_RESULT,
223     V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTZ_RESULT,
224     V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTZ_RESULT,
225     V_CONV_FROM_FP64_TO_FP32_UP_RTZ_RESULT,
226     V_CONV_FROM_FP64_TO_FP32_DOWN_RTZ_RESULT,
227     V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTZ_RESULT,
228     V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTZ_RESULT,
229     // Results of conversion operations: RTE
230     V_CONV_FROM_FP32_TO_FP16_UP_RTE_RESULT,
231     V_CONV_FROM_FP32_TO_FP16_DOWN_RTE_RESULT,
232     V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTE_RESULT,
233     V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTE_RESULT,
234     V_CONV_FROM_FP64_TO_FP16_UP_RTE_RESULT,
235     V_CONV_FROM_FP64_TO_FP16_DOWN_RTE_RESULT,
236     V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTE_RESULT,
237     V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTE_RESULT,
238     V_CONV_FROM_FP64_TO_FP32_UP_RTE_RESULT,
239     V_CONV_FROM_FP64_TO_FP32_DOWN_RTE_RESULT,
240     V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTE_RESULT,
241     V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTE_RESULT,
242 
243     // Results of conversion operations: RTZ
244     // 16 bit values can only use width-conversions -> No rounding testing
245     V_CONV_FROM_UINT32_UP_RTZ_RESULT,
246     V_CONV_FROM_UINT32_DOWN_RTZ_RESULT,
247     V_CONV_FROM_UINT32_TIE_RTZ_RESULT,
248     V_CONV_FROM_UINT64_UP_RTZ_RESULT,
249     V_CONV_FROM_UINT64_DOWN_RTZ_RESULT,
250     V_CONV_FROM_UINT64_TIE_RTZ_RESULT,
251     // Results of conversion operations: RTE
252     // 16 bit values can only use width-conversions -> No rounding testing
253     V_CONV_FROM_UINT32_UP_RTE_RESULT,
254     V_CONV_FROM_UINT32_DOWN_RTE_RESULT,
255     V_CONV_FROM_UINT32_TIE_RTE_RESULT,
256     V_CONV_FROM_UINT64_UP_RTE_RESULT,
257     V_CONV_FROM_UINT64_DOWN_RTE_RESULT,
258     V_CONV_FROM_UINT64_TIE_RTE_RESULT,
259 
260     // Same as UINT but will only test with negative values
261     // Results of conversion operations: RTZ
262     V_CONV_FROM_INT32_UP_RTZ_RESULT,
263     V_CONV_FROM_INT32_DOWN_RTZ_RESULT,
264     V_CONV_FROM_INT32_TIE_RTZ_RESULT,
265     V_CONV_FROM_INT64_UP_RTZ_RESULT,
266     V_CONV_FROM_INT64_DOWN_RTZ_RESULT,
267     V_CONV_FROM_INT64_TIE_RTZ_RESULT,
268     // Results of conversion operations: RTE
269     V_CONV_FROM_INT32_UP_RTE_RESULT,
270     V_CONV_FROM_INT32_DOWN_RTE_RESULT,
271     V_CONV_FROM_INT32_TIE_RTE_RESULT,
272     V_CONV_FROM_INT64_UP_RTE_RESULT,
273     V_CONV_FROM_INT64_DOWN_RTE_RESULT,
274     V_CONV_FROM_INT64_TIE_RTE_RESULT,
275 
276     V_CONV_DENORM_SMALLER, // used e.g. when converting fp16 denorm to fp32
277     V_CONV_DENORM_BIGGER,
278 };
279 
280 // Enum containing all tested operations. Operations are defined in generic way so that
281 // they can be used to generate tests operating on arguments with different values of
282 // specified float type.
283 enum OperationId
284 {
285     // spir-v unary operations
286     OID_NEGATE = 0,
287     OID_COMPOSITE,
288     OID_COMPOSITE_INS,
289     OID_COPY,
290     OID_D_EXTRACT,
291     OID_D_INSERT,
292     OID_SHUFFLE,
293     OID_TRANSPOSE,
294     OID_CONV_FROM_UINT_TO_FP32,
295     OID_CONV_FROM_UINT_TO_FP64,
296     OID_CONV_FROM_INT_TO_FP32,
297     OID_CONV_FROM_INT_TO_FP64,
298     // No SCONST_CONV_FROM_UINT since it requires Kernel Capability and Vulkan does not expose it
299     OID_CONV_FROM_FP16,
300     OID_CONV_FROM_FP32,
301     OID_CONV_FROM_FP64,
302     OID_SCONST_CONV_FROM_FP32_TO_FP16_UP,       // Round::UP case
303     OID_SCONST_CONV_FROM_FP32_TO_FP16_DOWN,     // Round::DOWN case
304     OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_UP,   // Round::TIE_DOWN case
305     OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_DOWN, // Round::TIE_DOWN case
306     OID_SCONST_CONV_FROM_FP64_TO_FP32_UP,
307     OID_SCONST_CONV_FROM_FP64_TO_FP32_DOWN,
308     OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_UP,
309     OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_DOWN,
310     OID_SCONST_CONV_FROM_FP64_TO_FP16_UP,
311     OID_SCONST_CONV_FROM_FP64_TO_FP16_DOWN,
312     OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_UP,
313     OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_DOWN,
314     OID_RETURN_VAL,
315 
316     // spir-v binary operations
317     OID_ADD,
318     OID_SUB,
319     OID_MUL,
320     OID_DIV,
321     OID_REM,
322     OID_MOD,
323     OID_PHI,
324     OID_SELECT,
325     OID_DOT,
326     OID_VEC_MUL_S,
327     OID_VEC_MUL_M,
328     OID_MAT_MUL_S,
329     OID_MAT_MUL_V,
330     OID_MAT_MUL_M,
331     OID_OUT_PROD,
332     OID_ORD_EQ,
333     OID_UORD_EQ,
334     OID_ORD_NEQ,
335     OID_UORD_NEQ,
336     OID_ORD_LS,
337     OID_UORD_LS,
338     OID_ORD_GT,
339     OID_UORD_GT,
340     OID_ORD_LE,
341     OID_UORD_LE,
342     OID_ORD_GE,
343     OID_UORD_GE,
344 
345     // glsl unary operations
346     OID_ROUND,
347     OID_ROUND_EV,
348     OID_TRUNC,
349     OID_ABS,
350     OID_SIGN,
351     OID_FLOOR,
352     OID_CEIL,
353     OID_FRACT,
354     OID_RADIANS,
355     OID_DEGREES,
356     OID_SIN,
357     OID_COS,
358     OID_TAN,
359     OID_ASIN,
360     OID_ACOS,
361     OID_ATAN,
362     OID_SINH,
363     OID_COSH,
364     OID_TANH,
365     OID_ASINH,
366     OID_ACOSH,
367     OID_ATANH,
368     OID_EXP,
369     OID_LOG,
370     OID_EXP2,
371     OID_LOG2,
372     OID_SQRT,
373     OID_INV_SQRT,
374     OID_MODF,
375     OID_MODF_ST,
376     OID_FREXP,
377     OID_FREXP_ST,
378     OID_LENGTH,
379     OID_NORMALIZE,
380     OID_REFLECT,
381     OID_REFRACT,
382     OID_MAT_DET,
383     OID_MAT_INV,
384     OID_PH_DENORM, // PackHalf2x16
385     OID_UPH_DENORM,
386     OID_PD_DENORM, // PackDouble2x32
387     OID_UPD_DENORM_FLUSH,
388     OID_UPD_DENORM_PRESERVE,
389 
390     // glsl binary operations
391     OID_ATAN2,
392     OID_POW,
393     OID_MIX,
394     OID_FMA,
395     OID_MIN,
396     OID_MAX,
397     OID_CLAMP,
398     OID_STEP,
399     OID_SSTEP,
400     OID_DIST,
401     OID_CROSS,
402     OID_FACE_FWD,
403     OID_NMIN,
404     OID_NMAX,
405     OID_NCLAMP,
406 
407     OID_ORTE_ROUND,
408     OID_ORTZ_ROUND
409 };
410 
411 // Structures storing data required to test DenormPreserve and DenormFlushToZero modes.
412 // Operations are separated into binary and unary lists because binary operations can be tested with
413 // two attributes and thus denorms can be tested in combination with value, denorm, inf and nan.
414 // Unary operations are only tested with denorms.
415 struct BinaryCase
416 {
417     OperationId operationId;
418     ValueId opVarResult;
419     ValueId opDenormResult;
420     ValueId opInfResult;
421     ValueId opNanResult;
422 };
423 struct UnaryCase
424 {
425     OperationId operationId;
426     ValueId result;
427 };
428 
429 // Function replacing all occurrences of substring with string passed in last parameter.
replace(string str,const string & from,const string & to)430 string replace(string str, const string &from, const string &to)
431 {
432     // to keep spir-v code clean and easier to read parts of it are processed
433     // with this method instead of StringTemplate; main usage of this method is the
434     // replacement of "float_" with "f16_", "f32_" or "f64_" depending on test case
435 
436     size_t start_pos = 0;
437     while ((start_pos = str.find(from, start_pos)) != std::string::npos)
438     {
439         str.replace(start_pos, from.length(), to);
440         start_pos += to.length();
441     }
442     return str;
443 }
444 
445 // Structure used to perform bits conversion int type <-> float type.
446 template <typename FLOAT_TYPE, typename UINT_TYPE>
447 struct RawConvert
448 {
449     union Value
450     {
451         FLOAT_TYPE fp;
452         UINT_TYPE ui;
453     };
454 };
455 
456 // Traits used to get int type that can store equivalent float type.
457 template <typename FLOAT_TYPE>
458 struct GetCoresponding
459 {
460     typedef uint16_t uint_type;
461 };
462 template <>
463 struct GetCoresponding<float>
464 {
465     typedef uint32_t uint_type;
466 };
467 template <>
468 struct GetCoresponding<double>
469 {
470     typedef uint64_t uint_type;
471 };
472 
473 // All values used for arguments and operation results are stored in single map.
474 // Each float type (fp16, fp32, fp64) has its own map that is used during
475 // test setup and during verification. TypeValuesBase is interface to that map.
476 class TypeValuesBase
477 {
478 public:
479     TypeValuesBase();
480     virtual ~TypeValuesBase() = default;
481 
482     virtual BufferSp constructInputBuffer(const ValueId *twoArguments) const                                     = 0;
483     virtual BufferSp constructOutputBuffer(ValueId result) const                                                 = 0;
484     virtual void fillInputData(const ValueId *twoArguments, vector<uint8_t> &bufferData, uint32_t &offset) const = 0;
485 };
486 
TypeValuesBase()487 TypeValuesBase::TypeValuesBase()
488 {
489 }
490 
491 typedef de::SharedPtr<TypeValuesBase> TypeValuesSP;
492 
493 template <typename FLOAT_TYPE>
494 class TypeValues : public TypeValuesBase
495 {
496 public:
497     TypeValues();
498 
499     BufferSp constructInputBuffer(const ValueId *twoArguments) const override;
500     BufferSp constructOutputBuffer(ValueId result) const override;
501     void fillInputData(const ValueId *twoArguments, vector<uint8_t> &bufferData, uint32_t &offset) const override;
502 
503     FLOAT_TYPE getValue(ValueId id) const;
504 
505     template <typename UINT_TYPE>
506     FLOAT_TYPE exactByteEquivalent(UINT_TYPE byteValue) const;
507 
508 private:
509     typedef map<ValueId, FLOAT_TYPE> ValueMap;
510     ValueMap m_valueIdToVariableType;
511 };
512 
513 template <typename FLOAT_TYPE>
constructInputBuffer(const ValueId * twoArguments) const514 BufferSp TypeValues<FLOAT_TYPE>::constructInputBuffer(const ValueId *twoArguments) const
515 {
516     std::vector<FLOAT_TYPE> inputData(2);
517     inputData[0] = m_valueIdToVariableType.at(twoArguments[0]);
518     inputData[1] = m_valueIdToVariableType.at(twoArguments[1]);
519     return BufferSp(new Buffer<FLOAT_TYPE>(inputData));
520 }
521 
522 template <typename FLOAT_TYPE>
constructOutputBuffer(ValueId result) const523 BufferSp TypeValues<FLOAT_TYPE>::constructOutputBuffer(ValueId result) const
524 {
525     // note: we are not doing maping here, ValueId is directly saved in
526     // float type in order to be able to retireve it during verification
527 
528     typedef typename GetCoresponding<FLOAT_TYPE>::uint_type uint_t;
529     uint_t value = static_cast<uint_t>(result);
530 
531     // For FP16 we increase the buffer size to hold an unsigned integer, as
532     // we can be in the no 16bit_storage case.
533     const uint_t outputSize = sizeof(FLOAT_TYPE) == 2u ? 2u : 1u;
534     std::vector<FLOAT_TYPE> outputData(outputSize, exactByteEquivalent<uint_t>(value));
535     return BufferSp(new Buffer<FLOAT_TYPE>(outputData));
536 }
537 
538 template <typename FLOAT_TYPE>
fillInputData(const ValueId * twoArguments,vector<uint8_t> & bufferData,uint32_t & offset) const539 void TypeValues<FLOAT_TYPE>::fillInputData(const ValueId *twoArguments, vector<uint8_t> &bufferData,
540                                            uint32_t &offset) const
541 {
542     uint32_t typeSize = sizeof(FLOAT_TYPE);
543 
544     FLOAT_TYPE argA = getValue(twoArguments[0]);
545     deMemcpy(&bufferData[offset], &argA, typeSize);
546     offset += typeSize;
547 
548     FLOAT_TYPE argB = getValue(twoArguments[1]);
549     deMemcpy(&bufferData[offset], &argB, typeSize);
550     offset += typeSize;
551 }
552 
553 template <typename FLOAT_TYPE>
getValue(ValueId id) const554 FLOAT_TYPE TypeValues<FLOAT_TYPE>::getValue(ValueId id) const
555 {
556     return m_valueIdToVariableType.at(id);
557 }
558 
559 template <typename FLOAT_TYPE>
560 template <typename UINT_TYPE>
exactByteEquivalent(UINT_TYPE byteValue) const561 FLOAT_TYPE TypeValues<FLOAT_TYPE>::exactByteEquivalent(UINT_TYPE byteValue) const
562 {
563     typename RawConvert<FLOAT_TYPE, UINT_TYPE>::Value value;
564     value.ui = byteValue;
565     return value.fp;
566 }
567 
568 // For floating point conversions, rounding modes only matter when
569 // doing a narrowing conversion, i.e. from more mantissa bits
570 // to fewer.
571 //
572 // There are four rounding cases, depending on the value of the
573 // least significant mantissa bit that is preserved, and the
574 // mantissa bits that are eliminated:
575 //
576 // Least significant  | Eliminated bit     |  Produces which
577 // retained bit       | string             |  Rounding Case
578 // -------------------|--------------------|-----------------
579 //   don't care       | 0y, y is anything  |  DOWN: Round toward zero
580 //   don't care       | 1y, y is non-zero  |  UP: Round away from zero
581 //   0                | 1y, y is zero      |  TIE_DOWN: Round toward zero
582 //   1                | 1y, y is zero      |  TIE_UP: Round away from zero
583 enum class Round
584 {
585     DOWN,
586     UP,
587     TIE_DOWN,
588     TIE_UP
589 };
590 
591 template <typename FROM_FLOAT_TYPE, typename TO_FLOAT_TYPE>
592 struct conversionDetail
593 {
594     typedef typename FROM_FLOAT_TYPE::StorageType FromInt;
595     typedef typename TO_FLOAT_TYPE::StorageType ToInt;
596 
597     // How many bits will be removed from the mantissa by the conversion?
598     static const int excessWidth = FROM_FLOAT_TYPE::MANTISSA_BITS - TO_FLOAT_TYPE::MANTISSA_BITS;
599 
600     // 'tie' contains the bits for the "1y, y is 0" case in RoundCase table.
601     // All the positions in tie32 will be thrown away, but help determine
602     // the rounding direction.
603     static const FromInt tie         = ((FromInt)1) << (excessWidth - 1);
604     static const FromInt down        = tie - 1;          // bits to trigger down case
605     static const FromInt up          = tie + 1;          // bits to trigger up case
606     static const FromInt tieDown     = tie;              // bits to trigger tie-down case
607     static const FromInt tieUp       = (tie << 1) | tie; // bits to trigger tie-up case
608     static const int exampleSign     = 1;                // Could be -1
609     static const int exampleExponent = TO_FLOAT_TYPE::EXPONENT_BIAS;
610 
611     // Not all platforms will support 16 or 64 bit values. We need to detect those cases
612     // and make the tests pass through since we cannot validate them.
hasExcessBitsvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail613     static bool hasExcessBits(void)
614     {
615         return 0 < excessWidth;
616     }
617 
618     // Returns arbitrary but nontrivial bits for the mantissa of the conversion
619     // result. This has TO_FLOAT_TYPE::MANTISSA_BITS. The bottom bit must be
620     // zero so it can be filled in later.
exampleMSBBitsvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail621     static ToInt exampleMSBBits(void)
622     {
623         switch (int(TO_FLOAT_TYPE::MANTISSA_BITS))
624         {
625         case 10: // Float16
626             // The Mantissa has 10 explicitly represented bits, and 1 bit
627             // that is normally hidden, but required here.
628             // The upper 9 are arbitrary, and the bottom bit is 0, to be filled
629             // in later.
630             return static_cast<ToInt>((1 << 10) | 0x39a);
631         case 23: // Float32
632             // The Mantissa has 23 explicitly represented bits, and 1 bit
633             // that is normally hidden, but required here.
634             // The upper 22 are arbitrary, and the bottom bit is 0, to be filled
635             // in later.
636             return static_cast<ToInt>((1 << 23) | 0x3a5a5a);
637         }
638         DE_ASSERT(false && "Expected Float16 or Float32");
639         return 0;
640     }
641 
inputMantissavkt::SpirVAssembly::__anon1f0d25030111::conversionDetail642     static FromInt inputMantissa(Round r)
643     {
644         const FromInt base = static_cast<FromInt>(exampleMSBBits()) << excessWidth;
645         switch (r)
646         {
647         case Round::DOWN:
648             return base | down;
649         case Round::UP:
650             return base | up;
651         case Round::TIE_DOWN:
652             return base | tieDown;
653         case Round::TIE_UP:
654             return base | tieUp;
655         }
656         DE_ASSERT(false);
657         return 0; // Unreachable
658     }
659 
outputMantissavkt::SpirVAssembly::__anon1f0d25030111::conversionDetail660     static ToInt outputMantissa(FromInt mantissa, Round r)
661     {
662         const ToInt base = static_cast<ToInt>(mantissa >> excessWidth);
663         switch (r)
664         {
665         case Round::DOWN:
666         case Round::TIE_DOWN:
667             return base;
668         case Round::UP:
669         case Round::TIE_UP:
670             return static_cast<ToInt>(base + 1);
671         }
672         DE_ASSERT(false);
673         return 0; // Unreachable
674     }
675 
676     // Returns the value for the sample input, for an intended rounding outcome.
fromvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail677     static FROM_FLOAT_TYPE from(Round r)
678     {
679         return FROM_FLOAT_TYPE::construct(exampleSign, exampleExponent, inputMantissa(r));
680     }
681 
682     // Returns the value of from(r) in string form as a sequence of 32 bit words.
fromStrvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail683     static std::string fromStr(Round r)
684     {
685         const FromInt value = from(r).bits();
686         switch (sizeof(FromInt))
687         {
688         case 8:
689             // Return low word first, high word second
690             return to_string(value & 0xFFFFFFFFu) + " " + to_string(value >> 16 >> 16);
691         case 4:
692             return to_string(value);
693         }
694         DE_ASSERT(false);
695         return "";
696     }
697 
698     // Return the float value expected for a RTZ conversion.
resultRTZvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail699     static TO_FLOAT_TYPE resultRTZ(Round r)
700     {
701         // Reconstruct the original input, then round toward zero.
702         const ToInt mantissa = outputMantissa(inputMantissa(r), Round::DOWN);
703         return TO_FLOAT_TYPE::construct(exampleSign, exampleExponent, mantissa);
704     }
705     // Return the bits for the float value expected for a RTZ conversion.
resultRTZBitsvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail706     static ToInt resultRTZBits(Round r)
707     {
708         return resultRTZ(r).bits();
709     }
710     // Return the float value expected for a RTE conversion.
resultRTEvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail711     static TO_FLOAT_TYPE resultRTE(Round r)
712     {
713         // Reconstruct the original input, then round as specified.
714         const ToInt mantissa = outputMantissa(inputMantissa(r), r);
715         return TO_FLOAT_TYPE::construct(exampleSign, exampleExponent, mantissa);
716     }
717     // Return the bits for the float value expected for a RTE conversion.
resultRTEBitsvkt::SpirVAssembly::__anon1f0d25030111::conversionDetail718     static ToInt resultRTEBits(Round r)
719     {
720         return resultRTE(r).bits();
721     }
722 };
723 
724 template <>
TypeValues()725 TypeValues<deFloat16>::TypeValues() : TypeValuesBase()
726 {
727     // NOTE: when updating entries in m_valueIdToVariableType make sure to
728     // update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
729     ValueMap &vm     = m_valueIdToVariableType;
730     vm[V_UNUSED]     = deFloat32To16(0.0f);
731     vm[V_MINUS_INF]  = 0xfc00;
732     vm[V_MINUS_ONE]  = deFloat32To16(-1.0f);
733     vm[V_MINUS_ZERO] = 0x8000;
734     vm[V_ZERO]       = 0x0000;
735     vm[V_HALF]       = deFloat32To16(0.5f);
736     vm[V_ONE]        = deFloat32To16(1.0f);
737     vm[V_INF]        = 0x7c00;
738     vm[V_DENORM]     = 0x03f0; // this value should be the same as the result of denormBase - epsilon
739     vm[V_NAN]        = 0x7cf0;
740 
741     vm[V_PI_DIV_2]         = deFloat32To16((float)M_PI_2);
742     vm[V_DENORM_TIMES_TWO] = 0x07e0;
743     vm[V_DEGREES_DENORM]   = 0x1b0c;
744 
745     vm[V_ADD_ARG_A] = 0x3c03;
746     vm[V_ADD_ARG_B] = vm[V_ONE];
747     vm[V_SUB_ARG_A] = vm[V_ADD_ARG_A];
748     vm[V_SUB_ARG_B] = 0x4203;
749     vm[V_MUL_ARG_A] = vm[V_ADD_ARG_A];
750     vm[V_MUL_ARG_B] = 0x1900;
751     vm[V_DOT_ARG_A] = vm[V_ADD_ARG_A];
752     vm[V_DOT_ARG_B] = vm[V_MUL_ARG_B];
753 
754     // Float16 is not the source type for a narrowing conversion, so these
755     // entries are unused.
756     vm[V_CONV_FROM_FP32_TO_FP16_UP_ARG]       = vm[V_UNUSED];
757     vm[V_CONV_FROM_FP32_TO_FP16_DOWN_ARG]     = vm[V_UNUSED];
758     vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG]   = vm[V_UNUSED];
759     vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG] = vm[V_UNUSED];
760     vm[V_CONV_FROM_FP64_TO_FP16_UP_ARG]       = vm[V_UNUSED];
761     vm[V_CONV_FROM_FP64_TO_FP16_DOWN_ARG]     = vm[V_UNUSED];
762     vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG]   = vm[V_UNUSED];
763     vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG] = vm[V_UNUSED];
764     vm[V_CONV_FROM_FP64_TO_FP32_UP_ARG]       = vm[V_UNUSED];
765     vm[V_CONV_FROM_FP64_TO_FP32_DOWN_ARG]     = vm[V_UNUSED];
766     vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG]   = vm[V_UNUSED];
767     vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG] = vm[V_UNUSED];
768 
769     // 16 values can only be used for width-conversions
770     vm[V_CONV_FROM_UINT_TO_FP32_UP_ARG]   = vm[V_UNUSED];
771     vm[V_CONV_FROM_UINT_TO_FP32_DOWN_ARG] = vm[V_UNUSED];
772     vm[V_CONV_FROM_UINT_TO_FP32_TIE_ARG]  = vm[V_UNUSED];
773     vm[V_CONV_FROM_UINT_TO_FP64_UP_ARG]   = vm[V_UNUSED];
774     vm[V_CONV_FROM_UINT_TO_FP64_DOWN_ARG] = vm[V_UNUSED];
775     vm[V_CONV_FROM_UINT_TO_FP64_TIE_ARG]  = vm[V_UNUSED];
776 
777     vm[V_CONV_FROM_INT_TO_FP32_UP_ARG]   = vm[V_UNUSED];
778     vm[V_CONV_FROM_INT_TO_FP32_DOWN_ARG] = vm[V_UNUSED];
779     vm[V_CONV_FROM_INT_TO_FP32_TIE_ARG]  = vm[V_UNUSED];
780     vm[V_CONV_FROM_INT_TO_FP64_UP_ARG]   = vm[V_UNUSED];
781     vm[V_CONV_FROM_INT_TO_FP64_DOWN_ARG] = vm[V_UNUSED];
782     vm[V_CONV_FROM_INT_TO_FP64_TIE_ARG]  = vm[V_UNUSED];
783 
784     vm[V_ADD_RTZ_RESULT] = 0x4001; // deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rtz)
785     vm[V_SUB_RTZ_RESULT] = 0xc001; // deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rtz)
786     vm[V_MUL_RTZ_RESULT] = 0x1903; // deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rtz)
787     vm[V_DOT_RTZ_RESULT] = 0x1d03;
788 
789     vm[V_ADD_RTE_RESULT] = 0x4002; // deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rte)
790     vm[V_SUB_RTE_RESULT] = 0xc002; // deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rte)
791     vm[V_MUL_RTE_RESULT] = 0x1904; // deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rte)
792     vm[V_DOT_RTE_RESULT] = 0x1d04;
793 
794     typedef conversionDetail<Float32, Float16> from32;
795     typedef conversionDetail<Float64, Float16> from64;
796     vm[V_CONV_FROM_FP32_TO_FP16_UP_RTZ_RESULT] =
797         from32::hasExcessBits() ? from32::resultRTZBits(Round::UP) : vm[V_UNUSED];
798     vm[V_CONV_FROM_FP32_TO_FP16_DOWN_RTZ_RESULT] =
799         from32::hasExcessBits() ? from32::resultRTZBits(Round::DOWN) : vm[V_UNUSED];
800     vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTZ_RESULT] =
801         from32::hasExcessBits() ? from32::resultRTZBits(Round::TIE_UP) : vm[V_UNUSED];
802     vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTZ_RESULT] =
803         from32::hasExcessBits() ? from32::resultRTZBits(Round::TIE_DOWN) : vm[V_UNUSED];
804     vm[V_CONV_FROM_FP64_TO_FP16_UP_RTZ_RESULT] =
805         from64::hasExcessBits() ? from64::resultRTZBits(Round::UP) : vm[V_UNUSED];
806     vm[V_CONV_FROM_FP64_TO_FP16_DOWN_RTZ_RESULT] =
807         from64::hasExcessBits() ? from64::resultRTZBits(Round::DOWN) : vm[V_UNUSED];
808     vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTZ_RESULT] =
809         from64::hasExcessBits() ? from64::resultRTZBits(Round::TIE_UP) : vm[V_UNUSED];
810     vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTZ_RESULT] =
811         from64::hasExcessBits() ? from64::resultRTZBits(Round::TIE_DOWN) : vm[V_UNUSED];
812     vm[V_CONV_FROM_FP64_TO_FP32_UP_RTZ_RESULT]       = vm[V_UNUSED];
813     vm[V_CONV_FROM_FP64_TO_FP32_DOWN_RTZ_RESULT]     = vm[V_UNUSED];
814     vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTZ_RESULT]   = vm[V_UNUSED];
815     vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTZ_RESULT] = vm[V_UNUSED];
816 
817     vm[V_CONV_FROM_FP32_TO_FP16_UP_RTE_RESULT] =
818         from32::hasExcessBits() ? from32::resultRTEBits(Round::UP) : vm[V_UNUSED];
819     vm[V_CONV_FROM_FP32_TO_FP16_DOWN_RTE_RESULT] =
820         from32::hasExcessBits() ? from32::resultRTEBits(Round::DOWN) : vm[V_UNUSED];
821     vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTE_RESULT] =
822         from32::hasExcessBits() ? from32::resultRTEBits(Round::TIE_UP) : vm[V_UNUSED];
823     vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTE_RESULT] =
824         from32::hasExcessBits() ? from32::resultRTEBits(Round::TIE_DOWN) : vm[V_UNUSED];
825     vm[V_CONV_FROM_FP64_TO_FP16_UP_RTE_RESULT] =
826         from64::hasExcessBits() ? from64::resultRTEBits(Round::UP) : vm[V_UNUSED];
827     vm[V_CONV_FROM_FP64_TO_FP16_DOWN_RTE_RESULT] =
828         from64::hasExcessBits() ? from64::resultRTEBits(Round::DOWN) : vm[V_UNUSED];
829     vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTE_RESULT] =
830         from64::hasExcessBits() ? from64::resultRTEBits(Round::TIE_UP) : vm[V_UNUSED];
831     vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTE_RESULT] =
832         from64::hasExcessBits() ? from64::resultRTEBits(Round::TIE_DOWN) : vm[V_UNUSED];
833     vm[V_CONV_FROM_FP64_TO_FP32_UP_RTE_RESULT]       = vm[V_UNUSED];
834     vm[V_CONV_FROM_FP64_TO_FP32_DOWN_RTE_RESULT]     = vm[V_UNUSED];
835     vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTE_RESULT]   = vm[V_UNUSED];
836     vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTE_RESULT] = vm[V_UNUSED];
837 
838     // 16 values can only be used for width-conversions
839     vm[V_CONV_FROM_UINT32_UP_RTZ_RESULT]   = vm[V_UNUSED];
840     vm[V_CONV_FROM_UINT32_DOWN_RTZ_RESULT] = vm[V_UNUSED];
841     vm[V_CONV_FROM_UINT32_TIE_RTZ_RESULT]  = vm[V_UNUSED];
842     vm[V_CONV_FROM_UINT64_UP_RTZ_RESULT]   = vm[V_UNUSED];
843     vm[V_CONV_FROM_UINT64_DOWN_RTZ_RESULT] = vm[V_UNUSED];
844     vm[V_CONV_FROM_UINT64_TIE_RTZ_RESULT]  = vm[V_UNUSED];
845 
846     vm[V_CONV_FROM_UINT32_UP_RTE_RESULT]   = vm[V_UNUSED];
847     vm[V_CONV_FROM_UINT32_DOWN_RTE_RESULT] = vm[V_UNUSED];
848     vm[V_CONV_FROM_UINT32_TIE_RTE_RESULT]  = vm[V_UNUSED];
849     vm[V_CONV_FROM_UINT64_UP_RTE_RESULT]   = vm[V_UNUSED];
850     vm[V_CONV_FROM_UINT64_DOWN_RTE_RESULT] = vm[V_UNUSED];
851     vm[V_CONV_FROM_UINT64_TIE_RTE_RESULT]  = vm[V_UNUSED];
852 
853     vm[V_CONV_FROM_INT32_UP_RTZ_RESULT]   = vm[V_UNUSED];
854     vm[V_CONV_FROM_INT32_DOWN_RTZ_RESULT] = vm[V_UNUSED];
855     vm[V_CONV_FROM_INT32_TIE_RTZ_RESULT]  = vm[V_UNUSED];
856     vm[V_CONV_FROM_INT64_UP_RTZ_RESULT]   = vm[V_UNUSED];
857     vm[V_CONV_FROM_INT64_DOWN_RTZ_RESULT] = vm[V_UNUSED];
858     vm[V_CONV_FROM_INT64_TIE_RTZ_RESULT]  = vm[V_UNUSED];
859 
860     vm[V_CONV_FROM_INT32_UP_RTE_RESULT]   = vm[V_UNUSED];
861     vm[V_CONV_FROM_INT32_DOWN_RTE_RESULT] = vm[V_UNUSED];
862     vm[V_CONV_FROM_INT32_TIE_RTE_RESULT]  = vm[V_UNUSED];
863     vm[V_CONV_FROM_INT64_UP_RTE_RESULT]   = vm[V_UNUSED];
864     vm[V_CONV_FROM_INT64_DOWN_RTE_RESULT] = vm[V_UNUSED];
865     vm[V_CONV_FROM_INT64_TIE_RTE_RESULT]  = vm[V_UNUSED];
866 
867     // there is no precision to store fp32 denorm nor fp64 denorm
868     vm[V_CONV_DENORM_SMALLER] = vm[V_ZERO];
869     vm[V_CONV_DENORM_BIGGER]  = vm[V_ZERO];
870 }
871 
872 template <>
TypeValues()873 TypeValues<float>::TypeValues() : TypeValuesBase()
874 {
875     // NOTE: when updating entries in m_valueIdToVariableType make sure to
876     // update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
877     ValueMap &vm     = m_valueIdToVariableType;
878     vm[V_UNUSED]     = 0.0f;
879     vm[V_MINUS_INF]  = -std::numeric_limits<float>::infinity();
880     vm[V_MINUS_ONE]  = -1.0f;
881     vm[V_MINUS_ZERO] = -0.0f;
882     vm[V_ZERO]       = 0.0f;
883     vm[V_HALF]       = 0.5f;
884     vm[V_ONE]        = 1.0f;
885     vm[V_INF]        = std::numeric_limits<float>::infinity();
886     vm[V_DENORM]     = static_cast<float>(1.413e-42); // 0x000003f0
887     vm[V_NAN]        = std::numeric_limits<float>::quiet_NaN();
888 
889     vm[V_PI_DIV_2]         = static_cast<float>(M_PI_2);
890     vm[V_DENORM_TIMES_TWO] = vm[V_DENORM] + vm[V_DENORM];
891     vm[V_DEGREES_DENORM]   = deFloatDegrees(vm[V_DENORM]);
892 
893     float e         = std::numeric_limits<float>::epsilon();
894     vm[V_ADD_ARG_A] = 1.0f + 3 * e;
895     vm[V_ADD_ARG_B] = 1.0f;
896     vm[V_SUB_ARG_A] = vm[V_ADD_ARG_A];
897     vm[V_SUB_ARG_B] = 3.0f + 6 * e;
898     vm[V_MUL_ARG_A] = vm[V_ADD_ARG_A];
899     vm[V_MUL_ARG_B] = 5 * e;
900     vm[V_DOT_ARG_A] = vm[V_ADD_ARG_A];
901     vm[V_DOT_ARG_B] = 5 * e;
902 
903     // Float32 is the source of a narrowing conversionsto Float16.
904     typedef conversionDetail<Float32, Float16> from32;
905     vm[V_CONV_FROM_FP32_TO_FP16_UP_ARG] = from32::hasExcessBits() ? from32::from(Round::UP).asFloat() : vm[V_UNUSED];
906     vm[V_CONV_FROM_FP32_TO_FP16_DOWN_ARG] =
907         from32::hasExcessBits() ? from32::from(Round::DOWN).asFloat() : vm[V_UNUSED];
908     vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG] =
909         from32::hasExcessBits() ? from32::from(Round::TIE_UP).asFloat() : vm[V_UNUSED];
910     vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG] =
911         from32::hasExcessBits() ? from32::from(Round::TIE_DOWN).asFloat() : vm[V_UNUSED];
912     vm[V_CONV_FROM_FP64_TO_FP16_UP_ARG]       = vm[V_UNUSED];
913     vm[V_CONV_FROM_FP64_TO_FP16_DOWN_ARG]     = vm[V_UNUSED];
914     vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG]   = vm[V_UNUSED];
915     vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG] = vm[V_UNUSED];
916     vm[V_CONV_FROM_FP64_TO_FP32_UP_ARG]       = vm[V_UNUSED];
917     vm[V_CONV_FROM_FP64_TO_FP32_DOWN_ARG]     = vm[V_UNUSED];
918     vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG]   = vm[V_UNUSED];
919     vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG] = vm[V_UNUSED];
920 
921     vm[V_CONV_FROM_UINT_TO_FP32_UP_ARG]   = exactByteEquivalent(0x02000003); // 33554435
922     vm[V_CONV_FROM_UINT_TO_FP32_DOWN_ARG] = exactByteEquivalent(0x02000001); // 33554433
923     vm[V_CONV_FROM_UINT_TO_FP32_TIE_ARG]  = exactByteEquivalent(0x02000002); // 33554434
924     vm[V_CONV_FROM_UINT_TO_FP64_UP_ARG]   = vm[V_UNUSED];
925     vm[V_CONV_FROM_UINT_TO_FP64_DOWN_ARG] = vm[V_UNUSED];
926     vm[V_CONV_FROM_UINT_TO_FP64_TIE_ARG]  = vm[V_UNUSED];
927 
928     vm[V_CONV_FROM_INT_TO_FP32_UP_ARG]   = exactByteEquivalent(0xfdfffffd); // -33554435
929     vm[V_CONV_FROM_INT_TO_FP32_DOWN_ARG] = exactByteEquivalent(0xfdffffff); // -33554433
930     vm[V_CONV_FROM_INT_TO_FP32_TIE_ARG]  = exactByteEquivalent(0xfdfffffe); // -33554434
931     vm[V_CONV_FROM_INT_TO_FP64_UP_ARG]   = vm[V_UNUSED];
932     vm[V_CONV_FROM_INT_TO_FP64_DOWN_ARG] = vm[V_UNUSED];
933     vm[V_CONV_FROM_INT_TO_FP64_TIE_ARG]  = vm[V_UNUSED];
934 
935     int prevRound = fegetround();
936     fesetround(FE_TOWARDZERO);
937     vm[V_ADD_RTZ_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
938     vm[V_SUB_RTZ_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
939     vm[V_MUL_RTZ_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
940     vm[V_DOT_RTZ_RESULT] = vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
941 
942     fesetround(FE_TONEAREST);
943     vm[V_ADD_RTE_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
944     vm[V_SUB_RTE_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
945     vm[V_MUL_RTE_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
946     vm[V_DOT_RTE_RESULT] = vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
947     fesetround(prevRound);
948 
949     typedef conversionDetail<Float64, Float32> from64;
950     vm[V_CONV_FROM_FP32_TO_FP16_UP_RTZ_RESULT]       = vm[V_UNUSED];
951     vm[V_CONV_FROM_FP32_TO_FP16_DOWN_RTZ_RESULT]     = vm[V_UNUSED];
952     vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTZ_RESULT]   = vm[V_UNUSED];
953     vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTZ_RESULT] = vm[V_UNUSED];
954     vm[V_CONV_FROM_FP64_TO_FP16_UP_RTZ_RESULT]       = vm[V_UNUSED];
955     vm[V_CONV_FROM_FP64_TO_FP16_DOWN_RTZ_RESULT]     = vm[V_UNUSED];
956     vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTZ_RESULT]   = vm[V_UNUSED];
957     vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTZ_RESULT] = vm[V_UNUSED];
958     vm[V_CONV_FROM_FP64_TO_FP32_UP_RTZ_RESULT] =
959         from64::hasExcessBits() ? from64::resultRTZ(Round::UP).asFloat() : vm[V_UNUSED];
960     vm[V_CONV_FROM_FP64_TO_FP32_DOWN_RTZ_RESULT] =
961         from64::hasExcessBits() ? from64::resultRTZ(Round::DOWN).asFloat() : vm[V_UNUSED];
962     vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTZ_RESULT] =
963         from64::hasExcessBits() ? from64::resultRTZ(Round::TIE_UP).asFloat() : vm[V_UNUSED];
964     vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTZ_RESULT] =
965         from64::hasExcessBits() ? from64::resultRTZ(Round::TIE_DOWN).asFloat() : vm[V_UNUSED];
966 
967     vm[V_CONV_FROM_FP32_TO_FP16_UP_RTE_RESULT]       = vm[V_UNUSED];
968     vm[V_CONV_FROM_FP32_TO_FP16_DOWN_RTE_RESULT]     = vm[V_UNUSED];
969     vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTE_RESULT]   = vm[V_UNUSED];
970     vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTE_RESULT] = vm[V_UNUSED];
971     vm[V_CONV_FROM_FP64_TO_FP16_UP_RTE_RESULT]       = vm[V_UNUSED];
972     vm[V_CONV_FROM_FP64_TO_FP16_DOWN_RTE_RESULT]     = vm[V_UNUSED];
973     vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTE_RESULT]   = vm[V_UNUSED];
974     vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTE_RESULT] = vm[V_UNUSED];
975     vm[V_CONV_FROM_FP64_TO_FP32_UP_RTE_RESULT] =
976         from64::hasExcessBits() ? from64::resultRTE(Round::UP).asFloat() : vm[V_UNUSED];
977     vm[V_CONV_FROM_FP64_TO_FP32_DOWN_RTE_RESULT] =
978         from64::hasExcessBits() ? from64::resultRTE(Round::DOWN).asFloat() : vm[V_UNUSED];
979     vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTE_RESULT] =
980         from64::hasExcessBits() ? from64::resultRTE(Round::TIE_UP).asFloat() : vm[V_UNUSED];
981     vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTE_RESULT] =
982         from64::hasExcessBits() ? from64::resultRTE(Round::TIE_DOWN).asFloat() : vm[V_UNUSED];
983 
984     vm[V_CONV_FROM_UINT32_UP_RTZ_RESULT]   = exactByteEquivalent(0x4c000000); // 33554432.0
985     vm[V_CONV_FROM_UINT32_DOWN_RTZ_RESULT] = exactByteEquivalent(0x4c000000); // 33554432.0
986     vm[V_CONV_FROM_UINT32_TIE_RTZ_RESULT]  = exactByteEquivalent(0x4c000000); // 33554432.0
987     vm[V_CONV_FROM_UINT64_UP_RTZ_RESULT]   = exactByteEquivalent(0x4c000000); // 33554432.0
988     vm[V_CONV_FROM_UINT64_DOWN_RTZ_RESULT] = exactByteEquivalent(0x4c000000); // 33554432.0
989     vm[V_CONV_FROM_UINT64_TIE_RTZ_RESULT]  = exactByteEquivalent(0x4c000000); // 33554432.0
990 
991     vm[V_CONV_FROM_UINT32_UP_RTE_RESULT]   = exactByteEquivalent(0x4c000001); // 33554434.0
992     vm[V_CONV_FROM_UINT32_DOWN_RTE_RESULT] = exactByteEquivalent(0x4c000000); // 33554432.0
993     vm[V_CONV_FROM_UINT32_TIE_RTE_RESULT]  = exactByteEquivalent(0x4c000000); // 33554432.0
994     vm[V_CONV_FROM_UINT64_UP_RTE_RESULT]   = exactByteEquivalent(0x4c000001); // 33554434.0
995     vm[V_CONV_FROM_UINT64_DOWN_RTE_RESULT] = exactByteEquivalent(0x4c000000); // 33554432.0
996     vm[V_CONV_FROM_UINT64_TIE_RTE_RESULT]  = exactByteEquivalent(0x4c000000); // 33554432.0
997 
998     vm[V_CONV_FROM_INT32_UP_RTZ_RESULT]   = exactByteEquivalent(0xcc000000); // -33554432.0
999     vm[V_CONV_FROM_INT32_DOWN_RTZ_RESULT] = exactByteEquivalent(0xcc000000); // -33554432.0
1000     vm[V_CONV_FROM_INT32_TIE_RTZ_RESULT]  = exactByteEquivalent(0xcc000000); // -33554432.0
1001     vm[V_CONV_FROM_INT64_UP_RTZ_RESULT]   = exactByteEquivalent(0xcc000000); // -33554432.0
1002     vm[V_CONV_FROM_INT64_DOWN_RTZ_RESULT] = exactByteEquivalent(0xcc000000); // -33554432.0
1003     vm[V_CONV_FROM_INT64_TIE_RTZ_RESULT]  = exactByteEquivalent(0xcc000000); // -33554432.0
1004 
1005     vm[V_CONV_FROM_INT32_UP_RTE_RESULT]   = exactByteEquivalent(0xcc000001); // -33554434.0
1006     vm[V_CONV_FROM_INT32_DOWN_RTE_RESULT] = exactByteEquivalent(0xcc000000); // -33554432.0
1007     vm[V_CONV_FROM_INT32_TIE_RTE_RESULT]  = exactByteEquivalent(0xcc000000); // -33554432.0
1008     vm[V_CONV_FROM_INT64_UP_RTE_RESULT]   = exactByteEquivalent(0xcc000001); // -33554434.0
1009     vm[V_CONV_FROM_INT64_DOWN_RTE_RESULT] = exactByteEquivalent(0xcc000000); // -33554432.0
1010     vm[V_CONV_FROM_INT64_TIE_RTE_RESULT]  = exactByteEquivalent(0xcc000000); // -33554432.0
1011 
1012     // there is no precision to store fp64 denorm
1013     vm[V_CONV_DENORM_SMALLER] = exactByteEquivalent<uint32_t>(0x387c0000); // fp16 denorm
1014     vm[V_CONV_DENORM_BIGGER]  = vm[V_ZERO];
1015 }
1016 
1017 template <>
TypeValues()1018 TypeValues<double>::TypeValues() : TypeValuesBase()
1019 {
1020     // NOTE: when updating entries in m_valueIdToVariableType make sure to
1021     // update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
1022     ValueMap &vm     = m_valueIdToVariableType;
1023     vm[V_UNUSED]     = 0.0;
1024     vm[V_MINUS_INF]  = -std::numeric_limits<double>::infinity();
1025     vm[V_MINUS_ONE]  = -1.0;
1026     vm[V_MINUS_ZERO] = -0.0;
1027     vm[V_ZERO]       = 0.0;
1028     vm[V_HALF]       = 0.5;
1029     vm[V_ONE]        = 1.0;
1030     vm[V_INF]        = std::numeric_limits<double>::infinity();
1031     vm[V_DENORM]     = 4.98e-321; // 0x00000000000003F0
1032     vm[V_NAN]        = std::numeric_limits<double>::quiet_NaN();
1033 
1034     vm[V_PI_DIV_2]         = M_PI_2;
1035     vm[V_DENORM_TIMES_TWO] = vm[V_DENORM] + vm[V_DENORM];
1036     vm[V_DEGREES_DENORM]   = vm[V_UNUSED];
1037 
1038     double e        = std::numeric_limits<double>::epsilon();
1039     vm[V_ADD_ARG_A] = 1.0 + 3 * e;
1040     vm[V_ADD_ARG_B] = 1.0;
1041     vm[V_SUB_ARG_A] = vm[V_ADD_ARG_A];
1042     vm[V_SUB_ARG_B] = 3.0 + 6 * e;
1043     vm[V_MUL_ARG_A] = vm[V_ADD_ARG_A];
1044     vm[V_MUL_ARG_B] = 5 * e;
1045     vm[V_DOT_ARG_A] = vm[V_ADD_ARG_A];
1046     vm[V_DOT_ARG_B] = 5 * e;
1047 
1048     // Float64 is the source of narrowing conversions to Float32 and Float16.
1049     typedef conversionDetail<Float64, Float16> to16;
1050     typedef conversionDetail<Float64, Float32> to32;
1051     vm[V_CONV_FROM_FP32_TO_FP16_UP_ARG]       = vm[V_UNUSED];
1052     vm[V_CONV_FROM_FP32_TO_FP16_DOWN_ARG]     = vm[V_UNUSED];
1053     vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG]   = vm[V_UNUSED];
1054     vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG] = vm[V_UNUSED];
1055     vm[V_CONV_FROM_FP64_TO_FP16_UP_ARG]       = to16::hasExcessBits() ? to16::from(Round::UP).asDouble() : vm[V_UNUSED];
1056     vm[V_CONV_FROM_FP64_TO_FP16_DOWN_ARG] = to16::hasExcessBits() ? to16::from(Round::DOWN).asDouble() : vm[V_UNUSED];
1057     vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG] =
1058         to16::hasExcessBits() ? to16::from(Round::TIE_UP).asDouble() : vm[V_UNUSED];
1059     vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG] =
1060         to16::hasExcessBits() ? to16::from(Round::TIE_DOWN).asDouble() : vm[V_UNUSED];
1061     vm[V_CONV_FROM_FP64_TO_FP32_UP_ARG]   = to32::hasExcessBits() ? to32::from(Round::UP).asDouble() : vm[V_UNUSED];
1062     vm[V_CONV_FROM_FP64_TO_FP32_DOWN_ARG] = to32::hasExcessBits() ? to32::from(Round::DOWN).asDouble() : vm[V_UNUSED];
1063     vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG] =
1064         to32::hasExcessBits() ? to32::from(Round::TIE_UP).asDouble() : vm[V_UNUSED];
1065     vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG] =
1066         to32::hasExcessBits() ? to32::from(Round::TIE_DOWN).asDouble() : vm[V_UNUSED];
1067 
1068     vm[V_CONV_FROM_UINT_TO_FP32_UP_ARG]   = exactByteEquivalent(static_cast<uint64_t>(0x0000000002000003)); // 33554435
1069     vm[V_CONV_FROM_UINT_TO_FP32_DOWN_ARG] = exactByteEquivalent(static_cast<uint64_t>(0x0000000002000001)); // 33554433
1070     vm[V_CONV_FROM_UINT_TO_FP32_TIE_ARG]  = exactByteEquivalent(static_cast<uint64_t>(0x0000000002000002)); // 33554434
1071     vm[V_CONV_FROM_UINT_TO_FP64_UP_ARG] =
1072         exactByteEquivalent(static_cast<uint64_t>(0x0040000000000003)); // 18014398509481987
1073     vm[V_CONV_FROM_UINT_TO_FP64_DOWN_ARG] =
1074         exactByteEquivalent(static_cast<uint64_t>(0x0040000000000001)); // 18014398509481985
1075     vm[V_CONV_FROM_UINT_TO_FP64_TIE_ARG] =
1076         exactByteEquivalent(static_cast<uint64_t>(0x0040000000000002)); // 18014398509481986
1077 
1078     vm[V_CONV_FROM_INT_TO_FP32_UP_ARG]   = exactByteEquivalent(static_cast<uint64_t>(0xfffffffffdfffffd)); // -33554435
1079     vm[V_CONV_FROM_INT_TO_FP32_DOWN_ARG] = exactByteEquivalent(static_cast<uint64_t>(0xfffffffffdffffff)); // -33554433
1080     vm[V_CONV_FROM_INT_TO_FP32_TIE_ARG]  = exactByteEquivalent(static_cast<uint64_t>(0xfffffffffdfffffe)); // -33554434
1081     vm[V_CONV_FROM_INT_TO_FP64_UP_ARG] =
1082         exactByteEquivalent(static_cast<uint64_t>(0xffbffffffffffffd)); // -18014398509481987
1083     vm[V_CONV_FROM_INT_TO_FP64_DOWN_ARG] =
1084         exactByteEquivalent(static_cast<uint64_t>(0xffbfffffffffffff)); // -18014398509481985
1085     vm[V_CONV_FROM_INT_TO_FP64_TIE_ARG] =
1086         exactByteEquivalent(static_cast<uint64_t>(0xffbffffffffffffe)); // -18014398509481986
1087 
1088     int prevRound = fegetround();
1089     fesetround(FE_TOWARDZERO);
1090     vm[V_ADD_RTZ_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
1091     vm[V_SUB_RTZ_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
1092     vm[V_MUL_RTZ_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
1093     vm[V_DOT_RTZ_RESULT] = vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
1094 
1095     fesetround(FE_TONEAREST);
1096     vm[V_ADD_RTE_RESULT] = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
1097     vm[V_SUB_RTE_RESULT] = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
1098     vm[V_MUL_RTE_RESULT] = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
1099     vm[V_DOT_RTE_RESULT] = vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
1100     fesetround(prevRound);
1101 
1102     // Float64 is not the destination of any narrowing conversions.
1103     vm[V_CONV_FROM_FP32_TO_FP16_UP_RTZ_RESULT]       = vm[V_UNUSED];
1104     vm[V_CONV_FROM_FP32_TO_FP16_DOWN_RTZ_RESULT]     = vm[V_UNUSED];
1105     vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTZ_RESULT]   = vm[V_UNUSED];
1106     vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTZ_RESULT] = vm[V_UNUSED];
1107     vm[V_CONV_FROM_FP64_TO_FP16_UP_RTZ_RESULT]       = vm[V_UNUSED];
1108     vm[V_CONV_FROM_FP64_TO_FP16_DOWN_RTZ_RESULT]     = vm[V_UNUSED];
1109     vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTZ_RESULT]   = vm[V_UNUSED];
1110     vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTZ_RESULT] = vm[V_UNUSED];
1111     vm[V_CONV_FROM_FP64_TO_FP32_UP_RTZ_RESULT]       = vm[V_UNUSED];
1112     vm[V_CONV_FROM_FP64_TO_FP32_DOWN_RTZ_RESULT]     = vm[V_UNUSED];
1113     vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTZ_RESULT]   = vm[V_UNUSED];
1114     vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTZ_RESULT] = vm[V_UNUSED];
1115 
1116     vm[V_CONV_FROM_FP32_TO_FP16_UP_RTE_RESULT]       = vm[V_UNUSED];
1117     vm[V_CONV_FROM_FP32_TO_FP16_DOWN_RTE_RESULT]     = vm[V_UNUSED];
1118     vm[V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTE_RESULT]   = vm[V_UNUSED];
1119     vm[V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTE_RESULT] = vm[V_UNUSED];
1120     vm[V_CONV_FROM_FP64_TO_FP16_UP_RTE_RESULT]       = vm[V_UNUSED];
1121     vm[V_CONV_FROM_FP64_TO_FP16_DOWN_RTE_RESULT]     = vm[V_UNUSED];
1122     vm[V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTE_RESULT]   = vm[V_UNUSED];
1123     vm[V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTE_RESULT] = vm[V_UNUSED];
1124     vm[V_CONV_FROM_FP64_TO_FP32_UP_RTE_RESULT]       = vm[V_UNUSED];
1125     vm[V_CONV_FROM_FP64_TO_FP32_DOWN_RTE_RESULT]     = vm[V_UNUSED];
1126     vm[V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTE_RESULT]   = vm[V_UNUSED];
1127     vm[V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTE_RESULT] = vm[V_UNUSED];
1128 
1129     vm[V_CONV_FROM_UINT32_UP_RTZ_RESULT]   = vm[V_UNUSED];
1130     vm[V_CONV_FROM_UINT32_DOWN_RTZ_RESULT] = vm[V_UNUSED];
1131     vm[V_CONV_FROM_UINT32_TIE_RTZ_RESULT]  = vm[V_UNUSED];
1132     vm[V_CONV_FROM_UINT64_UP_RTZ_RESULT] =
1133         exactByteEquivalent(static_cast<uint64_t>(0x4350000000000000)); // 18014398509481984.0
1134     vm[V_CONV_FROM_UINT64_DOWN_RTZ_RESULT] =
1135         exactByteEquivalent(static_cast<uint64_t>(0x4350000000000000)); // 18014398509481984.0
1136     vm[V_CONV_FROM_UINT64_TIE_RTZ_RESULT] =
1137         exactByteEquivalent(static_cast<uint64_t>(0x4350000000000000)); // 18014398509481984.0
1138 
1139     vm[V_CONV_FROM_UINT32_UP_RTE_RESULT]   = vm[V_UNUSED];
1140     vm[V_CONV_FROM_UINT32_DOWN_RTE_RESULT] = vm[V_UNUSED];
1141     vm[V_CONV_FROM_UINT32_TIE_RTE_RESULT]  = vm[V_UNUSED];
1142     vm[V_CONV_FROM_UINT64_UP_RTE_RESULT] =
1143         exactByteEquivalent(static_cast<uint64_t>(0x4350000000000001)); // 18014398509481988.0
1144     vm[V_CONV_FROM_UINT64_DOWN_RTE_RESULT] =
1145         exactByteEquivalent(static_cast<uint64_t>(0x4350000000000000)); // 18014398509481984.0
1146     vm[V_CONV_FROM_UINT64_TIE_RTE_RESULT] =
1147         exactByteEquivalent(static_cast<uint64_t>(0x4350000000000000)); // 18014398509481984.0
1148 
1149     vm[V_CONV_FROM_INT32_UP_RTZ_RESULT]   = vm[V_UNUSED];
1150     vm[V_CONV_FROM_INT32_DOWN_RTZ_RESULT] = vm[V_UNUSED];
1151     vm[V_CONV_FROM_INT32_TIE_RTZ_RESULT]  = vm[V_UNUSED];
1152     vm[V_CONV_FROM_INT64_UP_RTZ_RESULT] =
1153         exactByteEquivalent(static_cast<uint64_t>(0xc350000000000000)); // -18014398509481984.0
1154     vm[V_CONV_FROM_INT64_DOWN_RTZ_RESULT] =
1155         exactByteEquivalent(static_cast<uint64_t>(0xc350000000000000)); // -18014398509481984.0
1156     vm[V_CONV_FROM_INT64_TIE_RTZ_RESULT] =
1157         exactByteEquivalent(static_cast<uint64_t>(0xc350000000000000)); // -18014398509481984.0
1158 
1159     vm[V_CONV_FROM_INT32_UP_RTE_RESULT]   = vm[V_UNUSED];
1160     vm[V_CONV_FROM_INT32_DOWN_RTE_RESULT] = vm[V_UNUSED];
1161     vm[V_CONV_FROM_INT32_TIE_RTE_RESULT]  = vm[V_UNUSED];
1162     vm[V_CONV_FROM_INT64_UP_RTE_RESULT] =
1163         exactByteEquivalent(static_cast<uint64_t>(0xc350000000000001)); // -18014398509481988.0
1164     vm[V_CONV_FROM_INT64_DOWN_RTE_RESULT] =
1165         exactByteEquivalent(static_cast<uint64_t>(0xc350000000000000)); // -18014398509481984.0
1166     vm[V_CONV_FROM_INT64_TIE_RTE_RESULT] =
1167         exactByteEquivalent(static_cast<uint64_t>(0xc350000000000000)); // -18014398509481984.0
1168 
1169     vm[V_CONV_DENORM_SMALLER] = exactByteEquivalent<uint64_t>(0x3f0f800000000000); // 0x03f0 is fp16 denorm
1170     vm[V_CONV_DENORM_BIGGER]  = exactByteEquivalent<uint64_t>(0x373f800000000000); // 0x000003f0 is fp32 denorm
1171 }
1172 
1173 // Each type (fp16, fp32, fp64, uint16, uint32, uint64, int16, int32, int64)
1174 // has specific set of SPIR-V snippets that was extracted to separate template
1175 // specialization. Those snippets are used to compose final test shaders.
1176 // With this approach parameterization can be done just once per type and reused
1177 // for many tests.
1178 class TypeSnippetsBase
1179 {
1180 public:
TypeSnippetsBase(bool floatType,bool signedInteger)1181     TypeSnippetsBase(bool floatType, bool signedInteger) : isFloatType(floatType), isSignedInteger(signedInteger)
1182     {
1183     }
1184 
1185     virtual ~TypeSnippetsBase() = default;
1186 
getValueTypeString() const1187     const char *getValueTypeString() const
1188     {
1189         return isFloatType ? "f" : (isSignedInteger ? "i" : "u");
1190     }
1191 
1192 protected:
1193     void updateSpirvSnippets();
1194 
1195 public: // Type specific data:
1196     // Number of bits consumed by float type
1197     string bitWidth;
1198 
1199     // Minimum positive normal
1200     string epsilon;
1201 
1202     // denormBase is a normal value (found empirically) used to generate denorm value.
1203     // Denorm is generated by substracting epsilon from denormBase.
1204     // denormBase is not a denorm - it is used to create denorm.
1205     // This value is needed when operations are tested with arguments that were
1206     // generated in the code. Generated denorm should be the same as denorm
1207     // used when arguments are passed via input (m_valueIdToVariableType[V_DENORM]).
1208     // This is required as result of some operations depends on actual denorm value
1209     // e.g. OpRadians(0x0001) is 0 but OpRadians(0x03f0) is denorm.
1210     string denormBase;
1211 
1212     string capabilities;
1213     string extensions;
1214     string capabilitiesFp16Without16BitStorage;
1215     string extensionsFp16Without16BitStorage;
1216     string arrayStride;
1217 
1218     bool loadStoreRequiresShaderFloat16;
1219     bool isFloatType;
1220     bool isSignedInteger;
1221 
1222 public: // Type specific spir-v snippets:
1223     // Common annotations
1224     string typeAnnotationsSnippet;
1225 
1226     // Definitions of all types commonly used by operation tests
1227     string typeDefinitionsSnippet;
1228 
1229     // Definitions of all types commonly used by settings tests
1230     string minTypeDefinitionsSnippet;
1231 
1232     // Definitions of all constants commonly used by tests
1233     string constantsDefinitionsSnippet;
1234 
1235     // Map that stores instructions that generate arguments of specified value.
1236     // Every test that uses generated inputod will select up to two items from this map
1237     typedef map<ValueId, string> SnippetMap;
1238     SnippetMap valueIdToSnippetArgMap;
1239 
1240     // Spir-v snippets that read argument from SSBO
1241     string argumentsFromInputSnippet;
1242     string multiArgumentsFromInputSnippet;
1243 
1244     // SSBO with stage input/output definitions
1245     string inputAnnotationsSnippet;
1246     string inputDefinitionsSnippet;
1247     string outputAnnotationsSnippet;
1248     string multiOutputAnnotationsSnippet;
1249     string outputDefinitionsSnippet;
1250     string multiOutputDefinitionsSnippet;
1251 
1252     // Varying is required to pass result from vertex stage to fragment stage,
1253     // one of requirements was to not use SSBO writes in vertex stage so we
1254     // need to do that in fragment stage; we also cant pass operation result
1255     // directly because of interpolation, to avoid it we do a bitcast to uint
1256     string varyingsTypesSnippet;
1257     string inputVaryingsSnippet;
1258     string outputVaryingsSnippet;
1259     string storeVertexResultSnippet;
1260     string loadVertexResultSnippet;
1261 
1262     string storeResultsSnippet;
1263     string multiStoreResultsSnippet;
1264 
1265     string argumentsFromInputFp16Snippet;
1266     string storeResultsFp16Snippet;
1267     string multiArgumentsFromInputFp16Snippet;
1268     string multiOutputAnnotationsFp16Snippet;
1269     string multiStoreResultsFp16Snippet;
1270     string multiOutputDefinitionsFp16Snippet;
1271     string inputDefinitionsFp16Snippet;
1272     string outputDefinitionsFp16Snippet;
1273     string typeAnnotationsFp16Snippet;
1274     string typeDefinitionsFp16Snippet;
1275 };
1276 
updateSpirvSnippets()1277 void TypeSnippetsBase::updateSpirvSnippets()
1278 {
1279     // annotations to types that are commonly used by tests
1280     const string typeAnnotationsTemplate = "OpDecorate %type_valueType_arr_1 ArrayStride " + arrayStride +
1281                                            "\n"
1282                                            "OpDecorate %type_valueType_arr_2 ArrayStride " +
1283                                            arrayStride + "\n";
1284 
1285     // definition off all types that are commonly used by tests
1286     const string floatTypeDefinition = "%type_valueType             = OpTypeFloat " + bitWidth +
1287                                        "\n"
1288                                        "%type_valueType_uptr        = OpTypePointer Uniform %type_valueType\n"
1289                                        "%type_valueType_fptr        = OpTypePointer Function %type_valueType\n"
1290                                        "%type_valueType_vec2        = OpTypeVector %type_valueType 2\n"
1291                                        "%type_valueType_vec3        = OpTypeVector %type_valueType 3\n"
1292                                        "%type_valueType_vec4        = OpTypeVector %type_valueType 4\n"
1293                                        "%type_valueType_vec4_iptr   = OpTypePointer Input %type_valueType_vec4\n"
1294                                        "%type_valueType_vec4_optr   = OpTypePointer Output %type_valueType_vec4\n"
1295                                        "%type_valueType_mat2x2      = OpTypeMatrix %type_valueType_vec2 2\n"
1296                                        "%type_valueType_arr_1       = OpTypeArray %type_valueType %c_i32_1\n"
1297                                        "%type_valueType_arr_2       = OpTypeArray %type_valueType %c_i32_2\n";
1298     const string uintTypeDefinition =
1299         (bitWidth == "32" ? "" : // 32 bit values are already defined
1300              "%type_valueType             = OpTypeInt " + bitWidth + " " + (isSignedInteger ? "1" : "0") + "\n") +
1301         "%type_valueType_uptr        = OpTypePointer Uniform %type_valueType\n" +
1302         (bitWidth == "32" ? "" : // 32 bit values are already defined
1303                             "%type_valueType_fptr        = OpTypePointer Function %type_valueType\n"
1304                             "%type_valueType_vec2        = OpTypeVector %type_valueType 2\n"
1305                             "%type_valueType_vec3        = OpTypeVector %type_valueType 3\n") +
1306         "%type_valueType_vec4        = OpTypeVector %type_valueType 4\n"
1307         "%type_valueType_vec4_iptr   = OpTypePointer Input %type_valueType_vec4\n"
1308         "%type_valueType_vec4_optr   = OpTypePointer Output %type_valueType_vec4\n"
1309         "%type_valueType_arr_1       = OpTypeArray %type_valueType %c_i32_1\n"
1310         "%type_valueType_arr_2       = OpTypeArray %type_valueType %c_i32_2\n";
1311 
1312     const string typeDefinitionsTemplate = isFloatType ? floatTypeDefinition : uintTypeDefinition;
1313 
1314     // minimal type definition set that is used by settings tests
1315     const string minTypeDefinitionsTemplate = "%type_valueType             = OpTypeFloat " + bitWidth +
1316                                               "\n"
1317                                               "%type_valueType_uptr        = OpTypePointer Uniform %type_valueType\n"
1318                                               "%type_valueType_arr_2       = OpTypeArray %type_valueType %c_i32_2\n";
1319 
1320     // definition off all constants that are used by tests
1321     const string constantsDefinitionsTemplate = "%c_valueType_n1             = OpConstant %type_valueType -1\n"
1322                                                 "%c_valueType_0              = OpConstant %type_valueType 0.0\n"
1323                                                 "%c_valueType_0_5            = OpConstant %type_valueType 0.5\n"
1324                                                 "%c_valueType_1              = OpConstant %type_valueType 1\n"
1325                                                 "%c_valueType_2              = OpConstant %type_valueType 2\n"
1326                                                 "%c_valueType_3              = OpConstant %type_valueType 3\n"
1327                                                 "%c_valueType_4              = OpConstant %type_valueType 4\n"
1328                                                 "%c_valueType_5              = OpConstant %type_valueType 5\n"
1329                                                 "%c_valueType_6              = OpConstant %type_valueType 6\n"
1330                                                 "%c_valueType_eps            = OpConstant %type_valueType " +
1331                                                 epsilon +
1332                                                 "\n"
1333                                                 "%c_valueType_denorm_base    = OpConstant %type_valueType " +
1334                                                 denormBase + "\n";
1335 
1336     // when arguments are read from SSBO this snipped is placed in main function
1337     const string argumentsFromInputTemplate =
1338         "%arg1loc                = OpAccessChain %type_valueType_uptr %ssbo_in %c_i32_0 %c_i32_0\n"
1339         "%arg1                   = OpLoad %type_valueType %arg1loc\n"
1340         "%arg2loc                = OpAccessChain %type_valueType_uptr %ssbo_in %c_i32_0 %c_i32_1\n"
1341         "%arg2                   = OpLoad %type_valueType %arg2loc\n";
1342 
1343     const string multiArgumentsFromInputTemplate =
1344         "%arg1_valueType_loc         = OpAccessChain %type_valueType_uptr %ssbo_in %c_i32_${attr} %c_i32_0\n"
1345         "%arg2_valueType_loc         = OpAccessChain %type_valueType_uptr %ssbo_in %c_i32_${attr} %c_i32_1\n"
1346         "%arg1_valueType             = OpLoad %type_valueType %arg1_valueType_loc\n"
1347         "%arg2_valueType             = OpLoad %type_valueType %arg2_valueType_loc\n";
1348 
1349     // when tested shader stage reads from SSBO it has to have this snippet
1350     inputAnnotationsSnippet = "OpMemberDecorate %SSBO_in 0 Offset 0\n"
1351                               "OpDecorate %SSBO_in BufferBlock\n"
1352                               "OpDecorate %ssbo_in DescriptorSet 0\n"
1353                               "OpDecorate %ssbo_in Binding 0\n"
1354                               "OpDecorate %ssbo_in NonWritable\n";
1355 
1356     const string inputDefinitionsTemplate = "%SSBO_in              = OpTypeStruct %type_valueType_arr_2\n"
1357                                             "%up_SSBO_in           = OpTypePointer Uniform %SSBO_in\n"
1358                                             "%ssbo_in              = OpVariable %up_SSBO_in Uniform\n";
1359 
1360     outputAnnotationsSnippet = "OpMemberDecorate %SSBO_out 0 Offset 0\n"
1361                                "OpDecorate %SSBO_out BufferBlock\n"
1362                                "OpDecorate %ssbo_out DescriptorSet 0\n"
1363                                "OpDecorate %ssbo_out Binding 1\n";
1364 
1365     const string multiOutputAnnotationsTemplate = "OpMemberDecorate %SSBO_valueType_out 0 Offset 0\n"
1366                                                   "OpDecorate %type_valueType_arr_2 ArrayStride " +
1367                                                   arrayStride +
1368                                                   "\n"
1369                                                   "OpDecorate %SSBO_valueType_out BufferBlock\n"
1370                                                   "OpDecorate %ssbo_valueType_out DescriptorSet 0\n";
1371 
1372     const string outputDefinitionsTemplate = "%SSBO_out             = OpTypeStruct %type_valueType_arr_1\n"
1373                                              "%up_SSBO_out          = OpTypePointer Uniform %SSBO_out\n"
1374                                              "%ssbo_out             = OpVariable %up_SSBO_out Uniform\n";
1375 
1376     const string multiOutputDefinitionsTemplate =
1377         "%SSBO_valueType_out         = OpTypeStruct %type_valueType\n"
1378         "%up_SSBO_valueType_out      = OpTypePointer Uniform %SSBO_valueType_out\n"
1379         "%ssbo_valueType_out         = OpVariable %up_SSBO_valueType_out Uniform\n";
1380 
1381     // this snippet is used by compute and fragment stage but not by vertex stage
1382     const string storeResultsTemplate =
1383         "%outloc               = OpAccessChain %type_valueType_uptr %ssbo_out %c_i32_0 %c_i32_0\n"
1384         "OpStore %outloc %result\n";
1385 
1386     const string multiStoreResultsTemplate =
1387         "%outloc" + bitWidth +
1388         "             = OpAccessChain %type_valueType_uptr %ssbo_valueType_out %c_i32_0\n"
1389         "                        OpStore %outloc" +
1390         bitWidth + " %result" + bitWidth + "\n";
1391 
1392     const string typeToken = "_valueType";
1393     const string typeName  = string("_") + getValueTypeString() + bitWidth;
1394 
1395     typeAnnotationsSnippet         = replace(typeAnnotationsTemplate, typeToken, typeName);
1396     typeDefinitionsSnippet         = replace(typeDefinitionsTemplate, typeToken, typeName);
1397     minTypeDefinitionsSnippet      = replace(minTypeDefinitionsTemplate, typeToken, typeName);
1398     constantsDefinitionsSnippet    = isFloatType ? replace(constantsDefinitionsTemplate, typeToken, typeName) :
1399                                                    ""; // Not needed for int conversion tests
1400     argumentsFromInputSnippet      = replace(argumentsFromInputTemplate, typeToken, typeName);
1401     multiArgumentsFromInputSnippet = replace(multiArgumentsFromInputTemplate, typeToken, typeName);
1402     inputDefinitionsSnippet        = replace(inputDefinitionsTemplate, typeToken, typeName);
1403     multiOutputAnnotationsSnippet  = replace(multiOutputAnnotationsTemplate, typeToken, typeName);
1404     outputDefinitionsSnippet       = replace(outputDefinitionsTemplate, typeToken, typeName);
1405     multiOutputDefinitionsSnippet  = replace(multiOutputDefinitionsTemplate, typeToken, typeName);
1406     storeResultsSnippet            = replace(storeResultsTemplate, typeToken, typeName);
1407     multiStoreResultsSnippet       = replace(multiStoreResultsTemplate, typeToken, typeName);
1408 
1409     argumentsFromInputFp16Snippet      = "";
1410     storeResultsFp16Snippet            = "";
1411     multiArgumentsFromInputFp16Snippet = "";
1412     multiOutputAnnotationsFp16Snippet  = "";
1413     multiStoreResultsFp16Snippet       = "";
1414     multiOutputDefinitionsFp16Snippet  = "";
1415     inputDefinitionsFp16Snippet        = "";
1416     typeAnnotationsFp16Snippet         = "";
1417     outputDefinitionsFp16Snippet       = "";
1418     typeDefinitionsFp16Snippet         = "";
1419 
1420     if (bitWidth.compare("16") == 0)
1421     {
1422         typeDefinitionsFp16Snippet = "%type_u32_uptr       = OpTypePointer Uniform %type_u32\n"
1423                                      "%type_u32_arr_1      = OpTypeArray %type_u32 %c_i32_1\n";
1424 
1425         typeAnnotationsFp16Snippet  = "OpDecorate %type_u32_arr_1 ArrayStride 4\n";
1426         const string inputToken     = "_f16_arr_2";
1427         const string inputName      = "_u32_arr_1";
1428         inputDefinitionsFp16Snippet = replace(inputDefinitionsSnippet, inputToken, inputName);
1429 
1430         argumentsFromInputFp16Snippet = "%argloc            = OpAccessChain %type_u32_uptr %ssbo_in %c_i32_0 %c_i32_0\n"
1431                                         "%inval             = OpLoad %type_u32 %argloc\n"
1432                                         "%arg               = OpBitcast %type_f16_vec2 %inval\n"
1433                                         "%arg1              = OpCompositeExtract %type_f16 %arg 0\n"
1434                                         "%arg2              = OpCompositeExtract %type_f16 %arg 1\n";
1435 
1436         const string outputToken     = "_f16_arr_1";
1437         const string outputName      = "_u32_arr_1";
1438         outputDefinitionsFp16Snippet = replace(outputDefinitionsSnippet, outputToken, outputName);
1439 
1440         storeResultsFp16Snippet = "%result_f16_vec2   = OpCompositeConstruct %type_f16_vec2 %result %c_f16_0\n"
1441                                   "%result_u32 = OpBitcast %type_u32 %result_f16_vec2\n"
1442                                   "%outloc            = OpAccessChain %type_u32_uptr %ssbo_out %c_i32_0 %c_i32_0\n"
1443                                   "OpStore %outloc %result_u32\n";
1444 
1445         multiArgumentsFromInputFp16Snippet =
1446             "%arg_u32_loc         = OpAccessChain %type_u32_uptr %ssbo_in %c_i32_${attr} %c_i32_0\n"
1447             "%arg_u32             = OpLoad %type_u32 %arg_u32_loc\n"
1448             "%arg_f16_vec2        = OpBitcast %type_f16_vec2 %arg_u32\n"
1449             "%arg1_f16            = OpCompositeExtract %type_f16 %arg_f16_vec2 0\n"
1450             "%arg2_f16            = OpCompositeExtract %type_f16 %arg_f16_vec2 1\n";
1451 
1452         multiOutputAnnotationsFp16Snippet = "OpMemberDecorate %SSBO_u32_out 0 Offset 0\n"
1453                                             "OpDecorate %type_u32_arr_1 ArrayStride 4\n"
1454                                             "OpDecorate %SSBO_u32_out BufferBlock\n"
1455                                             "OpDecorate %ssbo_u32_out DescriptorSet 0\n";
1456 
1457         multiStoreResultsFp16Snippet = "%outloc_u32            = OpAccessChain %type_u32_uptr %ssbo_u32_out %c_i32_0\n"
1458                                        "%result16_vec2 = OpCompositeConstruct %type_f16_vec2 %result16 %c_f16_0\n"
1459                                        "%result_u32            = OpBitcast %type_u32 %result16_vec2\n"
1460                                        "                        OpStore %outloc_u32 %result_u32\n";
1461 
1462         multiOutputDefinitionsFp16Snippet = "%c_f16_0              = OpConstant %type_f16 0.0\n"
1463                                             "%SSBO_u32_out         = OpTypeStruct %type_u32\n"
1464                                             "%up_SSBO_u32_out      = OpTypePointer Uniform %SSBO_u32_out\n"
1465                                             "%ssbo_u32_out         = OpVariable %up_SSBO_u32_out Uniform\n";
1466     }
1467 
1468     // NOTE: only values used as _generated_ arguments in test operations
1469     // need to be in this map, arguments that are only used by tests,
1470     // that grab arguments from input, do need to be in this map
1471     // NOTE: when updating entries in valueIdToSnippetArgMap make
1472     // sure to update also m_valueIdToVariableType for all valueType width
1473     SnippetMap &sm   = valueIdToSnippetArgMap;
1474     sm[V_UNUSED]     = "OpFSub %type_valueType %c_valueType_0 %c_valueType_0\n";
1475     sm[V_MINUS_INF]  = "OpFDiv %type_valueType %c_valueType_n1 %c_valueType_0\n";
1476     sm[V_MINUS_ONE]  = "OpFAdd %type_valueType %c_valueType_n1 %c_valueType_0\n";
1477     sm[V_MINUS_ZERO] = "OpFMul %type_valueType %c_valueType_n1 %c_valueType_0\n";
1478     sm[V_ZERO]       = "OpFMul %type_valueType %c_valueType_0 %c_valueType_0\n";
1479     sm[V_HALF]       = "OpFAdd %type_valueType %c_valueType_0_5 %c_valueType_0\n";
1480     sm[V_ONE]        = "OpFAdd %type_valueType %c_valueType_1 %c_valueType_0\n";
1481     sm[V_INF]        = "OpFDiv %type_valueType %c_valueType_1 %c_valueType_0\n"; // x / 0 == Inf
1482     sm[V_DENORM]     = "OpFSub %type_valueType %c_valueType_denorm_base %c_valueType_eps\n";
1483     sm[V_NAN]        = "OpFDiv %type_valueType %c_valueType_0 %c_valueType_0\n"; // 0 / 0 == Nan
1484 
1485     map<ValueId, string>::iterator it;
1486     for (it = sm.begin(); it != sm.end(); it++)
1487         sm[it->first] = replace(it->second, typeToken, typeName);
1488 }
1489 
1490 typedef de::SharedPtr<TypeSnippetsBase> TypeSnippetsSP;
1491 
1492 template <typename FLOAT_TYPE>
1493 class TypeSnippets : public TypeSnippetsBase
1494 {
1495 public:
1496     TypeSnippets(bool floatType = true, bool signedInteger = false);
1497 };
1498 
1499 template <>
TypeSnippets(bool floatType,bool signedInteger)1500 TypeSnippets<deFloat16>::TypeSnippets(bool floatType, bool signedInteger) : TypeSnippetsBase(floatType, signedInteger)
1501 {
1502     bitWidth = "16";
1503     epsilon  = "6.104e-5"; // 2^-14 = 0x0400
1504 
1505     // 1.2113e-4 is 0x07f0 which after substracting epsilon will give 0x03f0 (same as vm[V_DENORM])
1506     // NOTE: constants in SPIR-V cant be specified as exact fp16 - there is conversion from double to fp16
1507     denormBase = "1.2113e-4";
1508 
1509     capabilities = "OpCapability StorageUniform16\n";
1510     extensions   = "OpExtension \"SPV_KHR_16bit_storage\"\n";
1511 
1512     capabilitiesFp16Without16BitStorage = "OpCapability Float16\n";
1513     extensionsFp16Without16BitStorage   = "";
1514 
1515     arrayStride = "2";
1516 
1517     varyingsTypesSnippet     = "%type_u32_iptr        = OpTypePointer Input %type_u32\n"
1518                                "%type_u32_optr        = OpTypePointer Output %type_u32\n";
1519     inputVaryingsSnippet     = "%BP_vertex_result    = OpVariable %type_u32_iptr Input\n";
1520     outputVaryingsSnippet    = "%BP_vertex_result    = OpVariable %type_u32_optr Output\n";
1521     storeVertexResultSnippet = "%tmp_vec2            = OpCompositeConstruct %type_f16_vec2 %result %c_f16_0\n"
1522                                "%packed_result       = OpBitcast %type_u32 %tmp_vec2\n"
1523                                "OpStore %BP_vertex_result %packed_result\n";
1524     loadVertexResultSnippet  = "%packed_result       = OpLoad %type_u32 %BP_vertex_result\n"
1525                                "%tmp_vec2            = OpBitcast %type_f16_vec2 %packed_result\n"
1526                                "%result              = OpCompositeExtract %type_f16 %tmp_vec2 0\n";
1527 
1528     loadStoreRequiresShaderFloat16 = true;
1529 
1530     updateSpirvSnippets();
1531 }
1532 
1533 template <>
TypeSnippets(bool floatType,bool signedInteger)1534 TypeSnippets<float>::TypeSnippets(bool floatType, bool signedInteger) : TypeSnippetsBase(floatType, signedInteger)
1535 {
1536     bitWidth                            = "32";
1537     epsilon                             = "1.175494351e-38";
1538     denormBase                          = "1.1756356e-38";
1539     capabilities                        = "";
1540     extensions                          = "";
1541     capabilitiesFp16Without16BitStorage = "";
1542     extensionsFp16Without16BitStorage   = "";
1543     arrayStride                         = "4";
1544 
1545     varyingsTypesSnippet     = "%type_u32_iptr        = OpTypePointer Input %type_u32\n"
1546                                "%type_u32_optr        = OpTypePointer Output %type_u32\n";
1547     inputVaryingsSnippet     = "%BP_vertex_result    = OpVariable %type_u32_iptr Input\n";
1548     outputVaryingsSnippet    = "%BP_vertex_result    = OpVariable %type_u32_optr Output\n";
1549     storeVertexResultSnippet = "%packed_result       = OpBitcast %type_u32 %result\n"
1550                                "OpStore %BP_vertex_result %packed_result\n";
1551     loadVertexResultSnippet  = "%packed_result       = OpLoad %type_u32 %BP_vertex_result\n"
1552                                "%result              = OpBitcast %type_f32 %packed_result\n";
1553 
1554     loadStoreRequiresShaderFloat16 = false;
1555 
1556     updateSpirvSnippets();
1557 }
1558 
1559 template <>
TypeSnippets(bool floatType,bool signedInteger)1560 TypeSnippets<double>::TypeSnippets(bool floatType, bool signedInteger) : TypeSnippetsBase(floatType, signedInteger)
1561 {
1562     const string float64Capability      = "OpCapability Float64\n";
1563     const string int64Capability        = "OpCapability Int64\n";
1564     bitWidth                            = "64";
1565     epsilon                             = "2.2250738585072014e-308"; // 0x0010000000000000
1566     denormBase                          = "2.2250738585076994e-308"; // 0x00100000000003F0
1567     capabilities                        = floatType ? float64Capability : int64Capability;
1568     extensions                          = "";
1569     capabilitiesFp16Without16BitStorage = "";
1570     extensionsFp16Without16BitStorage   = "";
1571     arrayStride                         = "8";
1572 
1573     varyingsTypesSnippet     = "%type_u32_vec2_iptr   = OpTypePointer Input %type_u32_vec2\n"
1574                                "%type_u32_vec2_optr   = OpTypePointer Output %type_u32_vec2\n";
1575     inputVaryingsSnippet     = "%BP_vertex_result     = OpVariable %type_u32_vec2_iptr Input\n";
1576     outputVaryingsSnippet    = "%BP_vertex_result     = OpVariable %type_u32_vec2_optr Output\n";
1577     storeVertexResultSnippet = "%packed_result        = OpBitcast %type_u32_vec2 %result\n"
1578                                "OpStore %BP_vertex_result %packed_result\n";
1579     loadVertexResultSnippet  = "%packed_result        = OpLoad %type_u32_vec2 %BP_vertex_result\n"
1580                                "%result               = OpBitcast %type_f64 %packed_result\n";
1581 
1582     loadStoreRequiresShaderFloat16 = false;
1583 
1584     updateSpirvSnippets();
1585 }
1586 
1587 class TypeTestResultsBase
1588 {
1589 public:
~TypeTestResultsBase()1590     virtual ~TypeTestResultsBase()
1591     {
1592     }
1593     VariableType variableType() const;
1594 
1595 protected:
1596     VariableType m_variableType;
1597 
1598 public:
1599     // Vectors containing test data for float controls
1600     vector<BinaryCase> binaryOpFTZ;
1601     vector<UnaryCase> unaryOpFTZ;
1602     vector<BinaryCase> binaryOpDenormPreserve;
1603     vector<UnaryCase> unaryOpDenormPreserve;
1604 };
1605 
variableType() const1606 VariableType TypeTestResultsBase::variableType() const
1607 {
1608     return m_variableType;
1609 }
1610 
1611 typedef de::SharedPtr<TypeTestResultsBase> TypeTestResultsSP;
1612 
1613 template <typename FLOAT_TYPE>
1614 class TypeTestResults : public TypeTestResultsBase
1615 {
1616 public:
1617     TypeTestResults();
1618 };
1619 
1620 template <>
TypeTestResults()1621 TypeTestResults<deFloat16>::TypeTestResults()
1622 {
1623     m_variableType = FP16;
1624 
1625     // note: there are many FTZ test cases that can produce diferent result depending
1626     // on input denorm being flushed or not; because of that FTZ tests can be limited
1627     // to those that return denorm as those are the ones affected by tested extension
1628     const BinaryCase binaryOpFTZArr[] = {
1629         //operation            den op one        den op den        den op inf        den op nan
1630         {OID_ADD, V_ONE, V_ZERO_OR_DENORM_TIMES_TWO, V_INF, V_UNUSED},
1631         {OID_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_UNUSED},
1632         {OID_MUL, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1633         {OID_DIV, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED},
1634         {OID_REM, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED},
1635         {OID_MOD, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED},
1636         {OID_VEC_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1637         {OID_VEC_MUL_M, V_ZERO_OR_DENORM_TIMES_TWO, V_ZERO, V_UNUSED, V_UNUSED},
1638         {OID_MAT_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1639         {OID_MAT_MUL_V, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1640         {OID_MAT_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1641         {OID_OUT_PROD, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1642         {OID_DOT, V_ZERO_OR_DENORM_TIMES_TWO, V_ZERO, V_UNUSED, V_UNUSED},
1643         {OID_ATAN2, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED},
1644         {OID_POW, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED},
1645         {OID_MIX, V_HALF, V_ZERO, V_INF, V_UNUSED},
1646         {OID_MIN, V_ZERO, V_ZERO, V_ZERO, V_UNUSED},
1647         {OID_MAX, V_ONE, V_ZERO, V_INF, V_UNUSED},
1648         {OID_CLAMP, V_ONE, V_ZERO, V_INF, V_UNUSED},
1649         {OID_STEP, V_ONE, V_ONE, V_ONE, V_UNUSED},
1650         {OID_SSTEP, V_HALF, V_ONE, V_ZERO, V_UNUSED},
1651         {OID_FMA, V_HALF, V_HALF, V_UNUSED, V_UNUSED},
1652         {OID_FACE_FWD, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE},
1653         {OID_NMIN, V_ZERO, V_ZERO, V_ZERO, V_ZERO},
1654         {OID_NMAX, V_ONE, V_ZERO, V_INF, V_ZERO},
1655         {OID_NCLAMP, V_ONE, V_ZERO, V_INF, V_ZERO},
1656         {OID_DIST, V_ONE, V_ZERO, V_INF, V_UNUSED},
1657         {OID_CROSS, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1658     };
1659 
1660     const UnaryCase unaryOpFTZArr[] = {
1661         //operation            op den
1662         {OID_NEGATE, V_MINUS_ZERO},
1663         {OID_ROUND, V_ZERO},
1664         {OID_ROUND_EV, V_ZERO},
1665         {OID_TRUNC, V_ZERO},
1666         {OID_ABS, V_ZERO},
1667         {OID_FLOOR, V_ZERO},
1668         {OID_CEIL, V_ZERO_OR_ONE},
1669         {OID_FRACT, V_ZERO},
1670         {OID_RADIANS, V_ZERO},
1671         {OID_DEGREES, V_ZERO},
1672         {OID_SIN, V_ZERO},
1673         {OID_COS, V_TRIG_ONE},
1674         {OID_TAN, V_ZERO},
1675         {OID_ASIN, V_ZERO},
1676         {OID_ACOS, V_PI_DIV_2},
1677         {OID_ATAN, V_ZERO},
1678         {OID_SINH, V_ZERO},
1679         {OID_COSH, V_ONE},
1680         {OID_TANH, V_ZERO},
1681         {OID_ASINH, V_ZERO},
1682         {OID_ACOSH, V_UNUSED},
1683         {OID_ATANH, V_ZERO},
1684         {OID_EXP, V_ONE},
1685         {OID_LOG, V_MINUS_INF_OR_LOG_DENORM},
1686         {OID_EXP2, V_ONE},
1687         {OID_LOG2, V_MINUS_INF_OR_LOG2_DENORM},
1688         {OID_SQRT, V_ZERO_OR_SQRT_DENORM},
1689         {OID_INV_SQRT, V_INF_OR_INV_SQRT_DENORM},
1690         {OID_MAT_DET, V_ZERO},
1691         {OID_MAT_INV, V_ZERO_OR_MINUS_ZERO},
1692         {OID_MODF, V_ZERO},
1693         {OID_MODF_ST, V_ZERO},
1694         {OID_NORMALIZE, V_ZERO},
1695         {OID_REFLECT, V_ZERO},
1696         {OID_REFRACT, V_ZERO},
1697         {OID_LENGTH, V_ZERO},
1698     };
1699 
1700     const BinaryCase binaryOpDenormPreserveArr[] = {
1701         //operation            den op one                den op den                den op inf        den op nan
1702         {OID_PHI, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1703         {OID_SELECT, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1704         {OID_ADD, V_ONE, V_DENORM_TIMES_TWO, V_INF, V_NAN},
1705         {OID_SUB, V_MINUS_ONE_OR_CLOSE, V_ZERO, V_MINUS_INF, V_NAN},
1706         {OID_MUL, V_DENORM, V_ZERO, V_INF, V_NAN},
1707         {OID_VEC_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN},
1708         {OID_VEC_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1709         {OID_MAT_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN},
1710         {OID_MAT_MUL_V, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1711         {OID_MAT_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1712         {OID_OUT_PROD, V_DENORM, V_ZERO, V_INF, V_NAN},
1713         {OID_DOT, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1714         {OID_MIX, V_HALF, V_DENORM, V_INF, V_NAN},
1715         {OID_FMA, V_HALF, V_HALF, V_INF, V_NAN},
1716         {OID_MIN, V_DENORM, V_DENORM, V_DENORM, V_UNUSED},
1717         {OID_MAX, V_ONE, V_DENORM, V_INF, V_UNUSED},
1718         {OID_CLAMP, V_ONE, V_DENORM, V_INF, V_UNUSED},
1719         {OID_NMIN, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1720         {OID_NMAX, V_ONE, V_DENORM, V_INF, V_DENORM},
1721         {OID_NCLAMP, V_ONE, V_DENORM, V_INF, V_DENORM},
1722     };
1723 
1724     const UnaryCase unaryOpDenormPreserveArr[] = {
1725         //operation                op den
1726         {OID_RETURN_VAL, V_DENORM},
1727         {OID_D_EXTRACT, V_DENORM},
1728         {OID_D_INSERT, V_DENORM},
1729         {OID_SHUFFLE, V_DENORM},
1730         {OID_COMPOSITE, V_DENORM},
1731         {OID_COMPOSITE_INS, V_DENORM},
1732         {OID_COPY, V_DENORM},
1733         {OID_TRANSPOSE, V_DENORM},
1734         {OID_NEGATE, V_DENORM},
1735         {OID_ABS, V_DENORM},
1736         {OID_SIGN, V_ONE},
1737         {OID_RADIANS, V_DENORM},
1738         {OID_DEGREES, V_DEGREES_DENORM},
1739     };
1740 
1741     binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr, binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1742     unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr, unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1743     binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1744                                   binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1745     unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1746                                  unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1747 }
1748 
1749 template <>
TypeTestResults()1750 TypeTestResults<float>::TypeTestResults()
1751 {
1752     m_variableType = FP32;
1753 
1754     const BinaryCase binaryOpFTZArr[] = {
1755         //operation            den op one        den op den        den op inf        den op nan
1756         {OID_ADD, V_ONE, V_ZERO, V_INF, V_UNUSED},
1757         {OID_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_UNUSED},
1758         {OID_MUL, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1759         {OID_DIV, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED},
1760         {OID_REM, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED},
1761         {OID_MOD, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED},
1762         {OID_VEC_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1763         {OID_VEC_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1764         {OID_MAT_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1765         {OID_MAT_MUL_V, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1766         {OID_MAT_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1767         {OID_OUT_PROD, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1768         {OID_DOT, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1769         {OID_ATAN2, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED},
1770         {OID_POW, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED},
1771         {OID_MIX, V_HALF, V_ZERO, V_INF, V_UNUSED},
1772         {OID_MIN, V_ZERO, V_ZERO, V_ZERO, V_UNUSED},
1773         {OID_MAX, V_ONE, V_ZERO, V_INF, V_UNUSED},
1774         {OID_CLAMP, V_ONE, V_ZERO, V_INF, V_UNUSED},
1775         {OID_STEP, V_ONE, V_ONE, V_ONE, V_UNUSED},
1776         {OID_SSTEP, V_HALF, V_ONE, V_ZERO, V_UNUSED},
1777         {OID_FMA, V_HALF, V_HALF, V_UNUSED, V_UNUSED},
1778         {OID_FACE_FWD, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE},
1779         {OID_NMIN, V_ZERO, V_ZERO, V_ZERO, V_ZERO},
1780         {OID_NMAX, V_ONE, V_ZERO, V_INF, V_ZERO},
1781         {OID_NCLAMP, V_ONE, V_ZERO, V_INF, V_ZERO},
1782         {OID_DIST, V_ONE, V_ZERO, V_INF, V_UNUSED},
1783         {OID_CROSS, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1784     };
1785 
1786     const UnaryCase unaryOpFTZArr[] = {
1787         //operation            op den
1788         {OID_NEGATE, V_MINUS_ZERO},
1789         {OID_ROUND, V_ZERO},
1790         {OID_ROUND_EV, V_ZERO},
1791         {OID_TRUNC, V_ZERO},
1792         {OID_ABS, V_ZERO},
1793         {OID_FLOOR, V_ZERO},
1794         {OID_CEIL, V_ZERO_OR_ONE},
1795         {OID_FRACT, V_ZERO},
1796         {OID_RADIANS, V_ZERO},
1797         {OID_DEGREES, V_ZERO},
1798         {OID_SIN, V_ZERO},
1799         {OID_COS, V_TRIG_ONE},
1800         {OID_TAN, V_ZERO},
1801         {OID_ASIN, V_ZERO},
1802         {OID_ACOS, V_PI_DIV_2},
1803         {OID_ATAN, V_ZERO},
1804         {OID_SINH, V_ZERO},
1805         {OID_COSH, V_ONE},
1806         {OID_TANH, V_ZERO},
1807         {OID_ASINH, V_ZERO},
1808         {OID_ACOSH, V_UNUSED},
1809         {OID_ATANH, V_ZERO},
1810         {OID_EXP, V_ONE},
1811         {OID_LOG, V_MINUS_INF_OR_LOG_DENORM},
1812         {OID_EXP2, V_ONE},
1813         {OID_LOG2, V_MINUS_INF_OR_LOG2_DENORM},
1814         {OID_SQRT, V_ZERO_OR_SQRT_DENORM},
1815         {OID_INV_SQRT, V_INF_OR_INV_SQRT_DENORM},
1816         {OID_MAT_DET, V_ZERO},
1817         {OID_MAT_INV, V_ZERO_OR_MINUS_ZERO},
1818         {OID_MODF, V_ZERO},
1819         {OID_MODF_ST, V_ZERO},
1820         {OID_NORMALIZE, V_ZERO},
1821         {OID_REFLECT, V_ZERO},
1822         {OID_REFRACT, V_ZERO},
1823         {OID_LENGTH, V_ZERO},
1824     };
1825 
1826     const BinaryCase binaryOpDenormPreserveArr[] = {
1827         //operation            den op one            den op den                den op inf        den op nan
1828         {OID_PHI, V_DENORM, V_DENORM, V_DENORM, V_DENORM},  {OID_SELECT, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1829         {OID_ADD, V_ONE, V_DENORM_TIMES_TWO, V_INF, V_NAN}, {OID_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_NAN},
1830         {OID_MUL, V_DENORM, V_ZERO, V_INF, V_NAN},          {OID_VEC_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN},
1831         {OID_VEC_MUL_M, V_DENORM, V_ZERO, V_INF, V_NAN},    {OID_MAT_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN},
1832         {OID_MAT_MUL_V, V_DENORM, V_ZERO, V_INF, V_NAN},    {OID_MAT_MUL_M, V_DENORM, V_ZERO, V_INF, V_NAN},
1833         {OID_OUT_PROD, V_DENORM, V_ZERO, V_INF, V_NAN},     {OID_DOT, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1834         {OID_MIX, V_HALF, V_DENORM, V_INF, V_NAN},          {OID_FMA, V_HALF, V_HALF, V_INF, V_NAN},
1835         {OID_MIN, V_DENORM, V_DENORM, V_DENORM, V_UNUSED},  {OID_MAX, V_ONE, V_DENORM, V_INF, V_UNUSED},
1836         {OID_CLAMP, V_ONE, V_DENORM, V_INF, V_UNUSED},      {OID_NMIN, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1837         {OID_NMAX, V_ONE, V_DENORM, V_INF, V_DENORM},       {OID_NCLAMP, V_ONE, V_DENORM, V_INF, V_DENORM},
1838     };
1839 
1840     const UnaryCase unaryOpDenormPreserveArr[] = {
1841         //operation                op den
1842         {OID_RETURN_VAL, V_DENORM},
1843         {OID_D_EXTRACT, V_DENORM},
1844         {OID_D_INSERT, V_DENORM},
1845         {OID_SHUFFLE, V_DENORM},
1846         {OID_COMPOSITE, V_DENORM},
1847         {OID_COMPOSITE_INS, V_DENORM},
1848         {OID_COPY, V_DENORM},
1849         {OID_TRANSPOSE, V_DENORM},
1850         {OID_NEGATE, V_DENORM},
1851         {OID_ABS, V_DENORM},
1852         {OID_SIGN, V_ONE},
1853         {OID_RADIANS, V_DENORM},
1854         {OID_DEGREES, V_DEGREES_DENORM},
1855     };
1856 
1857     binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr, binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1858     unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr, unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1859     binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1860                                   binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1861     unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1862                                  unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1863 }
1864 
1865 template <>
TypeTestResults()1866 TypeTestResults<double>::TypeTestResults()
1867 {
1868     m_variableType = FP64;
1869 
1870     // fp64 is supported by fewer operations then fp16 and fp32
1871     // e.g. Radians and Degrees functions are not supported
1872     const BinaryCase binaryOpFTZArr[] = {
1873         //operation            den op one        den op den        den op inf        den op nan
1874         {OID_ADD, V_ONE, V_ZERO, V_INF, V_UNUSED},
1875         {OID_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_UNUSED},
1876         {OID_MUL, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1877         {OID_DIV, V_ZERO, V_UNUSED, V_ZERO, V_UNUSED},
1878         {OID_REM, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED},
1879         {OID_MOD, V_ZERO, V_UNUSED, V_UNUSED, V_UNUSED},
1880         {OID_VEC_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1881         {OID_VEC_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1882         {OID_MAT_MUL_S, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1883         {OID_MAT_MUL_V, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1884         {OID_MAT_MUL_M, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1885         {OID_OUT_PROD, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1886         {OID_DOT, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1887         {OID_MIX, V_HALF, V_ZERO, V_INF, V_UNUSED},
1888         {OID_MIN, V_ZERO, V_ZERO, V_ZERO, V_UNUSED},
1889         {OID_MAX, V_ONE, V_ZERO, V_INF, V_UNUSED},
1890         {OID_CLAMP, V_ONE, V_ZERO, V_INF, V_UNUSED},
1891         {OID_STEP, V_ONE, V_ONE, V_ONE, V_UNUSED},
1892         {OID_SSTEP, V_HALF, V_ONE, V_ZERO, V_UNUSED},
1893         {OID_FMA, V_HALF, V_HALF, V_UNUSED, V_UNUSED},
1894         {OID_FACE_FWD, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE, V_MINUS_ONE},
1895         {OID_NMIN, V_ZERO, V_ZERO, V_ZERO, V_ZERO},
1896         {OID_NMAX, V_ONE, V_ZERO, V_INF, V_ZERO},
1897         {OID_NCLAMP, V_ONE, V_ZERO, V_INF, V_ZERO},
1898         {OID_DIST, V_ONE, V_ZERO, V_INF, V_UNUSED},
1899         {OID_CROSS, V_ZERO, V_ZERO, V_UNUSED, V_UNUSED},
1900     };
1901 
1902     const UnaryCase unaryOpFTZArr[] = {
1903         //operation            op den
1904         {OID_NEGATE, V_MINUS_ZERO},
1905         {OID_ROUND, V_ZERO},
1906         {OID_ROUND_EV, V_ZERO},
1907         {OID_TRUNC, V_ZERO},
1908         {OID_ABS, V_ZERO},
1909         {OID_FLOOR, V_ZERO},
1910         {OID_CEIL, V_ZERO_OR_ONE},
1911         {OID_FRACT, V_ZERO},
1912         {OID_SQRT, V_ZERO_OR_SQRT_DENORM},
1913         {OID_INV_SQRT, V_INF_OR_INV_SQRT_DENORM},
1914         {OID_MAT_DET, V_ZERO},
1915         {OID_MAT_INV, V_ZERO_OR_MINUS_ZERO},
1916         {OID_MODF, V_ZERO},
1917         {OID_MODF_ST, V_ZERO},
1918         {OID_NORMALIZE, V_ZERO},
1919         {OID_REFLECT, V_ZERO},
1920         {OID_LENGTH, V_ZERO},
1921     };
1922 
1923     const BinaryCase binaryOpDenormPreserveArr[] = {
1924         //operation            den op one            den op den                den op inf        den op nan
1925         {OID_PHI, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1926         {OID_SELECT, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1927         {OID_ADD, V_ONE, V_DENORM_TIMES_TWO, V_INF, V_NAN},
1928         {OID_SUB, V_MINUS_ONE, V_ZERO, V_MINUS_INF, V_NAN},
1929         {OID_MUL, V_DENORM, V_ZERO, V_INF, V_NAN},
1930         {OID_VEC_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN},
1931         {OID_VEC_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1932         {OID_MAT_MUL_S, V_DENORM, V_ZERO, V_INF, V_NAN},
1933         {OID_MAT_MUL_V, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1934         {OID_MAT_MUL_M, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1935         {OID_OUT_PROD, V_DENORM, V_ZERO, V_INF, V_NAN},
1936         {OID_DOT, V_DENORM_TIMES_TWO, V_ZERO, V_INF, V_NAN},
1937         {OID_MIX, V_HALF, V_DENORM, V_INF, V_NAN},
1938         {OID_FMA, V_HALF, V_HALF, V_INF, V_NAN},
1939         {OID_MIN, V_DENORM, V_DENORM, V_DENORM, V_UNUSED},
1940         {OID_MAX, V_ONE, V_DENORM, V_INF, V_UNUSED},
1941         {OID_CLAMP, V_ONE, V_DENORM, V_INF, V_UNUSED},
1942         {OID_NMIN, V_DENORM, V_DENORM, V_DENORM, V_DENORM},
1943         {OID_NMAX, V_ONE, V_DENORM, V_INF, V_DENORM},
1944         {OID_NCLAMP, V_ONE, V_DENORM, V_INF, V_DENORM},
1945     };
1946 
1947     const UnaryCase unaryOpDenormPreserveArr[] = {
1948         //operation                op den
1949         {OID_RETURN_VAL, V_DENORM}, {OID_D_EXTRACT, V_DENORM},     {OID_D_INSERT, V_DENORM}, {OID_SHUFFLE, V_DENORM},
1950         {OID_COMPOSITE, V_DENORM},  {OID_COMPOSITE_INS, V_DENORM}, {OID_COPY, V_DENORM},     {OID_TRANSPOSE, V_DENORM},
1951         {OID_NEGATE, V_DENORM},     {OID_ABS, V_DENORM},           {OID_SIGN, V_ONE},
1952     };
1953 
1954     binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr, binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1955     unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr, unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1956     binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1957                                   binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1958     unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1959                                  unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1960 }
1961 
1962 // Operation structure holds data needed to test specified SPIR-V operation. This class contains
1963 // additional annotations, additional types and aditional constants that should be properly included
1964 // in SPIR-V code. Commands attribute in this structure contains code that performs tested operation
1965 // on given arguments, in some cases verification is also performed there.
1966 // All snipets stroed in this structure are generic and can be specialized for fp16, fp32 or fp64,
1967 // thanks to that this data can be shared by many OperationTestCase instances (testing diferent
1968 // float behaviors on diferent float widths).
1969 struct Operation
1970 {
1971     // operation name is included in test case name
1972     const char *name;
1973 
1974     // How extensively is the floating point type used?
1975     FloatUsage floatUsage;
1976 
1977     // operation specific spir-v snippets that will be
1978     // placed in proper places in final test shader
1979     const char *annotations;
1980     const char *types;
1981     const char *constants;
1982     const char *variables;
1983     const char *functions;
1984     const char *commands;
1985 
1986     // conversion operations operate on one float type and produce float
1987     // type with different bit width; restrictedInputType is used only when
1988     // isInputTypeRestricted is set to true and it restricts usage of this
1989     // operation to specified input type
1990     bool isInputTypeRestricted;
1991     VariableType restrictedInputType;
1992 
1993     // arguments for OpSpecConstant need to be specified also as constant
1994     bool isSpecConstant;
1995 
1996     // set if c_float* constant is used in operation
1997     FloatStatementUsageFlags statementUsageFlags;
1998 
Operationvkt::SpirVAssembly::__anon1f0d25030111::Operation1999     Operation()
2000     {
2001     }
2002 
2003     // Minimal constructor - used by most of operations
Operationvkt::SpirVAssembly::__anon1f0d25030111::Operation2004     Operation(const char *_name, FloatUsage _floatUsage, const char *_commands,
2005               const FloatStatementUsageFlags _statementUsageFlags = 0)
2006         : name(_name)
2007         , floatUsage(_floatUsage)
2008         , annotations("")
2009         , types("")
2010         , constants("")
2011         , variables("")
2012         , functions("")
2013         , commands(_commands)
2014         , isInputTypeRestricted(false)
2015         , restrictedInputType(FP16) // not used as isInputTypeRestricted is false
2016         , isSpecConstant(false)
2017         , statementUsageFlags(_statementUsageFlags)
2018     {
2019     }
2020 
2021     // Conversion operations constructor (used also by conversions done in SpecConstantOp)
Operationvkt::SpirVAssembly::__anon1f0d25030111::Operation2022     Operation(const char *_name, FloatUsage _floatUsage, bool specConstant, VariableType _inputType,
2023               const char *_constants, const char *_commands, const FloatStatementUsageFlags _statementUsageFlags = 0)
2024         : name(_name)
2025         , floatUsage(_floatUsage)
2026         , annotations("")
2027         , types("")
2028         , constants(_constants)
2029         , variables("")
2030         , functions("")
2031         , commands(_commands)
2032         , isInputTypeRestricted(true)
2033         , restrictedInputType(_inputType)
2034         , isSpecConstant(specConstant)
2035         , statementUsageFlags(_statementUsageFlags)
2036     {
2037     }
2038 
2039     // Full constructor - used by few operations, that are more complex to test
Operationvkt::SpirVAssembly::__anon1f0d25030111::Operation2040     Operation(const char *_name, FloatUsage _floatUsage, const char *_annotations, const char *_types,
2041               const char *_constants, const char *_variables, const char *_functions, const char *_commands,
2042               const FloatStatementUsageFlags _statementUsageFlags = 0)
2043         : name(_name)
2044         , floatUsage(_floatUsage)
2045         , annotations(_annotations)
2046         , types(_types)
2047         , constants(_constants)
2048         , variables(_variables)
2049         , functions(_functions)
2050         , commands(_commands)
2051         , isInputTypeRestricted(false)
2052         , restrictedInputType(FP16) // not used as isInputTypeRestricted is false
2053         , isSpecConstant(false)
2054         , statementUsageFlags(_statementUsageFlags)
2055     {
2056     }
2057 
2058     // Full constructor - used by rounding override cases
Operationvkt::SpirVAssembly::__anon1f0d25030111::Operation2059     Operation(const char *_name, FloatUsage _floatUsage, VariableType _inputType, const char *_annotations,
2060               const char *_types, const char *_constants, const char *_commands,
2061               const FloatStatementUsageFlags _statementUsageFlags = 0)
2062         : name(_name)
2063         , floatUsage(_floatUsage)
2064         , annotations(_annotations)
2065         , types(_types)
2066         , constants(_constants)
2067         , variables("")
2068         , functions("")
2069         , commands(_commands)
2070         , isInputTypeRestricted(true)
2071         , restrictedInputType(_inputType)
2072         , isSpecConstant(false)
2073         , statementUsageFlags(_statementUsageFlags)
2074     {
2075     }
2076 };
2077 
2078 // Class storing input that will be passed to operation and expected
2079 // output that should be generated for specified behaviour.
2080 class OperationTestCase
2081 {
2082 public:
OperationTestCase()2083     OperationTestCase()
2084     {
2085     }
2086 
OperationTestCase(const char * _baseName,BehaviorFlags _behaviorFlags,OperationId _operationId,ValueId _input1,ValueId _input2,ValueId _expectedOutput,bool _fp16Without16BitStorage=false)2087     OperationTestCase(const char *_baseName, BehaviorFlags _behaviorFlags, OperationId _operationId, ValueId _input1,
2088                       ValueId _input2, ValueId _expectedOutput, bool _fp16Without16BitStorage = false)
2089         : behaviorFlags(_behaviorFlags)
2090         , operationId(_operationId)
2091         , expectedOutput(_expectedOutput)
2092         , fp16Without16BitStorage(_fp16Without16BitStorage)
2093     {
2094         baseName = _baseName;
2095         if (fp16Without16BitStorage)
2096             baseName += "_nostorage";
2097         input[0] = _input1;
2098         input[1] = _input2;
2099     }
2100 
2101 public:
2102     string baseName;
2103     BehaviorFlags behaviorFlags;
2104     OperationId operationId;
2105     ValueId input[2];
2106     ValueId expectedOutput;
2107     bool fp16Without16BitStorage;
2108 };
2109 
2110 // Helper structure used to store specialized operation
2111 // data. This data is ready to be used during shader assembly.
2112 struct SpecializedOperation
2113 {
2114     string constants;
2115     string annotations;
2116     string types;
2117     string arguments;
2118     string variables;
2119     string functions;
2120     string commands;
2121 
2122     VariableType inVariableType;
2123     TypeSnippetsSP inTypeSnippets;
2124     TypeSnippetsSP outTypeSnippets;
2125     FloatStatementUsageFlags argumentsUsesFloatConstant;
2126 };
2127 
2128 // Class responsible for constructing list of test cases for specified
2129 // float type and specified way of preparation of arguments.
2130 // Arguments can be either read from input SSBO or generated via math
2131 // operations in spir-v code.
2132 class TestCasesBuilder
2133 {
2134 public:
2135     void init();
2136     void build(vector<OperationTestCase> &testCases, TypeTestResultsSP typeTestResults, bool argumentsFromInput);
2137     const Operation &getOperation(OperationId id) const;
2138 
2139 private:
2140     void createUnaryTestCases(vector<OperationTestCase> &testCases, OperationId operationId,
2141                               ValueId denormPreserveResult, ValueId denormFTZResult,
2142                               bool fp16WithoutStorage = false) const;
2143 
2144 private:
2145     // Operations are shared betwean test cases so they are
2146     // passed to them as pointers to data stored in TestCasesBuilder.
2147     typedef OperationTestCase OTC;
2148     typedef Operation Op;
2149     map<int, Op> m_operations;
2150     // SPIR-V assembly snippets that are used in m_operations
2151     vector<std::string> m_saved_strings;
2152 
2153     // We expect 12 strings: 3 kinds of narrowing conversions, with
2154     // 4 cases each.
2155     const size_t m_num_expected_strings = 12;
2156     // Saves the given string in m_strings, and returns a pointer to its data.
save(std::string str)2157     const char *save(std::string str)
2158     {
2159         m_saved_strings.emplace_back(std::move(str));
2160         return m_saved_strings.back().data();
2161     }
2162 };
2163 
init()2164 void TestCasesBuilder::init()
2165 {
2166     map<int, Op> &mo = m_operations;
2167     m_saved_strings.reserve(m_num_expected_strings);
2168 
2169     // predefine operations repeatedly used in tests; note that "_valueType"
2170     // in every operation command will be replaced with either "_f16",
2171     // "_f32", "_f64", "_ui16", "ui32", "_ui64", "_i16", "_i32", "_i64"
2172     // StringTemplate is not used here because it would make code less
2173     // readable m_operations contains generic operation definitions that
2174     // can be used for all float types
2175 
2176     mo[OID_NEGATE]    = Op("negate", FLOAT_ARITHMETIC, "%result             = OpFNegate %type_valueType %arg1\n",
2177                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2178     mo[OID_COMPOSITE] = Op("composite", FLOAT_ARITHMETIC,
2179                            "%vec1               = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2180                            "%result             = OpCompositeExtract %type_valueType %vec1 0\n",
2181                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2182     mo[OID_COMPOSITE_INS] =
2183         Op("comp_ins", FLOAT_ARITHMETIC,
2184            "%vec1               = OpCompositeConstruct %type_valueType_vec2 %c_valueType_0 %c_valueType_0\n"
2185            "%vec2               = OpCompositeInsert %type_valueType_vec2 %arg1 %vec1 0\n"
2186            "%result             = OpCompositeExtract %type_valueType %vec2 0\n",
2187            B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2188     mo[OID_COPY]      = Op("copy", FLOAT_STORAGE_ONLY, "%result             = OpCopyObject %type_valueType %arg1\n",
2189                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2190     mo[OID_D_EXTRACT] = Op("extract", FLOAT_ARITHMETIC,
2191                            "%vec1               = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2192                            "%result             = OpVectorExtractDynamic %type_valueType %vec1 %c_i32_0\n",
2193                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2194     mo[OID_D_INSERT] =
2195         Op("insert", FLOAT_ARITHMETIC,
2196            "%tmpVec             = OpCompositeConstruct %type_valueType_vec2 %c_valueType_2 %c_valueType_2\n"
2197            "%vec1               = OpVectorInsertDynamic %type_valueType_vec2 %tmpVec %arg1 %c_i32_0\n"
2198            "%result             = OpCompositeExtract %type_valueType %vec1 0\n",
2199            B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2200     mo[OID_SHUFFLE] = Op(
2201         "shuffle", FLOAT_ARITHMETIC,
2202         "%tmpVec1            = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2203         "%tmpVec2            = OpCompositeConstruct %type_valueType_vec2 %c_valueType_2 "
2204         "%c_valueType_2\n" // NOTE: its impossible to test shuffle with denorms flushed
2205         "%vec1               = OpVectorShuffle %type_valueType_vec2 %tmpVec1 %tmpVec2 0 2\n" //       to zero as this will be done by earlier operation
2206         "%result             = OpCompositeExtract %type_valueType %vec1 0\n", //       (this also applies to few other operations)
2207         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2208     mo[OID_TRANSPOSE]  = Op("transpose", FLOAT_ARITHMETIC,
2209                             "%col                = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2210                              "%mat                = OpCompositeConstruct %type_valueType_mat2x2 %col %col\n"
2211                              "%tmat               = OpTranspose %type_valueType_mat2x2 %mat\n"
2212                              "%tcol               = OpCompositeExtract %type_valueType_vec2 %tmat 0\n"
2213                              "%result             = OpCompositeExtract %type_valueType %tcol 0\n",
2214                             B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2215     mo[OID_RETURN_VAL] = Op("ret_val", FLOAT_ARITHMETIC, "",
2216                             "%type_test_fun      = OpTypeFunction %type_valueType %type_valueType\n", "", "",
2217                             "%test_fun = OpFunction %type_valueType None %type_test_fun\n"
2218                             "%param = OpFunctionParameter %type_valueType\n"
2219                             "%entry = OpLabel\n"
2220                             "OpReturnValue %param\n"
2221                             "OpFunctionEnd\n",
2222                             "%result             = OpFunctionCall %type_valueType %test_fun %arg1\n",
2223                             B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2224 
2225     // conversion operations that are meant to be used only for single output type (defined by the second number in name)
2226     const char *convertSource = "%result             = OpFConvert %type_valueType %arg1\n";
2227     mo[OID_CONV_FROM_FP16] =
2228         Op("conv_from_fp16", FLOAT_STORAGE_ONLY, false, FP16, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2229     mo[OID_CONV_FROM_FP32] =
2230         Op("conv_from_fp32", FLOAT_STORAGE_ONLY, false, FP32, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2231     mo[OID_CONV_FROM_FP64] =
2232         Op("conv_from_fp64", FLOAT_STORAGE_ONLY, false, FP64, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2233 
2234     const char *convertFromUintSource = "%result             = OpConvertUToF %type_valueType %arg1\n";
2235     mo[OID_CONV_FROM_UINT_TO_FP32]    = Op("conv_uint_to_fp32", FLOAT_STORAGE_ONLY, false, UINT32, "",
2236                                            convertFromUintSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2237     mo[OID_CONV_FROM_UINT_TO_FP64]    = Op("conv_uint_to_fp64", FLOAT_STORAGE_ONLY, false, UINT64, "",
2238                                            convertFromUintSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2239     const char *convertFromIntSource  = "%result             = OpConvertSToF %type_valueType %arg1\n";
2240     mo[OID_CONV_FROM_INT_TO_FP32] = Op("conv_uint_to_fp32", FLOAT_STORAGE_ONLY, false, INT32, "", convertFromIntSource,
2241                                        B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2242     mo[OID_CONV_FROM_INT_TO_FP64] = Op("conv_uint_to_fp64", FLOAT_STORAGE_ONLY, false, INT64, "", convertFromIntSource,
2243                                        B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2244 
2245     // From all operands supported by OpSpecConstantOp we can only test FConvert opcode with literals as everything
2246     // else requires Karnel capability (OpenCL); values of literals used in SPIR-V code must be equivalent to
2247     // the values V_CONV_FROM_....  Use the feature of the SPIR-V assembler where use ! to inject raw integer
2248     // words into the SPIR-V binary.
2249 
2250     // fp32 -> fp16 with cases UP, DOWN, TIE_UP, TIE_DOWN
2251     typedef conversionDetail<Float32, Float16> conv32to16;
2252     mo[OID_SCONST_CONV_FROM_FP32_TO_FP16_UP] =
2253         Op("sconst_conv_from_fp32_up", FLOAT_ARITHMETIC, true, FP32,
2254            save("%c_arg              = OpConstant %type_f32 !" + conv32to16::fromStr(Round::UP) +
2255                 "\n"
2256                 "%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2257            "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP32);
2258     mo[OID_SCONST_CONV_FROM_FP32_TO_FP16_DOWN] =
2259         Op("sconst_conv_from_fp32_down", FLOAT_ARITHMETIC, true, FP32,
2260            save("%c_arg              = OpConstant %type_f32 !" + conv32to16::fromStr(Round::DOWN) +
2261                 "\n"
2262                 "%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2263            "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP32);
2264     mo[OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_UP] =
2265         Op("sconst_conv_from_fp32_tie_up", FLOAT_ARITHMETIC, true, FP32,
2266            save("%c_arg              = OpConstant %type_f32 !" + conv32to16::fromStr(Round::TIE_UP) +
2267                 "\n"
2268                 "%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2269            "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP32);
2270     mo[OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_DOWN] =
2271         Op("sconst_conv_from_fp32_tie_down", FLOAT_ARITHMETIC, true, FP32,
2272            save("%c_arg              = OpConstant %type_f32 !" + conv32to16::fromStr(Round::TIE_DOWN) +
2273                 "\n"
2274                 "%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2275            "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP32);
2276 
2277     // fp64 -> fp32 with cases UP, DOWN, TIE_UP, TIE_DOWN
2278     // To inject a 64 bit value, inject 2 32-bit words.
2279     typedef conversionDetail<Float64, Float32> conv64to32;
2280     mo[OID_SCONST_CONV_FROM_FP64_TO_FP32_UP] =
2281         Op("sconst_conv_from_fp64_up", FLOAT_ARITHMETIC, true, FP64,
2282            save("%c_arg              = OpConstant %type_f64 !" + conv64to32::fromStr(Round::UP) +
2283                 "\n"
2284                 "%result             = OpSpecConstantOp %type_f32 FConvert %c_arg\n"),
2285            "", B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2286     mo[OID_SCONST_CONV_FROM_FP64_TO_FP32_DOWN] =
2287         Op("sconst_conv_from_fp64_down", FLOAT_ARITHMETIC, true, FP64,
2288            save("%c_arg              = OpConstant %type_f64 !" + conv64to32::fromStr(Round::DOWN) +
2289                 "\n"
2290                 "%result             = OpSpecConstantOp %type_f32 FConvert %c_arg\n"),
2291            "", B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2292     mo[OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_UP] =
2293         Op("sconst_conv_from_fp64_tie_up", FLOAT_ARITHMETIC, true, FP64,
2294            save("%c_arg              = OpConstant %type_f64 !" + conv64to32::fromStr(Round::TIE_UP) +
2295                 "\n"
2296                 "%result             = OpSpecConstantOp %type_f32 FConvert %c_arg\n"),
2297            "", B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2298     mo[OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_DOWN] =
2299         Op("sconst_conv_from_fp64_tie_down", FLOAT_ARITHMETIC, true, FP64,
2300            save("%c_arg              = OpConstant %type_f64 !" + conv64to32::fromStr(Round::TIE_DOWN) +
2301                 "\n"
2302                 "%result             = OpSpecConstantOp %type_f32 FConvert %c_arg\n"),
2303            "", B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2304 
2305     // fp64 -> fp16 with cases UP, DOWN, TIE_UP, TIE_DOWN
2306     typedef conversionDetail<Float64, Float16> conv64to16;
2307     mo[OID_SCONST_CONV_FROM_FP64_TO_FP16_UP] =
2308         Op("sconst_conv_from_fp64_up", FLOAT_ARITHMETIC, true, FP64,
2309            save("%c_arg              = OpConstant %type_f64 !" + conv64to16::fromStr(Round::UP) +
2310                 "\n"
2311                 "%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2312            "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2313     mo[OID_SCONST_CONV_FROM_FP64_TO_FP16_DOWN] =
2314         Op("sconst_conv_from_fp64_down", FLOAT_ARITHMETIC, true, FP64,
2315            save("%c_arg              = OpConstant %type_f64 !" + conv64to16::fromStr(Round::DOWN) +
2316                 "\n"
2317                 "%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2318            "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2319     mo[OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_UP] =
2320         Op("sconst_conv_from_fp64_tie_up", FLOAT_ARITHMETIC, true, FP64,
2321            save("%c_arg              = OpConstant %type_f64 !" + conv64to16::fromStr(Round::TIE_UP) +
2322                 "\n"
2323                 "%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2324            "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2325     mo[OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_DOWN] =
2326         Op("sconst_conv_from_fp64_tie_down", FLOAT_ARITHMETIC, true, FP64,
2327            save("%c_arg              = OpConstant %type_f64 !" + conv64to16::fromStr(Round::TIE_DOWN) +
2328                 "\n"
2329                 "%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n"),
2330            "", B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
2331 
2332     mo[OID_ADD]       = Op("add", FLOAT_ARITHMETIC, "%result             = OpFAdd %type_valueType %arg1 %arg2\n",
2333                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2334     mo[OID_SUB]       = Op("sub", FLOAT_ARITHMETIC, "%result             = OpFSub %type_valueType %arg1 %arg2\n",
2335                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2336     mo[OID_MUL]       = Op("mul", FLOAT_ARITHMETIC, "%result             = OpFMul %type_valueType %arg1 %arg2\n",
2337                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2338     mo[OID_DIV]       = Op("div", FLOAT_ARITHMETIC, "%result             = OpFDiv %type_valueType %arg1 %arg2\n",
2339                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2340     mo[OID_REM]       = Op("rem", FLOAT_ARITHMETIC, "%result             = OpFRem %type_valueType %arg1 %arg2\n",
2341                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2342     mo[OID_MOD]       = Op("mod", FLOAT_ARITHMETIC, "%result             = OpFMod %type_valueType %arg1 %arg2\n",
2343                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2344     mo[OID_PHI]       = Op("phi", FLOAT_ARITHMETIC,
2345                            "%comp               = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
2346                                  "                      OpSelectionMerge %comp_merge None\n"
2347                                  "                      OpBranchConditional %comp %true_branch %false_branch\n"
2348                                  "%true_branch        = OpLabel\n"
2349                                  "                      OpBranch %comp_merge\n"
2350                                  "%false_branch       = OpLabel\n"
2351                                  "                      OpBranch %comp_merge\n"
2352                                  "%comp_merge         = OpLabel\n"
2353                                  "%result             = OpPhi %type_valueType %arg2 %true_branch %arg1 %false_branch\n",
2354                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2355     mo[OID_SELECT]    = Op("select", FLOAT_ARITHMETIC,
2356                            "%always_true        = OpFOrdGreaterThan %type_bool %c_valueType_1 %c_valueType_0\n"
2357                               "%result             = OpSelect %type_valueType %always_true %arg1 %arg2\n",
2358                            B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2359     mo[OID_DOT]       = Op("dot", FLOAT_ARITHMETIC,
2360                            "%vec1               = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2361                                  "%vec2               = OpCompositeConstruct %type_valueType_vec2 %arg2 %arg2\n"
2362                                  "%result             = OpDot %type_valueType %vec1 %vec2\n",
2363                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2364     mo[OID_VEC_MUL_S] = Op("vmuls", FLOAT_ARITHMETIC,
2365                            "%vec                = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2366                            "%tmpVec             = OpVectorTimesScalar %type_valueType_vec2 %vec %arg2\n"
2367                            "%result             = OpCompositeExtract %type_valueType %tmpVec 0\n",
2368                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2369     mo[OID_VEC_MUL_M] = Op("vmulm", FLOAT_ARITHMETIC,
2370                            "%col                = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2371                            "%mat                = OpCompositeConstruct %type_valueType_mat2x2 %col %col\n"
2372                            "%vec                = OpCompositeConstruct %type_valueType_vec2 %arg2 %arg2\n"
2373                            "%tmpVec             = OpVectorTimesMatrix %type_valueType_vec2 %vec %mat\n"
2374                            "%result             = OpCompositeExtract %type_valueType %tmpVec 0\n",
2375                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2376     mo[OID_MAT_MUL_S] = Op("mmuls", FLOAT_ARITHMETIC,
2377                            "%col                = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2378                            "%mat                = OpCompositeConstruct %type_valueType_mat2x2 %col %col\n"
2379                            "%mulMat             = OpMatrixTimesScalar %type_valueType_mat2x2 %mat %arg2\n"
2380                            "%extCol             = OpCompositeExtract %type_valueType_vec2 %mulMat 0\n"
2381                            "%result             = OpCompositeExtract %type_valueType %extCol 0\n",
2382                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2383     mo[OID_MAT_MUL_V] = Op("mmulv", FLOAT_ARITHMETIC,
2384                            "%col                = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2385                            "%mat                = OpCompositeConstruct %type_valueType_mat2x2 %col %col\n"
2386                            "%vec                = OpCompositeConstruct %type_valueType_vec2 %arg2 %arg2\n"
2387                            "%mulVec             = OpMatrixTimesVector %type_valueType_vec2 %mat %vec\n"
2388                            "%result             = OpCompositeExtract %type_valueType %mulVec 0\n",
2389                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2390     mo[OID_MAT_MUL_M] = Op("mmulm", FLOAT_ARITHMETIC,
2391                            "%col1               = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2392                            "%mat1               = OpCompositeConstruct %type_valueType_mat2x2 %col1 %col1\n"
2393                            "%col2               = OpCompositeConstruct %type_valueType_vec2 %arg2 %arg2\n"
2394                            "%mat2               = OpCompositeConstruct %type_valueType_mat2x2 %col2 %col2\n"
2395                            "%mulMat             = OpMatrixTimesMatrix %type_valueType_mat2x2 %mat1 %mat2\n"
2396                            "%extCol             = OpCompositeExtract %type_valueType_vec2 %mulMat 0\n"
2397                            "%result             = OpCompositeExtract %type_valueType %extCol 0\n",
2398                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2399     mo[OID_OUT_PROD]  = Op("out_prod", FLOAT_ARITHMETIC,
2400                            "%vec1               = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2401                             "%vec2               = OpCompositeConstruct %type_valueType_vec2 %arg2 %arg2\n"
2402                             "%mulMat             = OpOuterProduct %type_valueType_mat2x2 %vec1 %vec2\n"
2403                             "%extCol             = OpCompositeExtract %type_valueType_vec2 %mulMat 0\n"
2404                             "%result             = OpCompositeExtract %type_valueType %extCol 0\n",
2405                            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2406 
2407     // comparison operations
2408     mo[OID_ORD_EQ]   = Op("ord_eq", FLOAT_ARITHMETIC,
2409                           "%boolVal           = OpFOrdEqual %type_bool %arg1 %arg2\n"
2410                             "%result            = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2411                           B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2412     mo[OID_UORD_EQ]  = Op("uord_eq", FLOAT_ARITHMETIC,
2413                           "%boolVal           = OpFUnordEqual %type_bool %arg1 %arg2\n"
2414                            "%result            = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2415                           B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2416     mo[OID_ORD_NEQ]  = Op("ord_neq", FLOAT_ARITHMETIC,
2417                           "%boolVal           = OpFOrdNotEqual %type_bool %arg1 %arg2\n"
2418                            "%result            = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2419                           B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2420     mo[OID_UORD_NEQ] = Op("uord_neq", FLOAT_ARITHMETIC,
2421                           "%boolVal           = OpFUnordNotEqual %type_bool %arg1 %arg2\n"
2422                           "%result            = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2423                           B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2424     mo[OID_ORD_LS]   = Op("ord_ls", FLOAT_ARITHMETIC,
2425                           "%boolVal           = OpFOrdLessThan %type_bool %arg1 %arg2\n"
2426                             "%result            = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2427                           B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2428     mo[OID_UORD_LS]  = Op("uord_ls", FLOAT_ARITHMETIC,
2429                           "%boolVal           = OpFUnordLessThan %type_bool %arg1 %arg2\n"
2430                            "%result            = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2431                           B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2432     mo[OID_ORD_GT]   = Op("ord_gt", FLOAT_ARITHMETIC,
2433                           "%boolVal           = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
2434                             "%result            = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2435                           B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2436     mo[OID_UORD_GT]  = Op("uord_gt", FLOAT_ARITHMETIC,
2437                           "%boolVal           = OpFUnordGreaterThan %type_bool %arg1 %arg2\n"
2438                            "%result            = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2439                           B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2440     mo[OID_ORD_LE]   = Op("ord_le", FLOAT_ARITHMETIC,
2441                           "%boolVal           = OpFOrdLessThanEqual %type_bool %arg1 %arg2\n"
2442                             "%result            = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2443                           B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2444     mo[OID_UORD_LE]  = Op("uord_le", FLOAT_ARITHMETIC,
2445                           "%boolVal           = OpFUnordLessThanEqual %type_bool %arg1 %arg2\n"
2446                            "%result            = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2447                           B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2448     mo[OID_ORD_GE]   = Op("ord_ge", FLOAT_ARITHMETIC,
2449                           "%boolVal           = OpFOrdGreaterThanEqual %type_bool %arg1 %arg2\n"
2450                             "%result            = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2451                           B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2452     mo[OID_UORD_GE]  = Op("uord_ge", FLOAT_ARITHMETIC,
2453                           "%boolVal           = OpFUnordGreaterThanEqual %type_bool %arg1 %arg2\n"
2454                            "%result            = OpSelect %type_valueType %boolVal %c_valueType_1 %c_valueType_0\n",
2455                           B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2456 
2457     mo[OID_ATAN2] =
2458         Op("atan2", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Atan2 %arg1 %arg2\n",
2459            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2460     mo[OID_POW] =
2461         Op("pow", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Pow %arg1 %arg2\n",
2462            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2463     mo[OID_MIX] = Op("mix", FLOAT_ARITHMETIC,
2464                      "%result             = OpExtInst %type_valueType %std450 FMix %arg1 %arg2 %c_valueType_0_5\n",
2465                      B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2466     mo[OID_FMA] = Op("fma", FLOAT_ARITHMETIC,
2467                      "%result             = OpExtInst %type_valueType %std450 Fma %arg1 %arg2 %c_valueType_0_5\n",
2468                      B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2469     mo[OID_MIN] =
2470         Op("min", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 FMin %arg1 %arg2\n",
2471            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2472     mo[OID_MAX] =
2473         Op("max", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 FMax %arg1 %arg2\n",
2474            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2475     mo[OID_CLAMP] = Op("clamp", FLOAT_ARITHMETIC,
2476                        "%result             = OpExtInst %type_valueType %std450 FClamp %arg1 %arg2 %arg2\n",
2477                        B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2478     mo[OID_STEP] =
2479         Op("step", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Step %arg1 %arg2\n",
2480            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2481     mo[OID_SSTEP] =
2482         Op("sstep", FLOAT_ARITHMETIC,
2483            "%result             = OpExtInst %type_valueType %std450 SmoothStep %arg1 %arg2 %c_valueType_0_5\n",
2484            B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2485     mo[OID_DIST]  = Op("distance", FLOAT_ARITHMETIC,
2486                        "%result             = OpExtInst %type_valueType %std450 Distance %arg1 %arg2\n",
2487                        B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2488     mo[OID_CROSS] = Op("cross", FLOAT_ARITHMETIC,
2489                        "%vec1               = OpCompositeConstruct %type_valueType_vec3 %arg1 %arg1 %arg1\n"
2490                        "%vec2               = OpCompositeConstruct %type_valueType_vec3 %arg2 %arg2 %arg2\n"
2491                        "%tmpVec             = OpExtInst %type_valueType_vec3 %std450 Cross %vec1 %vec2\n"
2492                        "%result             = OpCompositeExtract %type_valueType %tmpVec 0\n",
2493                        B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2494     mo[OID_FACE_FWD] =
2495         Op("face_fwd", FLOAT_ARITHMETIC,
2496            "%result             = OpExtInst %type_valueType %std450 FaceForward %c_valueType_1 %arg1 %arg2\n",
2497            B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2498     mo[OID_NMIN] =
2499         Op("nmin", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 NMin %arg1 %arg2\n",
2500            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2501     mo[OID_NMAX] =
2502         Op("nmax", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 NMax %arg1 %arg2\n",
2503            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2504     mo[OID_NCLAMP] = Op("nclamp", FLOAT_ARITHMETIC,
2505                         "%result             = OpExtInst %type_valueType %std450 NClamp %arg2 %arg1 %arg2\n",
2506                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2507 
2508     mo[OID_ROUND] =
2509         Op("round", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Round %arg1\n",
2510            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2511     mo[OID_ROUND_EV] =
2512         Op("round_ev", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 RoundEven %arg1\n",
2513            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2514     mo[OID_TRUNC] =
2515         Op("trunc", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Trunc %arg1\n",
2516            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2517     mo[OID_ABS]  = Op("abs", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 FAbs %arg1\n",
2518                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2519     mo[OID_SIGN] = Op("sign", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 FSign %arg1\n",
2520                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2521     mo[OID_FLOOR] =
2522         Op("floor", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Floor %arg1\n",
2523            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2524     mo[OID_CEIL] = Op("ceil", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Ceil %arg1\n",
2525                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2526     mo[OID_FRACT] =
2527         Op("fract", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Fract %arg1\n",
2528            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2529     mo[OID_RADIANS] =
2530         Op("radians", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Radians %arg1\n",
2531            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2532     mo[OID_DEGREES] =
2533         Op("degrees", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Degrees %arg1\n",
2534            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2535     mo[OID_SIN]  = Op("sin", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Sin %arg1\n",
2536                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2537     mo[OID_COS]  = Op("cos", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Cos %arg1\n",
2538                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2539     mo[OID_TAN]  = Op("tan", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Tan %arg1\n",
2540                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2541     mo[OID_ASIN] = Op("asin", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Asin %arg1\n",
2542                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2543     mo[OID_ACOS] = Op("acos", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Acos %arg1\n",
2544                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2545     mo[OID_ATAN] = Op("atan", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Atan %arg1\n",
2546                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2547     mo[OID_SINH] = Op("sinh", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Sinh %arg1\n",
2548                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2549     mo[OID_COSH] = Op("cosh", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Cosh %arg1\n",
2550                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2551     mo[OID_TANH] = Op("tanh", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Tanh %arg1\n",
2552                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2553     mo[OID_ASINH] =
2554         Op("asinh", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Asinh %arg1\n",
2555            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2556     mo[OID_ACOSH] =
2557         Op("acosh", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Acosh %arg1\n",
2558            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2559     mo[OID_ATANH] =
2560         Op("atanh", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Atanh %arg1\n",
2561            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2562     mo[OID_EXP]  = Op("exp", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Exp %arg1\n",
2563                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2564     mo[OID_LOG]  = Op("log", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Log %arg1\n",
2565                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2566     mo[OID_EXP2] = Op("exp2", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Exp2 %arg1\n",
2567                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2568     mo[OID_LOG2] = Op("log2", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Log2 %arg1\n",
2569                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2570     mo[OID_SQRT] = Op("sqrt", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Sqrt %arg1\n",
2571                       B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2572     mo[OID_INV_SQRT] =
2573         Op("inv_sqrt", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 InverseSqrt %arg1\n",
2574            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2575     mo[OID_MODF] =
2576         Op("modf", FLOAT_ARITHMETIC, "", "", "", "%tmpVarPtr          = OpVariable %type_valueType_fptr Function\n", "",
2577            "%result             = OpExtInst %type_valueType %std450 Modf %arg1 %tmpVarPtr\n",
2578            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2579     mo[OID_MODF_ST] = Op("modf_st", FLOAT_ARITHMETIC,
2580                          "OpMemberDecorate %struct_ff 0 Offset 0\n"
2581                          "OpMemberDecorate %struct_ff 1 Offset ${float_width}\n",
2582                          "%struct_ff          = OpTypeStruct %type_valueType %type_valueType\n"
2583                          "%struct_ff_fptr     = OpTypePointer Function %struct_ff\n",
2584                          "", "%tmpStructPtr       = OpVariable %struct_ff_fptr Function\n", "",
2585                          "%tmpStruct          = OpExtInst %struct_ff %std450 ModfStruct %arg1\n"
2586                          "                      OpStore %tmpStructPtr %tmpStruct\n"
2587                          "%tmpLoc             = OpAccessChain %type_valueType_fptr %tmpStructPtr %c_i32_0\n"
2588                          "%result             = OpLoad %type_valueType %tmpLoc\n",
2589                          B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2590     mo[OID_FREXP] =
2591         Op("frexp", FLOAT_ARITHMETIC, "", "", "", "%tmpVarPtr          = OpVariable %type_i32_fptr Function\n", "",
2592            "%result             = OpExtInst %type_valueType %std450 Frexp %arg1 %tmpVarPtr\n",
2593            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2594     mo[OID_FREXP_ST] = Op("frexp_st", FLOAT_ARITHMETIC,
2595                           "OpMemberDecorate %struct_fi 0 Offset 0\n"
2596                           "OpMemberDecorate %struct_fi 1 Offset ${float_width}\n",
2597                           "%struct_fi          = OpTypeStruct %type_valueType %type_i32\n"
2598                           "%struct_fi_fptr     = OpTypePointer Function %struct_fi\n",
2599                           "", "%tmpStructPtr       = OpVariable %struct_fi_fptr Function\n", "",
2600                           "%tmpStruct          = OpExtInst %struct_fi %std450 FrexpStruct %arg1\n"
2601                           "                      OpStore %tmpStructPtr %tmpStruct\n"
2602                           "%tmpLoc             = OpAccessChain %type_valueType_fptr %tmpStructPtr %c_i32_0\n"
2603                           "%result             = OpLoad %type_valueType %tmpLoc\n",
2604                           B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2605     mo[OID_LENGTH] =
2606         Op("length", FLOAT_ARITHMETIC, "%result             = OpExtInst %type_valueType %std450 Length %arg1\n",
2607            B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2608     mo[OID_NORMALIZE] = Op("normalize", FLOAT_ARITHMETIC,
2609                            "%vec1               = OpCompositeConstruct %type_valueType_vec2 %arg1 %c_valueType_2\n"
2610                            "%tmpVec             = OpExtInst %type_valueType_vec2 %std450 Normalize %vec1\n"
2611                            "%result             = OpCompositeExtract %type_valueType %tmpVec 0\n",
2612                            B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2613     mo[OID_REFLECT] =
2614         Op("reflect", FLOAT_ARITHMETIC,
2615            "%vec1               = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2616            "%vecN               = OpCompositeConstruct %type_valueType_vec2 %c_valueType_0 %c_valueType_n1\n"
2617            "%tmpVec             = OpExtInst %type_valueType_vec2 %std450 Reflect %vec1 %vecN\n"
2618            "%result             = OpCompositeExtract %type_valueType %tmpVec 0\n",
2619            B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2620     mo[OID_REFRACT] =
2621         Op("refract", FLOAT_ARITHMETIC,
2622            "%vec1               = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2623            "%vecN               = OpCompositeConstruct %type_valueType_vec2 %c_valueType_0 %c_valueType_n1\n"
2624            "%tmpVec             = OpExtInst %type_valueType_vec2 %std450 Refract %vec1 %vecN %c_valueType_0_5\n"
2625            "%result             = OpCompositeExtract %type_valueType %tmpVec 0\n",
2626            B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2627     mo[OID_MAT_DET] = Op("mat_det", FLOAT_ARITHMETIC,
2628                          "%col                = OpCompositeConstruct %type_valueType_vec2 %arg1 %arg1\n"
2629                          "%mat                = OpCompositeConstruct %type_valueType_mat2x2 %col %col\n"
2630                          "%result             = OpExtInst %type_valueType %std450 Determinant %mat\n",
2631                          B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2632     mo[OID_MAT_INV] =
2633         Op("mat_inv", FLOAT_ARITHMETIC,
2634            "%col1               = OpCompositeConstruct %type_valueType_vec2 %arg1 %c_valueType_1\n"
2635            "%col2               = OpCompositeConstruct %type_valueType_vec2 %c_valueType_1 %c_valueType_1\n"
2636            "%mat                = OpCompositeConstruct %type_valueType_mat2x2 %col1 %col2\n"
2637            "%invMat             = OpExtInst %type_valueType_mat2x2 %std450 MatrixInverse %mat\n"
2638            "%extCol             = OpCompositeExtract %type_valueType_vec2 %invMat 1\n"
2639            "%result             = OpCompositeExtract %type_valueType %extCol 1\n",
2640            B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2641 
2642     // PackHalf2x16 is a special case as it operates on fp32 vec2 and returns unsigned int,
2643     // the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
2644     mo[OID_PH_DENORM] =
2645         Op("ph_denorm", FLOAT_STORAGE_ONLY, "", "",
2646            "%c_fp32_denorm_fp16 = OpConstant %type_f32 6.01e-5\n" // fp32 representation of fp16 denorm value
2647            "%c_ref              = OpConstant %type_u32 66061296\n",
2648            "", "",
2649            "%srcVec             = OpCompositeConstruct %type_f32_vec2 %c_fp32_denorm_fp16 %c_fp32_denorm_fp16\n"
2650            "%packedInt          = OpExtInst %type_u32 %std450 PackHalf2x16 %srcVec\n"
2651            "%boolVal            = OpIEqual %type_bool %c_ref %packedInt\n"
2652            "%result             = OpSelect %type_f32 %boolVal %c_f32_1 %c_f32_0\n",
2653            B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_COMMANDS_CONST_FP32 |
2654                B_STATEMENT_USAGE_COMMANDS_TYPE_FP32);
2655 
2656     // UnpackHalf2x16 is a special case that operates on uint32 and returns two 32-bit floats,
2657     // this function is tested using constants
2658     mo[OID_UPH_DENORM] = Op("uph_denorm", FLOAT_STORAGE_ONLY, "", "",
2659                             "%c_u32_2_16_pack    = OpConstant %type_u32 66061296\n", // == packHalf2x16(vec2(denorm))
2660                             "", "",
2661                             "%tmpVec             = OpExtInst %type_f32_vec2 %std450 UnpackHalf2x16 %c_u32_2_16_pack\n"
2662                             "%result             = OpCompositeExtract %type_f32 %tmpVec 0\n",
2663                             B_STATEMENT_USAGE_COMMANDS_TYPE_FP32);
2664 
2665     // PackDouble2x32 is a special case that operates on two uint32 and returns
2666     // double, this function is tested using constants
2667     mo[OID_PD_DENORM] = Op("pd_denorm", FLOAT_STORAGE_ONLY, "", "",
2668                            "%c_p1               = OpConstant %type_u32 0\n"
2669                            "%c_p2               = OpConstant %type_u32 262144\n", // == UnpackDouble2x32(denorm)
2670                            "", "",
2671                            "%srcVec             = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
2672                            "%result             = OpExtInst %type_f64 %std450 PackDouble2x32 %srcVec\n",
2673                            B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2674 
2675     // UnpackDouble2x32 is a special case as it operates only on FP64 and returns two ints,
2676     // the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
2677     const char *unpackDouble2x32Types = "%type_bool_vec2     = OpTypeVector %type_bool 2\n";
2678     const char *unpackDouble2x32Source =
2679         "%refVec2            = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
2680         "%resVec2            = OpExtInst %type_u32_vec2 %std450 UnpackDouble2x32 %arg1\n"
2681         "%boolVec2           = OpIEqual %type_bool_vec2 %refVec2 %resVec2\n"
2682         "%boolVal            = OpAll %type_bool %boolVec2\n"
2683         "%result             = OpSelect %type_f64 %boolVal %c_f64_1 %c_f64_0\n";
2684     mo[OID_UPD_DENORM_FLUSH]    = Op("upd_denorm", FLOAT_STORAGE_ONLY, "", unpackDouble2x32Types,
2685                                      "%c_p1               = OpConstant %type_u32 0\n"
2686                                         "%c_p2               = OpConstant %type_u32 0\n",
2687                                      "", "", unpackDouble2x32Source,
2688                                      B_STATEMENT_USAGE_COMMANDS_CONST_FP64 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2689     mo[OID_UPD_DENORM_PRESERVE] = Op("upd_denorm", FLOAT_STORAGE_ONLY, "", unpackDouble2x32Types,
2690                                      "%c_p1               = OpConstant %type_u32 1008\n"
2691                                      "%c_p2               = OpConstant %type_u32 0\n",
2692                                      "", "", unpackDouble2x32Source,
2693                                      B_STATEMENT_USAGE_COMMANDS_CONST_FP64 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2694 
2695     mo[OID_ORTE_ROUND] = Op("orte_round", FLOAT_STORAGE_ONLY, FP32, "OpDecorate %result FPRoundingMode RTE\n", "", "",
2696                             "%result             = OpFConvert %type_f16 %arg1\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FP16);
2697     mo[OID_ORTZ_ROUND] = Op("ortz_round", FLOAT_STORAGE_ONLY, FP32, "OpDecorate %result FPRoundingMode RTZ\n", "", "",
2698                             "%result             = OpFConvert %type_f16 %arg1\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FP16);
2699 
2700     DE_ASSERT(m_saved_strings.size() == m_num_expected_strings);
2701 }
2702 
build(vector<OperationTestCase> & testCases,TypeTestResultsSP typeTestResults,bool argumentsFromInput)2703 void TestCasesBuilder::build(vector<OperationTestCase> &testCases, TypeTestResultsSP typeTestResults,
2704                              bool argumentsFromInput)
2705 {
2706     // this method constructs a list of test cases; this list is a bit different
2707     // for every combination of float type, arguments preparation method and tested float control
2708 
2709     testCases.reserve(750);
2710 
2711     bool isFP16 = typeTestResults->variableType() == FP16;
2712 
2713     for (int j = 0; j < 2; j++)
2714     {
2715         // fp16NoStorage tests only supported if testing fp16.
2716         bool fp16NoStorage = (j == 1);
2717         if (fp16NoStorage && !isFP16)
2718             continue;
2719 
2720         // Denorm - FlushToZero - binary operations
2721         for (size_t i = 0; i < typeTestResults->binaryOpFTZ.size(); ++i)
2722         {
2723             const BinaryCase &binaryCase = typeTestResults->binaryOpFTZ[i];
2724             OperationId operation        = binaryCase.operationId;
2725             testCases.push_back(OTC("denorm_op_var_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_ONE,
2726                                     binaryCase.opVarResult, fp16NoStorage));
2727             testCases.push_back(OTC("denorm_op_denorm_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_DENORM,
2728                                     binaryCase.opDenormResult, fp16NoStorage));
2729             testCases.push_back(OTC("denorm_op_inf_flush_to_zero", B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM,
2730                                     V_INF, binaryCase.opInfResult, fp16NoStorage));
2731             testCases.push_back(OTC("denorm_op_nan_flush_to_zero", B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM,
2732                                     V_NAN, binaryCase.opNanResult, fp16NoStorage));
2733         }
2734 
2735         // Denorm - FlushToZero - unary operations
2736         for (size_t i = 0; i < typeTestResults->unaryOpFTZ.size(); ++i)
2737         {
2738             const UnaryCase &unaryCase = typeTestResults->unaryOpFTZ[i];
2739             OperationId operation      = unaryCase.operationId;
2740             testCases.push_back(OTC("op_denorm_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_UNUSED,
2741                                     unaryCase.result, fp16NoStorage));
2742         }
2743 
2744         // Denorm - Preserve - binary operations
2745         for (size_t i = 0; i < typeTestResults->binaryOpDenormPreserve.size(); ++i)
2746         {
2747             const BinaryCase &binaryCase = typeTestResults->binaryOpDenormPreserve[i];
2748             OperationId operation        = binaryCase.operationId;
2749             testCases.push_back(OTC("denorm_op_var_preserve", B_DENORM_PRESERVE, operation, V_DENORM, V_ONE,
2750                                     binaryCase.opVarResult, fp16NoStorage));
2751             testCases.push_back(OTC("denorm_op_denorm_preserve", B_DENORM_PRESERVE, operation, V_DENORM, V_DENORM,
2752                                     binaryCase.opDenormResult, fp16NoStorage));
2753             testCases.push_back(OTC("denorm_op_inf_preserve", B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM,
2754                                     V_INF, binaryCase.opInfResult, fp16NoStorage));
2755             testCases.push_back(OTC("denorm_op_nan_preserve", B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM,
2756                                     V_NAN, binaryCase.opNanResult, fp16NoStorage));
2757         }
2758 
2759         // Denorm - Preserve - unary operations
2760         for (size_t i = 0; i < typeTestResults->unaryOpDenormPreserve.size(); ++i)
2761         {
2762             const UnaryCase &unaryCase = typeTestResults->unaryOpDenormPreserve[i];
2763             OperationId operation      = unaryCase.operationId;
2764             testCases.push_back(OTC("op_denorm_preserve", B_DENORM_PRESERVE, operation, V_DENORM, V_UNUSED,
2765                                     unaryCase.result, fp16NoStorage));
2766         }
2767     }
2768 
2769     struct ZINCase
2770     {
2771         OperationId operationId;
2772         bool supportedByFP64;
2773         ValueId secondArgument;
2774         ValueId preserveZeroResult;
2775         ValueId preserveSZeroResult;
2776         ValueId preserveInfResult;
2777         ValueId preserveSInfResult;
2778         ValueId preserveNanResult;
2779     };
2780 
2781     const ZINCase binaryOpZINPreserve[] = {
2782         // operation        fp64    second arg        preserve zero    preserve szero        preserve inf    preserve sinf        preserve nan
2783         {OID_PHI, true, V_INF, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2784         {OID_SELECT, true, V_ONE, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2785         {OID_ADD, true, V_ZERO, V_ZERO, V_ZERO, V_INF, V_MINUS_INF, V_NAN},
2786         {OID_SUB, true, V_ZERO, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2787         {OID_MUL, true, V_ONE, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2788     };
2789 
2790     const ZINCase unaryOpZINPreserve[] = {
2791         // operation                fp64    second arg        preserve zero    preserve szero        preserve inf    preserve sinf        preserve nan
2792         {OID_RETURN_VAL, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2793         {OID_D_EXTRACT, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2794         {OID_D_INSERT, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2795         {OID_SHUFFLE, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2796         {OID_COMPOSITE, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2797         {OID_COMPOSITE_INS, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2798         {OID_COPY, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2799         {OID_TRANSPOSE, true, V_UNUSED, V_ZERO, V_MINUS_ZERO, V_INF, V_MINUS_INF, V_NAN},
2800         {OID_NEGATE, true, V_UNUSED, V_MINUS_ZERO, V_ZERO, V_MINUS_INF, V_INF, V_NAN},
2801     };
2802 
2803     bool isFP64 = typeTestResults->variableType() == FP64;
2804 
2805     // Signed Zero Inf Nan - Preserve - binary operations
2806     for (int j = 0; j < 2; j++)
2807     {
2808         // fp16NoStorage tests only supported if testing fp16.
2809         bool fp16NoStorage = (j == 1);
2810         if (fp16NoStorage && !isFP16)
2811             continue;
2812 
2813         for (size_t i = 0; i < DE_LENGTH_OF_ARRAY(binaryOpZINPreserve); ++i)
2814         {
2815             const ZINCase &zc = binaryOpZINPreserve[i];
2816             if (isFP64 && !zc.supportedByFP64)
2817                 continue;
2818 
2819             testCases.push_back(OTC("zero_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_ZERO, zc.secondArgument,
2820                                     zc.preserveZeroResult, fp16NoStorage));
2821             testCases.push_back(OTC("signed_zero_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_MINUS_ZERO,
2822                                     zc.secondArgument, zc.preserveSZeroResult, fp16NoStorage));
2823             testCases.push_back(OTC("inf_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_INF, zc.secondArgument,
2824                                     zc.preserveInfResult, fp16NoStorage));
2825             testCases.push_back(OTC("signed_inf_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_MINUS_INF,
2826                                     zc.secondArgument, zc.preserveSInfResult, fp16NoStorage));
2827             testCases.push_back(OTC("nan_op_var_preserve", B_ZIN_PRESERVE, zc.operationId, V_NAN, zc.secondArgument,
2828                                     zc.preserveNanResult, fp16NoStorage));
2829         }
2830 
2831         // Signed Zero Inf Nan - Preserve - unary operations
2832         for (size_t i = 0; i < DE_LENGTH_OF_ARRAY(unaryOpZINPreserve); ++i)
2833         {
2834             const ZINCase &zc = unaryOpZINPreserve[i];
2835             if (isFP64 && !zc.supportedByFP64)
2836                 continue;
2837 
2838             testCases.push_back(OTC("op_zero_preserve", B_ZIN_PRESERVE, zc.operationId, V_ZERO, V_UNUSED,
2839                                     zc.preserveZeroResult, fp16NoStorage));
2840             testCases.push_back(OTC("op_signed_zero_preserve", B_ZIN_PRESERVE, zc.operationId, V_MINUS_ZERO, V_UNUSED,
2841                                     zc.preserveSZeroResult, fp16NoStorage));
2842             testCases.push_back(OTC("op_inf_preserve", B_ZIN_PRESERVE, zc.operationId, V_INF, V_UNUSED,
2843                                     zc.preserveInfResult, fp16NoStorage));
2844             testCases.push_back(OTC("op_signed_inf_preserve", B_ZIN_PRESERVE, zc.operationId, V_MINUS_INF, V_UNUSED,
2845                                     zc.preserveSInfResult, fp16NoStorage));
2846             testCases.push_back(OTC("op_nan_preserve", B_ZIN_PRESERVE, zc.operationId, V_NAN, V_UNUSED,
2847                                     zc.preserveNanResult, fp16NoStorage));
2848         }
2849     }
2850 
2851     // comparison operations - tested differently because they return true/false
2852     struct ComparisonCase
2853     {
2854         OperationId operationId;
2855         ValueId denormPreserveResult;
2856     };
2857     const ComparisonCase comparisonCases[] = {// operation    denorm
2858                                               {OID_ORD_EQ, V_ZERO},  {OID_UORD_EQ, V_ZERO}, {OID_ORD_NEQ, V_ONE},
2859                                               {OID_UORD_NEQ, V_ONE}, {OID_ORD_LS, V_ONE},   {OID_UORD_LS, V_ONE},
2860                                               {OID_ORD_GT, V_ZERO},  {OID_UORD_GT, V_ZERO}, {OID_ORD_LE, V_ONE},
2861                                               {OID_UORD_LE, V_ONE},  {OID_ORD_GE, V_ZERO},  {OID_UORD_GE, V_ZERO}};
2862     for (int op = 0; op < DE_LENGTH_OF_ARRAY(comparisonCases); ++op)
2863     {
2864         const ComparisonCase &cc = comparisonCases[op];
2865         testCases.push_back(
2866             OTC("denorm_op_var_preserve", B_DENORM_PRESERVE, cc.operationId, V_DENORM, V_ONE, cc.denormPreserveResult));
2867         if (isFP16)
2868             testCases.push_back(OTC("denorm_op_var_preserve", B_DENORM_PRESERVE, cc.operationId, V_DENORM, V_ONE,
2869                                     cc.denormPreserveResult, true));
2870     }
2871 
2872     if (argumentsFromInput)
2873     {
2874         struct RoundingModeCase
2875         {
2876             OperationId operationId;
2877             ValueId arg1;
2878             ValueId arg2;
2879             ValueId expectedRTEResult;
2880             ValueId expectedRTZResult;
2881         };
2882 
2883         const RoundingModeCase roundingCases[] = {
2884             {OID_ADD, V_ADD_ARG_A, V_ADD_ARG_B, V_ADD_RTE_RESULT, V_ADD_RTZ_RESULT},
2885             {OID_SUB, V_SUB_ARG_A, V_SUB_ARG_B, V_SUB_RTE_RESULT, V_SUB_RTZ_RESULT},
2886             {OID_MUL, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT},
2887             {OID_DOT, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT},
2888 
2889             // in vect/mat multiplication by scalar operations only first element of result is checked
2890             // so argument and result values prepared for multiplication can be reused for those cases
2891             {OID_VEC_MUL_S, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT},
2892             {OID_MAT_MUL_S, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT},
2893             {OID_OUT_PROD, V_MUL_ARG_A, V_MUL_ARG_B, V_MUL_RTE_RESULT, V_MUL_RTZ_RESULT},
2894 
2895             // in SPIR-V code we return first element of operation result so for following
2896             // cases argument and result values prepared for dot product can be reused
2897             {OID_VEC_MUL_M, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT},
2898             {OID_MAT_MUL_V, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT},
2899             {OID_MAT_MUL_M, V_DOT_ARG_A, V_DOT_ARG_B, V_DOT_RTE_RESULT, V_DOT_RTZ_RESULT},
2900 
2901             // conversion operations are added separately - depending on float type width
2902         };
2903 
2904         for (int c = 0; c < DE_LENGTH_OF_ARRAY(roundingCases); ++c)
2905         {
2906             const RoundingModeCase &rmc = roundingCases[c];
2907             testCases.push_back(
2908                 OTC("rounding_rte_op", B_RTE_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTEResult));
2909             testCases.push_back(
2910                 OTC("rounding_rtz_op", B_RTZ_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTZResult));
2911             if (isFP16)
2912             {
2913                 testCases.push_back(OTC("rounding_rte_op", B_RTE_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2,
2914                                         rmc.expectedRTEResult, true));
2915                 testCases.push_back(OTC("rounding_rtz_op", B_RTZ_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2,
2916                                         rmc.expectedRTZResult, true));
2917             }
2918         }
2919     }
2920 
2921     // special cases
2922     if (typeTestResults->variableType() == FP16)
2923     {
2924         if (argumentsFromInput)
2925         {
2926             for (int i = 0; i < 2; i++)
2927             {
2928                 bool noStorage = (i == 1);
2929 
2930                 //// Conversions from arguments
2931                 // fp32 rte
2932                 testCases.push_back(OTC("rounding_rte_conv_from_fp32_up", B_RTE_ROUNDING, OID_CONV_FROM_FP32,
2933                                         V_CONV_FROM_FP32_TO_FP16_UP_ARG, V_UNUSED,
2934                                         V_CONV_FROM_FP32_TO_FP16_UP_RTE_RESULT, noStorage));
2935                 testCases.push_back(OTC("rounding_rte_conv_from_fp32_down", B_RTE_ROUNDING, OID_CONV_FROM_FP32,
2936                                         V_CONV_FROM_FP32_TO_FP16_DOWN_ARG, V_UNUSED,
2937                                         V_CONV_FROM_FP32_TO_FP16_DOWN_RTE_RESULT, noStorage));
2938                 testCases.push_back(OTC("rounding_rte_conv_from_fp32_tie_up", B_RTE_ROUNDING, OID_CONV_FROM_FP32,
2939                                         V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG, V_UNUSED,
2940                                         V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTE_RESULT, noStorage));
2941                 testCases.push_back(OTC("rounding_rte_conv_from_fp32_tie_down", B_RTE_ROUNDING, OID_CONV_FROM_FP32,
2942                                         V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
2943                                         V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTE_RESULT, noStorage));
2944 
2945                 // fp32 rtz
2946                 testCases.push_back(OTC("rounding_rtz_conv_from_fp32_up", B_RTZ_ROUNDING, OID_CONV_FROM_FP32,
2947                                         V_CONV_FROM_FP32_TO_FP16_UP_ARG, V_UNUSED,
2948                                         V_CONV_FROM_FP32_TO_FP16_UP_RTZ_RESULT, noStorage));
2949                 testCases.push_back(OTC("rounding_rtz_conv_from_fp32_down", B_RTZ_ROUNDING, OID_CONV_FROM_FP32,
2950                                         V_CONV_FROM_FP32_TO_FP16_DOWN_ARG, V_UNUSED,
2951                                         V_CONV_FROM_FP32_TO_FP16_DOWN_RTZ_RESULT, noStorage));
2952                 testCases.push_back(OTC("rounding_rtz_conv_from_fp32_tie_up", B_RTZ_ROUNDING, OID_CONV_FROM_FP32,
2953                                         V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG, V_UNUSED,
2954                                         V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTZ_RESULT, noStorage));
2955                 testCases.push_back(OTC("rounding_rtz_conv_from_fp32_tie_down", B_RTZ_ROUNDING, OID_CONV_FROM_FP32,
2956                                         V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
2957                                         V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTZ_RESULT, noStorage));
2958 
2959                 // fp64 rte
2960                 testCases.push_back(OTC("rounding_rte_conv_from_fp64_up", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
2961                                         V_CONV_FROM_FP64_TO_FP16_UP_ARG, V_UNUSED,
2962                                         V_CONV_FROM_FP64_TO_FP16_UP_RTE_RESULT, noStorage));
2963                 testCases.push_back(OTC("rounding_rte_conv_from_fp64_down", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
2964                                         V_CONV_FROM_FP64_TO_FP16_DOWN_ARG, V_UNUSED,
2965                                         V_CONV_FROM_FP64_TO_FP16_DOWN_RTE_RESULT, noStorage));
2966                 testCases.push_back(OTC("rounding_rte_conv_from_fp64_tie_up", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
2967                                         V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG, V_UNUSED,
2968                                         V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTE_RESULT, noStorage));
2969                 testCases.push_back(OTC("rounding_rte_conv_from_fp64_tie_down", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
2970                                         V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
2971                                         V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTE_RESULT, noStorage));
2972 
2973                 // fp64 rtz
2974                 testCases.push_back(OTC("rounding_rtz_conv_from_fp64_up", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
2975                                         V_CONV_FROM_FP64_TO_FP16_UP_ARG, V_UNUSED,
2976                                         V_CONV_FROM_FP64_TO_FP16_UP_RTZ_RESULT, noStorage));
2977                 testCases.push_back(OTC("rounding_rtz_conv_from_fp64_down", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
2978                                         V_CONV_FROM_FP64_TO_FP16_DOWN_ARG, V_UNUSED,
2979                                         V_CONV_FROM_FP64_TO_FP16_DOWN_RTZ_RESULT, noStorage));
2980                 testCases.push_back(OTC("rounding_rtz_conv_from_fp64_tie_up", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
2981                                         V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG, V_UNUSED,
2982                                         V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTZ_RESULT, noStorage));
2983                 testCases.push_back(OTC("rounding_rtz_conv_from_fp64_tie_down", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
2984                                         V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
2985                                         V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTZ_RESULT, noStorage));
2986 
2987                 //// Conversions from specialization constants
2988                 // fp32 rte
2989                 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32_up", B_RTE_ROUNDING,
2990                                         OID_SCONST_CONV_FROM_FP32_TO_FP16_UP, V_CONV_FROM_FP32_TO_FP16_UP_ARG, V_UNUSED,
2991                                         V_CONV_FROM_FP32_TO_FP16_UP_RTE_RESULT, noStorage));
2992                 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32_down", B_RTE_ROUNDING,
2993                                         OID_SCONST_CONV_FROM_FP32_TO_FP16_DOWN, V_CONV_FROM_FP32_TO_FP16_DOWN_ARG,
2994                                         V_UNUSED, V_CONV_FROM_FP32_TO_FP16_DOWN_RTE_RESULT, noStorage));
2995                 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32_tie_up", B_RTE_ROUNDING,
2996                                         OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_UP, V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG,
2997                                         V_UNUSED, V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTE_RESULT, noStorage));
2998                 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32_tie_down", B_RTE_ROUNDING,
2999                                         OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_DOWN,
3000                                         V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
3001                                         V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTE_RESULT, noStorage));
3002 
3003                 // fp32 rtz
3004                 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32_up", B_RTZ_ROUNDING,
3005                                         OID_SCONST_CONV_FROM_FP32_TO_FP16_UP, V_CONV_FROM_FP32_TO_FP16_UP_ARG, V_UNUSED,
3006                                         V_CONV_FROM_FP32_TO_FP16_UP_RTZ_RESULT, noStorage));
3007                 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32_down", B_RTZ_ROUNDING,
3008                                         OID_SCONST_CONV_FROM_FP32_TO_FP16_DOWN, V_CONV_FROM_FP32_TO_FP16_DOWN_ARG,
3009                                         V_UNUSED, V_CONV_FROM_FP32_TO_FP16_DOWN_RTZ_RESULT, noStorage));
3010                 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32_tie_up", B_RTZ_ROUNDING,
3011                                         OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_UP, V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG,
3012                                         V_UNUSED, V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTZ_RESULT, noStorage));
3013                 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32_tie_down", B_RTZ_ROUNDING,
3014                                         OID_SCONST_CONV_FROM_FP32_TO_FP16_TIE_DOWN,
3015                                         V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
3016                                         V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTZ_RESULT, noStorage));
3017 
3018                 // fp64 rte
3019                 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_up", B_RTE_ROUNDING,
3020                                         OID_SCONST_CONV_FROM_FP64_TO_FP16_UP, V_CONV_FROM_FP64_TO_FP16_UP_ARG, V_UNUSED,
3021                                         V_CONV_FROM_FP64_TO_FP16_UP_RTE_RESULT, noStorage));
3022                 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_down", B_RTE_ROUNDING,
3023                                         OID_SCONST_CONV_FROM_FP64_TO_FP16_DOWN, V_CONV_FROM_FP64_TO_FP16_DOWN_ARG,
3024                                         V_UNUSED, V_CONV_FROM_FP64_TO_FP16_DOWN_RTE_RESULT, noStorage));
3025                 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_tie_up", B_RTE_ROUNDING,
3026                                         OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_UP, V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG,
3027                                         V_UNUSED, V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTE_RESULT, noStorage));
3028                 testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_tie_down", B_RTE_ROUNDING,
3029                                         OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_DOWN,
3030                                         V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
3031                                         V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTE_RESULT, noStorage));
3032 
3033                 // fp64 rtz
3034                 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_up", B_RTZ_ROUNDING,
3035                                         OID_SCONST_CONV_FROM_FP64_TO_FP16_UP, V_CONV_FROM_FP64_TO_FP16_UP_ARG, V_UNUSED,
3036                                         V_CONV_FROM_FP64_TO_FP16_UP_RTZ_RESULT, noStorage));
3037                 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_down", B_RTZ_ROUNDING,
3038                                         OID_SCONST_CONV_FROM_FP64_TO_FP16_DOWN, V_CONV_FROM_FP64_TO_FP16_DOWN_ARG,
3039                                         V_UNUSED, V_CONV_FROM_FP64_TO_FP16_DOWN_RTZ_RESULT, noStorage));
3040                 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_tie_up", B_RTZ_ROUNDING,
3041                                         OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_UP, V_CONV_FROM_FP64_TO_FP16_TIE_UP_ARG,
3042                                         V_UNUSED, V_CONV_FROM_FP64_TO_FP16_TIE_UP_RTZ_RESULT, noStorage));
3043                 testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_tie_down", B_RTZ_ROUNDING,
3044                                         OID_SCONST_CONV_FROM_FP64_TO_FP16_TIE_DOWN,
3045                                         V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
3046                                         V_CONV_FROM_FP64_TO_FP16_TIE_DOWN_RTZ_RESULT, noStorage));
3047             }
3048 
3049             // verify that VkShaderFloatingPointRoundingModeKHR can be overridden for a given instruction by the FPRoundingMode decoration.
3050             // FPRoundingMode decoration requires VK_KHR_16bit_storage.
3051             testCases.push_back(OTC("rounding_rte_override_from_fp32_up", B_RTE_ROUNDING, OID_ORTZ_ROUND,
3052                                     V_CONV_FROM_FP32_TO_FP16_UP_ARG, V_UNUSED, V_CONV_FROM_FP32_TO_FP16_UP_RTZ_RESULT));
3053             testCases.push_back(OTC("rounding_rte_override_from_fp32_down", B_RTE_ROUNDING, OID_ORTZ_ROUND,
3054                                     V_CONV_FROM_FP32_TO_FP16_DOWN_ARG, V_UNUSED,
3055                                     V_CONV_FROM_FP32_TO_FP16_DOWN_RTZ_RESULT));
3056             testCases.push_back(OTC("rounding_rte_override_from_fp32_tie_up", B_RTE_ROUNDING, OID_ORTZ_ROUND,
3057                                     V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG, V_UNUSED,
3058                                     V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTZ_RESULT));
3059             testCases.push_back(OTC("rounding_rte_override_from_fp32_tie_down", B_RTE_ROUNDING, OID_ORTZ_ROUND,
3060                                     V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
3061                                     V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTZ_RESULT));
3062             // Missing for FP64 -> FP16
3063             // TODO(https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/4539)
3064 
3065             testCases.push_back(OTC("rounding_rtz_override_from_fp32_up", B_RTE_ROUNDING, OID_ORTE_ROUND,
3066                                     V_CONV_FROM_FP32_TO_FP16_UP_ARG, V_UNUSED, V_CONV_FROM_FP32_TO_FP16_UP_RTE_RESULT));
3067             testCases.push_back(OTC("rounding_rtz_override_from_fp32_down", B_RTE_ROUNDING, OID_ORTE_ROUND,
3068                                     V_CONV_FROM_FP32_TO_FP16_DOWN_ARG, V_UNUSED,
3069                                     V_CONV_FROM_FP32_TO_FP16_DOWN_RTE_RESULT));
3070             testCases.push_back(OTC("rounding_rtz_override_from_fp32_tie_up", B_RTE_ROUNDING, OID_ORTE_ROUND,
3071                                     V_CONV_FROM_FP32_TO_FP16_TIE_UP_ARG, V_UNUSED,
3072                                     V_CONV_FROM_FP32_TO_FP16_TIE_UP_RTE_RESULT));
3073             testCases.push_back(OTC("rounding_rtz_override_from_fp32_tie_down", B_RTE_ROUNDING, OID_ORTE_ROUND,
3074                                     V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_ARG, V_UNUSED,
3075                                     V_CONV_FROM_FP32_TO_FP16_TIE_DOWN_RTE_RESULT));
3076             // Missing for FP64 -> FP16
3077             // TODO(https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/4539)
3078         }
3079 
3080         createUnaryTestCases(testCases, OID_CONV_FROM_FP32, V_CONV_DENORM_SMALLER, V_ZERO);
3081         createUnaryTestCases(testCases, OID_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
3082         createUnaryTestCases(testCases, OID_CONV_FROM_FP32, V_CONV_DENORM_SMALLER, V_ZERO, true);
3083         createUnaryTestCases(testCases, OID_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO, true);
3084     }
3085     else if (typeTestResults->variableType() == FP32)
3086     {
3087         if (argumentsFromInput)
3088         {
3089             //// Conversions from arguments
3090             // fp64 rte
3091             testCases.push_back(OTC("rounding_rte_conv_from_fp64_up", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
3092                                     V_CONV_FROM_FP64_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_FP64_TO_FP32_UP_RTE_RESULT));
3093             testCases.push_back(OTC("rounding_rte_conv_from_fp64_down", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
3094                                     V_CONV_FROM_FP64_TO_FP32_DOWN_ARG, V_UNUSED,
3095                                     V_CONV_FROM_FP64_TO_FP32_DOWN_RTE_RESULT));
3096             testCases.push_back(OTC("rounding_rte_conv_from_fp64_tie_up", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
3097                                     V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG, V_UNUSED,
3098                                     V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTE_RESULT));
3099             testCases.push_back(OTC("rounding_rte_conv_from_fp64_tie_down", B_RTE_ROUNDING, OID_CONV_FROM_FP64,
3100                                     V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG, V_UNUSED,
3101                                     V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTE_RESULT));
3102 
3103             // fp64 rtz
3104             testCases.push_back(OTC("rounding_rtz_conv_from_fp64_up", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
3105                                     V_CONV_FROM_FP64_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_FP64_TO_FP32_UP_RTZ_RESULT));
3106             testCases.push_back(OTC("rounding_rtz_conv_from_fp64_down", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
3107                                     V_CONV_FROM_FP64_TO_FP32_DOWN_ARG, V_UNUSED,
3108                                     V_CONV_FROM_FP64_TO_FP32_DOWN_RTZ_RESULT));
3109             testCases.push_back(OTC("rounding_rtz_conv_from_fp64_tie_up", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
3110                                     V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG, V_UNUSED,
3111                                     V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTZ_RESULT));
3112             testCases.push_back(OTC("rounding_rtz_conv_from_fp64_tie_down", B_RTZ_ROUNDING, OID_CONV_FROM_FP64,
3113                                     V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG, V_UNUSED,
3114                                     V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTZ_RESULT));
3115 
3116             //// Conversions from specialization constants
3117             // fp64 rte
3118             testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_up", B_RTE_ROUNDING,
3119                                     OID_SCONST_CONV_FROM_FP64_TO_FP32_UP, V_CONV_FROM_FP64_TO_FP32_UP_ARG, V_UNUSED,
3120                                     V_CONV_FROM_FP64_TO_FP32_UP_RTE_RESULT));
3121             testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_down", B_RTE_ROUNDING,
3122                                     OID_SCONST_CONV_FROM_FP64_TO_FP32_DOWN, V_CONV_FROM_FP64_TO_FP32_DOWN_ARG, V_UNUSED,
3123                                     V_CONV_FROM_FP64_TO_FP32_DOWN_RTE_RESULT));
3124             testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_tie_up", B_RTE_ROUNDING,
3125                                     OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_UP, V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG,
3126                                     V_UNUSED, V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTE_RESULT));
3127             testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_tie_down", B_RTE_ROUNDING,
3128                                     OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_DOWN, V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG,
3129                                     V_UNUSED, V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTE_RESULT));
3130 
3131             // fp64 rtz
3132             testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_up", B_RTZ_ROUNDING,
3133                                     OID_SCONST_CONV_FROM_FP64_TO_FP32_UP, V_CONV_FROM_FP64_TO_FP32_UP_ARG, V_UNUSED,
3134                                     V_CONV_FROM_FP64_TO_FP32_UP_RTZ_RESULT));
3135             testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_down", B_RTZ_ROUNDING,
3136                                     OID_SCONST_CONV_FROM_FP64_TO_FP32_DOWN, V_CONV_FROM_FP64_TO_FP32_DOWN_ARG, V_UNUSED,
3137                                     V_CONV_FROM_FP64_TO_FP32_DOWN_RTZ_RESULT));
3138             testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_tie_up", B_RTZ_ROUNDING,
3139                                     OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_UP, V_CONV_FROM_FP64_TO_FP32_TIE_UP_ARG,
3140                                     V_UNUSED, V_CONV_FROM_FP64_TO_FP32_TIE_UP_RTZ_RESULT));
3141             testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_tie_down", B_RTZ_ROUNDING,
3142                                     OID_SCONST_CONV_FROM_FP64_TO_FP32_TIE_DOWN, V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_ARG,
3143                                     V_UNUSED, V_CONV_FROM_FP64_TO_FP32_TIE_DOWN_RTZ_RESULT));
3144 
3145             // Verify that VkShaderFloatingPointRoundingModeKHR can be overridden for a given instruction by the FPRoundingMode decoration.
3146             // Missing for FP64 -> FP32
3147             // TODO(https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/4539)
3148 
3149             // uint32 rtz
3150             testCases.push_back(OTC("rounding_rtz_conv_from_uint32_up", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP32,
3151                                     V_CONV_FROM_UINT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_UINT32_UP_RTZ_RESULT));
3152             testCases.push_back(OTC("rounding_rtz_conv_from_uint32_tie", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP32,
3153                                     V_CONV_FROM_UINT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_UINT32_TIE_RTZ_RESULT));
3154             testCases.push_back(OTC("rounding_rtz_conv_from_uint32_down", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP32,
3155                                     V_CONV_FROM_UINT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_UINT32_DOWN_RTZ_RESULT));
3156 
3157             // uint64 rtz
3158             testCases.push_back(OTC("rounding_rtz_conv_from_uint64_up", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3159                                     V_CONV_FROM_UINT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_UINT64_UP_RTZ_RESULT));
3160             testCases.push_back(OTC("rounding_rtz_conv_from_uint64_tie", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3161                                     V_CONV_FROM_UINT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_UINT64_TIE_RTZ_RESULT));
3162             testCases.push_back(OTC("rounding_rtz_conv_from_uint64_down", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3163                                     V_CONV_FROM_UINT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_UINT64_DOWN_RTZ_RESULT));
3164 
3165             // uint32 rte
3166             testCases.push_back(OTC("rounding_rte_conv_from_uint32_up", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP32,
3167                                     V_CONV_FROM_UINT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_UINT32_UP_RTE_RESULT));
3168             testCases.push_back(OTC("rounding_rte_conv_from_uint32_tie", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP32,
3169                                     V_CONV_FROM_UINT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_UINT32_TIE_RTE_RESULT));
3170             testCases.push_back(OTC("rounding_rte_conv_from_uint32_down", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP32,
3171                                     V_CONV_FROM_UINT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_UINT32_DOWN_RTE_RESULT));
3172 
3173             // uint64 rte
3174             testCases.push_back(OTC("rounding_rte_conv_from_uint64_up", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3175                                     V_CONV_FROM_UINT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_UINT64_UP_RTE_RESULT));
3176             testCases.push_back(OTC("rounding_rte_conv_from_uint64_tie", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3177                                     V_CONV_FROM_UINT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_UINT64_TIE_RTE_RESULT));
3178             testCases.push_back(OTC("rounding_rte_conv_from_uint64_down", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3179                                     V_CONV_FROM_UINT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_UINT64_DOWN_RTE_RESULT));
3180 
3181             // int32 rtz
3182             testCases.push_back(OTC("rounding_rtz_conv_from_int32_up", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP32,
3183                                     V_CONV_FROM_INT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_INT32_UP_RTZ_RESULT));
3184             testCases.push_back(OTC("rounding_rtz_conv_from_int32_tie", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP32,
3185                                     V_CONV_FROM_INT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_INT32_TIE_RTZ_RESULT));
3186             testCases.push_back(OTC("rounding_rtz_conv_from_int32_down", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP32,
3187                                     V_CONV_FROM_INT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_INT32_DOWN_RTZ_RESULT));
3188 
3189             // int64 rtz
3190             testCases.push_back(OTC("rounding_rtz_conv_from_int64_up", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3191                                     V_CONV_FROM_INT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_INT64_UP_RTZ_RESULT));
3192             testCases.push_back(OTC("rounding_rtz_conv_from_int64_tie", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3193                                     V_CONV_FROM_INT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_INT64_TIE_RTZ_RESULT));
3194             testCases.push_back(OTC("rounding_rtz_conv_from_int64_down", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3195                                     V_CONV_FROM_INT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_INT64_DOWN_RTZ_RESULT));
3196 
3197             // int32 rte
3198             testCases.push_back(OTC("rounding_rte_conv_from_int32_up", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP32,
3199                                     V_CONV_FROM_INT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_INT32_UP_RTE_RESULT));
3200             testCases.push_back(OTC("rounding_rte_conv_from_int32_tie", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP32,
3201                                     V_CONV_FROM_INT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_INT32_TIE_RTE_RESULT));
3202             testCases.push_back(OTC("rounding_rte_conv_from_int32_down", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP32,
3203                                     V_CONV_FROM_INT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_INT32_DOWN_RTE_RESULT));
3204 
3205             // int64 rte
3206             testCases.push_back(OTC("rounding_rte_conv_from_int64_up", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3207                                     V_CONV_FROM_INT_TO_FP32_UP_ARG, V_UNUSED, V_CONV_FROM_INT64_UP_RTE_RESULT));
3208             testCases.push_back(OTC("rounding_rte_conv_from_int64_tie", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3209                                     V_CONV_FROM_INT_TO_FP32_TIE_ARG, V_UNUSED, V_CONV_FROM_INT64_TIE_RTE_RESULT));
3210             testCases.push_back(OTC("rounding_rte_conv_from_int64_down", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3211                                     V_CONV_FROM_INT_TO_FP32_DOWN_ARG, V_UNUSED, V_CONV_FROM_INT64_DOWN_RTE_RESULT));
3212         }
3213         else
3214         {
3215             // PackHalf2x16 - verification done in SPIR-V
3216             testCases.push_back(
3217                 OTC("pack_half_denorm_preserve", B_DENORM_PRESERVE, OID_PH_DENORM, V_UNUSED, V_UNUSED, V_ONE));
3218 
3219             // UnpackHalf2x16 - custom arguments defined as constants
3220             testCases.push_back(
3221                 OTC("upack_half_denorm_flush_to_zero", B_DENORM_FLUSH, OID_UPH_DENORM, V_UNUSED, V_UNUSED, V_ZERO));
3222             testCases.push_back(OTC("upack_half_denorm_preserve", B_DENORM_PRESERVE, OID_UPH_DENORM, V_UNUSED, V_UNUSED,
3223                                     V_CONV_DENORM_SMALLER));
3224         }
3225 
3226         createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP32);
3227         createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP32, true);
3228         createUnaryTestCases(testCases, OID_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
3229     }
3230     else // FP64
3231     {
3232         if (argumentsFromInput)
3233         {
3234             // uint64 rtz
3235             testCases.push_back(OTC("rounding_rtz_conv_from_uint64_up", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3236                                     V_CONV_FROM_UINT_TO_FP64_UP_ARG, V_UNUSED, V_CONV_FROM_UINT64_UP_RTZ_RESULT));
3237             testCases.push_back(OTC("rounding_rtz_conv_from_uint64_tie", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3238                                     V_CONV_FROM_UINT_TO_FP64_TIE_ARG, V_UNUSED, V_CONV_FROM_UINT64_TIE_RTZ_RESULT));
3239             testCases.push_back(OTC("rounding_rtz_conv_from_uint64_down", B_RTZ_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3240                                     V_CONV_FROM_UINT_TO_FP64_DOWN_ARG, V_UNUSED, V_CONV_FROM_UINT64_DOWN_RTZ_RESULT));
3241 
3242             // uint64 rte
3243             testCases.push_back(OTC("rounding_rte_conv_from_uint64_up", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3244                                     V_CONV_FROM_UINT_TO_FP64_UP_ARG, V_UNUSED, V_CONV_FROM_UINT64_UP_RTE_RESULT));
3245             testCases.push_back(OTC("rounding_rte_conv_from_uint64_tie", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3246                                     V_CONV_FROM_UINT_TO_FP64_TIE_ARG, V_UNUSED, V_CONV_FROM_UINT64_TIE_RTE_RESULT));
3247             testCases.push_back(OTC("rounding_rte_conv_from_uint64_down", B_RTE_ROUNDING, OID_CONV_FROM_UINT_TO_FP64,
3248                                     V_CONV_FROM_UINT_TO_FP64_DOWN_ARG, V_UNUSED, V_CONV_FROM_UINT64_DOWN_RTE_RESULT));
3249 
3250             // int64 rtz
3251             testCases.push_back(OTC("rounding_rtz_conv_from_int64_up", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3252                                     V_CONV_FROM_INT_TO_FP64_UP_ARG, V_UNUSED, V_CONV_FROM_INT64_UP_RTZ_RESULT));
3253             testCases.push_back(OTC("rounding_rtz_conv_from_int64_tie", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3254                                     V_CONV_FROM_INT_TO_FP64_TIE_ARG, V_UNUSED, V_CONV_FROM_INT64_TIE_RTZ_RESULT));
3255             testCases.push_back(OTC("rounding_rtz_conv_from_int64_down", B_RTZ_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3256                                     V_CONV_FROM_INT_TO_FP64_DOWN_ARG, V_UNUSED, V_CONV_FROM_INT64_DOWN_RTZ_RESULT));
3257 
3258             // int64 rte
3259             testCases.push_back(OTC("rounding_rte_conv_from_int64_up", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3260                                     V_CONV_FROM_INT_TO_FP64_UP_ARG, V_UNUSED, V_CONV_FROM_INT64_UP_RTE_RESULT));
3261             testCases.push_back(OTC("rounding_rte_conv_from_int64_tie", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3262                                     V_CONV_FROM_INT_TO_FP64_TIE_ARG, V_UNUSED, V_CONV_FROM_INT64_TIE_RTE_RESULT));
3263             testCases.push_back(OTC("rounding_rte_conv_from_int64_down", B_RTE_ROUNDING, OID_CONV_FROM_INT_TO_FP64,
3264                                     V_CONV_FROM_INT_TO_FP64_DOWN_ARG, V_UNUSED, V_CONV_FROM_INT64_DOWN_RTE_RESULT));
3265         }
3266         else
3267         {
3268             // PackDouble2x32 - custom arguments defined as constants
3269             testCases.push_back(
3270                 OTC("pack_double_denorm_preserve", B_DENORM_PRESERVE, OID_PD_DENORM, V_UNUSED, V_UNUSED, V_DENORM));
3271 
3272             // UnpackDouble2x32 - verification done in SPIR-V
3273             testCases.push_back(OTC("upack_double_denorm_flush_to_zero", B_DENORM_FLUSH, OID_UPD_DENORM_FLUSH, V_DENORM,
3274                                     V_UNUSED, V_ONE));
3275             testCases.push_back(OTC("upack_double_denorm_preserve", B_DENORM_PRESERVE, OID_UPD_DENORM_PRESERVE,
3276                                     V_DENORM, V_UNUSED, V_ONE));
3277         }
3278 
3279         createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP64);
3280         createUnaryTestCases(testCases, OID_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP64, true);
3281         createUnaryTestCases(testCases, OID_CONV_FROM_FP32, V_CONV_DENORM_BIGGER, V_ZERO_OR_FP32_DENORM_TO_FP64);
3282     }
3283 }
3284 
getOperation(OperationId id) const3285 const Operation &TestCasesBuilder::getOperation(OperationId id) const
3286 {
3287     return m_operations.at(id);
3288 }
3289 
createUnaryTestCases(vector<OperationTestCase> & testCases,OperationId operationId,ValueId denormPreserveResult,ValueId denormFTZResult,bool fp16WithoutStorage) const3290 void TestCasesBuilder::createUnaryTestCases(vector<OperationTestCase> &testCases, OperationId operationId,
3291                                             ValueId denormPreserveResult, ValueId denormFTZResult,
3292                                             bool fp16WithoutStorage) const
3293 {
3294     // Denorm - Preserve
3295     testCases.push_back(OTC("op_denorm_preserve", B_DENORM_PRESERVE, operationId, V_DENORM, V_UNUSED,
3296                             denormPreserveResult, fp16WithoutStorage));
3297 
3298     // Denorm - FlushToZero
3299     testCases.push_back(OTC("op_denorm_flush_to_zero", B_DENORM_FLUSH, operationId, V_DENORM, V_UNUSED, denormFTZResult,
3300                             fp16WithoutStorage));
3301 
3302     // Signed Zero Inf Nan - Preserve
3303     testCases.push_back(
3304         OTC("op_zero_preserve", B_ZIN_PRESERVE, operationId, V_ZERO, V_UNUSED, V_ZERO, fp16WithoutStorage));
3305     testCases.push_back(OTC("op_signed_zero_preserve", B_ZIN_PRESERVE, operationId, V_MINUS_ZERO, V_UNUSED,
3306                             V_MINUS_ZERO, fp16WithoutStorage));
3307     testCases.push_back(
3308         OTC("op_inf_preserve", B_ZIN_PRESERVE, operationId, V_INF, V_UNUSED, V_INF, fp16WithoutStorage));
3309     testCases.push_back(
3310         OTC("op_nan_preserve", B_ZIN_PRESERVE, operationId, V_NAN, V_UNUSED, V_NAN, fp16WithoutStorage));
3311 }
3312 
3313 template <typename TYPE, typename FLOAT_TYPE>
isZeroOrOtherValue(const TYPE & returnedFloat,ValueId secondAcceptableResult,TestLog & log)3314 bool isZeroOrOtherValue(const TYPE &returnedFloat, ValueId secondAcceptableResult, TestLog &log)
3315 {
3316     if (returnedFloat.isZero() && !returnedFloat.signBit())
3317         return true;
3318 
3319     TypeValues<FLOAT_TYPE> typeValues;
3320     typedef typename TYPE::StorageType SType;
3321     typename RawConvert<FLOAT_TYPE, SType>::Value value;
3322     value.fp = typeValues.getValue(secondAcceptableResult);
3323 
3324     if (returnedFloat.bits() == value.ui)
3325         return true;
3326 
3327     log << TestLog::Message << "Expected 0 or " << toHex(value.ui) << " (" << value.fp << ")" << TestLog::EndMessage;
3328     return false;
3329 }
3330 
3331 template <typename TYPE>
isAcosResultCorrect(const TYPE & returnedFloat,TestLog & log)3332 bool isAcosResultCorrect(const TYPE &returnedFloat, TestLog &log)
3333 {
3334     // pi/2 is result of acos(0) which in the specs is defined as equivalent to
3335     // atan2(sqrt(1.0 - x^2), x), where atan2 has 4096 ULP, sqrt is equivalent to
3336     // 1.0 /inversesqrt(), inversesqrt() is 2 ULP and rcp is another 2.5 ULP
3337 
3338     double precision    = 0;
3339     const double piDiv2 = M_PI_2;
3340     if (returnedFloat.MANTISSA_BITS == 23)
3341     {
3342         FloatFormat fp32Format(-126, 127, 23, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
3343         precision = fp32Format.ulp(piDiv2, 4096.0);
3344     }
3345     else
3346     {
3347         FloatFormat fp16Format(-14, 15, 10, true, tcu::MAYBE);
3348         precision = fp16Format.ulp(piDiv2, 5.0);
3349     }
3350 
3351     if (deAbs(returnedFloat.asDouble() - piDiv2) < precision)
3352         return true;
3353 
3354     log << TestLog::Message << "Expected result to be in range"
3355         << " (" << piDiv2 - precision << ", " << piDiv2 + precision << "), got " << returnedFloat.asDouble()
3356         << TestLog::EndMessage;
3357     return false;
3358 }
3359 
3360 template <typename TYPE>
isCosResultCorrect(const TYPE & returnedFloat,TestLog & log)3361 bool isCosResultCorrect(const TYPE &returnedFloat, TestLog &log)
3362 {
3363     // for cos(x) with x between -pi and pi, the precision error is 2^-11 for fp32 and 2^-7 for fp16.
3364     double precision      = returnedFloat.MANTISSA_BITS == 23 ? dePow(2, -11) : dePow(2, -7);
3365     const double expected = 1.0;
3366 
3367     if (deAbs(returnedFloat.asDouble() - expected) < precision)
3368         return true;
3369 
3370     log << TestLog::Message << "Expected result to be in range"
3371         << " (" << expected - precision << ", " << expected + precision << "), got " << returnedFloat.asDouble()
3372         << TestLog::EndMessage;
3373     return false;
3374 }
3375 
3376 template <typename FLOAT_TYPE>
getVariableTypeAsDouble(FLOAT_TYPE param)3377 double getVariableTypeAsDouble(FLOAT_TYPE param)
3378 {
3379     return param;
3380 }
3381 template <>
getVariableTypeAsDouble(deFloat16 param)3382 double getVariableTypeAsDouble(deFloat16 param)
3383 {
3384     return deFloat16To64(param);
3385 }
3386 
getPrecisionAt(double value,float ulp,int mantissaBits)3387 double getPrecisionAt(double value, float ulp, int mantissaBits)
3388 {
3389     if (mantissaBits == 23)
3390     {
3391         FloatFormat fp32Format(-126, 127, 23, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
3392         return fp32Format.ulp(value, ulp);
3393     }
3394     else if (mantissaBits == 52)
3395     {
3396         FloatFormat fp32Format(-1022, 1023, 52, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
3397         return fp32Format.ulp(value, ulp);
3398     }
3399     else
3400     {
3401         DE_ASSERT(mantissaBits == 10);
3402         FloatFormat fp16Format(-14, 15, 10, true, tcu::MAYBE);
3403         return fp16Format.ulp(value, ulp);
3404     }
3405 }
3406 
3407 template <typename TYPE, typename FLOAT_TYPE, typename REF_FUNCTION>
isLogResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,REF_FUNCTION refFunction,TestLog & log)3408 bool isLogResultCorrect(const TYPE &returnedFloat, FLOAT_TYPE param, REF_FUNCTION refFunction, TestLog &log)
3409 {
3410     if (returnedFloat.isInf() && returnedFloat.signBit())
3411         return true;
3412 
3413     const double expected  = refFunction(getVariableTypeAsDouble(param));
3414     const double precision = getPrecisionAt(expected, 3.0, returnedFloat.MANTISSA_BITS);
3415 
3416     if (deAbs(returnedFloat.asDouble() - expected) < precision)
3417         return true;
3418 
3419     log << TestLog::Message << "Expected result to be -INF or in range"
3420         << " (" << expected - precision << ", " << expected + precision << "), got " << returnedFloat.asDouble()
3421         << TestLog::EndMessage;
3422     return false;
3423 }
3424 
3425 template <typename TYPE, typename FLOAT_TYPE>
isInverseSqrtResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,TestLog & log)3426 bool isInverseSqrtResultCorrect(const TYPE &returnedFloat, FLOAT_TYPE param, TestLog &log)
3427 {
3428     if (returnedFloat.isInf() && !returnedFloat.signBit())
3429         return true;
3430 
3431     const double expected  = 1.0 / deSqrt(getVariableTypeAsDouble(param));
3432     const double precision = getPrecisionAt(expected, 2.0, returnedFloat.MANTISSA_BITS);
3433 
3434     if (deAbs(returnedFloat.asDouble() - expected) < precision)
3435         return true;
3436 
3437     log << TestLog::Message << "Expected result to be INF or in range"
3438         << " (" << expected - precision << ", " << expected + precision << "), got " << returnedFloat.asDouble()
3439         << TestLog::EndMessage;
3440     return false;
3441 }
3442 
3443 template <typename TYPE, typename FLOAT_TYPE>
isSqrtResultCorrect(const TYPE & returnedFloat,FLOAT_TYPE param,TestLog & log)3444 bool isSqrtResultCorrect(const TYPE &returnedFloat, FLOAT_TYPE param, TestLog &log)
3445 {
3446     if (returnedFloat.isZero() && !returnedFloat.signBit())
3447         return true;
3448 
3449     const double expected             = deSqrt(getVariableTypeAsDouble(param));
3450     const double expectedInverseSqrt  = 1.0 / expected;
3451     const double inverseSqrtPrecision = getPrecisionAt(expectedInverseSqrt, 2.0, returnedFloat.MANTISSA_BITS);
3452 
3453     double expectedMin =
3454         deMin(1.0 / (expectedInverseSqrt - inverseSqrtPrecision), 1.0 / (expectedInverseSqrt + inverseSqrtPrecision));
3455     double expectedMax =
3456         deMax(1.0 / (expectedInverseSqrt - inverseSqrtPrecision), 1.0 / (expectedInverseSqrt + inverseSqrtPrecision));
3457 
3458     expectedMin -= getPrecisionAt(expectedMin, 2.5, returnedFloat.MANTISSA_BITS);
3459     expectedMax += getPrecisionAt(expectedMax, 2.5, returnedFloat.MANTISSA_BITS);
3460 
3461     if (returnedFloat.asDouble() >= expectedMin && returnedFloat.asDouble() <= expectedMax)
3462         return true;
3463 
3464     log << TestLog::Message << "Expected result to be +0 or in range"
3465         << " (" << expectedMin << ", " << expectedMax << "), got " << returnedFloat.asDouble() << TestLog::EndMessage;
3466     return false;
3467 }
3468 
3469 // Function used to compare test result with expected output.
3470 // TYPE can be Float16, Float32 or Float64.
3471 // FLOAT_TYPE can be deFloat16, float, double.
3472 template <typename TYPE, typename FLOAT_TYPE>
compareBytes(vector<uint8_t> & expectedBytes,AllocationSp outputAlloc,TestLog & log)3473 bool compareBytes(vector<uint8_t> &expectedBytes, AllocationSp outputAlloc, TestLog &log)
3474 {
3475     const TYPE *returned = static_cast<const TYPE *>(outputAlloc->getHostPtr());
3476     const TYPE *fValueId = reinterpret_cast<const TYPE *>(&expectedBytes.front());
3477 
3478     // all test return single value
3479     // Fp16 nostorage tests get their values from a uint32_t value, but we create the
3480     // buffer with the same size for both cases: 4 bytes.
3481     if (sizeof(TYPE) == 2u)
3482         DE_ASSERT((expectedBytes.size() / sizeof(TYPE)) == 2);
3483     else
3484         DE_ASSERT((expectedBytes.size() / sizeof(TYPE)) == 1);
3485 
3486     // during test setup we do not store expected value but id that can be used to
3487     // retrieve actual value - this is done to handle special cases like multiple
3488     // allowed results or epsilon checks for some cases
3489     // note that this is workaround - this should be done by changing
3490     // ComputerShaderCase and GraphicsShaderCase so that additional arguments can
3491     // be passed to this verification callback
3492     typedef typename TYPE::StorageType SType;
3493     SType expectedInt       = fValueId[0].bits();
3494     ValueId expectedValueId = static_cast<ValueId>(expectedInt);
3495 
3496     // something went wrong, expected value cant be V_UNUSED,
3497     // if this is the case then test shouldn't be created at all
3498     DE_ASSERT(expectedValueId != V_UNUSED);
3499 
3500     TYPE returnedFloat = returned[0];
3501 
3502     log << TestLog::Message << "Calculated result: " << toHex(returnedFloat.bits()) << " (" << returnedFloat.asFloat()
3503         << ")" << TestLog::EndMessage;
3504 
3505     if (expectedValueId == V_NAN)
3506     {
3507         if (returnedFloat.isNaN())
3508             return true;
3509 
3510         log << TestLog::Message << "Expected NaN" << TestLog::EndMessage;
3511         return false;
3512     }
3513 
3514     if (expectedValueId == V_DENORM)
3515     {
3516         if (returnedFloat.isDenorm())
3517             return true;
3518 
3519         log << TestLog::Message << "Expected Denorm" << TestLog::EndMessage;
3520         return false;
3521     }
3522 
3523     // handle multiple acceptable results cases
3524     if (expectedValueId == V_ZERO_OR_MINUS_ZERO)
3525     {
3526         if (returnedFloat.isZero())
3527             return true;
3528 
3529         log << TestLog::Message << "Expected 0 or -0" << TestLog::EndMessage;
3530         return false;
3531     }
3532     if (expectedValueId == V_ZERO_OR_ONE)
3533         return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_ONE, log);
3534     if ((expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP32) || (expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP64))
3535         return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_SMALLER, log);
3536     if (expectedValueId == V_ZERO_OR_FP32_DENORM_TO_FP64)
3537         return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_BIGGER, log);
3538     if (expectedValueId == V_ZERO_OR_DENORM_TIMES_TWO)
3539     {
3540         // this expected value is only needed for fp16
3541         DE_ASSERT(returnedFloat.EXPONENT_BIAS == 15);
3542         return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_DENORM_TIMES_TWO, log);
3543     }
3544     if (expectedValueId == V_MINUS_ONE_OR_CLOSE)
3545     {
3546         // this expected value is only needed for fp16
3547         DE_ASSERT(returnedFloat.EXPONENT_BIAS == 15);
3548         typename TYPE::StorageType returnedValue = returnedFloat.bits();
3549         return (returnedValue == 0xbc00) || (returnedValue == 0xbbff);
3550     }
3551 
3552     // handle trigonometric operations precision errors
3553     if (expectedValueId == V_TRIG_ONE)
3554         return isCosResultCorrect<TYPE>(returnedFloat, log);
3555 
3556     // handle acos(0) case
3557     if (expectedValueId == V_PI_DIV_2)
3558         return isAcosResultCorrect<TYPE>(returnedFloat, log);
3559 
3560     TypeValues<FLOAT_TYPE> typeValues;
3561 
3562     if (expectedValueId == V_MINUS_INF_OR_LOG_DENORM)
3563         return isLogResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), deLog, log);
3564 
3565     if (expectedValueId == V_MINUS_INF_OR_LOG2_DENORM)
3566         return isLogResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), deLog2, log);
3567 
3568     if (expectedValueId == V_ZERO_OR_SQRT_DENORM)
3569         return isSqrtResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), log);
3570 
3571     if (expectedValueId == V_INF_OR_INV_SQRT_DENORM)
3572         return isInverseSqrtResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), log);
3573 
3574     typename RawConvert<FLOAT_TYPE, SType>::Value value;
3575     value.fp = typeValues.getValue(expectedValueId);
3576 
3577     if (returnedFloat.bits() == value.ui)
3578         return true;
3579 
3580     log << TestLog::Message << "Expected " << toHex(value.ui) << " (" << value.fp << ")" << TestLog::EndMessage;
3581     return false;
3582 }
3583 
3584 template <typename TYPE, typename FLOAT_TYPE>
checkFloats(const vector<Resource> &,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,TestLog & log)3585 bool checkFloats(const vector<Resource> &, const vector<AllocationSp> &outputAllocs,
3586                  const vector<Resource> &expectedOutputs, TestLog &log)
3587 {
3588     if (outputAllocs.size() != expectedOutputs.size())
3589         return false;
3590 
3591     for (uint32_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
3592     {
3593         vector<uint8_t> expectedBytes;
3594         expectedOutputs[outputNdx].getBytes(expectedBytes);
3595 
3596         if (!compareBytes<TYPE, FLOAT_TYPE>(expectedBytes, outputAllocs[outputNdx], log))
3597             return false;
3598     }
3599 
3600     return true;
3601 }
3602 
checkMixedFloats(const vector<Resource> &,const vector<AllocationSp> & outputAllocs,const vector<Resource> & expectedOutputs,TestLog & log)3603 bool checkMixedFloats(const vector<Resource> &, const vector<AllocationSp> &outputAllocs,
3604                       const vector<Resource> &expectedOutputs, TestLog &log)
3605 {
3606     // this function validates buffers containing floats of diferent widths, order is not important
3607 
3608     if (outputAllocs.size() != expectedOutputs.size())
3609         return false;
3610 
3611     // The comparison function depends on the data type stored in the resource.
3612     using compareFun = bool (*)(vector<uint8_t> &expectedBytes, AllocationSp outputAlloc, TestLog &log);
3613     const map<BufferDataType, compareFun> compareMap = {
3614         {BufferDataType::DATA_FP16, compareBytes<Float16, deFloat16>},
3615         {BufferDataType::DATA_FP32, compareBytes<Float32, float>},
3616         {BufferDataType::DATA_FP64, compareBytes<Float64, double>},
3617     };
3618 
3619     vector<uint8_t> expectedBytes;
3620     bool allResultsAreCorrect = true;
3621     int resultIndex           = static_cast<int>(outputAllocs.size());
3622 
3623     while (resultIndex--)
3624     {
3625         expectedOutputs[resultIndex].getBytes(expectedBytes);
3626         BufferDataType type =
3627             static_cast<BufferDataType>(reinterpret_cast<std::uintptr_t>(expectedOutputs[resultIndex].getUserData()));
3628         allResultsAreCorrect &= compareMap.at(type)(expectedBytes, outputAllocs[resultIndex], log);
3629     }
3630 
3631     return allResultsAreCorrect;
3632 }
3633 
3634 // Base class for ComputeTestGroupBuilder and GrephicstestGroupBuilder classes.
3635 // It contains all functionalities that are used by both child classes.
3636 class TestGroupBuilderBase
3637 {
3638 public:
3639     TestGroupBuilderBase();
3640     virtual ~TestGroupBuilderBase() = default;
3641 
3642     virtual void createOperationTests(TestCaseGroup *parentGroup, const char *groupName, VariableType variableType,
3643                                       bool argumentsFromInput) = 0;
3644 
3645     virtual void createSettingsTests(TestCaseGroup *parentGroup) = 0;
3646 
3647 protected:
3648     typedef vector<OperationTestCase> TestCaseVect;
3649 
3650     // Structure containing all data required to create single operation test.
3651     struct OperationTestCaseInfo
3652     {
3653         VariableType outVariableType;
3654         bool argumentsFromInput;
3655         VkShaderStageFlagBits testedStage;
3656         const Operation &operation;
3657         const OperationTestCase &testCase;
3658     };
3659 
3660     // Mode used by SettingsTestCaseInfo to specify what settings do we want to test.
3661     enum SettingsMode
3662     {
3663         SM_ROUNDING = 0,
3664         SM_DENORMS
3665     };
3666 
3667     // Enum containing available options. When rounding is tested only SO_RTE and SO_RTZ
3668     // should be used. SO_FLUSH and SO_PRESERVE should be used only for denorm tests.
3669     enum SettingsOption
3670     {
3671         SO_UNUSED = 0,
3672         SO_RTE,
3673         SO_RTZ,
3674         SO_FLUSH,
3675         SO_PRESERVE
3676     };
3677 
3678     // Structure containing all data required to create single settings test.
3679     struct SettingsTestCaseInfo
3680     {
3681         const char *name;
3682         SettingsMode testedMode;
3683         VkShaderFloatControlsIndependence independenceSetting;
3684 
3685         SettingsOption fp16Option;
3686         SettingsOption fp32Option;
3687         SettingsOption fp64Option;
3688         bool fp16Without16BitStorage;
3689     };
3690 
3691     void specializeOperation(const OperationTestCaseInfo &testCaseInfo,
3692                              SpecializedOperation &specializedOperation) const;
3693 
3694     void getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags, const string inBitWidth,
3695                                                const string outBitWidth, string &capability,
3696                                                string &executionMode) const;
3697 
3698     void setupFloatControlsProperties(VariableType inVariableType, VariableType outVariableType,
3699                                       BehaviorFlags behaviorFlags,
3700                                       vk::VkPhysicalDeviceFloatControlsProperties &props) const;
3701 
3702 protected:
3703     struct TypeData
3704     {
3705         TypeValuesSP values;
3706         TypeSnippetsSP snippets;
3707         TypeTestResultsSP testResults;
3708     };
3709 
3710     // Type specific parameters are stored in this map.
3711     map<VariableType, TypeData> m_typeData;
3712 
3713     // Map converting behaviuor id to OpCapability instruction
3714     typedef map<BehaviorFlagBits, string> BehaviorNameMap;
3715     BehaviorNameMap m_behaviorToName;
3716 };
3717 
TestGroupBuilderBase()3718 TestGroupBuilderBase::TestGroupBuilderBase()
3719 {
3720     m_typeData[FP16]               = TypeData();
3721     m_typeData[FP16].values        = TypeValuesSP(new TypeValues<deFloat16>);
3722     m_typeData[FP16].snippets      = TypeSnippetsSP(new TypeSnippets<deFloat16>);
3723     m_typeData[FP16].testResults   = TypeTestResultsSP(new TypeTestResults<deFloat16>);
3724     m_typeData[FP32]               = TypeData();
3725     m_typeData[FP32].values        = TypeValuesSP(new TypeValues<float>);
3726     m_typeData[FP32].snippets      = TypeSnippetsSP(new TypeSnippets<float>);
3727     m_typeData[FP32].testResults   = TypeTestResultsSP(new TypeTestResults<float>);
3728     m_typeData[FP64]               = TypeData();
3729     m_typeData[FP64].values        = TypeValuesSP(new TypeValues<double>);
3730     m_typeData[FP64].snippets      = TypeSnippetsSP(new TypeSnippets<double>);
3731     m_typeData[FP64].testResults   = TypeTestResultsSP(new TypeTestResults<double>);
3732     m_typeData[UINT32]             = TypeData();
3733     m_typeData[UINT32].values      = TypeValuesSP(new TypeValues<float>);
3734     m_typeData[UINT32].snippets    = TypeSnippetsSP(new TypeSnippets<float>(false));
3735     m_typeData[UINT32].testResults = TypeTestResultsSP(new TypeTestResults<float>);
3736     m_typeData[UINT64]             = TypeData();
3737     m_typeData[UINT64].values      = TypeValuesSP(new TypeValues<double>);
3738     m_typeData[UINT64].snippets    = TypeSnippetsSP(new TypeSnippets<double>(false));
3739     m_typeData[UINT64].testResults = TypeTestResultsSP(new TypeTestResults<double>);
3740     m_typeData[INT32]              = TypeData();
3741     m_typeData[INT32].values       = TypeValuesSP(new TypeValues<float>);
3742     m_typeData[INT32].snippets     = TypeSnippetsSP(new TypeSnippets<float>(false, true));
3743     m_typeData[INT32].testResults  = TypeTestResultsSP(new TypeTestResults<float>);
3744     m_typeData[INT64]              = TypeData();
3745     m_typeData[INT64].values       = TypeValuesSP(new TypeValues<double>);
3746     m_typeData[INT64].snippets     = TypeSnippetsSP(new TypeSnippets<double>(false, true));
3747     m_typeData[INT64].testResults  = TypeTestResultsSP(new TypeTestResults<double>);
3748 
3749     m_behaviorToName[B_DENORM_PRESERVE] = "DenormPreserve";
3750     m_behaviorToName[B_DENORM_FLUSH]    = "DenormFlushToZero";
3751     m_behaviorToName[B_ZIN_PRESERVE]    = "SignedZeroInfNanPreserve";
3752     m_behaviorToName[B_RTE_ROUNDING]    = "RoundingModeRTE";
3753     m_behaviorToName[B_RTZ_ROUNDING]    = "RoundingModeRTZ";
3754 }
3755 
specializeOperation(const OperationTestCaseInfo & testCaseInfo,SpecializedOperation & specializedOperation) const3756 void TestGroupBuilderBase::specializeOperation(const OperationTestCaseInfo &testCaseInfo,
3757                                                SpecializedOperation &specializedOperation) const
3758 {
3759     const string typeToken  = "_valueType";
3760     const string widthToken = "${float_width}";
3761 
3762     VariableType outVariableType         = testCaseInfo.outVariableType;
3763     const Operation &operation           = testCaseInfo.operation;
3764     const TypeSnippetsSP outTypeSnippets = m_typeData.at(outVariableType).snippets;
3765     const bool inputRestricted           = operation.isInputTypeRestricted;
3766     VariableType inVariableType          = operation.restrictedInputType;
3767 
3768     // usually input type is same as output but this is not the case for conversion
3769     // operations; in those cases operation definitions have restricted input type
3770     inVariableType = inputRestricted ? inVariableType : outVariableType;
3771 
3772     TypeSnippetsSP inTypeSnippets = m_typeData.at(inVariableType).snippets;
3773 
3774     const string inTypePrefix  = string("_") + inTypeSnippets->getValueTypeString() + inTypeSnippets->bitWidth;
3775     const string outTypePrefix = string("_") + outTypeSnippets->getValueTypeString() + outTypeSnippets->bitWidth;
3776 
3777     std::string byteWidthToken = std::to_string(std::stoi(outTypeSnippets->bitWidth) / 8);
3778 
3779     specializedOperation.constants   = replace(operation.constants, typeToken, inTypePrefix);
3780     specializedOperation.annotations = replace(operation.annotations, widthToken, byteWidthToken);
3781     specializedOperation.types       = replace(operation.types, typeToken, outTypePrefix);
3782     specializedOperation.variables   = replace(operation.variables, typeToken, outTypePrefix);
3783     specializedOperation.functions   = replace(operation.functions, typeToken, outTypePrefix);
3784     specializedOperation.commands    = replace(operation.commands, typeToken, outTypePrefix);
3785 
3786     specializedOperation.inVariableType             = inVariableType;
3787     specializedOperation.inTypeSnippets             = inTypeSnippets;
3788     specializedOperation.outTypeSnippets            = outTypeSnippets;
3789     specializedOperation.argumentsUsesFloatConstant = 0;
3790 
3791     if (operation.isSpecConstant)
3792         return;
3793 
3794     // select way arguments are prepared
3795     if (testCaseInfo.argumentsFromInput)
3796     {
3797         // read arguments from input SSBO in main function
3798         specializedOperation.arguments = inTypeSnippets->argumentsFromInputSnippet;
3799 
3800         if (inVariableType == FP16 && testCaseInfo.testCase.fp16Without16BitStorage)
3801             specializedOperation.arguments = inTypeSnippets->argumentsFromInputFp16Snippet;
3802     }
3803     else
3804     {
3805         // generate proper values in main function
3806         const string arg1 = "%arg1                 = ";
3807         const string arg2 = "%arg2                 = ";
3808 
3809         const ValueId *inputArguments = testCaseInfo.testCase.input;
3810         if (inputArguments[0] != V_UNUSED)
3811         {
3812             specializedOperation.arguments = arg1 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[0]);
3813             specializedOperation.argumentsUsesFloatConstant |= B_STATEMENT_USAGE_ARGS_CONST_FLOAT;
3814         }
3815         if (inputArguments[1] != V_UNUSED)
3816         {
3817             specializedOperation.arguments += arg2 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[1]);
3818             specializedOperation.argumentsUsesFloatConstant |= B_STATEMENT_USAGE_ARGS_CONST_FLOAT;
3819         }
3820     }
3821 }
3822 
getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,const string inBitWidth,const string outBitWidth,string & capability,string & executionMode) const3823 void TestGroupBuilderBase::getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags, const string inBitWidth,
3824                                                                  const string outBitWidth, string &capability,
3825                                                                  string &executionMode) const
3826 {
3827     // iterate over all behaviours and request those that are needed
3828     BehaviorNameMap::const_iterator it = m_behaviorToName.begin();
3829     while (it != m_behaviorToName.end())
3830     {
3831         BehaviorFlagBits behaviorId = it->first;
3832         string behaviorName         = it->second;
3833 
3834         if (behaviorFlags & behaviorId)
3835         {
3836             capability += "OpCapability " + behaviorName + "\n";
3837 
3838             // rounding mode should be obeyed for destination type
3839             bool rounding = (behaviorId == B_RTE_ROUNDING) || (behaviorId == B_RTZ_ROUNDING);
3840             executionMode +=
3841                 "OpExecutionMode %main " + behaviorName + " " + (rounding ? outBitWidth : inBitWidth) + "\n";
3842         }
3843 
3844         ++it;
3845     }
3846 
3847     DE_ASSERT(!capability.empty() && !executionMode.empty());
3848 }
3849 
setupFloatControlsProperties(VariableType inVariableType,VariableType outVariableType,BehaviorFlags behaviorFlags,vk::VkPhysicalDeviceFloatControlsProperties & props) const3850 void TestGroupBuilderBase::setupFloatControlsProperties(VariableType inVariableType, VariableType outVariableType,
3851                                                         BehaviorFlags behaviorFlags,
3852                                                         vk::VkPhysicalDeviceFloatControlsProperties &props) const
3853 {
3854     // rounding mode should obey the destination type
3855     bool rteRounding = (behaviorFlags & B_RTE_ROUNDING) != 0;
3856     bool rtzRounding = (behaviorFlags & B_RTZ_ROUNDING) != 0;
3857     if (rteRounding || rtzRounding)
3858     {
3859         switch (outVariableType)
3860         {
3861         case FP16:
3862             props.shaderRoundingModeRTEFloat16 = rteRounding;
3863             props.shaderRoundingModeRTZFloat16 = rtzRounding;
3864             return;
3865         case FP32:
3866             props.shaderRoundingModeRTEFloat32 = rteRounding;
3867             props.shaderRoundingModeRTZFloat32 = rtzRounding;
3868             return;
3869         case FP64:
3870             props.shaderRoundingModeRTEFloat64 = rteRounding;
3871             props.shaderRoundingModeRTZFloat64 = rtzRounding;
3872             return;
3873         case UINT32:
3874         case INT32:
3875         case UINT64:
3876         case INT64:
3877             return;
3878         }
3879     }
3880 
3881     switch (inVariableType)
3882     {
3883     case FP16:
3884         props.shaderDenormPreserveFloat16           = behaviorFlags & B_DENORM_PRESERVE;
3885         props.shaderDenormFlushToZeroFloat16        = behaviorFlags & B_DENORM_FLUSH;
3886         props.shaderSignedZeroInfNanPreserveFloat16 = behaviorFlags & B_ZIN_PRESERVE;
3887         return;
3888     case FP32:
3889         props.shaderDenormPreserveFloat32           = behaviorFlags & B_DENORM_PRESERVE;
3890         props.shaderDenormFlushToZeroFloat32        = behaviorFlags & B_DENORM_FLUSH;
3891         props.shaderSignedZeroInfNanPreserveFloat32 = behaviorFlags & B_ZIN_PRESERVE;
3892         return;
3893     case FP64:
3894         props.shaderDenormPreserveFloat64           = behaviorFlags & B_DENORM_PRESERVE;
3895         props.shaderDenormFlushToZeroFloat64        = behaviorFlags & B_DENORM_FLUSH;
3896         props.shaderSignedZeroInfNanPreserveFloat64 = behaviorFlags & B_ZIN_PRESERVE;
3897         return;
3898     case UINT32:
3899     case INT32:
3900     case UINT64:
3901     case INT64:
3902         return;
3903     }
3904 }
3905 
3906 // Test case not related to SPIR-V but executed with compute tests. It checks if specified
3907 // features are set to the same value when specific independence settings are used.
verifyIndependenceSettings(Context & context)3908 tcu::TestStatus verifyIndependenceSettings(Context &context)
3909 {
3910     if (!context.isDeviceFunctionalitySupported("VK_KHR_shader_float_controls"))
3911         TCU_THROW(NotSupportedError, "VK_KHR_shader_float_controls not supported");
3912 
3913     vk::VkPhysicalDeviceFloatControlsProperties fcProperties;
3914     fcProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES;
3915     fcProperties.pNext = DE_NULL;
3916 
3917     vk::VkPhysicalDeviceProperties2 deviceProperties;
3918     deviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
3919     deviceProperties.pNext = &fcProperties;
3920 
3921     auto fail = [](const string &featureGroup)
3922     { return tcu::TestStatus::fail(featureGroup + " features should be set to the same value"); };
3923 
3924     const VkPhysicalDevice physicalDevice          = context.getPhysicalDevice();
3925     const vk::InstanceInterface &instanceInterface = context.getInstanceInterface();
3926     instanceInterface.getPhysicalDeviceProperties2(physicalDevice, &deviceProperties);
3927 
3928     if (fcProperties.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE)
3929     {
3930         vk::VkBool32 fp16rte = fcProperties.shaderRoundingModeRTEFloat16;
3931         vk::VkBool32 fp32rte = fcProperties.shaderRoundingModeRTEFloat32;
3932         vk::VkBool32 fp64rte = fcProperties.shaderRoundingModeRTEFloat64;
3933         if ((fp16rte != fp32rte) || (fp32rte != fp64rte))
3934             return fail("shaderRoundingModeRTEFloat*");
3935 
3936         vk::VkBool32 fp16rtz = fcProperties.shaderRoundingModeRTZFloat16;
3937         vk::VkBool32 fp32rtz = fcProperties.shaderRoundingModeRTZFloat32;
3938         vk::VkBool32 fp64rtz = fcProperties.shaderRoundingModeRTZFloat64;
3939         if ((fp16rtz != fp32rtz) || (fp32rtz != fp64rtz))
3940             return fail("shaderRoundingModeRTZFloat*");
3941     }
3942     else if (fcProperties.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY)
3943     {
3944         vk::VkBool32 fp16rte = fcProperties.shaderRoundingModeRTEFloat16;
3945         vk::VkBool32 fp64rte = fcProperties.shaderRoundingModeRTEFloat64;
3946         if ((fp16rte != fp64rte))
3947             return fail("shaderRoundingModeRTEFloat16 and 64");
3948 
3949         vk::VkBool32 fp16rtz = fcProperties.shaderRoundingModeRTZFloat16;
3950         vk::VkBool32 fp64rtz = fcProperties.shaderRoundingModeRTZFloat64;
3951         if ((fp16rtz != fp64rtz))
3952             return fail("shaderRoundingModeRTZFloat16 and 64");
3953     }
3954 
3955     if (fcProperties.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE)
3956     {
3957         vk::VkBool32 fp16flush = fcProperties.shaderDenormFlushToZeroFloat16;
3958         vk::VkBool32 fp32flush = fcProperties.shaderDenormFlushToZeroFloat32;
3959         vk::VkBool32 fp64flush = fcProperties.shaderDenormFlushToZeroFloat64;
3960         if ((fp16flush != fp32flush) || (fp32flush != fp64flush))
3961             return fail("shaderDenormFlushToZeroFloat*");
3962 
3963         vk::VkBool32 fp16preserve = fcProperties.shaderDenormPreserveFloat16;
3964         vk::VkBool32 fp32preserve = fcProperties.shaderDenormPreserveFloat32;
3965         vk::VkBool32 fp64preserve = fcProperties.shaderDenormPreserveFloat64;
3966         if ((fp16preserve != fp32preserve) || (fp32preserve != fp64preserve))
3967             return fail("shaderDenormPreserveFloat*");
3968     }
3969     else if (fcProperties.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY)
3970     {
3971         vk::VkBool32 fp16flush = fcProperties.shaderDenormFlushToZeroFloat16;
3972         vk::VkBool32 fp64flush = fcProperties.shaderDenormFlushToZeroFloat64;
3973         if ((fp16flush != fp64flush))
3974             return fail("shaderDenormFlushToZeroFloat16 and 64");
3975 
3976         vk::VkBool32 fp16preserve = fcProperties.shaderDenormPreserveFloat16;
3977         vk::VkBool32 fp64preserve = fcProperties.shaderDenormPreserveFloat64;
3978         if ((fp16preserve != fp64preserve))
3979             return fail("shaderDenormPreserveFloat16 and 64");
3980     }
3981 
3982     return tcu::TestStatus::pass("Pass");
3983 }
3984 
3985 // ComputeTestGroupBuilder contains logic that creates compute shaders
3986 // for all test cases. As most tests in spirv-assembly it uses functionality
3987 // implemented in vktSpvAsmComputeShaderTestUtil.cpp.
3988 class ComputeTestGroupBuilder : public TestGroupBuilderBase
3989 {
3990 public:
3991     void init();
3992 
3993     void createOperationTests(TestCaseGroup *parentGroup, const char *groupName, VariableType variableType,
3994                               bool argumentsFromInput) override;
3995 
3996     void createSettingsTests(TestCaseGroup *parentGroup) override;
3997 
3998 protected:
3999     void fillShaderSpec(const OperationTestCaseInfo &testCaseInfo, ComputeShaderSpec &csSpec) const;
4000     void fillShaderSpec(const SettingsTestCaseInfo &testCaseInfo, ComputeShaderSpec &csSpec) const;
4001 
4002 private:
4003     StringTemplate m_operationShaderTemplate;
4004     StringTemplate m_settingsShaderTemplate;
4005     TestCasesBuilder m_operationTestCaseBuilder;
4006 };
4007 
init()4008 void ComputeTestGroupBuilder::init()
4009 {
4010     m_operationTestCaseBuilder.init();
4011 
4012     // generic compute shader template with common code for all
4013     // float types and all possible operations listed in OperationId enum
4014     m_operationShaderTemplate.setString("OpCapability Shader\n"
4015                                         "${capabilities}"
4016 
4017                                         "OpExtension \"SPV_KHR_float_controls\"\n"
4018                                         "${extensions}"
4019 
4020                                         "%std450            = OpExtInstImport \"GLSL.std.450\"\n"
4021                                         "OpMemoryModel Logical GLSL450\n"
4022                                         "OpEntryPoint GLCompute %main \"main\" %id\n"
4023                                         "OpExecutionMode %main LocalSize 1 1 1\n"
4024                                         "${execution_mode}"
4025 
4026                                         "OpDecorate %id BuiltIn GlobalInvocationId\n"
4027 
4028                                         // some tests require additional annotations
4029                                         "${annotations}"
4030 
4031                                         "%type_void            = OpTypeVoid\n"
4032                                         "%type_voidf           = OpTypeFunction %type_void\n"
4033                                         "%type_bool            = OpTypeBool\n"
4034                                         "%type_u32             = OpTypeInt 32 0\n"
4035                                         "%type_i32             = OpTypeInt 32 1\n"
4036                                         "%type_i32_fptr        = OpTypePointer Function %type_i32\n"
4037                                         "%type_u32_vec2        = OpTypeVector %type_u32 2\n"
4038                                         "%type_u32_vec3        = OpTypeVector %type_u32 3\n"
4039                                         "%type_u32_vec3_ptr    = OpTypePointer Input %type_u32_vec3\n"
4040 
4041                                         "%c_i32_0              = OpConstant %type_i32 0\n"
4042                                         "%c_i32_1              = OpConstant %type_i32 1\n"
4043                                         "%c_i32_2              = OpConstant %type_i32 2\n"
4044                                         "%c_u32_1              = OpConstant %type_u32 1\n"
4045 
4046                                         // if input float type has different width then output then
4047                                         // both types are defined here along with all types derived from
4048                                         // them that are commonly used by tests; some tests also define
4049                                         // their own types (those that are needed just by this single test)
4050                                         "${types}"
4051 
4052                                         // SSBO definitions
4053                                         "${io_definitions}"
4054 
4055                                         "%id                   = OpVariable %type_u32_vec3_ptr Input\n"
4056 
4057                                         // set of default constants per float type is placed here,
4058                                         // operation tests can also define additional constants.
4059                                         "${constants}"
4060 
4061                                         // O_RETURN_VAL defines function here and becouse
4062                                         // of that this token needs to be directly before main function
4063                                         "${functions}"
4064 
4065                                         "%main                 = OpFunction %type_void None %type_voidf\n"
4066                                         "%label                = OpLabel\n"
4067 
4068                                         "${variables}"
4069 
4070                                         // depending on test case arguments are either read from input ssbo
4071                                         // or generated in spir-v code - in later case shader input is not used
4072                                         "${arguments}"
4073 
4074                                         // perform test commands
4075                                         "${commands}"
4076 
4077                                         // save result to SSBO
4078                                         "${save_result}"
4079 
4080                                         "OpReturn\n"
4081                                         "OpFunctionEnd\n");
4082 
4083     m_settingsShaderTemplate.setString("OpCapability Shader\n"
4084                                        "${capabilities}"
4085 
4086                                        "OpExtension \"SPV_KHR_float_controls\"\n"
4087                                        "${extensions}"
4088 
4089                                        "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
4090                                        "OpMemoryModel Logical GLSL450\n"
4091                                        "OpEntryPoint GLCompute %main \"main\" %id\n"
4092                                        "OpExecutionMode %main LocalSize 1 1 1\n"
4093                                        "${execution_modes}"
4094 
4095                                        // annotations
4096                                        "OpDecorate %SSBO_in BufferBlock\n"
4097                                        "OpDecorate %ssbo_in DescriptorSet 0\n"
4098                                        "OpDecorate %ssbo_in Binding 0\n"
4099                                        "OpDecorate %ssbo_in NonWritable\n"
4100                                        "${io_annotations}"
4101 
4102                                        "OpDecorate %id BuiltIn GlobalInvocationId\n"
4103 
4104                                        // types
4105                                        "%type_void            = OpTypeVoid\n"
4106                                        "%type_voidf           = OpTypeFunction %type_void\n"
4107                                        "%type_u32             = OpTypeInt 32 0\n"
4108                                        "%type_i32             = OpTypeInt 32 1\n"
4109                                        "%type_i32_fptr        = OpTypePointer Function %type_i32\n"
4110                                        "%type_u32_vec3        = OpTypeVector %type_u32 3\n"
4111                                        "%type_u32_vec3_ptr    = OpTypePointer Input %type_u32_vec3\n"
4112 
4113                                        "%c_i32_0              = OpConstant %type_i32 0\n"
4114                                        "%c_i32_1              = OpConstant %type_i32 1\n"
4115                                        "%c_i32_2              = OpConstant %type_i32 2\n"
4116 
4117                                        "${types}"
4118 
4119                                        // in SSBO definition
4120                                        "%SSBO_in              = OpTypeStruct ${in_struct}\n"
4121                                        "%up_SSBO_in           = OpTypePointer Uniform %SSBO_in\n"
4122                                        "%ssbo_in              = OpVariable %up_SSBO_in Uniform\n"
4123 
4124                                        // out SSBO definitions
4125                                        "${out_definitions}"
4126 
4127                                        "%id                   = OpVariable %type_u32_vec3_ptr Input\n"
4128                                        "%main                 = OpFunction %type_void None %type_voidf\n"
4129                                        "%label                = OpLabel\n"
4130 
4131                                        "${commands}"
4132 
4133                                        "${save_result}"
4134 
4135                                        "OpReturn\n"
4136                                        "OpFunctionEnd\n");
4137 }
4138 
createOperationTests(TestCaseGroup * parentGroup,const char * groupName,VariableType variableType,bool argumentsFromInput)4139 void ComputeTestGroupBuilder::createOperationTests(TestCaseGroup *parentGroup, const char *groupName,
4140                                                    VariableType variableType, bool argumentsFromInput)
4141 {
4142     TestContext &testCtx = parentGroup->getTestContext();
4143     TestCaseGroup *group = new TestCaseGroup(testCtx, groupName);
4144     parentGroup->addChild(group);
4145 
4146     TestCaseVect testCases;
4147     m_operationTestCaseBuilder.build(testCases, m_typeData[variableType].testResults, argumentsFromInput);
4148 
4149     for (auto &testCase : testCases)
4150     {
4151         // skip cases with undefined output
4152         if (testCase.expectedOutput == V_UNUSED)
4153             continue;
4154 
4155         OperationTestCaseInfo testCaseInfo = {variableType, argumentsFromInput, VK_SHADER_STAGE_COMPUTE_BIT,
4156                                               m_operationTestCaseBuilder.getOperation(testCase.operationId), testCase};
4157 
4158         ComputeShaderSpec csSpec;
4159 
4160         fillShaderSpec(testCaseInfo, csSpec);
4161 
4162         string testName = replace(testCase.baseName, "op", testCaseInfo.operation.name);
4163         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), csSpec));
4164     }
4165 }
4166 
createSettingsTests(TestCaseGroup * parentGroup)4167 void ComputeTestGroupBuilder::createSettingsTests(TestCaseGroup *parentGroup)
4168 {
4169     TestContext &testCtx = parentGroup->getTestContext();
4170     TestCaseGroup *group = new TestCaseGroup(testCtx, "independence_settings");
4171     parentGroup->addChild(group);
4172 
4173     using SFCI                 = VkShaderFloatControlsIndependence;
4174     const SFCI independence32  = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
4175     const SFCI independenceAll = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
4176 
4177     vector<SettingsTestCaseInfo> testCases = {
4178         // name                                                            mode            independenceSetting        fp16Option        fp32Option        fp64Option        fp16Without16bitstorage
4179 
4180         // test rounding modes when only two float widths are available
4181         {"rounding_ind_all_fp16_rte_fp32_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_UNUSED, false},
4182         {"rounding_ind_all_fp16_rtz_fp32_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_UNUSED, false},
4183         {"rounding_ind_32_fp16_rte_fp32_rtz", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_UNUSED, false},
4184         {"rounding_ind_32_fp16_rtz_fp32_rte", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_UNUSED, false},
4185         {"rounding_ind_all_fp16_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_UNUSED, SO_RTZ, false},
4186         {"rounding_ind_all_fp16_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_UNUSED, SO_RTE, false},
4187         {"rounding_ind_all_fp32_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_UNUSED, SO_RTE, SO_RTZ, false},
4188         {"rounding_ind_all_fp32_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_UNUSED, SO_RTZ, SO_RTE, false},
4189         {"rounding_ind_32_fp32_rte_fp64_rtz", SM_ROUNDING, independence32, SO_UNUSED, SO_RTE, SO_RTZ, false},
4190         {"rounding_ind_32_fp32_rtz_fp64_rte", SM_ROUNDING, independence32, SO_UNUSED, SO_RTZ, SO_RTE, false},
4191 
4192         // test rounding modes when three widths are available
4193         {"rounding_ind_all_fp16_rtz_fp32_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTZ, false},
4194         {"rounding_ind_32_fp16_rtz_fp32_rte_fp64_rtz", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_RTZ, false},
4195         {"rounding_ind_all_fp16_rte_fp32_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTE, false},
4196         {"rounding_ind_32_fp16_rte_fp32_rtz_fp64_rte", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_RTE, false},
4197         {"rounding_ind_all_fp16_rtz_fp32_rtz_fp64_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTZ, SO_RTE, false},
4198         {"rounding_ind_all_fp16_rtz_fp32_rte_fp64_rte", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTE, false},
4199         {"rounding_ind_all_fp16_rte_fp32_rte_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_RTE, SO_RTZ, false},
4200         {"rounding_ind_all_fp16_rte_fp32_rtz_fp64_rtz", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTZ, false},
4201 
4202         // test denorm settings when only two float widths are available
4203         {"denorm_ind_all_fp16_flush_fp32_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE, SO_UNUSED,
4204          false},
4205         {"denorm_ind_all_fp16_preserve_fp32_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH, SO_UNUSED,
4206          false},
4207         {"denorm_ind_32_fp16_flush_fp32_preserve", SM_DENORMS, independence32, SO_FLUSH, SO_PRESERVE, SO_UNUSED, false},
4208         {"denorm_ind_32_fp16_preserve_fp32_flush", SM_DENORMS, independence32, SO_PRESERVE, SO_FLUSH, SO_UNUSED, false},
4209         {"denorm_ind_all_fp16_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_UNUSED, SO_PRESERVE,
4210          false},
4211         {"denorm_ind_all_fp16_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_UNUSED, SO_FLUSH,
4212          false},
4213         {"denorm_ind_all_fp32_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_UNUSED, SO_FLUSH, SO_PRESERVE,
4214          false},
4215         {"denorm_ind_all_fp32_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_UNUSED, SO_PRESERVE, SO_FLUSH,
4216          false},
4217         {"denorm_ind_32_fp32_flush_fp64_preserve", SM_DENORMS, independence32, SO_UNUSED, SO_FLUSH, SO_PRESERVE, false},
4218         {"denorm_ind_32_fp32_preserve_fp64_flush", SM_DENORMS, independence32, SO_UNUSED, SO_PRESERVE, SO_FLUSH, false},
4219 
4220         // test denorm settings when three widths are available
4221         {"denorm_ind_all_fp16_preserve_fp32_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH,
4222          SO_PRESERVE, false},
4223         {"denorm_ind_32_fp16_preserve_fp32_flush_fp64_preserve", SM_DENORMS, independence32, SO_PRESERVE, SO_FLUSH,
4224          SO_PRESERVE, false},
4225         {"denorm_ind_all_fp16_flush_fp32_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE,
4226          SO_FLUSH, false},
4227         {"denorm_ind_32_fp16_flush_fp32_preserve_fp64_flush", SM_DENORMS, independence32, SO_FLUSH, SO_PRESERVE,
4228          SO_FLUSH, false},
4229         {"denorm_ind_all_fp16_preserve_fp32_preserve_fp64_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_PRESERVE,
4230          SO_FLUSH, false},
4231         {"denorm_ind_all_fp16_preserve_fp32_flush_fp64_flush", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH,
4232          SO_FLUSH, false},
4233         {"denorm_ind_all_fp16_flush_fp32_flush_fp64_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_FLUSH,
4234          SO_PRESERVE, false},
4235         {"denorm_ind_all_fp16_flush_fp32_preserve_fp64_preserve", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE,
4236          SO_PRESERVE, false},
4237 
4238         // Same fp16 tests but without requiring VK_KHR_16bit_storage
4239         // test rounding modes when only two float widths are available
4240         {"rounding_ind_all_fp16_rte_fp32_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_UNUSED, true},
4241         {"rounding_ind_all_fp16_rtz_fp32_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_UNUSED, true},
4242         {"rounding_ind_32_fp16_rte_fp32_rtz_nostorage", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_UNUSED, true},
4243         {"rounding_ind_32_fp16_rtz_fp32_rte_nostorage", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_UNUSED, true},
4244         {"rounding_ind_all_fp16_rte_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_UNUSED, SO_RTZ, true},
4245         {"rounding_ind_all_fp16_rtz_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_UNUSED, SO_RTE, true},
4246 
4247         // test rounding modes when three widths are available
4248         {"rounding_ind_all_fp16_rtz_fp32_rte_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTZ,
4249          true},
4250         {"rounding_ind_32_fp16_rtz_fp32_rte_fp64_rtz_nostorage", SM_ROUNDING, independence32, SO_RTZ, SO_RTE, SO_RTZ,
4251          true},
4252         {"rounding_ind_all_fp16_rte_fp32_rtz_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTE,
4253          true},
4254         {"rounding_ind_32_fp16_rte_fp32_rtz_fp64_rte_nostorage", SM_ROUNDING, independence32, SO_RTE, SO_RTZ, SO_RTE,
4255          true},
4256         {"rounding_ind_all_fp16_rtz_fp32_rtz_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTZ, SO_RTE,
4257          true},
4258         {"rounding_ind_all_fp16_rtz_fp32_rte_fp64_rte_nostorage", SM_ROUNDING, independenceAll, SO_RTZ, SO_RTE, SO_RTE,
4259          true},
4260         {"rounding_ind_all_fp16_rte_fp32_rte_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTE, SO_RTZ,
4261          true},
4262         {"rounding_ind_all_fp16_rte_fp32_rtz_fp64_rtz_nostorage", SM_ROUNDING, independenceAll, SO_RTE, SO_RTZ, SO_RTZ,
4263          true},
4264 
4265         // test denorm settings when only two float widths are available
4266         {"denorm_ind_all_fp16_flush_fp32_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH, SO_PRESERVE,
4267          SO_UNUSED, true},
4268         {"denorm_ind_all_fp16_preserve_fp32_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE, SO_FLUSH,
4269          SO_UNUSED, true},
4270         {"denorm_ind_32_fp16_flush_fp32_preserve_nostorage", SM_DENORMS, independence32, SO_FLUSH, SO_PRESERVE,
4271          SO_UNUSED, true},
4272         {"denorm_ind_32_fp16_preserve_fp32_flush_nostorage", SM_DENORMS, independence32, SO_PRESERVE, SO_FLUSH,
4273          SO_UNUSED, true},
4274         {"denorm_ind_all_fp16_flush_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH, SO_UNUSED,
4275          SO_PRESERVE, true},
4276         {"denorm_ind_all_fp16_preserve_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE, SO_UNUSED,
4277          SO_FLUSH, true},
4278 
4279         // test denorm settings when three widths are available
4280         {"denorm_ind_all_fp16_preserve_fp32_flush_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE,
4281          SO_FLUSH, SO_PRESERVE, true},
4282         {"denorm_ind_32_fp16_preserve_fp32_flush_fp64_preserve_nostorage", SM_DENORMS, independence32, SO_PRESERVE,
4283          SO_FLUSH, SO_PRESERVE, true},
4284         {"denorm_ind_all_fp16_flush_fp32_preserve_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_FLUSH,
4285          SO_PRESERVE, SO_FLUSH, true},
4286         {"denorm_ind_32_fp16_flush_fp32_preserve_fp64_flush_nostorage", SM_DENORMS, independence32, SO_FLUSH,
4287          SO_PRESERVE, SO_FLUSH, true},
4288         {"denorm_ind_all_fp16_preserve_fp32_preserve_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE,
4289          SO_PRESERVE, SO_FLUSH, true},
4290         {"denorm_ind_all_fp16_preserve_fp32_flush_fp64_flush_nostorage", SM_DENORMS, independenceAll, SO_PRESERVE,
4291          SO_FLUSH, SO_FLUSH, true},
4292         {"denorm_ind_all_fp16_flush_fp32_flush_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH,
4293          SO_FLUSH, SO_PRESERVE, true},
4294         {"denorm_ind_all_fp16_flush_fp32_preserve_fp64_preserve_nostorage", SM_DENORMS, independenceAll, SO_FLUSH,
4295          SO_PRESERVE, SO_PRESERVE, true},
4296     };
4297 
4298     for (const auto &testCase : testCases)
4299     {
4300         ComputeShaderSpec csSpec;
4301         fillShaderSpec(testCase, csSpec);
4302         group->addChild(new SpvAsmComputeShaderCase(testCtx, testCase.name, csSpec));
4303     }
4304 
4305     addFunctionCase(group, "independence_settings", verifyIndependenceSettings);
4306 }
4307 
fillShaderSpec(const OperationTestCaseInfo & testCaseInfo,ComputeShaderSpec & csSpec) const4308 void ComputeTestGroupBuilder::fillShaderSpec(const OperationTestCaseInfo &testCaseInfo, ComputeShaderSpec &csSpec) const
4309 {
4310     // LUT storing functions used to verify test results
4311     const VerifyIOFunc checkFloatsLUT[] = {checkFloats<Float16, deFloat16>, checkFloats<Float32, float>,
4312                                            checkFloats<Float64, double>};
4313 
4314     const Operation &testOperation    = testCaseInfo.operation;
4315     const OperationTestCase &testCase = testCaseInfo.testCase;
4316     VariableType outVariableType      = testCaseInfo.outVariableType;
4317 
4318     SpecializedOperation specOpData;
4319     specializeOperation(testCaseInfo, specOpData);
4320 
4321     TypeSnippetsSP inTypeSnippets  = specOpData.inTypeSnippets;
4322     TypeSnippetsSP outTypeSnippets = specOpData.outTypeSnippets;
4323     VariableType inVariableType    = specOpData.inVariableType;
4324 
4325     bool outFp16WithoutStorage = (outVariableType == FP16) && testCase.fp16Without16BitStorage;
4326     bool inFp16WithoutStorage  = (inVariableType == FP16) && testCase.fp16Without16BitStorage;
4327 
4328     // The feature is required if OpCapability StorageUniform16 is used in the shader.
4329     bool requiresUniformAndStorage16BitBufferAccess = false;
4330 
4331     // UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
4332     // internaly operates on fp16 and this type should be used by float controls
4333     VariableType inVariableTypeForCaps = inVariableType;
4334     string inFloatWidthForCaps         = inTypeSnippets->bitWidth;
4335     if (testCase.operationId == OID_UPH_DENORM)
4336     {
4337         inVariableTypeForCaps = FP16;
4338         inFloatWidthForCaps   = "16";
4339     }
4340 
4341     string behaviorCapability;
4342     string behaviorExecutionMode;
4343     getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags, inFloatWidthForCaps, outTypeSnippets->bitWidth,
4344                                           behaviorCapability, behaviorExecutionMode);
4345 
4346     string capabilities = behaviorCapability + outTypeSnippets->capabilities;
4347     string extensions   = outTypeSnippets->extensions;
4348     string annotations  = inTypeSnippets->inputAnnotationsSnippet + outTypeSnippets->outputAnnotationsSnippet +
4349                          outTypeSnippets->typeAnnotationsSnippet;
4350     string types         = outTypeSnippets->typeDefinitionsSnippet;
4351     string constants     = outTypeSnippets->constantsDefinitionsSnippet;
4352     string ioDefinitions = "";
4353 
4354     // Getting rid of 16bit_storage dependency imply replacing lots of snippets.
4355     {
4356         if (inFp16WithoutStorage)
4357         {
4358             ioDefinitions = inTypeSnippets->inputDefinitionsFp16Snippet;
4359         }
4360         else
4361         {
4362             ioDefinitions = inTypeSnippets->inputDefinitionsSnippet;
4363         }
4364 
4365         if (outFp16WithoutStorage)
4366         {
4367             extensions   = outTypeSnippets->extensionsFp16Without16BitStorage;
4368             capabilities = behaviorCapability + outTypeSnippets->capabilitiesFp16Without16BitStorage;
4369             types += outTypeSnippets->typeDefinitionsFp16Snippet;
4370             annotations += outTypeSnippets->typeAnnotationsFp16Snippet;
4371             ioDefinitions += outTypeSnippets->outputDefinitionsFp16Snippet;
4372         }
4373         else
4374         {
4375             ioDefinitions += outTypeSnippets->outputDefinitionsSnippet;
4376 
4377             requiresUniformAndStorage16BitBufferAccess |= (outVariableType == FP16);
4378         }
4379     }
4380 
4381     bool outFp16TypeUsage = outTypeSnippets->loadStoreRequiresShaderFloat16;
4382     bool inFp16TypeUsage  = false;
4383 
4384     if (testOperation.isInputTypeRestricted)
4385     {
4386         annotations += inTypeSnippets->typeAnnotationsSnippet;
4387         types += inTypeSnippets->typeDefinitionsSnippet;
4388         constants += inTypeSnippets->constantsDefinitionsSnippet;
4389 
4390         if (inFp16WithoutStorage)
4391         {
4392             annotations += inTypeSnippets->typeAnnotationsFp16Snippet;
4393             types += inTypeSnippets->typeDefinitionsFp16Snippet;
4394             capabilities += inTypeSnippets->capabilitiesFp16Without16BitStorage;
4395             extensions += inTypeSnippets->extensionsFp16Without16BitStorage;
4396         }
4397         else
4398         {
4399             capabilities += inTypeSnippets->capabilities;
4400             extensions += inTypeSnippets->extensions;
4401 
4402             requiresUniformAndStorage16BitBufferAccess |= (inVariableType == FP16);
4403         }
4404 
4405         inFp16TypeUsage = inTypeSnippets->loadStoreRequiresShaderFloat16;
4406     }
4407 
4408     map<string, string> specializations;
4409     specializations["extensions"]     = extensions;
4410     specializations["execution_mode"] = behaviorExecutionMode;
4411     specializations["annotations"]    = annotations + specOpData.annotations;
4412     specializations["types"]          = types + specOpData.types;
4413     specializations["io_definitions"] = ioDefinitions;
4414     specializations["variables"]      = specOpData.variables;
4415     specializations["functions"]      = specOpData.functions;
4416     specializations["save_result"] =
4417         (outFp16WithoutStorage ? outTypeSnippets->storeResultsFp16Snippet : outTypeSnippets->storeResultsSnippet);
4418     specializations["arguments"] = specOpData.arguments;
4419     specializations["commands"]  = specOpData.commands;
4420 
4421     // Build constants. They are only needed sometimes.
4422     const FloatStatementUsageFlags argsAnyFloatConstMask =
4423         B_STATEMENT_USAGE_ARGS_CONST_FLOAT | B_STATEMENT_USAGE_ARGS_CONST_FP16 | B_STATEMENT_USAGE_ARGS_CONST_FP32 |
4424         B_STATEMENT_USAGE_ARGS_CONST_FP64;
4425     const bool argsUseFPConstants = (specOpData.argumentsUsesFloatConstant & argsAnyFloatConstMask) != 0;
4426     const FloatStatementUsageFlags commandsAnyFloatConstMask =
4427         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_CONST_FP16 |
4428         B_STATEMENT_USAGE_COMMANDS_CONST_FP32 | B_STATEMENT_USAGE_COMMANDS_CONST_FP64;
4429     const bool commandsUseFPConstants = (testCaseInfo.operation.statementUsageFlags & commandsAnyFloatConstMask) != 0;
4430     const bool needConstants          = argsUseFPConstants || commandsUseFPConstants;
4431     const FloatStatementUsageFlags constsFloatTypeMask =
4432         B_STATEMENT_USAGE_CONSTS_TYPE_FLOAT | B_STATEMENT_USAGE_CONSTS_TYPE_FP16;
4433     const bool constsUsesFP16Type             = (testCaseInfo.operation.statementUsageFlags & constsFloatTypeMask) != 0;
4434     const bool loadStoreRequiresShaderFloat16 = inFp16TypeUsage || outFp16TypeUsage;
4435     const bool usesFP16Constants              = constsUsesFP16Type || (needConstants && loadStoreRequiresShaderFloat16);
4436 
4437     specializations["constants"] = "";
4438     if (needConstants || outFp16WithoutStorage)
4439     {
4440         specializations["constants"] = constants;
4441     }
4442     specializations["constants"] += specOpData.constants;
4443 
4444     // check which format features are needed
4445     bool float16FeatureRequired = (outVariableType == FP16) || (inVariableType == FP16);
4446     bool float64FeatureRequired = (outVariableType == FP64) || (inVariableType == FP64);
4447     bool int64FeatureRequired   = ((outVariableType == UINT64) || (outVariableType == INT64)) ||
4448                                 ((inVariableType == UINT64) || (inVariableType == INT64));
4449 
4450     // Determine required capabilities.
4451     bool float16CapabilityAlreadyAdded = inFp16WithoutStorage || outFp16WithoutStorage;
4452     if ((testOperation.floatUsage == FLOAT_ARITHMETIC && float16FeatureRequired && !float16CapabilityAlreadyAdded) ||
4453         usesFP16Constants)
4454     {
4455         capabilities += "OpCapability Float16\n";
4456     }
4457     specializations["capabilities"] = capabilities;
4458 
4459     // specialize shader
4460     const string shaderCode = m_operationShaderTemplate.specialize(specializations);
4461 
4462     // construct input and output buffers of proper types
4463     TypeValuesSP inTypeValues  = m_typeData.at(inVariableType).values;
4464     TypeValuesSP outTypeValues = m_typeData.at(outVariableType).values;
4465     BufferSp inBufferSp        = inTypeValues->constructInputBuffer(testCase.input);
4466     BufferSp outBufferSp       = outTypeValues->constructOutputBuffer(testCase.expectedOutput);
4467     csSpec.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4468     csSpec.outputs.push_back(Resource(outBufferSp));
4469 
4470     // check which features/properties are needed
4471     csSpec.assembly      = shaderCode;
4472     csSpec.numWorkGroups = IVec3(1, 1, 1);
4473     csSpec.verifyIO      = checkFloatsLUT[outVariableType];
4474 
4475     csSpec.extensions.push_back("VK_KHR_shader_float_controls");
4476 
4477     csSpec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = float64FeatureRequired;
4478     csSpec.requestedVulkanFeatures.coreFeatures.shaderInt64   = int64FeatureRequired;
4479     csSpec.requestedVulkanFeatures.ext16BitStorage.uniformAndStorageBuffer16BitAccess =
4480         float16FeatureRequired && requiresUniformAndStorage16BitBufferAccess;
4481     csSpec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 =
4482         float16CapabilityAlreadyAdded || usesFP16Constants ||
4483         (float16FeatureRequired && requiresUniformAndStorage16BitBufferAccess &&
4484          testOperation.floatUsage == FLOAT_ARITHMETIC);
4485 
4486     setupFloatControlsProperties(
4487         inVariableTypeForCaps, // usualy same as inFloatType - different only for UnpackHalf2x16
4488         outVariableType, testCase.behaviorFlags, csSpec.requestedVulkanFeatures.floatControlsProperties);
4489 }
4490 
fillShaderSpec(const SettingsTestCaseInfo & testCaseInfo,ComputeShaderSpec & csSpec) const4491 void ComputeTestGroupBuilder::fillShaderSpec(const SettingsTestCaseInfo &testCaseInfo, ComputeShaderSpec &csSpec) const
4492 {
4493     string capabilities;
4494     string fp16behaviorName;
4495     string fp32behaviorName;
4496     string fp64behaviorName;
4497 
4498     ValueId addArgs[2];
4499     ValueId fp16resultValue;
4500     ValueId fp32resultValue;
4501     ValueId fp64resultValue;
4502 
4503     vk::VkPhysicalDeviceFloatControlsProperties &floatControls = csSpec.requestedVulkanFeatures.floatControlsProperties;
4504     bool fp16Required                                          = testCaseInfo.fp16Option != SO_UNUSED;
4505     bool fp32Required                                          = testCaseInfo.fp32Option != SO_UNUSED;
4506     bool fp64Required                                          = testCaseInfo.fp64Option != SO_UNUSED;
4507 
4508     if (testCaseInfo.testedMode == SM_ROUNDING)
4509     {
4510         // make sure that only rounding options are used
4511         DE_ASSERT((testCaseInfo.fp16Option != SO_FLUSH) && (testCaseInfo.fp16Option != SO_PRESERVE) &&
4512                   (testCaseInfo.fp32Option != SO_FLUSH) && (testCaseInfo.fp32Option != SO_PRESERVE) &&
4513                   (testCaseInfo.fp64Option != SO_FLUSH) && (testCaseInfo.fp64Option != SO_PRESERVE));
4514 
4515         bool fp16RteRounding = testCaseInfo.fp16Option == SO_RTE;
4516         bool fp32RteRounding = testCaseInfo.fp32Option == SO_RTE;
4517         bool fp64RteRounding = testCaseInfo.fp64Option == SO_RTE;
4518 
4519         const string &rte = m_behaviorToName.at(B_RTE_ROUNDING);
4520         const string &rtz = m_behaviorToName.at(B_RTZ_ROUNDING);
4521 
4522         fp16behaviorName = fp16RteRounding ? rte : rtz;
4523         fp32behaviorName = fp32RteRounding ? rte : rtz;
4524         fp64behaviorName = fp64RteRounding ? rte : rtz;
4525 
4526         addArgs[0]      = V_ADD_ARG_A;
4527         addArgs[1]      = V_ADD_ARG_B;
4528         fp16resultValue = fp16RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
4529         fp32resultValue = fp32RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
4530         fp64resultValue = fp64RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
4531 
4532         capabilities = "OpCapability " + rte +
4533                        "\n"
4534                        "OpCapability " +
4535                        rtz + "\n";
4536 
4537         floatControls.roundingModeIndependence     = testCaseInfo.independenceSetting;
4538         floatControls.denormBehaviorIndependence   = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
4539         floatControls.shaderRoundingModeRTEFloat16 = fp16RteRounding;
4540         floatControls.shaderRoundingModeRTZFloat16 = fp16Required && !fp16RteRounding;
4541         floatControls.shaderRoundingModeRTEFloat32 = fp32RteRounding;
4542         floatControls.shaderRoundingModeRTZFloat32 = fp32Required && !fp32RteRounding;
4543         floatControls.shaderRoundingModeRTEFloat64 = fp64RteRounding;
4544         floatControls.shaderRoundingModeRTZFloat64 = fp64Required && !fp64RteRounding;
4545     }
4546     else // SM_DENORMS
4547     {
4548         // make sure that only denorm options are used
4549         DE_ASSERT((testCaseInfo.fp16Option != SO_RTE) && (testCaseInfo.fp16Option != SO_RTZ) &&
4550                   (testCaseInfo.fp32Option != SO_RTE) && (testCaseInfo.fp32Option != SO_RTZ) &&
4551                   (testCaseInfo.fp64Option != SO_RTE) && (testCaseInfo.fp64Option != SO_RTZ));
4552 
4553         bool fp16DenormPreserve = testCaseInfo.fp16Option == SO_PRESERVE;
4554         bool fp32DenormPreserve = testCaseInfo.fp32Option == SO_PRESERVE;
4555         bool fp64DenormPreserve = testCaseInfo.fp64Option == SO_PRESERVE;
4556 
4557         const string &preserve = m_behaviorToName.at(B_DENORM_PRESERVE);
4558         const string &flush    = m_behaviorToName.at(B_DENORM_FLUSH);
4559 
4560         fp16behaviorName = fp16DenormPreserve ? preserve : flush;
4561         fp32behaviorName = fp32DenormPreserve ? preserve : flush;
4562         fp64behaviorName = fp64DenormPreserve ? preserve : flush;
4563 
4564         addArgs[0]      = V_DENORM;
4565         addArgs[1]      = V_DENORM;
4566         fp16resultValue = fp16DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO_OR_DENORM_TIMES_TWO;
4567         fp32resultValue = fp32DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO;
4568         fp64resultValue = fp64DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO;
4569 
4570         capabilities = "OpCapability " + preserve +
4571                        "\n"
4572                        "OpCapability " +
4573                        flush + "\n";
4574 
4575         floatControls.denormBehaviorIndependence     = testCaseInfo.independenceSetting;
4576         floatControls.roundingModeIndependence       = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
4577         floatControls.shaderDenormPreserveFloat16    = fp16DenormPreserve;
4578         floatControls.shaderDenormFlushToZeroFloat16 = fp16Required && !fp16DenormPreserve;
4579         floatControls.shaderDenormPreserveFloat32    = fp32DenormPreserve;
4580         floatControls.shaderDenormFlushToZeroFloat32 = fp32Required && !fp32DenormPreserve;
4581         floatControls.shaderDenormPreserveFloat64    = fp64DenormPreserve;
4582         floatControls.shaderDenormFlushToZeroFloat64 = fp64Required && !fp64DenormPreserve;
4583     }
4584 
4585     const auto &fp64Data = m_typeData.at(FP64);
4586     const auto &fp32Data = m_typeData.at(FP32);
4587     const auto &fp16Data = m_typeData.at(FP16);
4588 
4589     uint32_t attributeIndex  = 0;
4590     uint32_t attributeOffset = 0;
4591     string attribute;
4592     string extensions     = "";
4593     string executionModes = "";
4594     string ioAnnotations  = "";
4595     string types          = "";
4596     string inStruct       = "";
4597     string outDefinitions = "";
4598     string commands       = "";
4599     string saveResult     = "";
4600 
4601     // construct single input buffer containing arguments for all float widths
4602     // (maxPerStageDescriptorStorageBuffers can be min 4 and we need 3 for outputs)
4603     uint32_t inputOffset = 0;
4604     std::vector<uint8_t> inputData((fp64Required * sizeof(double) + sizeof(float) + fp16Required * sizeof(deFloat16)) *
4605                                    2);
4606 
4607     // to follow storage buffer layout rules we store data in ssbo in order 64 -> 16
4608     if (fp64Required)
4609     {
4610         capabilities += fp64Data.snippets->capabilities;
4611         executionModes += "OpExecutionMode %main " + fp64behaviorName + " 64\n";
4612         attribute = to_string(attributeIndex);
4613         ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) + "\n" +
4614                          fp64Data.snippets->multiOutputAnnotationsSnippet + "OpDecorate %ssbo_f64_out Binding " +
4615                          to_string(attributeIndex + 1) + "\n";
4616         types += fp64Data.snippets->minTypeDefinitionsSnippet;
4617         inStruct += " %type_f64_arr_2";
4618         outDefinitions += fp64Data.snippets->multiOutputDefinitionsSnippet;
4619         commands += replace(fp64Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
4620                     "%result64             = OpFAdd %type_f64 %arg1_f64 %arg2_f64\n";
4621         saveResult += fp64Data.snippets->multiStoreResultsSnippet;
4622         attributeOffset += 2 * static_cast<uint32_t>(sizeof(double));
4623         attributeIndex++;
4624 
4625         fp64Data.values->fillInputData(addArgs, inputData, inputOffset);
4626 
4627         // construct separate buffers for outputs to make validation easier
4628         BufferSp fp64OutBufferSp = fp64Data.values->constructOutputBuffer(fp64resultValue);
4629         csSpec.outputs.push_back(Resource(fp64OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
4630                                           reinterpret_cast<void *>(BufferDataType::DATA_FP64)));
4631 
4632         csSpec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
4633     }
4634     if (fp32Required)
4635     {
4636         executionModes += "OpExecutionMode %main " + fp32behaviorName + " 32\n";
4637         attribute = to_string(attributeIndex);
4638         ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) + "\n" +
4639                          fp32Data.snippets->multiOutputAnnotationsSnippet + "OpDecorate %ssbo_f32_out Binding " +
4640                          to_string(attributeIndex + 1) + "\n";
4641         types += fp32Data.snippets->minTypeDefinitionsSnippet;
4642         inStruct += " %type_f32_arr_2";
4643         outDefinitions += fp32Data.snippets->multiOutputDefinitionsSnippet;
4644         commands += replace(fp32Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
4645                     "%result32             = OpFAdd %type_f32 %arg1_f32 %arg2_f32\n";
4646         saveResult += fp32Data.snippets->multiStoreResultsSnippet;
4647         attributeOffset += 2 * static_cast<uint32_t>(sizeof(float));
4648         attributeIndex++;
4649 
4650         fp32Data.values->fillInputData(addArgs, inputData, inputOffset);
4651 
4652         BufferSp fp32OutBufferSp = fp32Data.values->constructOutputBuffer(fp32resultValue);
4653         csSpec.outputs.push_back(Resource(fp32OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
4654                                           reinterpret_cast<void *>(BufferDataType::DATA_FP32)));
4655     }
4656     if (fp16Required)
4657     {
4658         if (testCaseInfo.fp16Without16BitStorage)
4659         {
4660             capabilities += fp16Data.snippets->capabilitiesFp16Without16BitStorage;
4661             extensions += fp16Data.snippets->extensionsFp16Without16BitStorage;
4662             executionModes += "OpExecutionMode %main " + fp16behaviorName + " 16\n";
4663             attribute = to_string(attributeIndex);
4664             ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) + "\n" +
4665                              fp16Data.snippets->multiOutputAnnotationsFp16Snippet +
4666                              "OpDecorate %ssbo_u32_out Binding " + to_string(attributeIndex + 1) + "\n";
4667             types += fp16Data.snippets->minTypeDefinitionsSnippet + fp16Data.snippets->typeDefinitionsFp16Snippet +
4668                      "%type_f16_vec2        = OpTypeVector %type_f16 2\n";
4669             inStruct += " %type_u32_arr_1";
4670             outDefinitions += fp16Data.snippets->multiOutputDefinitionsFp16Snippet;
4671             commands += replace(fp16Data.snippets->multiArgumentsFromInputFp16Snippet, "${attr}", attribute) +
4672                         "%result16             = OpFAdd %type_f16 %arg1_f16 %arg2_f16\n";
4673             saveResult += fp16Data.snippets->multiStoreResultsFp16Snippet;
4674 
4675             csSpec.extensions.push_back("VK_KHR_shader_float16_int8");
4676             csSpec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
4677         }
4678         else
4679         {
4680             capabilities += fp16Data.snippets->capabilities + "OpCapability Float16\n";
4681             extensions += fp16Data.snippets->extensions;
4682             executionModes += "OpExecutionMode %main " + fp16behaviorName + " 16\n";
4683             attribute = to_string(attributeIndex);
4684             ioAnnotations += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) + "\n" +
4685                              fp16Data.snippets->multiOutputAnnotationsSnippet + "OpDecorate %ssbo_f16_out Binding " +
4686                              to_string(attributeIndex + 1) + "\n";
4687             types += fp16Data.snippets->minTypeDefinitionsSnippet;
4688             inStruct += " %type_f16_arr_2";
4689             outDefinitions += fp16Data.snippets->multiOutputDefinitionsSnippet;
4690             commands += replace(fp16Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
4691                         "%result16             = OpFAdd %type_f16 %arg1_f16 %arg2_f16\n";
4692             saveResult += fp16Data.snippets->multiStoreResultsSnippet;
4693 
4694             csSpec.extensions.push_back("VK_KHR_16bit_storage");
4695             csSpec.requestedVulkanFeatures.ext16BitStorage.uniformAndStorageBuffer16BitAccess = true;
4696         }
4697 
4698         fp16Data.values->fillInputData(addArgs, inputData, inputOffset);
4699 
4700         BufferSp fp16OutBufferSp = fp16Data.values->constructOutputBuffer(fp16resultValue);
4701         csSpec.outputs.push_back(Resource(fp16OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
4702                                           reinterpret_cast<void *>(BufferDataType::DATA_FP16)));
4703     }
4704 
4705     BufferSp inBufferSp(new Buffer<uint8_t>(inputData));
4706     csSpec.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4707 
4708     map<string, string> specializations = {
4709         {"capabilities", capabilities},      {"extensions", extensions}, {"execution_modes", executionModes},
4710         {"io_annotations", ioAnnotations},   {"types", types},           {"in_struct", inStruct},
4711         {"out_definitions", outDefinitions}, {"commands", commands},     {"save_result", saveResult}};
4712 
4713     // specialize shader
4714     const string shaderCode = m_settingsShaderTemplate.specialize(specializations);
4715 
4716     csSpec.assembly      = shaderCode;
4717     csSpec.numWorkGroups = IVec3(1, 1, 1);
4718     csSpec.verifyIO      = checkMixedFloats;
4719     csSpec.extensions.push_back("VK_KHR_shader_float_controls");
4720 }
4721 
getGraphicsShaderCode(vk::SourceCollections & dst,InstanceContext context)4722 void getGraphicsShaderCode(vk::SourceCollections &dst, InstanceContext context)
4723 {
4724     // this function is used only by GraphicsTestGroupBuilder but it couldn't
4725     // be implemented as a method because of how addFunctionCaseWithPrograms
4726     // was implemented
4727 
4728     SpirvVersion targetSpirvVersion = context.resources.spirvVersion;
4729     const uint32_t vulkanVersion    = dst.usedVulkanVersion;
4730 
4731     static const string vertexTemplate =
4732         "OpCapability Shader\n"
4733         "${vert_capabilities}"
4734 
4735         "OpExtension \"SPV_KHR_float_controls\"\n"
4736         "${vert_extensions}"
4737 
4738         "%std450            = OpExtInstImport \"GLSL.std.450\"\n"
4739         "OpMemoryModel Logical GLSL450\n"
4740         "OpEntryPoint Vertex %main \"main\" %BP_stream %BP_position %BP_color %BP_gl_VertexIndex %BP_gl_InstanceIndex "
4741         "%BP_vertex_color %BP_vertex_result \n"
4742         "${vert_execution_mode}"
4743 
4744         "OpMemberDecorate %BP_gl_PerVertex 0 BuiltIn Position\n"
4745         "OpMemberDecorate %BP_gl_PerVertex 1 BuiltIn PointSize\n"
4746         "OpMemberDecorate %BP_gl_PerVertex 2 BuiltIn ClipDistance\n"
4747         "OpMemberDecorate %BP_gl_PerVertex 3 BuiltIn CullDistance\n"
4748         "OpDecorate %BP_gl_PerVertex Block\n"
4749         "OpDecorate %BP_position Location 0\n"
4750         "OpDecorate %BP_color Location 1\n"
4751         "OpDecorate %BP_vertex_color Location 1\n"
4752         "OpDecorate %BP_vertex_result Location 2\n"
4753         "OpDecorate %BP_vertex_result Flat\n"
4754         "OpDecorate %BP_gl_VertexIndex BuiltIn VertexIndex\n"
4755         "OpDecorate %BP_gl_InstanceIndex BuiltIn InstanceIndex\n"
4756 
4757         // some tests require additional annotations
4758         "${vert_annotations}"
4759 
4760         // types required by most of tests
4761         "%type_void            = OpTypeVoid\n"
4762         "%type_voidf           = OpTypeFunction %type_void\n"
4763         "%type_bool            = OpTypeBool\n"
4764         "%type_i32             = OpTypeInt 32 1\n"
4765         "%type_u32             = OpTypeInt 32 0\n"
4766         "%type_u32_vec2        = OpTypeVector %type_u32 2\n"
4767         "%type_i32_iptr        = OpTypePointer Input %type_i32\n"
4768         "%type_i32_optr        = OpTypePointer Output %type_i32\n"
4769         "%type_i32_fptr        = OpTypePointer Function %type_i32\n"
4770 
4771         // constants required by most of tests
4772         "%c_i32_0              = OpConstant %type_i32 0\n"
4773         "%c_i32_1              = OpConstant %type_i32 1\n"
4774         "%c_i32_2              = OpConstant %type_i32 2\n"
4775         "%c_u32_1              = OpConstant %type_u32 1\n"
4776 
4777         // if input float type has different width then output then
4778         // both types are defined here along with all types derived from
4779         // them that are commonly used by tests; some tests also define
4780         // their own types (those that are needed just by this single test)
4781         "${vert_types}"
4782 
4783         // SSBO is not universally supported for storing
4784         // data in vertex stages - it is onle read here
4785         "${vert_io_definitions}"
4786 
4787         "%BP_gl_PerVertex      = OpTypeStruct %type_f32_vec4 %type_f32 %type_f32_arr_1 %type_f32_arr_1\n"
4788         "%BP_gl_PerVertex_optr = OpTypePointer Output %BP_gl_PerVertex\n"
4789         "%BP_stream            = OpVariable %BP_gl_PerVertex_optr Output\n"
4790         "%BP_position          = OpVariable %type_f32_vec4_iptr Input\n"
4791         "%BP_color             = OpVariable %type_f32_vec4_iptr Input\n"
4792         "%BP_gl_VertexIndex    = OpVariable %type_i32_iptr Input\n"
4793         "%BP_gl_InstanceIndex  = OpVariable %type_i32_iptr Input\n"
4794         "%BP_vertex_color      = OpVariable %type_f32_vec4_optr Output\n"
4795 
4796         // set of default constants per float type is placed here,
4797         // operation tests can also define additional constants.
4798         "${vert_constants}"
4799 
4800         // O_RETURN_VAL defines function here and because
4801         // of that this token needs to be directly before main function.
4802         "${vert_functions}"
4803 
4804         "%main                 = OpFunction %type_void None %type_voidf\n"
4805         "%label                = OpLabel\n"
4806 
4807         "${vert_variables}"
4808 
4809         "%position             = OpLoad %type_f32_vec4 %BP_position\n"
4810         "%gl_pos               = OpAccessChain %type_f32_vec4_optr %BP_stream %c_i32_0\n"
4811         "OpStore %gl_pos %position\n"
4812         "%color                = OpLoad %type_f32_vec4 %BP_color\n"
4813         "OpStore %BP_vertex_color %color\n"
4814 
4815         // this token is filled only when vertex stage is tested;
4816         // depending on test case arguments are either read from input ssbo
4817         // or generated in spir-v code - in later case ssbo is not used
4818         "${vert_arguments}"
4819 
4820         // when vertex shader is tested then test operations are performed
4821         // here and passed to fragment stage; if fragment stage ts tested
4822         // then ${comands} and ${vert_process_result} are rplaced with nop
4823         "${vert_commands}"
4824 
4825         "${vert_process_result}"
4826 
4827         "OpReturn\n"
4828         "OpFunctionEnd\n";
4829 
4830     static const string fragmentTemplate =
4831         "OpCapability Shader\n"
4832         "${frag_capabilities}"
4833 
4834         "OpExtension \"SPV_KHR_float_controls\"\n"
4835         "${frag_extensions}"
4836 
4837         "%std450            = OpExtInstImport \"GLSL.std.450\"\n"
4838         "OpMemoryModel Logical GLSL450\n"
4839         "OpEntryPoint Fragment %main \"main\" %BP_vertex_color %BP_vertex_result %BP_fragColor %BP_gl_FragCoord \n"
4840         "OpExecutionMode %main OriginUpperLeft\n"
4841         "${frag_execution_mode}"
4842 
4843         "OpDecorate %BP_fragColor Location 0\n"
4844         "OpDecorate %BP_vertex_color Location 1\n"
4845         "OpDecorate %BP_vertex_result Location 2\n"
4846         "OpDecorate %BP_vertex_result Flat\n"
4847         "OpDecorate %BP_gl_FragCoord BuiltIn FragCoord\n"
4848 
4849         // some tests require additional annotations
4850         "${frag_annotations}"
4851 
4852         // types required by most of tests
4853         "%type_void            = OpTypeVoid\n"
4854         "%type_voidf           = OpTypeFunction %type_void\n"
4855         "%type_bool            = OpTypeBool\n"
4856         "%type_i32             = OpTypeInt 32 1\n"
4857         "%type_u32             = OpTypeInt 32 0\n"
4858         "%type_u32_vec2        = OpTypeVector %type_u32 2\n"
4859         "%type_i32_iptr        = OpTypePointer Input %type_i32\n"
4860         "%type_i32_optr        = OpTypePointer Output %type_i32\n"
4861         "%type_i32_fptr        = OpTypePointer Function %type_i32\n"
4862 
4863         // constants required by most of tests
4864         "%c_i32_0              = OpConstant %type_i32 0\n"
4865         "%c_i32_1              = OpConstant %type_i32 1\n"
4866         "%c_i32_2              = OpConstant %type_i32 2\n"
4867         "%c_u32_1              = OpConstant %type_u32 1\n"
4868 
4869         // if input float type has different width then output then
4870         // both types are defined here along with all types derived from
4871         // them that are commonly used by tests; some tests also define
4872         // their own types (those that are needed just by this single test)
4873         "${frag_types}"
4874 
4875         "%BP_gl_FragCoord      = OpVariable %type_f32_vec4_iptr Input\n"
4876         "%BP_vertex_color      = OpVariable %type_f32_vec4_iptr Input\n"
4877         "%BP_fragColor         = OpVariable %type_f32_vec4_optr Output\n"
4878 
4879         // SSBO definitions
4880         "${frag_io_definitions}"
4881 
4882         // set of default constants per float type is placed here,
4883         // operation tests can also define additional constants.
4884         "${frag_constants}"
4885 
4886         // O_RETURN_VAL defines function here and because
4887         // of that this token needs to be directly before main function.
4888         "${frag_functions}"
4889 
4890         "%main                 = OpFunction %type_void None %type_voidf\n"
4891         "%label                = OpLabel\n"
4892 
4893         "${frag_variables}"
4894 
4895         // just pass vertex color - rendered image is not important in our case
4896         "%vertex_color         = OpLoad %type_f32_vec4 %BP_vertex_color\n"
4897         "OpStore %BP_fragColor %vertex_color\n"
4898 
4899         // this token is filled only when fragment stage is tested;
4900         // depending on test case arguments are either read from input ssbo or
4901         // generated in spir-v code - in later case ssbo is used only for output
4902         "${frag_arguments}"
4903 
4904         // when fragment shader is tested then test operations are performed
4905         // here and saved to ssbo; if vertex stage was tested then its
4906         // result is just saved to ssbo here
4907         "${frag_commands}"
4908         "${frag_process_result}"
4909 
4910         "OpReturn\n"
4911         "OpFunctionEnd\n";
4912 
4913     dst.spirvAsmSources.add("vert", DE_NULL) << StringTemplate(vertexTemplate).specialize(context.testCodeFragments)
4914                                              << SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4915     dst.spirvAsmSources.add("frag", DE_NULL) << StringTemplate(fragmentTemplate).specialize(context.testCodeFragments)
4916                                              << SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4917 }
4918 
4919 // GraphicsTestGroupBuilder iterates over all test cases and creates test for both
4920 // vertex and fragment stages. As in most spirv-assembly tests, tests here are also
4921 // executed using functionality defined in vktSpvAsmGraphicsShaderTestUtil.cpp but
4922 // because one of requirements during development was that SSBO wont be used in
4923 // vertex stage we couldn't use createTestForStage functions - we need a custom
4924 // version for both vertex and fragmen shaders at the same time. This was required
4925 // as we needed to pass result from vertex stage to fragment stage where it could
4926 // be saved to ssbo. To achieve that InstanceContext is created manually in
4927 // createInstanceContext method.
4928 class GraphicsTestGroupBuilder : public TestGroupBuilderBase
4929 {
4930 public:
4931     void init();
4932 
4933     void createOperationTests(TestCaseGroup *parentGroup, const char *groupName, VariableType variableType,
4934                               bool argumentsFromInput) override;
4935     void createSettingsTests(TestCaseGroup *parentGroup) override;
4936 
4937 protected:
4938     InstanceContext createInstanceContext(const OperationTestCaseInfo &testCaseInfo) const;
4939 
4940 private:
4941     TestCasesBuilder m_testCaseBuilder;
4942 };
4943 
init()4944 void GraphicsTestGroupBuilder::init()
4945 {
4946     m_testCaseBuilder.init();
4947 }
4948 
createOperationTests(TestCaseGroup * parentGroup,const char * groupName,VariableType variableType,bool argumentsFromInput)4949 void GraphicsTestGroupBuilder::createOperationTests(TestCaseGroup *parentGroup, const char *groupName,
4950                                                     VariableType variableType, bool argumentsFromInput)
4951 {
4952     TestContext &testCtx = parentGroup->getTestContext();
4953     TestCaseGroup *group = new TestCaseGroup(testCtx, groupName);
4954     parentGroup->addChild(group);
4955 
4956     // create test cases for vertex stage
4957     TestCaseVect testCases;
4958     m_testCaseBuilder.build(testCases, m_typeData[variableType].testResults, argumentsFromInput);
4959 
4960     for (auto &testCase : testCases)
4961     {
4962         // skip cases with undefined output
4963         if (testCase.expectedOutput == V_UNUSED)
4964             continue;
4965 
4966         // FPRoundingMode decoration can be applied only to conversion instruction that is used as the object
4967         // argument of an OpStore storing through a pointer to a 16-bit floating-point object in Uniform, or
4968         // PushConstant, or Input, or Output Storage Classes. SSBO writes are not commonly supported
4969         // in VS so this test case needs to be skiped for vertex stage.
4970         if ((testCase.operationId == OID_ORTZ_ROUND) || (testCase.operationId == OID_ORTE_ROUND))
4971             continue;
4972 
4973         OperationTestCaseInfo testCaseInfo = {variableType, argumentsFromInput, VK_SHADER_STAGE_VERTEX_BIT,
4974                                               m_testCaseBuilder.getOperation(testCase.operationId), testCase};
4975 
4976         InstanceContext ctxVertex = createInstanceContext(testCaseInfo);
4977         string testName           = replace(testCase.baseName, "op", testCaseInfo.operation.name);
4978 
4979         addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_vert", getGraphicsShaderCode,
4980                                                      runAndVerifyDefaultPipeline, ctxVertex);
4981     }
4982 
4983     // create test cases for fragment stage
4984     testCases.clear();
4985     m_testCaseBuilder.build(testCases, m_typeData[variableType].testResults, argumentsFromInput);
4986 
4987     for (auto &testCase : testCases)
4988     {
4989         // skip cases with undefined output
4990         if (testCase.expectedOutput == V_UNUSED)
4991             continue;
4992 
4993         OperationTestCaseInfo testCaseInfo = {variableType, argumentsFromInput, VK_SHADER_STAGE_FRAGMENT_BIT,
4994                                               m_testCaseBuilder.getOperation(testCase.operationId), testCase};
4995 
4996         InstanceContext ctxFragment = createInstanceContext(testCaseInfo);
4997         string testName             = replace(testCase.baseName, "op", testCaseInfo.operation.name);
4998 
4999         addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_frag", getGraphicsShaderCode,
5000                                                      runAndVerifyDefaultPipeline, ctxFragment);
5001     }
5002 }
5003 
createSettingsTests(TestCaseGroup * parentGroup)5004 void GraphicsTestGroupBuilder::createSettingsTests(TestCaseGroup *parentGroup)
5005 {
5006     DE_UNREF(parentGroup);
5007 
5008     // WG decided that testing settings only for compute stage is sufficient
5009 }
5010 
createInstanceContext(const OperationTestCaseInfo & testCaseInfo) const5011 InstanceContext GraphicsTestGroupBuilder::createInstanceContext(const OperationTestCaseInfo &testCaseInfo) const
5012 {
5013     // LUT storing functions used to verify test results
5014     const VerifyIOFunc checkFloatsLUT[] = {checkFloats<Float16, deFloat16>, checkFloats<Float32, float>,
5015                                            checkFloats<Float64, double>};
5016 
5017     // 32-bit float types are always needed for standard operations on color
5018     // if tested operation does not require fp32 for either input or output
5019     // then this minimal type definitions must be appended to types section
5020     const string f32TypeMinimalRequired = "%type_f32             = OpTypeFloat 32\n"
5021                                           "%type_f32_arr_1       = OpTypeArray %type_f32 %c_i32_1\n"
5022                                           "%type_f32_iptr        = OpTypePointer Input %type_f32\n"
5023                                           "%type_f32_optr        = OpTypePointer Output %type_f32\n"
5024                                           "%type_f32_vec4        = OpTypeVector %type_f32 4\n"
5025                                           "%type_f32_vec4_iptr   = OpTypePointer Input %type_f32_vec4\n"
5026                                           "%type_f32_vec4_optr   = OpTypePointer Output %type_f32_vec4\n";
5027 
5028     const Operation &testOperation    = testCaseInfo.operation;
5029     const OperationTestCase &testCase = testCaseInfo.testCase;
5030     VariableType outVariableType      = testCaseInfo.outVariableType;
5031     VkShaderStageFlagBits testedStage = testCaseInfo.testedStage;
5032 
5033     DE_ASSERT((testedStage == VK_SHADER_STAGE_VERTEX_BIT) || (testedStage == VK_SHADER_STAGE_FRAGMENT_BIT));
5034 
5035     SpecializedOperation specOpData;
5036     specializeOperation(testCaseInfo, specOpData);
5037 
5038     TypeSnippetsSP inTypeSnippets  = specOpData.inTypeSnippets;
5039     TypeSnippetsSP outTypeSnippets = specOpData.outTypeSnippets;
5040     VariableType inVariableType    = specOpData.inVariableType;
5041 
5042     bool outFp16WithoutStorage = (outVariableType == FP16) && testCase.fp16Without16BitStorage;
5043     bool inFp16WithoutStorage  = (inVariableType == FP16) && testCase.fp16Without16BitStorage;
5044 
5045     // The feature is required if OpCapability StorageUniform16 is used in the shader.
5046     bool requiresUniformAndStorage16BitBufferAccess = false;
5047 
5048     // There may be several reasons why we need the shaderFloat16 Vulkan feature.
5049     bool needsShaderFloat16 = inFp16WithoutStorage || outFp16WithoutStorage;
5050     // There are some weird cases where we need the constants, but would otherwise drop them.
5051     bool needsSpecialConstants = false;
5052 
5053     // UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
5054     // internaly operates on fp16 and this type should be used by float controls
5055     VariableType inVariableTypeForCaps = inVariableType;
5056     string inFloatWidthForCaps         = inTypeSnippets->bitWidth;
5057     if (testCase.operationId == OID_UPH_DENORM)
5058     {
5059         inVariableTypeForCaps = FP16;
5060         inFloatWidthForCaps   = "16";
5061     }
5062 
5063     string behaviorCapability;
5064     string behaviorExecutionMode;
5065     getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags, inFloatWidthForCaps, outTypeSnippets->bitWidth,
5066                                           behaviorCapability, behaviorExecutionMode);
5067 
5068     // check which format features are needed
5069     bool float16FeatureRequired = (inVariableType == FP16) || (outVariableType == FP16);
5070     bool float64FeatureRequired = (inVariableType == FP64) || (outVariableType == FP64);
5071     bool int64FeatureRequired   = ((inVariableType == UINT64) || (inVariableType == INT64)) ||
5072                                 ((outVariableType == UINT64) || (outVariableType == INT64));
5073 
5074     string vertExecutionMode;
5075     string fragExecutionMode;
5076     string vertCapabilities;
5077     string fragCapabilities;
5078     string vertExtensions;
5079     string fragExtensions;
5080     string vertAnnotations;
5081     string fragAnnotations;
5082     string vertTypes;
5083     string fragTypes;
5084     string vertConstants;
5085     string fragConstants;
5086     string vertFunctions;
5087     string fragFunctions;
5088     string vertIODefinitions;
5089     string fragIODefinitions;
5090     string vertArguments;
5091     string fragArguments;
5092     string vertVariables;
5093     string fragVariables;
5094     string vertCommands;
5095     string fragCommands;
5096     string vertProcessResult;
5097     string fragProcessResult;
5098 
5099     // check if operation should be executed in vertex stage
5100     if (testedStage == VK_SHADER_STAGE_VERTEX_BIT)
5101     {
5102         vertAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet;
5103         fragAnnotations = outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
5104         vertFunctions   = specOpData.functions;
5105 
5106         // check if input type is different from tested type (conversion operations)
5107         if (testOperation.isInputTypeRestricted)
5108         {
5109             vertCapabilities = behaviorCapability + inTypeSnippets->capabilities + outTypeSnippets->capabilities;
5110             fragCapabilities = outTypeSnippets->capabilities;
5111             vertExtensions   = inTypeSnippets->extensions + outTypeSnippets->extensions;
5112             fragExtensions   = outTypeSnippets->extensions;
5113             vertTypes        = inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet +
5114                         outTypeSnippets->varyingsTypesSnippet;
5115             if (inFp16WithoutStorage)
5116                 vertTypes += inTypeSnippets->typeDefinitionsFp16Snippet;
5117 
5118             fragTypes     = outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
5119             vertConstants = inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
5120             fragConstants = outTypeSnippets->constantsDefinitionsSnippet;
5121 
5122             requiresUniformAndStorage16BitBufferAccess |= (inVariableType == FP16);
5123         }
5124         else
5125         {
5126             // input and output types are the same (majority of operations)
5127 
5128             vertCapabilities = behaviorCapability + outTypeSnippets->capabilities;
5129             fragCapabilities = vertCapabilities;
5130             vertExtensions   = outTypeSnippets->extensions;
5131             fragExtensions   = vertExtensions;
5132             vertTypes        = outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
5133             fragTypes        = vertTypes;
5134             vertConstants    = outTypeSnippets->constantsDefinitionsSnippet;
5135             fragConstants    = outTypeSnippets->constantsDefinitionsSnippet;
5136         }
5137 
5138         requiresUniformAndStorage16BitBufferAccess |= (outVariableType == FP16);
5139 
5140         if (outVariableType != FP32)
5141         {
5142             fragTypes += f32TypeMinimalRequired;
5143             if (inVariableType != FP32)
5144                 vertTypes += f32TypeMinimalRequired;
5145         }
5146 
5147         vertAnnotations += specOpData.annotations;
5148         vertTypes += specOpData.types;
5149         vertConstants += specOpData.constants;
5150 
5151         vertExecutionMode = behaviorExecutionMode;
5152         fragExecutionMode = "";
5153         vertIODefinitions = inTypeSnippets->inputDefinitionsSnippet + outTypeSnippets->outputVaryingsSnippet;
5154         fragIODefinitions = outTypeSnippets->inputVaryingsSnippet + outTypeSnippets->outputDefinitionsSnippet;
5155         vertArguments     = specOpData.arguments;
5156         fragArguments     = "";
5157         vertVariables     = specOpData.variables;
5158         fragVariables     = "";
5159         vertCommands      = specOpData.commands;
5160         fragCommands      = "";
5161         vertProcessResult = outTypeSnippets->storeVertexResultSnippet;
5162         fragProcessResult = outTypeSnippets->loadVertexResultSnippet + outTypeSnippets->storeResultsSnippet;
5163 
5164         if (inFp16WithoutStorage)
5165         {
5166             vertAnnotations += inTypeSnippets->typeAnnotationsFp16Snippet;
5167             vertIODefinitions = inTypeSnippets->inputDefinitionsFp16Snippet + outTypeSnippets->outputVaryingsSnippet;
5168         }
5169 
5170         if (outFp16WithoutStorage)
5171         {
5172             vertTypes += outTypeSnippets->typeDefinitionsFp16Snippet;
5173             fragTypes += outTypeSnippets->typeDefinitionsFp16Snippet;
5174             fragAnnotations += outTypeSnippets->typeAnnotationsFp16Snippet;
5175             fragIODefinitions = outTypeSnippets->inputVaryingsSnippet + outTypeSnippets->outputDefinitionsFp16Snippet;
5176             fragProcessResult = outTypeSnippets->loadVertexResultSnippet + outTypeSnippets->storeResultsFp16Snippet;
5177         }
5178 
5179         needsShaderFloat16 |= outTypeSnippets->loadStoreRequiresShaderFloat16;
5180     }
5181     else // perform test in fragment stage - vertex stage is empty
5182     {
5183         fragFunctions = specOpData.functions;
5184         // check if input type is different from tested type
5185         if (testOperation.isInputTypeRestricted)
5186         {
5187             fragAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
5188                               outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
5189             fragCapabilities = behaviorCapability +
5190                                (inFp16WithoutStorage ? inTypeSnippets->capabilitiesFp16Without16BitStorage :
5191                                                        inTypeSnippets->capabilities) +
5192                                (outFp16WithoutStorage ? outTypeSnippets->capabilitiesFp16Without16BitStorage :
5193                                                         outTypeSnippets->capabilities);
5194             fragExtensions = (inFp16WithoutStorage ? inTypeSnippets->extensionsFp16Without16BitStorage :
5195                                                      inTypeSnippets->extensions) +
5196                              (outFp16WithoutStorage ? outTypeSnippets->extensionsFp16Without16BitStorage :
5197                                                       outTypeSnippets->extensions);
5198             fragTypes     = inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet;
5199             fragConstants = inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
5200             ;
5201             requiresUniformAndStorage16BitBufferAccess |=
5202                 ((inVariableType == FP16) && (testCase.fp16Without16BitStorage == false));
5203         }
5204         else
5205         {
5206             // input and output types are the same
5207 
5208             fragAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
5209                               outTypeSnippets->outputAnnotationsSnippet;
5210             fragCapabilities =
5211                 behaviorCapability + (outFp16WithoutStorage ? outTypeSnippets->capabilitiesFp16Without16BitStorage :
5212                                                               outTypeSnippets->capabilities);
5213             fragExtensions = (outFp16WithoutStorage ? outTypeSnippets->extensionsFp16Without16BitStorage :
5214                                                       outTypeSnippets->extensions);
5215             fragTypes      = outTypeSnippets->typeDefinitionsSnippet;
5216             fragConstants  = outTypeSnippets->constantsDefinitionsSnippet;
5217         }
5218 
5219         requiresUniformAndStorage16BitBufferAccess |=
5220             ((outVariableType == FP16) && (testCase.fp16Without16BitStorage == false));
5221 
5222         // varying is not used but it needs to be specified so lets use type_i32 for it
5223         string unusedVertVarying = "%BP_vertex_result     = OpVariable %type_i32_optr Output\n";
5224         string unusedFragVarying = "%BP_vertex_result     = OpVariable %type_i32_iptr Input\n";
5225 
5226         vertCapabilities = "";
5227         vertExtensions   = "";
5228         vertAnnotations  = "OpDecorate %type_f32_arr_1 ArrayStride 4\n";
5229         vertTypes        = f32TypeMinimalRequired;
5230         vertConstants    = "";
5231 
5232         if ((outVariableType != FP32) && (inVariableType != FP32))
5233             fragTypes += f32TypeMinimalRequired;
5234 
5235         fragAnnotations += specOpData.annotations;
5236         fragTypes += specOpData.types;
5237         fragConstants += specOpData.constants;
5238 
5239         vertExecutionMode = "";
5240         fragExecutionMode = behaviorExecutionMode;
5241         vertIODefinitions = unusedVertVarying;
5242         fragIODefinitions = unusedFragVarying;
5243 
5244         vertArguments     = "";
5245         fragArguments     = specOpData.arguments;
5246         vertVariables     = "";
5247         fragVariables     = specOpData.variables;
5248         vertCommands      = "";
5249         fragCommands      = specOpData.commands;
5250         vertProcessResult = "";
5251         fragProcessResult = outTypeSnippets->storeResultsSnippet;
5252 
5253         if (inFp16WithoutStorage)
5254         {
5255             fragAnnotations += inTypeSnippets->typeAnnotationsFp16Snippet;
5256             if (testOperation.isInputTypeRestricted)
5257             {
5258                 fragTypes += inTypeSnippets->typeDefinitionsFp16Snippet;
5259             }
5260             fragIODefinitions += inTypeSnippets->inputDefinitionsFp16Snippet;
5261         }
5262         else
5263         {
5264             fragIODefinitions += inTypeSnippets->inputDefinitionsSnippet;
5265         }
5266 
5267         if (outFp16WithoutStorage)
5268         {
5269             if (testOperation.isInputTypeRestricted)
5270             {
5271                 fragAnnotations += outTypeSnippets->typeAnnotationsFp16Snippet;
5272             }
5273             fragTypes += outTypeSnippets->typeDefinitionsFp16Snippet;
5274             fragIODefinitions += outTypeSnippets->outputDefinitionsFp16Snippet;
5275             fragProcessResult = outTypeSnippets->storeResultsFp16Snippet;
5276         }
5277         else
5278         {
5279             fragIODefinitions += outTypeSnippets->outputDefinitionsSnippet;
5280         }
5281 
5282         if (!testCaseInfo.argumentsFromInput)
5283         {
5284             switch (testCaseInfo.testCase.operationId)
5285             {
5286             case OID_CONV_FROM_FP32:
5287             case OID_CONV_FROM_FP64:
5288                 needsSpecialConstants = true;
5289                 break;
5290             default:
5291                 break;
5292             }
5293         }
5294     }
5295 
5296     // Another reason we need shaderFloat16 is the executable instructions uses fp16
5297     // in a way not supported by the 16bit storage extension.
5298     needsShaderFloat16 |= float16FeatureRequired && testOperation.floatUsage == FLOAT_ARITHMETIC;
5299 
5300     // Constants are only needed sometimes.  Drop them in the fp16 case if the code doesn't need
5301     // them, and if we don't otherwise need shaderFloat16.
5302     bool needsFP16Constants = needsShaderFloat16 || needsSpecialConstants || outFp16WithoutStorage;
5303 
5304     if (!needsFP16Constants && float16FeatureRequired)
5305     {
5306         // Check various code fragments
5307         const FloatStatementUsageFlags commandsFloatConstMask =
5308             B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_CONST_FP16;
5309         const bool commandsUsesFloatConstant =
5310             (testCaseInfo.operation.statementUsageFlags & commandsFloatConstMask) != 0;
5311         const FloatStatementUsageFlags argumentsFloatConstMask =
5312             B_STATEMENT_USAGE_ARGS_CONST_FLOAT | B_STATEMENT_USAGE_ARGS_CONST_FP16;
5313         const bool argumentsUsesFloatConstant = (specOpData.argumentsUsesFloatConstant & argumentsFloatConstMask) != 0;
5314         bool hasFP16ConstsInCommandsOrArguments = commandsUsesFloatConstant || argumentsUsesFloatConstant;
5315 
5316         needsFP16Constants |= hasFP16ConstsInCommandsOrArguments;
5317 
5318         if (!needsFP16Constants)
5319         {
5320             vertConstants = "";
5321             fragConstants = "";
5322         }
5323     }
5324     needsShaderFloat16 |= needsFP16Constants;
5325 
5326     if (needsShaderFloat16)
5327     {
5328         vertCapabilities += "OpCapability Float16\n";
5329         fragCapabilities += "OpCapability Float16\n";
5330     }
5331 
5332     map<string, string> specializations;
5333     specializations["vert_capabilities"]   = vertCapabilities;
5334     specializations["vert_extensions"]     = vertExtensions;
5335     specializations["vert_execution_mode"] = vertExecutionMode;
5336     specializations["vert_annotations"]    = vertAnnotations;
5337     specializations["vert_types"]          = vertTypes;
5338     specializations["vert_constants"]      = vertConstants;
5339     specializations["vert_io_definitions"] = vertIODefinitions;
5340     specializations["vert_arguments"]      = vertArguments;
5341     specializations["vert_variables"]      = vertVariables;
5342     specializations["vert_functions"]      = vertFunctions;
5343     specializations["vert_commands"]       = vertCommands;
5344     specializations["vert_process_result"] = vertProcessResult;
5345     specializations["frag_capabilities"]   = fragCapabilities;
5346     specializations["frag_extensions"]     = fragExtensions;
5347     specializations["frag_execution_mode"] = fragExecutionMode;
5348     specializations["frag_annotations"]    = fragAnnotations;
5349     specializations["frag_types"]          = fragTypes;
5350     specializations["frag_constants"]      = fragConstants;
5351     specializations["frag_functions"]      = fragFunctions;
5352     specializations["frag_io_definitions"] = fragIODefinitions;
5353     specializations["frag_arguments"]      = fragArguments;
5354     specializations["frag_variables"]      = fragVariables;
5355     specializations["frag_commands"]       = fragCommands;
5356     specializations["frag_process_result"] = fragProcessResult;
5357 
5358     // colors are not used by the test - input is passed via uniform buffer
5359     RGBA defaultColors[4] = {RGBA::white(), RGBA::red(), RGBA::green(), RGBA::blue()};
5360 
5361     // construct input and output buffers of proper types
5362     TypeValuesSP inTypeValues  = m_typeData.at(inVariableType).values;
5363     TypeValuesSP outTypeValues = m_typeData.at(outVariableType).values;
5364     BufferSp inBufferSp        = inTypeValues->constructInputBuffer(testCase.input);
5365     BufferSp outBufferSp       = outTypeValues->constructOutputBuffer(testCase.expectedOutput);
5366 
5367     vkt::SpirVAssembly::GraphicsResources resources;
5368     resources.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5369     resources.outputs.push_back(Resource(outBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
5370     resources.verifyIO = checkFloatsLUT[outVariableType];
5371 
5372     StageToSpecConstantMap noSpecConstants;
5373     PushConstants noPushConstants;
5374     GraphicsInterfaces noInterfaces;
5375 
5376     VulkanFeatures vulkanFeatures;
5377     setupFloatControlsProperties(
5378         inVariableTypeForCaps, // usualy same as inFloatType - different only for UnpackHalf2x16
5379         outVariableType, testCase.behaviorFlags, vulkanFeatures.floatControlsProperties);
5380     vulkanFeatures.coreFeatures.fragmentStoresAndAtomics = true;
5381     vulkanFeatures.coreFeatures.shaderFloat64            = float64FeatureRequired;
5382     vulkanFeatures.coreFeatures.shaderInt64              = int64FeatureRequired;
5383     vulkanFeatures.extFloat16Int8.shaderFloat16          = needsShaderFloat16;
5384     vulkanFeatures.ext16BitStorage.uniformAndStorageBuffer16BitAccess =
5385         float16FeatureRequired && requiresUniformAndStorage16BitBufferAccess;
5386 
5387     vector<string> extensions;
5388     extensions.push_back("VK_KHR_shader_float_controls");
5389 
5390     InstanceContext ctx(defaultColors, defaultColors, specializations, noSpecConstants, noPushConstants, resources,
5391                         noInterfaces, extensions, vulkanFeatures, testedStage);
5392 
5393     ctx.moduleMap["vert"].push_back(std::make_pair("main", VK_SHADER_STAGE_VERTEX_BIT));
5394     ctx.moduleMap["frag"].push_back(std::make_pair("main", VK_SHADER_STAGE_FRAGMENT_BIT));
5395 
5396     ctx.requiredStages = static_cast<VkShaderStageFlagBits>(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
5397     ctx.failResult     = QP_TEST_RESULT_FAIL;
5398     ctx.failMessageTemplate = "Output doesn't match with expected";
5399 
5400     return ctx;
5401 }
5402 
5403 } // namespace
5404 
createFloatControlsTestGroup(TestContext & testCtx,TestGroupBuilderBase * groupBuilder)5405 tcu::TestCaseGroup *createFloatControlsTestGroup(TestContext &testCtx, TestGroupBuilderBase *groupBuilder)
5406 {
5407     de::MovePtr<TestCaseGroup> group(new TestCaseGroup(testCtx, "float_controls"));
5408 
5409     struct TestGroup
5410     {
5411         VariableType variableType;
5412         const char *groupName;
5413     };
5414     TestGroup testGroups[] = {
5415         {FP16, "fp16"},
5416         {FP32, "fp32"},
5417         {FP64, "fp64"},
5418     };
5419 
5420     for (int i = 0; i < DE_LENGTH_OF_ARRAY(testGroups); ++i)
5421     {
5422         const TestGroup &testGroup = testGroups[i];
5423         TestCaseGroup *typeGroup   = new TestCaseGroup(testCtx, testGroup.groupName);
5424         group->addChild(typeGroup);
5425 
5426         groupBuilder->createOperationTests(typeGroup, "input_args", testGroup.variableType, true);
5427         groupBuilder->createOperationTests(typeGroup, "generated_args", testGroup.variableType, false);
5428     }
5429 
5430     groupBuilder->createSettingsTests(group.get());
5431 
5432     return group.release();
5433 }
5434 
createFloatControlsComputeGroup(TestContext & testCtx)5435 tcu::TestCaseGroup *createFloatControlsComputeGroup(TestContext &testCtx)
5436 {
5437     ComputeTestGroupBuilder computeTestGroupBuilder;
5438     computeTestGroupBuilder.init();
5439 
5440     return createFloatControlsTestGroup(testCtx, &computeTestGroupBuilder);
5441 }
5442 
createFloatControlsGraphicsGroup(TestContext & testCtx)5443 tcu::TestCaseGroup *createFloatControlsGraphicsGroup(TestContext &testCtx)
5444 {
5445     GraphicsTestGroupBuilder graphicsTestGroupBuilder;
5446     graphicsTestGroupBuilder.init();
5447 
5448     return createFloatControlsTestGroup(testCtx, &graphicsTestGroupBuilder);
5449 }
5450 
5451 } // namespace SpirVAssembly
5452 } // namespace vkt
5453