xref: /aosp_15_r20/external/deqp/external/vulkancts/modules/vulkan/shaderexecutor/vktShaderFConvertTests.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 Valve Corporation.
6  * Copyright (c) 2019 The Khronos Group Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief OpFConvert tests.
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktShaderFConvertTests.hpp"
26 #include "vktTestCase.hpp"
27 
28 #include "vkBufferWithMemory.hpp"
29 #include "vkObjUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkPrograms.hpp"
33 
34 #include "deDefs.hpp"
35 #include "deRandom.hpp"
36 
37 #include "tcuFloat.hpp"
38 #include "tcuTestLog.hpp"
39 #include "tcuFormatUtil.hpp"
40 
41 #include <vector>
42 #include <iterator>
43 #include <algorithm>
44 #include <memory>
45 #include <sstream>
46 #include <iomanip>
47 #include <string>
48 #include <limits>
49 #include <cassert>
50 
51 namespace vkt
52 {
53 namespace shaderexecutor
54 {
55 
56 namespace
57 {
58 
59 constexpr uint32_t kRandomSeed                          = 0xdeadbeef;
60 constexpr size_t kRandomSourcesPerType                  = 240;
61 constexpr size_t kMinVectorLength                       = 1;
62 constexpr size_t kMaxVectorLength                       = 4;
63 constexpr size_t kArrayAlignment                        = 16;              // Bytes.
64 constexpr size_t kEffectiveLength[kMaxVectorLength + 1] = {0, 1, 2, 4, 4}; // Effective length of a vector of size i.
65 constexpr size_t kGCFNumFloats = 12; // Greatest Common Factor of the number of floats in a test.
66 
67 // Get a random normal number.
68 // Works for implementations of tcu::Float as T.
69 template <class T>
getRandomNormal(de::Random & rnd)70 T getRandomNormal(de::Random &rnd)
71 {
72     static constexpr typename T::StorageType kLeadingMantissaBit =
73         (static_cast<typename T::StorageType>(1) << T::MANTISSA_BITS);
74     static constexpr int kSignValues[] = {-1, 1};
75 
76     int signBit  = rnd.getInt(0, 1);
77     int exponent = rnd.getInt(1 - T::EXPONENT_BIAS, T::EXPONENT_BIAS + 1);
78     typename T::StorageType mantissa =
79         static_cast<typename T::StorageType>(rnd.getUint64() & static_cast<uint64_t>(kLeadingMantissaBit - 1));
80 
81     // Construct number.
82     return T::construct(kSignValues[signBit], exponent, (kLeadingMantissaBit | mantissa));
83 }
84 
85 // Get a list of hand-picked interesting samples for tcu::Float class T.
86 template <class T>
interestingSamples()87 const std::vector<T> &interestingSamples()
88 {
89     static const std::vector<T> samples = {
90         T::zero(-1),
91         T::zero(1),
92         //T::inf                (-1),
93         //T::inf                ( 1),
94         //T::nan                (  ),
95         T::largestNormal(-1),
96         T::largestNormal(1),
97         T::smallestNormal(-1),
98         T::smallestNormal(1),
99     };
100 
101     return samples;
102 }
103 
104 // Get some random interesting numbers.
105 // Works for implementations of tcu::Float as T.
106 template <class T>
getRandomInteresting(de::Random & rnd,size_t numSamples)107 std::vector<T> getRandomInteresting(de::Random &rnd, size_t numSamples)
108 {
109     auto &samples = interestingSamples<T>();
110     std::vector<T> result;
111 
112     result.reserve(numSamples);
113     std::generate_n(std::back_inserter(result), numSamples,
114                     [&rnd, &samples]() { return rnd.choose<T>(begin(samples), end(samples)); });
115 
116     return result;
117 }
118 
119 // Helper class to build each vector only once in a thread-safe way.
120 template <class T>
121 struct StaticVectorHelper
122 {
123     std::vector<T> v;
124 
StaticVectorHelpervkt::shaderexecutor::__anonce1e3d900111::StaticVectorHelper125     StaticVectorHelper(de::Random &rnd)
126     {
127         v.reserve(kRandomSourcesPerType);
128         for (size_t i = 0; i < kRandomSourcesPerType; ++i)
129             v.push_back(getRandomNormal<T>(rnd));
130     }
131 };
132 
133 // Get a list of random normal input values for type T.
134 template <class T>
getRandomNormals(de::Random & rnd)135 const std::vector<T> &getRandomNormals(de::Random &rnd)
136 {
137     static StaticVectorHelper<T> helper(rnd);
138     return helper.v;
139 }
140 
141 // Convert a vector of tcu::Float elements of type T1 to type T2.
142 template <class T1, class T2>
convertVector(const std::vector<T1> & orig)143 std::vector<T2> convertVector(const std::vector<T1> &orig)
144 {
145     std::vector<T2> result;
146     result.reserve(orig.size());
147 
148     std::transform(begin(orig), end(orig), std::back_inserter(result), [](T1 f) { return T2::convert(f); });
149 
150     return result;
151 }
152 
153 // Get converted normal values for other tcu::Float types smaller than T, which should be exact conversions when converting back to
154 // those types.
155 template <class T>
156 std::vector<T> getOtherNormals(de::Random &rnd);
157 
158 template <>
getOtherNormals(de::Random &)159 std::vector<tcu::Float16> getOtherNormals<tcu::Float16>(de::Random &)
160 {
161     // Nothing below tcu::Float16.
162     return std::vector<tcu::Float16>();
163 }
164 
165 template <>
getOtherNormals(de::Random & rnd)166 std::vector<tcu::Float32> getOtherNormals<tcu::Float32>(de::Random &rnd)
167 {
168     // The ones from tcu::Float16.
169     return convertVector<tcu::Float16, tcu::Float32>(getRandomNormals<tcu::Float16>(rnd));
170 }
171 
172 template <>
getOtherNormals(de::Random & rnd)173 std::vector<tcu::Float64> getOtherNormals<tcu::Float64>(de::Random &rnd)
174 {
175     // The ones from both tcu::Float16 and tcu::Float64.
176     auto v1 = convertVector<tcu::Float16, tcu::Float64>(getRandomNormals<tcu::Float16>(rnd));
177     auto v2 = convertVector<tcu::Float32, tcu::Float64>(getRandomNormals<tcu::Float32>(rnd));
178 
179     v1.reserve(v1.size() + v2.size());
180     std::copy(begin(v2), end(v2), std::back_inserter(v1));
181     return v1;
182 }
183 
184 // Get the full list of input values for type T.
185 template <class T>
getInputValues(de::Random & rnd)186 std::vector<T> getInputValues(de::Random &rnd)
187 {
188     auto &interesting = interestingSamples<T>();
189     auto &normals     = getRandomNormals<T>(rnd);
190     auto otherNormals = getOtherNormals<T>(rnd);
191 
192     const size_t numValues   = interesting.size() + normals.size() + otherNormals.size();
193     const size_t extraValues = numValues % kGCFNumFloats;
194     const size_t needed      = ((extraValues == 0) ? 0 : (kGCFNumFloats - extraValues));
195 
196     auto extra = getRandomInteresting<T>(rnd, needed);
197 
198     std::vector<T> values;
199     values.reserve(interesting.size() + normals.size() + otherNormals.size() + extra.size());
200 
201     std::copy(begin(interesting), end(interesting), std::back_inserter(values));
202     std::copy(begin(normals), end(normals), std::back_inserter(values));
203     std::copy(begin(otherNormals), end(otherNormals), std::back_inserter(values));
204     std::copy(begin(extra), end(extra), std::back_inserter(values));
205 
206     // Shuffle samples around a bit to make it more interesting.
207     rnd.shuffle(begin(values), end(values));
208 
209     return values;
210 }
211 
212 // This singleton makes sure generated samples are stable no matter the test order.
213 class InputGenerator
214 {
215 public:
getInstance()216     static const InputGenerator &getInstance()
217     {
218         static InputGenerator instance;
219         return instance;
220     }
221 
getInputValues16() const222     const std::vector<tcu::Float16> &getInputValues16() const
223     {
224         return m_values16;
225     }
226 
getInputValues32() const227     const std::vector<tcu::Float32> &getInputValues32() const
228     {
229         return m_values32;
230     }
231 
getInputValues64() const232     const std::vector<tcu::Float64> &getInputValues64() const
233     {
234         return m_values64;
235     }
236 
237 private:
InputGenerator()238     InputGenerator()
239         : m_rnd(kRandomSeed)
240         , m_values16(getInputValues<tcu::Float16>(m_rnd))
241         , m_values32(getInputValues<tcu::Float32>(m_rnd))
242         , m_values64(getInputValues<tcu::Float64>(m_rnd))
243     {
244     }
245 
246     // Cannot copy or assign.
247     InputGenerator(const InputGenerator &)            = delete;
248     InputGenerator &operator=(const InputGenerator &) = delete;
249 
250     de::Random m_rnd;
251     std::vector<tcu::Float16> m_values16;
252     std::vector<tcu::Float32> m_values32;
253     std::vector<tcu::Float64> m_values64;
254 };
255 
256 // Check single result is as expected.
257 // Works for implementations of tcu::Float as T1 and T2.
258 template <class T1, class T2>
validConversion(const T1 & orig,const T2 & result)259 bool validConversion(const T1 &orig, const T2 &result)
260 {
261     const T2 acceptedResults[] = {T2::convert(orig, tcu::ROUND_DOWNWARD), T2::convert(orig, tcu::ROUND_UPWARD)};
262     bool valid                 = false;
263 
264     for (const auto &validResult : acceptedResults)
265     {
266         if (validResult.isNaN() && result.isNaN())
267             valid = true;
268         else if (validResult.isInf() && result.isInf())
269             valid = true;
270         else if (validResult.isZero() && result.isZero())
271             valid = true;
272         else if (validResult.isDenorm() && (result.isDenorm() || result.isZero()))
273             valid = true;
274         else if (validResult.bits() == result.bits()) // Exact conversion, up or down.
275             valid = true;
276     }
277 
278     return valid;
279 }
280 
281 // Check results vector is as expected.
282 template <class T1, class T2>
validConversion(const std::vector<T1> & orig,const std::vector<T2> & converted,tcu::TestLog & log)283 bool validConversion(const std::vector<T1> &orig, const std::vector<T2> &converted, tcu::TestLog &log)
284 {
285     DE_ASSERT(orig.size() == converted.size());
286 
287     bool allValid = true;
288 
289     for (size_t i = 0; i < orig.size(); ++i)
290     {
291         const bool valid = validConversion(orig[i], converted[i]);
292 
293         {
294             const double origD = orig[i].asDouble();
295             const double convD = converted[i].asDouble();
296 
297             std::ostringstream msg;
298             msg << "[" << i << "] " << std::setprecision(std::numeric_limits<double>::digits10 + 2) << std::scientific
299                 << origD << " converted to " << convD << ": " << (valid ? "OK" : "FAILURE");
300 
301             log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
302         }
303 
304         if (!valid)
305             allValid = false;
306     }
307 
308     return allValid;
309 }
310 
311 // Helps calculate buffer sizes and other parameters for the given number of values and vector length using a given floating point
312 // type. This is mostly used in packFloats() below, but we also need this information in the iterate() method for the test instance,
313 // so it has been separated.
314 struct BufferSizeInfo
315 {
316     template <class T>
calculatevkt::shaderexecutor::__anonce1e3d900111::BufferSizeInfo317     static BufferSizeInfo calculate(size_t numValues_, size_t vectorLength_)
318     {
319         // The vector length must be a known number.
320         DE_ASSERT(vectorLength_ >= kMinVectorLength && vectorLength_ <= kMaxVectorLength);
321         // The number of values must be appropriate for the vector length.
322         DE_ASSERT(numValues_ % vectorLength_ == 0);
323 
324         BufferSizeInfo info;
325 
326         info.numValues    = numValues_;
327         info.vectorLength = vectorLength_;
328         info.totalVectors = numValues_ / vectorLength_;
329 
330         const size_t elementSize     = sizeof(typename T::StorageType);
331         const size_t effectiveLength = kEffectiveLength[vectorLength_];
332         const size_t vectorSize      = elementSize * effectiveLength;
333         const size_t extraBytes      = vectorSize % kArrayAlignment;
334 
335         info.vectorStrideBytes = vectorSize + ((extraBytes == 0) ? 0 : (kArrayAlignment - extraBytes));
336         info.memorySizeBytes   = info.vectorStrideBytes * info.totalVectors;
337 
338         return info;
339     }
340 
341     size_t numValues;
342     size_t vectorLength;
343     size_t totalVectors;
344     size_t vectorStrideBytes;
345     size_t memorySizeBytes;
346 };
347 
348 // Pack an array of tcu::Float values into a buffer to be read from a shader, as if it was an array of vectors with each vector
349 // having size vectorLength (e.g. 3 for a vec3). Note: assumes std140.
350 template <class T>
packFloats(const std::vector<T> & values,size_t vectorLength)351 std::vector<uint8_t> packFloats(const std::vector<T> &values, size_t vectorLength)
352 {
353     BufferSizeInfo sizeInfo = BufferSizeInfo::calculate<T>(values.size(), vectorLength);
354 
355     std::vector<uint8_t> memory(sizeInfo.memorySizeBytes);
356     for (size_t i = 0; i < sizeInfo.totalVectors; ++i)
357     {
358         T *vectorPtr = reinterpret_cast<T *>(memory.data() + sizeInfo.vectorStrideBytes * i);
359         for (size_t j = 0; j < vectorLength; ++j)
360             vectorPtr[j] = values[i * vectorLength + j];
361     }
362 
363     return memory;
364 }
365 
366 // Unpack an array of vectors into an array of values, undoing what packFloats would do.
367 // expectedNumValues is used for verification.
368 template <class T>
unpackFloats(const std::vector<uint8_t> & memory,size_t vectorLength,size_t expectedNumValues)369 std::vector<T> unpackFloats(const std::vector<uint8_t> &memory, size_t vectorLength, size_t expectedNumValues)
370 {
371     DE_ASSERT(vectorLength >= kMinVectorLength && vectorLength <= kMaxVectorLength);
372 
373     const size_t effectiveLength = kEffectiveLength[vectorLength];
374     const size_t elementSize     = sizeof(typename T::StorageType);
375     const size_t vectorSize      = elementSize * effectiveLength;
376     const size_t extraBytes      = vectorSize % kArrayAlignment;
377     const size_t vectorBlockSize = vectorSize + ((extraBytes == 0) ? 0 : (kArrayAlignment - extraBytes));
378 
379     DE_ASSERT(memory.size() % vectorBlockSize == 0);
380     const size_t numStoredVectors = memory.size() / vectorBlockSize;
381     const size_t numStoredValues  = numStoredVectors * vectorLength;
382 
383     DE_UNREF(expectedNumValues); // For release builds.
384     DE_ASSERT(numStoredValues == expectedNumValues);
385     std::vector<T> values;
386     values.reserve(numStoredValues);
387 
388     for (size_t i = 0; i < numStoredVectors; ++i)
389     {
390         const T *vectorPtr = reinterpret_cast<const T *>(memory.data() + vectorBlockSize * i);
391         for (size_t j = 0; j < vectorLength; ++j)
392             values.push_back(vectorPtr[j]);
393     }
394 
395     return values;
396 }
397 
398 enum FloatType
399 {
400     FLOAT_TYPE_16_BITS = 0,
401     FLOAT_TYPE_32_BITS,
402     FLOAT_TYPE_64_BITS,
403     FLOAT_TYPE_MAX_ENUM,
404 };
405 
406 static const char *const kFloatNames[FLOAT_TYPE_MAX_ENUM] = {
407     "f16",
408     "f32",
409     "f64",
410 };
411 
412 static const char *const kGLSLTypes[][kMaxVectorLength + 1] = {
413     {nullptr, "float16_t", "f16vec2", "f16vec3", "f16vec4"},
414     {nullptr, "float", "vec2", "vec3", "vec4"},
415     {nullptr, "double", "dvec2", "dvec3", "dvec4"},
416 };
417 
418 struct TestParams
419 {
420     FloatType from;
421     FloatType to;
422     size_t vectorLength;
423 
getInputTypeStrvkt::shaderexecutor::__anonce1e3d900111::TestParams424     std::string getInputTypeStr() const
425     {
426         DE_ASSERT(from >= 0 && from < FLOAT_TYPE_MAX_ENUM);
427         DE_ASSERT(vectorLength >= kMinVectorLength && vectorLength <= kMaxVectorLength);
428         return kGLSLTypes[from][vectorLength];
429     }
430 
getOutputTypeStrvkt::shaderexecutor::__anonce1e3d900111::TestParams431     std::string getOutputTypeStr() const
432     {
433         DE_ASSERT(to >= 0 && to < FLOAT_TYPE_MAX_ENUM);
434         DE_ASSERT(vectorLength >= kMinVectorLength && vectorLength <= kMaxVectorLength);
435         return kGLSLTypes[to][vectorLength];
436     }
437 };
438 
439 class FConvertTestInstance : public TestInstance
440 {
441 public:
FConvertTestInstance(Context & context,const TestParams & params)442     FConvertTestInstance(Context &context, const TestParams &params) : TestInstance(context), m_params(params)
443     {
444     }
445 
446     virtual tcu::TestStatus iterate(void);
447 
448 private:
449     TestParams m_params;
450 };
451 
452 class FConvertTestCase : public TestCase
453 {
454 public:
FConvertTestCase(tcu::TestContext & context,const std::string & name,const TestParams & params)455     FConvertTestCase(tcu::TestContext &context, const std::string &name, const TestParams &params)
456         : TestCase(context, name)
457         , m_params(params)
458     {
459     }
460 
~FConvertTestCase(void)461     ~FConvertTestCase(void)
462     {
463     }
createInstance(Context & context) const464     virtual TestInstance *createInstance(Context &context) const
465     {
466         return new FConvertTestInstance(context, m_params);
467     }
468     virtual void initPrograms(vk::SourceCollections &programCollection) const;
469     virtual void checkSupport(Context &context) const;
470 
471 private:
472     TestParams m_params;
473 };
474 
initPrograms(vk::SourceCollections & programCollection) const475 void FConvertTestCase::initPrograms(vk::SourceCollections &programCollection) const
476 {
477     const std::string inputType          = m_params.getInputTypeStr();
478     const std::string outputType         = m_params.getOutputTypeStr();
479     const InputGenerator &inputGenerator = InputGenerator::getInstance();
480 
481     size_t numValues = 0;
482     switch (m_params.from)
483     {
484     case FLOAT_TYPE_16_BITS:
485         numValues = inputGenerator.getInputValues16().size();
486         break;
487     case FLOAT_TYPE_32_BITS:
488         numValues = inputGenerator.getInputValues32().size();
489         break;
490     case FLOAT_TYPE_64_BITS:
491         numValues = inputGenerator.getInputValues64().size();
492         break;
493     default:
494         DE_ASSERT(false);
495         break;
496     }
497 
498     const size_t arraySize = numValues / m_params.vectorLength;
499 
500     std::ostringstream shader;
501 
502     shader
503         << "#version 450 core\n"
504         << ((m_params.from == FLOAT_TYPE_16_BITS || m_params.to == FLOAT_TYPE_16_BITS) ?
505                 "#extension GL_EXT_shader_16bit_storage: require\n" // This is needed to use 16-bit float types in buffers.
506                 "#extension GL_EXT_shader_explicit_arithmetic_types: require\n" // This is needed for some conversions.
507                 :
508                 "")
509         << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
510         << "layout(set = 0, binding = 0, std140) buffer issbodef { " << inputType << " val[" << arraySize
511         << "]; } issbo;\n"
512         << "layout(set = 0, binding = 1, std140) buffer ossbodef { " << outputType << " val[" << arraySize
513         << "]; } ossbo;\n"
514         << "void main()\n"
515         << "{\n"
516         << "    ossbo.val[gl_WorkGroupID.x] = " << outputType << "(issbo.val[gl_WorkGroupID.x]);\n"
517         << "}\n";
518 
519     programCollection.glslSources.add("comp") << glu::ComputeSource(shader.str());
520 }
521 
checkSupport(Context & context) const522 void FConvertTestCase::checkSupport(Context &context) const
523 {
524     if (m_params.from == FLOAT_TYPE_64_BITS || m_params.to == FLOAT_TYPE_64_BITS)
525     {
526         // Check for 64-bit float support.
527         auto features = context.getDeviceFeatures();
528         if (!features.shaderFloat64)
529             TCU_THROW(NotSupportedError, "64-bit floats not supported in shader code");
530     }
531 
532     if (m_params.from == FLOAT_TYPE_16_BITS || m_params.to == FLOAT_TYPE_16_BITS)
533     {
534         // Check for 16-bit float support.
535         auto &features16 = context.getShaderFloat16Int8Features();
536         if (!features16.shaderFloat16)
537             TCU_THROW(NotSupportedError, "16-bit floats not supported in shader code");
538 
539         auto &storage16 = context.get16BitStorageFeatures();
540         if (!storage16.storageBuffer16BitAccess)
541             TCU_THROW(NotSupportedError, "16-bit floats not supported for storage buffers");
542     }
543 }
544 
iterate(void)545 tcu::TestStatus FConvertTestInstance::iterate(void)
546 {
547     BufferSizeInfo inputBufferSizeInfo;
548     BufferSizeInfo outputBufferSizeInfo;
549     std::vector<uint8_t> inputMemory;
550 
551     assert(m_params.from == FLOAT_TYPE_16_BITS || m_params.from == FLOAT_TYPE_32_BITS ||
552            m_params.from == FLOAT_TYPE_64_BITS);
553 
554     if (m_params.from == FLOAT_TYPE_16_BITS)
555     {
556         auto &inputValues   = InputGenerator::getInstance().getInputValues16();
557         inputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float16>(inputValues.size(), m_params.vectorLength);
558         inputMemory         = packFloats(inputValues, m_params.vectorLength);
559     }
560     else if (m_params.from == FLOAT_TYPE_32_BITS)
561     {
562         auto &inputValues   = InputGenerator::getInstance().getInputValues32();
563         inputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float32>(inputValues.size(), m_params.vectorLength);
564         inputMemory         = packFloats(inputValues, m_params.vectorLength);
565     }
566     else
567     {
568         auto &inputValues   = InputGenerator::getInstance().getInputValues64();
569         inputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float64>(inputValues.size(), m_params.vectorLength);
570         inputMemory         = packFloats(inputValues, m_params.vectorLength);
571     }
572 
573     switch (m_params.to)
574     {
575     case FLOAT_TYPE_16_BITS:
576         outputBufferSizeInfo =
577             BufferSizeInfo::calculate<tcu::Float16>(inputBufferSizeInfo.numValues, m_params.vectorLength);
578         break;
579     case FLOAT_TYPE_32_BITS:
580         outputBufferSizeInfo =
581             BufferSizeInfo::calculate<tcu::Float32>(inputBufferSizeInfo.numValues, m_params.vectorLength);
582         break;
583     case FLOAT_TYPE_64_BITS:
584         outputBufferSizeInfo =
585             BufferSizeInfo::calculate<tcu::Float64>(inputBufferSizeInfo.numValues, m_params.vectorLength);
586         break;
587     default:
588         assert(false);
589         break;
590     }
591 
592     // Prepare input and output buffers.
593     auto &vkd       = m_context.getDeviceInterface();
594     auto device     = m_context.getDevice();
595     auto &allocator = m_context.getDefaultAllocator();
596 
597     de::MovePtr<vk::BufferWithMemory> inputBuffer(new vk::BufferWithMemory(
598         vkd, device, allocator,
599         vk::makeBufferCreateInfo(inputBufferSizeInfo.memorySizeBytes, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
600         vk::MemoryRequirement::HostVisible));
601 
602     de::MovePtr<vk::BufferWithMemory> outputBuffer(new vk::BufferWithMemory(
603         vkd, device, allocator,
604         vk::makeBufferCreateInfo(outputBufferSizeInfo.memorySizeBytes, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
605         vk::MemoryRequirement::HostVisible));
606 
607     // Copy values to input buffer.
608     {
609         auto &alloc = inputBuffer->getAllocation();
610         deMemcpy(reinterpret_cast<uint8_t *>(alloc.getHostPtr()) + alloc.getOffset(), inputMemory.data(),
611                  inputMemory.size());
612         vk::flushAlloc(vkd, device, alloc);
613     }
614 
615     // Create an array with the input and output buffers to make it easier to iterate below.
616     const vk::VkBuffer buffers[] = {inputBuffer->get(), outputBuffer->get()};
617 
618     // Create descriptor set layout.
619     std::vector<vk::VkDescriptorSetLayoutBinding> bindings;
620     for (int i = 0; i < DE_LENGTH_OF_ARRAY(buffers); ++i)
621     {
622         const vk::VkDescriptorSetLayoutBinding binding = {
623             static_cast<uint32_t>(i),              // uint32_t              binding;
624             vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // VkDescriptorType      descriptorType;
625             1u,                                    // uint32_t              descriptorCount;
626             vk::VK_SHADER_STAGE_COMPUTE_BIT,       // VkShaderStageFlags    stageFlags;
627             DE_NULL,                               // const VkSampler*      pImmutableSamplers;
628         };
629         bindings.push_back(binding);
630     }
631 
632     const vk::VkDescriptorSetLayoutCreateInfo layoutCreateInfo = {
633         vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, // VkStructureType                        sType;
634         DE_NULL,                                                 // const void*                            pNext;
635         0,                                                       // VkDescriptorSetLayoutCreateFlags       flags;
636         static_cast<uint32_t>(bindings.size()),                  // uint32_t                               bindingCount;
637         bindings.data()                                          // const VkDescriptorSetLayoutBinding*    pBindings;
638     };
639     auto descriptorSetLayout = vk::createDescriptorSetLayout(vkd, device, &layoutCreateInfo);
640 
641     // Create descriptor set.
642     vk::DescriptorPoolBuilder poolBuilder;
643     for (const auto &b : bindings)
644         poolBuilder.addType(b.descriptorType, 1u);
645     auto descriptorPool = poolBuilder.build(vkd, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
646 
647     const vk::VkDescriptorSetAllocateInfo allocateInfo = {
648         vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType                 sType;
649         DE_NULL,                                            // const void*                     pNext;
650         *descriptorPool,                                    // VkDescriptorPool                descriptorPool;
651         1u,                                                 // uint32_t                        descriptorSetCount;
652         &descriptorSetLayout.get()                          // const VkDescriptorSetLayout*    pSetLayouts;
653     };
654     auto descriptorSet = vk::allocateDescriptorSet(vkd, device, &allocateInfo);
655 
656     // Update descriptor set.
657     std::vector<vk::VkDescriptorBufferInfo> descriptorBufferInfos;
658     std::vector<vk::VkWriteDescriptorSet> descriptorWrites;
659 
660     for (const auto &buffer : buffers)
661     {
662         const vk::VkDescriptorBufferInfo bufferInfo = {
663             buffer,        // VkBuffer        buffer;
664             0u,            // VkDeviceSize    offset;
665             VK_WHOLE_SIZE, // VkDeviceSize    range;
666         };
667         descriptorBufferInfos.push_back(bufferInfo);
668     }
669 
670     for (size_t i = 0; i < bindings.size(); ++i)
671     {
672         const vk::VkWriteDescriptorSet write = {
673             vk::VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // VkStructureType                  sType;
674             DE_NULL,                                    // const void*                      pNext;
675             *descriptorSet,                             // VkDescriptorSet                  dstSet;
676             static_cast<uint32_t>(i),                   // uint32_t                         dstBinding;
677             0u,                                         // uint32_t                         dstArrayElement;
678             1u,                                         // uint32_t                         descriptorCount;
679             bindings[i].descriptorType,                 // VkDescriptorType                 descriptorType;
680             DE_NULL,                                    // const VkDescriptorImageInfo*     pImageInfo;
681             &descriptorBufferInfos[i],                  // const VkDescriptorBufferInfo*    pBufferInfo;
682             DE_NULL,                                    // const VkBufferView*              pTexelBufferView;
683         };
684         descriptorWrites.push_back(write);
685     }
686     vkd.updateDescriptorSets(device, static_cast<uint32_t>(descriptorWrites.size()), descriptorWrites.data(), 0u,
687                              DE_NULL);
688 
689     // Prepare barriers in advance so data is visible to the shaders and the host.
690     std::vector<vk::VkBufferMemoryBarrier> hostToDevBarriers;
691     std::vector<vk::VkBufferMemoryBarrier> devToHostBarriers;
692     for (int i = 0; i < DE_LENGTH_OF_ARRAY(buffers); ++i)
693     {
694         const vk::VkBufferMemoryBarrier hostToDev = {
695             vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,                      // VkStructureType sType;
696             DE_NULL,                                                          // const void* pNext;
697             vk::VK_ACCESS_HOST_WRITE_BIT,                                     // VkAccessFlags srcAccessMask;
698             (vk::VK_ACCESS_SHADER_READ_BIT | vk::VK_ACCESS_SHADER_WRITE_BIT), // VkAccessFlags dstAccessMask;
699             VK_QUEUE_FAMILY_IGNORED,                                          // uint32_t srcQueueFamilyIndex;
700             VK_QUEUE_FAMILY_IGNORED,                                          // uint32_t dstQueueFamilyIndex;
701             buffers[i],                                                       // VkBuffer buffer;
702             0u,                                                               // VkDeviceSize offset;
703             VK_WHOLE_SIZE,                                                    // VkDeviceSize size;
704         };
705         hostToDevBarriers.push_back(hostToDev);
706 
707         const vk::VkBufferMemoryBarrier devToHost = {
708             vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
709             DE_NULL,                                     // const void* pNext;
710             vk::VK_ACCESS_SHADER_WRITE_BIT,              // VkAccessFlags srcAccessMask;
711             vk::VK_ACCESS_HOST_READ_BIT,                 // VkAccessFlags dstAccessMask;
712             VK_QUEUE_FAMILY_IGNORED,                     // uint32_t srcQueueFamilyIndex;
713             VK_QUEUE_FAMILY_IGNORED,                     // uint32_t dstQueueFamilyIndex;
714             buffers[i],                                  // VkBuffer buffer;
715             0u,                                          // VkDeviceSize offset;
716             VK_WHOLE_SIZE,                               // VkDeviceSize size;
717         };
718         devToHostBarriers.push_back(devToHost);
719     }
720 
721     // Create command pool and command buffer.
722     auto queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
723 
724     const vk::VkCommandPoolCreateInfo cmdPoolCreateInfo = {
725         vk::VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, // VkStructureType sType;
726         DE_NULL,                                        // const void* pNext;
727         vk::VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,       // VkCommandPoolCreateFlags flags;
728         queueFamilyIndex,                               // uint32_t queueFamilyIndex;
729     };
730     auto cmdPool = vk::createCommandPool(vkd, device, &cmdPoolCreateInfo);
731 
732     const vk::VkCommandBufferAllocateInfo cmdBufferAllocateInfo = {
733         vk::VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
734         DE_NULL,                                            // const void* pNext;
735         *cmdPool,                                           // VkCommandPool commandPool;
736         vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY,                // VkCommandBufferLevel level;
737         1u,                                                 // uint32_t commandBufferCount;
738     };
739     auto cmdBuffer = vk::allocateCommandBuffer(vkd, device, &cmdBufferAllocateInfo);
740 
741     // Create pipeline layout.
742     const vk::VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
743         vk::VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
744         DE_NULL,                                           // const void* pNext;
745         0,                                                 // VkPipelineLayoutCreateFlags flags;
746         1u,                                                // uint32_t setLayoutCount;
747         &descriptorSetLayout.get(),                        // const VkDescriptorSetLayout* pSetLayouts;
748         0u,                                                // uint32_t pushConstantRangeCount;
749         DE_NULL,                                           // const VkPushConstantRange* pPushConstantRanges;
750     };
751     auto pipelineLayout = vk::createPipelineLayout(vkd, device, &pipelineLayoutCreateInfo);
752 
753     // Create compute pipeline.
754     const vk::Unique<vk::VkShaderModule> shader(
755         vk::createShaderModule(vkd, device, m_context.getBinaryCollection().get("comp"), 0));
756 
757     const vk::VkComputePipelineCreateInfo computeCreateInfo = {
758         vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType                    sType;
759         DE_NULL,                                            // const void*                        pNext;
760         0,                                                  // VkPipelineCreateFlags              flags;
761         {
762             // VkPipelineShaderStageCreateInfo    stage;
763             vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType                     sType;
764             DE_NULL,                                                 // const void*                         pNext;
765             0,                                                       // VkPipelineShaderStageCreateFlags    flags;
766             vk::VK_SHADER_STAGE_COMPUTE_BIT,                         // VkShaderStageFlagBits               stage;
767             *shader,                                                 // VkShaderModule                      module;
768             "main",                                                  // const char*                         pName;
769             DE_NULL, // const VkSpecializationInfo*         pSpecializationInfo;
770         },
771         *pipelineLayout, // VkPipelineLayout                   layout;
772         DE_NULL,         // VkPipeline                         basePipelineHandle;
773         0,               // int32_t                            basePipelineIndex;
774     };
775     auto computePipeline = vk::createComputePipeline(vkd, device, DE_NULL, &computeCreateInfo);
776 
777     // Run the shader.
778     vk::beginCommandBuffer(vkd, *cmdBuffer);
779     vkd.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
780     vkd.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1u,
781                               &descriptorSet.get(), 0u, DE_NULL);
782     vkd.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0u,
783                            DE_NULL, static_cast<uint32_t>(hostToDevBarriers.size()), hostToDevBarriers.data(), 0u,
784                            DE_NULL);
785     vkd.cmdDispatch(*cmdBuffer, static_cast<uint32_t>(inputBufferSizeInfo.totalVectors), 1u, 1u);
786     vkd.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, 0, 0u,
787                            DE_NULL, static_cast<uint32_t>(devToHostBarriers.size()), devToHostBarriers.data(), 0u,
788                            DE_NULL);
789     vk::endCommandBuffer(vkd, *cmdBuffer);
790     vk::submitCommandsAndWait(vkd, device, m_context.getUniversalQueue(), *cmdBuffer);
791 
792     // Invalidate output allocation.
793     vk::invalidateAlloc(vkd, device, outputBuffer->getAllocation());
794 
795     // Copy output buffer data.
796     std::vector<uint8_t> outputMemory(outputBufferSizeInfo.memorySizeBytes);
797     {
798         auto &alloc = outputBuffer->getAllocation();
799         deMemcpy(outputMemory.data(), reinterpret_cast<uint8_t *>(alloc.getHostPtr()) + alloc.getOffset(),
800                  outputBufferSizeInfo.memorySizeBytes);
801     }
802 
803     // Unpack and verify output data.
804     auto &testLog     = m_context.getTestContext().getLog();
805     bool conversionOk = false;
806     switch (m_params.to)
807     {
808     case FLOAT_TYPE_16_BITS:
809     {
810         auto outputValues =
811             unpackFloats<tcu::Float16>(outputMemory, m_params.vectorLength, inputBufferSizeInfo.numValues);
812         switch (m_params.from)
813         {
814         case FLOAT_TYPE_32_BITS:
815         {
816             auto &inputValues = InputGenerator::getInstance().getInputValues32();
817             conversionOk      = validConversion(inputValues, outputValues, testLog);
818         }
819         break;
820 
821         case FLOAT_TYPE_64_BITS:
822         {
823             auto &inputValues = InputGenerator::getInstance().getInputValues64();
824             conversionOk      = validConversion(inputValues, outputValues, testLog);
825         }
826         break;
827 
828         default:
829             DE_ASSERT(false);
830             break;
831         }
832     }
833     break;
834 
835     case FLOAT_TYPE_32_BITS:
836     {
837         auto outputValues =
838             unpackFloats<tcu::Float32>(outputMemory, m_params.vectorLength, inputBufferSizeInfo.numValues);
839         switch (m_params.from)
840         {
841         case FLOAT_TYPE_16_BITS:
842         {
843             auto &inputValues = InputGenerator::getInstance().getInputValues16();
844             conversionOk      = validConversion(inputValues, outputValues, testLog);
845         }
846         break;
847 
848         case FLOAT_TYPE_64_BITS:
849         {
850             auto &inputValues = InputGenerator::getInstance().getInputValues64();
851             conversionOk      = validConversion(inputValues, outputValues, testLog);
852         }
853         break;
854 
855         default:
856             DE_ASSERT(false);
857             break;
858         }
859     }
860     break;
861 
862     case FLOAT_TYPE_64_BITS:
863     {
864         auto outputValues =
865             unpackFloats<tcu::Float64>(outputMemory, m_params.vectorLength, inputBufferSizeInfo.numValues);
866         switch (m_params.from)
867         {
868         case FLOAT_TYPE_16_BITS:
869         {
870             auto &inputValues = InputGenerator::getInstance().getInputValues16();
871             conversionOk      = validConversion(inputValues, outputValues, testLog);
872         }
873         break;
874 
875         case FLOAT_TYPE_32_BITS:
876         {
877             auto &inputValues = InputGenerator::getInstance().getInputValues32();
878             conversionOk      = validConversion(inputValues, outputValues, testLog);
879         }
880         break;
881 
882         default:
883             DE_ASSERT(false);
884             break;
885         }
886     }
887     break;
888 
889     default:
890         DE_ASSERT(false);
891         break;
892     }
893 
894     return (conversionOk ? tcu::TestStatus::pass("Pass") : tcu::TestStatus::fail("Fail"));
895 }
896 
897 } // namespace
898 
createPrecisionFconvertGroup(tcu::TestContext & testCtx)899 tcu::TestCaseGroup *createPrecisionFconvertGroup(tcu::TestContext &testCtx)
900 {
901     tcu::TestCaseGroup *newGroup = new tcu::TestCaseGroup(testCtx, "precision_fconvert");
902 
903     for (int i = 0; i < FLOAT_TYPE_MAX_ENUM; ++i)
904         for (int j = 0; j < FLOAT_TYPE_MAX_ENUM; ++j)
905             for (size_t k = kMinVectorLength; k <= kMaxVectorLength; ++k)
906             {
907                 // No actual conversion if the types are the same.
908                 if (i == j)
909                     continue;
910 
911                 TestParams params = {
912                     static_cast<FloatType>(i),
913                     static_cast<FloatType>(j),
914                     k,
915                 };
916 
917                 std::string testName =
918                     std::string() + kFloatNames[i] + "_to_" + kFloatNames[j] + "_size_" + std::to_string(k);
919 
920                 newGroup->addChild(new FConvertTestCase(testCtx, testName, params));
921             }
922 
923     return newGroup;
924 }
925 
926 } // namespace shaderexecutor
927 } // namespace vkt
928