1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 Valve Corporation.
6 * Copyright (c) 2019 The Khronos Group Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief OpFConvert tests.
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktShaderFConvertTests.hpp"
26 #include "vktTestCase.hpp"
27
28 #include "vkBufferWithMemory.hpp"
29 #include "vkObjUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkPrograms.hpp"
33
34 #include "deDefs.hpp"
35 #include "deRandom.hpp"
36
37 #include "tcuFloat.hpp"
38 #include "tcuTestLog.hpp"
39 #include "tcuFormatUtil.hpp"
40
41 #include <vector>
42 #include <iterator>
43 #include <algorithm>
44 #include <memory>
45 #include <sstream>
46 #include <iomanip>
47 #include <string>
48 #include <limits>
49 #include <cassert>
50
51 namespace vkt
52 {
53 namespace shaderexecutor
54 {
55
56 namespace
57 {
58
59 constexpr uint32_t kRandomSeed = 0xdeadbeef;
60 constexpr size_t kRandomSourcesPerType = 240;
61 constexpr size_t kMinVectorLength = 1;
62 constexpr size_t kMaxVectorLength = 4;
63 constexpr size_t kArrayAlignment = 16; // Bytes.
64 constexpr size_t kEffectiveLength[kMaxVectorLength + 1] = {0, 1, 2, 4, 4}; // Effective length of a vector of size i.
65 constexpr size_t kGCFNumFloats = 12; // Greatest Common Factor of the number of floats in a test.
66
67 // Get a random normal number.
68 // Works for implementations of tcu::Float as T.
69 template <class T>
getRandomNormal(de::Random & rnd)70 T getRandomNormal(de::Random &rnd)
71 {
72 static constexpr typename T::StorageType kLeadingMantissaBit =
73 (static_cast<typename T::StorageType>(1) << T::MANTISSA_BITS);
74 static constexpr int kSignValues[] = {-1, 1};
75
76 int signBit = rnd.getInt(0, 1);
77 int exponent = rnd.getInt(1 - T::EXPONENT_BIAS, T::EXPONENT_BIAS + 1);
78 typename T::StorageType mantissa =
79 static_cast<typename T::StorageType>(rnd.getUint64() & static_cast<uint64_t>(kLeadingMantissaBit - 1));
80
81 // Construct number.
82 return T::construct(kSignValues[signBit], exponent, (kLeadingMantissaBit | mantissa));
83 }
84
85 // Get a list of hand-picked interesting samples for tcu::Float class T.
86 template <class T>
interestingSamples()87 const std::vector<T> &interestingSamples()
88 {
89 static const std::vector<T> samples = {
90 T::zero(-1),
91 T::zero(1),
92 //T::inf (-1),
93 //T::inf ( 1),
94 //T::nan ( ),
95 T::largestNormal(-1),
96 T::largestNormal(1),
97 T::smallestNormal(-1),
98 T::smallestNormal(1),
99 };
100
101 return samples;
102 }
103
104 // Get some random interesting numbers.
105 // Works for implementations of tcu::Float as T.
106 template <class T>
getRandomInteresting(de::Random & rnd,size_t numSamples)107 std::vector<T> getRandomInteresting(de::Random &rnd, size_t numSamples)
108 {
109 auto &samples = interestingSamples<T>();
110 std::vector<T> result;
111
112 result.reserve(numSamples);
113 std::generate_n(std::back_inserter(result), numSamples,
114 [&rnd, &samples]() { return rnd.choose<T>(begin(samples), end(samples)); });
115
116 return result;
117 }
118
119 // Helper class to build each vector only once in a thread-safe way.
120 template <class T>
121 struct StaticVectorHelper
122 {
123 std::vector<T> v;
124
StaticVectorHelpervkt::shaderexecutor::__anonce1e3d900111::StaticVectorHelper125 StaticVectorHelper(de::Random &rnd)
126 {
127 v.reserve(kRandomSourcesPerType);
128 for (size_t i = 0; i < kRandomSourcesPerType; ++i)
129 v.push_back(getRandomNormal<T>(rnd));
130 }
131 };
132
133 // Get a list of random normal input values for type T.
134 template <class T>
getRandomNormals(de::Random & rnd)135 const std::vector<T> &getRandomNormals(de::Random &rnd)
136 {
137 static StaticVectorHelper<T> helper(rnd);
138 return helper.v;
139 }
140
141 // Convert a vector of tcu::Float elements of type T1 to type T2.
142 template <class T1, class T2>
convertVector(const std::vector<T1> & orig)143 std::vector<T2> convertVector(const std::vector<T1> &orig)
144 {
145 std::vector<T2> result;
146 result.reserve(orig.size());
147
148 std::transform(begin(orig), end(orig), std::back_inserter(result), [](T1 f) { return T2::convert(f); });
149
150 return result;
151 }
152
153 // Get converted normal values for other tcu::Float types smaller than T, which should be exact conversions when converting back to
154 // those types.
155 template <class T>
156 std::vector<T> getOtherNormals(de::Random &rnd);
157
158 template <>
getOtherNormals(de::Random &)159 std::vector<tcu::Float16> getOtherNormals<tcu::Float16>(de::Random &)
160 {
161 // Nothing below tcu::Float16.
162 return std::vector<tcu::Float16>();
163 }
164
165 template <>
getOtherNormals(de::Random & rnd)166 std::vector<tcu::Float32> getOtherNormals<tcu::Float32>(de::Random &rnd)
167 {
168 // The ones from tcu::Float16.
169 return convertVector<tcu::Float16, tcu::Float32>(getRandomNormals<tcu::Float16>(rnd));
170 }
171
172 template <>
getOtherNormals(de::Random & rnd)173 std::vector<tcu::Float64> getOtherNormals<tcu::Float64>(de::Random &rnd)
174 {
175 // The ones from both tcu::Float16 and tcu::Float64.
176 auto v1 = convertVector<tcu::Float16, tcu::Float64>(getRandomNormals<tcu::Float16>(rnd));
177 auto v2 = convertVector<tcu::Float32, tcu::Float64>(getRandomNormals<tcu::Float32>(rnd));
178
179 v1.reserve(v1.size() + v2.size());
180 std::copy(begin(v2), end(v2), std::back_inserter(v1));
181 return v1;
182 }
183
184 // Get the full list of input values for type T.
185 template <class T>
getInputValues(de::Random & rnd)186 std::vector<T> getInputValues(de::Random &rnd)
187 {
188 auto &interesting = interestingSamples<T>();
189 auto &normals = getRandomNormals<T>(rnd);
190 auto otherNormals = getOtherNormals<T>(rnd);
191
192 const size_t numValues = interesting.size() + normals.size() + otherNormals.size();
193 const size_t extraValues = numValues % kGCFNumFloats;
194 const size_t needed = ((extraValues == 0) ? 0 : (kGCFNumFloats - extraValues));
195
196 auto extra = getRandomInteresting<T>(rnd, needed);
197
198 std::vector<T> values;
199 values.reserve(interesting.size() + normals.size() + otherNormals.size() + extra.size());
200
201 std::copy(begin(interesting), end(interesting), std::back_inserter(values));
202 std::copy(begin(normals), end(normals), std::back_inserter(values));
203 std::copy(begin(otherNormals), end(otherNormals), std::back_inserter(values));
204 std::copy(begin(extra), end(extra), std::back_inserter(values));
205
206 // Shuffle samples around a bit to make it more interesting.
207 rnd.shuffle(begin(values), end(values));
208
209 return values;
210 }
211
212 // This singleton makes sure generated samples are stable no matter the test order.
213 class InputGenerator
214 {
215 public:
getInstance()216 static const InputGenerator &getInstance()
217 {
218 static InputGenerator instance;
219 return instance;
220 }
221
getInputValues16() const222 const std::vector<tcu::Float16> &getInputValues16() const
223 {
224 return m_values16;
225 }
226
getInputValues32() const227 const std::vector<tcu::Float32> &getInputValues32() const
228 {
229 return m_values32;
230 }
231
getInputValues64() const232 const std::vector<tcu::Float64> &getInputValues64() const
233 {
234 return m_values64;
235 }
236
237 private:
InputGenerator()238 InputGenerator()
239 : m_rnd(kRandomSeed)
240 , m_values16(getInputValues<tcu::Float16>(m_rnd))
241 , m_values32(getInputValues<tcu::Float32>(m_rnd))
242 , m_values64(getInputValues<tcu::Float64>(m_rnd))
243 {
244 }
245
246 // Cannot copy or assign.
247 InputGenerator(const InputGenerator &) = delete;
248 InputGenerator &operator=(const InputGenerator &) = delete;
249
250 de::Random m_rnd;
251 std::vector<tcu::Float16> m_values16;
252 std::vector<tcu::Float32> m_values32;
253 std::vector<tcu::Float64> m_values64;
254 };
255
256 // Check single result is as expected.
257 // Works for implementations of tcu::Float as T1 and T2.
258 template <class T1, class T2>
validConversion(const T1 & orig,const T2 & result)259 bool validConversion(const T1 &orig, const T2 &result)
260 {
261 const T2 acceptedResults[] = {T2::convert(orig, tcu::ROUND_DOWNWARD), T2::convert(orig, tcu::ROUND_UPWARD)};
262 bool valid = false;
263
264 for (const auto &validResult : acceptedResults)
265 {
266 if (validResult.isNaN() && result.isNaN())
267 valid = true;
268 else if (validResult.isInf() && result.isInf())
269 valid = true;
270 else if (validResult.isZero() && result.isZero())
271 valid = true;
272 else if (validResult.isDenorm() && (result.isDenorm() || result.isZero()))
273 valid = true;
274 else if (validResult.bits() == result.bits()) // Exact conversion, up or down.
275 valid = true;
276 }
277
278 return valid;
279 }
280
281 // Check results vector is as expected.
282 template <class T1, class T2>
validConversion(const std::vector<T1> & orig,const std::vector<T2> & converted,tcu::TestLog & log)283 bool validConversion(const std::vector<T1> &orig, const std::vector<T2> &converted, tcu::TestLog &log)
284 {
285 DE_ASSERT(orig.size() == converted.size());
286
287 bool allValid = true;
288
289 for (size_t i = 0; i < orig.size(); ++i)
290 {
291 const bool valid = validConversion(orig[i], converted[i]);
292
293 {
294 const double origD = orig[i].asDouble();
295 const double convD = converted[i].asDouble();
296
297 std::ostringstream msg;
298 msg << "[" << i << "] " << std::setprecision(std::numeric_limits<double>::digits10 + 2) << std::scientific
299 << origD << " converted to " << convD << ": " << (valid ? "OK" : "FAILURE");
300
301 log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
302 }
303
304 if (!valid)
305 allValid = false;
306 }
307
308 return allValid;
309 }
310
311 // Helps calculate buffer sizes and other parameters for the given number of values and vector length using a given floating point
312 // type. This is mostly used in packFloats() below, but we also need this information in the iterate() method for the test instance,
313 // so it has been separated.
314 struct BufferSizeInfo
315 {
316 template <class T>
calculatevkt::shaderexecutor::__anonce1e3d900111::BufferSizeInfo317 static BufferSizeInfo calculate(size_t numValues_, size_t vectorLength_)
318 {
319 // The vector length must be a known number.
320 DE_ASSERT(vectorLength_ >= kMinVectorLength && vectorLength_ <= kMaxVectorLength);
321 // The number of values must be appropriate for the vector length.
322 DE_ASSERT(numValues_ % vectorLength_ == 0);
323
324 BufferSizeInfo info;
325
326 info.numValues = numValues_;
327 info.vectorLength = vectorLength_;
328 info.totalVectors = numValues_ / vectorLength_;
329
330 const size_t elementSize = sizeof(typename T::StorageType);
331 const size_t effectiveLength = kEffectiveLength[vectorLength_];
332 const size_t vectorSize = elementSize * effectiveLength;
333 const size_t extraBytes = vectorSize % kArrayAlignment;
334
335 info.vectorStrideBytes = vectorSize + ((extraBytes == 0) ? 0 : (kArrayAlignment - extraBytes));
336 info.memorySizeBytes = info.vectorStrideBytes * info.totalVectors;
337
338 return info;
339 }
340
341 size_t numValues;
342 size_t vectorLength;
343 size_t totalVectors;
344 size_t vectorStrideBytes;
345 size_t memorySizeBytes;
346 };
347
348 // Pack an array of tcu::Float values into a buffer to be read from a shader, as if it was an array of vectors with each vector
349 // having size vectorLength (e.g. 3 for a vec3). Note: assumes std140.
350 template <class T>
packFloats(const std::vector<T> & values,size_t vectorLength)351 std::vector<uint8_t> packFloats(const std::vector<T> &values, size_t vectorLength)
352 {
353 BufferSizeInfo sizeInfo = BufferSizeInfo::calculate<T>(values.size(), vectorLength);
354
355 std::vector<uint8_t> memory(sizeInfo.memorySizeBytes);
356 for (size_t i = 0; i < sizeInfo.totalVectors; ++i)
357 {
358 T *vectorPtr = reinterpret_cast<T *>(memory.data() + sizeInfo.vectorStrideBytes * i);
359 for (size_t j = 0; j < vectorLength; ++j)
360 vectorPtr[j] = values[i * vectorLength + j];
361 }
362
363 return memory;
364 }
365
366 // Unpack an array of vectors into an array of values, undoing what packFloats would do.
367 // expectedNumValues is used for verification.
368 template <class T>
unpackFloats(const std::vector<uint8_t> & memory,size_t vectorLength,size_t expectedNumValues)369 std::vector<T> unpackFloats(const std::vector<uint8_t> &memory, size_t vectorLength, size_t expectedNumValues)
370 {
371 DE_ASSERT(vectorLength >= kMinVectorLength && vectorLength <= kMaxVectorLength);
372
373 const size_t effectiveLength = kEffectiveLength[vectorLength];
374 const size_t elementSize = sizeof(typename T::StorageType);
375 const size_t vectorSize = elementSize * effectiveLength;
376 const size_t extraBytes = vectorSize % kArrayAlignment;
377 const size_t vectorBlockSize = vectorSize + ((extraBytes == 0) ? 0 : (kArrayAlignment - extraBytes));
378
379 DE_ASSERT(memory.size() % vectorBlockSize == 0);
380 const size_t numStoredVectors = memory.size() / vectorBlockSize;
381 const size_t numStoredValues = numStoredVectors * vectorLength;
382
383 DE_UNREF(expectedNumValues); // For release builds.
384 DE_ASSERT(numStoredValues == expectedNumValues);
385 std::vector<T> values;
386 values.reserve(numStoredValues);
387
388 for (size_t i = 0; i < numStoredVectors; ++i)
389 {
390 const T *vectorPtr = reinterpret_cast<const T *>(memory.data() + vectorBlockSize * i);
391 for (size_t j = 0; j < vectorLength; ++j)
392 values.push_back(vectorPtr[j]);
393 }
394
395 return values;
396 }
397
398 enum FloatType
399 {
400 FLOAT_TYPE_16_BITS = 0,
401 FLOAT_TYPE_32_BITS,
402 FLOAT_TYPE_64_BITS,
403 FLOAT_TYPE_MAX_ENUM,
404 };
405
406 static const char *const kFloatNames[FLOAT_TYPE_MAX_ENUM] = {
407 "f16",
408 "f32",
409 "f64",
410 };
411
412 static const char *const kGLSLTypes[][kMaxVectorLength + 1] = {
413 {nullptr, "float16_t", "f16vec2", "f16vec3", "f16vec4"},
414 {nullptr, "float", "vec2", "vec3", "vec4"},
415 {nullptr, "double", "dvec2", "dvec3", "dvec4"},
416 };
417
418 struct TestParams
419 {
420 FloatType from;
421 FloatType to;
422 size_t vectorLength;
423
getInputTypeStrvkt::shaderexecutor::__anonce1e3d900111::TestParams424 std::string getInputTypeStr() const
425 {
426 DE_ASSERT(from >= 0 && from < FLOAT_TYPE_MAX_ENUM);
427 DE_ASSERT(vectorLength >= kMinVectorLength && vectorLength <= kMaxVectorLength);
428 return kGLSLTypes[from][vectorLength];
429 }
430
getOutputTypeStrvkt::shaderexecutor::__anonce1e3d900111::TestParams431 std::string getOutputTypeStr() const
432 {
433 DE_ASSERT(to >= 0 && to < FLOAT_TYPE_MAX_ENUM);
434 DE_ASSERT(vectorLength >= kMinVectorLength && vectorLength <= kMaxVectorLength);
435 return kGLSLTypes[to][vectorLength];
436 }
437 };
438
439 class FConvertTestInstance : public TestInstance
440 {
441 public:
FConvertTestInstance(Context & context,const TestParams & params)442 FConvertTestInstance(Context &context, const TestParams ¶ms) : TestInstance(context), m_params(params)
443 {
444 }
445
446 virtual tcu::TestStatus iterate(void);
447
448 private:
449 TestParams m_params;
450 };
451
452 class FConvertTestCase : public TestCase
453 {
454 public:
FConvertTestCase(tcu::TestContext & context,const std::string & name,const TestParams & params)455 FConvertTestCase(tcu::TestContext &context, const std::string &name, const TestParams ¶ms)
456 : TestCase(context, name)
457 , m_params(params)
458 {
459 }
460
~FConvertTestCase(void)461 ~FConvertTestCase(void)
462 {
463 }
createInstance(Context & context) const464 virtual TestInstance *createInstance(Context &context) const
465 {
466 return new FConvertTestInstance(context, m_params);
467 }
468 virtual void initPrograms(vk::SourceCollections &programCollection) const;
469 virtual void checkSupport(Context &context) const;
470
471 private:
472 TestParams m_params;
473 };
474
initPrograms(vk::SourceCollections & programCollection) const475 void FConvertTestCase::initPrograms(vk::SourceCollections &programCollection) const
476 {
477 const std::string inputType = m_params.getInputTypeStr();
478 const std::string outputType = m_params.getOutputTypeStr();
479 const InputGenerator &inputGenerator = InputGenerator::getInstance();
480
481 size_t numValues = 0;
482 switch (m_params.from)
483 {
484 case FLOAT_TYPE_16_BITS:
485 numValues = inputGenerator.getInputValues16().size();
486 break;
487 case FLOAT_TYPE_32_BITS:
488 numValues = inputGenerator.getInputValues32().size();
489 break;
490 case FLOAT_TYPE_64_BITS:
491 numValues = inputGenerator.getInputValues64().size();
492 break;
493 default:
494 DE_ASSERT(false);
495 break;
496 }
497
498 const size_t arraySize = numValues / m_params.vectorLength;
499
500 std::ostringstream shader;
501
502 shader
503 << "#version 450 core\n"
504 << ((m_params.from == FLOAT_TYPE_16_BITS || m_params.to == FLOAT_TYPE_16_BITS) ?
505 "#extension GL_EXT_shader_16bit_storage: require\n" // This is needed to use 16-bit float types in buffers.
506 "#extension GL_EXT_shader_explicit_arithmetic_types: require\n" // This is needed for some conversions.
507 :
508 "")
509 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
510 << "layout(set = 0, binding = 0, std140) buffer issbodef { " << inputType << " val[" << arraySize
511 << "]; } issbo;\n"
512 << "layout(set = 0, binding = 1, std140) buffer ossbodef { " << outputType << " val[" << arraySize
513 << "]; } ossbo;\n"
514 << "void main()\n"
515 << "{\n"
516 << " ossbo.val[gl_WorkGroupID.x] = " << outputType << "(issbo.val[gl_WorkGroupID.x]);\n"
517 << "}\n";
518
519 programCollection.glslSources.add("comp") << glu::ComputeSource(shader.str());
520 }
521
checkSupport(Context & context) const522 void FConvertTestCase::checkSupport(Context &context) const
523 {
524 if (m_params.from == FLOAT_TYPE_64_BITS || m_params.to == FLOAT_TYPE_64_BITS)
525 {
526 // Check for 64-bit float support.
527 auto features = context.getDeviceFeatures();
528 if (!features.shaderFloat64)
529 TCU_THROW(NotSupportedError, "64-bit floats not supported in shader code");
530 }
531
532 if (m_params.from == FLOAT_TYPE_16_BITS || m_params.to == FLOAT_TYPE_16_BITS)
533 {
534 // Check for 16-bit float support.
535 auto &features16 = context.getShaderFloat16Int8Features();
536 if (!features16.shaderFloat16)
537 TCU_THROW(NotSupportedError, "16-bit floats not supported in shader code");
538
539 auto &storage16 = context.get16BitStorageFeatures();
540 if (!storage16.storageBuffer16BitAccess)
541 TCU_THROW(NotSupportedError, "16-bit floats not supported for storage buffers");
542 }
543 }
544
iterate(void)545 tcu::TestStatus FConvertTestInstance::iterate(void)
546 {
547 BufferSizeInfo inputBufferSizeInfo;
548 BufferSizeInfo outputBufferSizeInfo;
549 std::vector<uint8_t> inputMemory;
550
551 assert(m_params.from == FLOAT_TYPE_16_BITS || m_params.from == FLOAT_TYPE_32_BITS ||
552 m_params.from == FLOAT_TYPE_64_BITS);
553
554 if (m_params.from == FLOAT_TYPE_16_BITS)
555 {
556 auto &inputValues = InputGenerator::getInstance().getInputValues16();
557 inputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float16>(inputValues.size(), m_params.vectorLength);
558 inputMemory = packFloats(inputValues, m_params.vectorLength);
559 }
560 else if (m_params.from == FLOAT_TYPE_32_BITS)
561 {
562 auto &inputValues = InputGenerator::getInstance().getInputValues32();
563 inputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float32>(inputValues.size(), m_params.vectorLength);
564 inputMemory = packFloats(inputValues, m_params.vectorLength);
565 }
566 else
567 {
568 auto &inputValues = InputGenerator::getInstance().getInputValues64();
569 inputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float64>(inputValues.size(), m_params.vectorLength);
570 inputMemory = packFloats(inputValues, m_params.vectorLength);
571 }
572
573 switch (m_params.to)
574 {
575 case FLOAT_TYPE_16_BITS:
576 outputBufferSizeInfo =
577 BufferSizeInfo::calculate<tcu::Float16>(inputBufferSizeInfo.numValues, m_params.vectorLength);
578 break;
579 case FLOAT_TYPE_32_BITS:
580 outputBufferSizeInfo =
581 BufferSizeInfo::calculate<tcu::Float32>(inputBufferSizeInfo.numValues, m_params.vectorLength);
582 break;
583 case FLOAT_TYPE_64_BITS:
584 outputBufferSizeInfo =
585 BufferSizeInfo::calculate<tcu::Float64>(inputBufferSizeInfo.numValues, m_params.vectorLength);
586 break;
587 default:
588 assert(false);
589 break;
590 }
591
592 // Prepare input and output buffers.
593 auto &vkd = m_context.getDeviceInterface();
594 auto device = m_context.getDevice();
595 auto &allocator = m_context.getDefaultAllocator();
596
597 de::MovePtr<vk::BufferWithMemory> inputBuffer(new vk::BufferWithMemory(
598 vkd, device, allocator,
599 vk::makeBufferCreateInfo(inputBufferSizeInfo.memorySizeBytes, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
600 vk::MemoryRequirement::HostVisible));
601
602 de::MovePtr<vk::BufferWithMemory> outputBuffer(new vk::BufferWithMemory(
603 vkd, device, allocator,
604 vk::makeBufferCreateInfo(outputBufferSizeInfo.memorySizeBytes, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
605 vk::MemoryRequirement::HostVisible));
606
607 // Copy values to input buffer.
608 {
609 auto &alloc = inputBuffer->getAllocation();
610 deMemcpy(reinterpret_cast<uint8_t *>(alloc.getHostPtr()) + alloc.getOffset(), inputMemory.data(),
611 inputMemory.size());
612 vk::flushAlloc(vkd, device, alloc);
613 }
614
615 // Create an array with the input and output buffers to make it easier to iterate below.
616 const vk::VkBuffer buffers[] = {inputBuffer->get(), outputBuffer->get()};
617
618 // Create descriptor set layout.
619 std::vector<vk::VkDescriptorSetLayoutBinding> bindings;
620 for (int i = 0; i < DE_LENGTH_OF_ARRAY(buffers); ++i)
621 {
622 const vk::VkDescriptorSetLayoutBinding binding = {
623 static_cast<uint32_t>(i), // uint32_t binding;
624 vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // VkDescriptorType descriptorType;
625 1u, // uint32_t descriptorCount;
626 vk::VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlags stageFlags;
627 DE_NULL, // const VkSampler* pImmutableSamplers;
628 };
629 bindings.push_back(binding);
630 }
631
632 const vk::VkDescriptorSetLayoutCreateInfo layoutCreateInfo = {
633 vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, // VkStructureType sType;
634 DE_NULL, // const void* pNext;
635 0, // VkDescriptorSetLayoutCreateFlags flags;
636 static_cast<uint32_t>(bindings.size()), // uint32_t bindingCount;
637 bindings.data() // const VkDescriptorSetLayoutBinding* pBindings;
638 };
639 auto descriptorSetLayout = vk::createDescriptorSetLayout(vkd, device, &layoutCreateInfo);
640
641 // Create descriptor set.
642 vk::DescriptorPoolBuilder poolBuilder;
643 for (const auto &b : bindings)
644 poolBuilder.addType(b.descriptorType, 1u);
645 auto descriptorPool = poolBuilder.build(vkd, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
646
647 const vk::VkDescriptorSetAllocateInfo allocateInfo = {
648 vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType sType;
649 DE_NULL, // const void* pNext;
650 *descriptorPool, // VkDescriptorPool descriptorPool;
651 1u, // uint32_t descriptorSetCount;
652 &descriptorSetLayout.get() // const VkDescriptorSetLayout* pSetLayouts;
653 };
654 auto descriptorSet = vk::allocateDescriptorSet(vkd, device, &allocateInfo);
655
656 // Update descriptor set.
657 std::vector<vk::VkDescriptorBufferInfo> descriptorBufferInfos;
658 std::vector<vk::VkWriteDescriptorSet> descriptorWrites;
659
660 for (const auto &buffer : buffers)
661 {
662 const vk::VkDescriptorBufferInfo bufferInfo = {
663 buffer, // VkBuffer buffer;
664 0u, // VkDeviceSize offset;
665 VK_WHOLE_SIZE, // VkDeviceSize range;
666 };
667 descriptorBufferInfos.push_back(bufferInfo);
668 }
669
670 for (size_t i = 0; i < bindings.size(); ++i)
671 {
672 const vk::VkWriteDescriptorSet write = {
673 vk::VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // VkStructureType sType;
674 DE_NULL, // const void* pNext;
675 *descriptorSet, // VkDescriptorSet dstSet;
676 static_cast<uint32_t>(i), // uint32_t dstBinding;
677 0u, // uint32_t dstArrayElement;
678 1u, // uint32_t descriptorCount;
679 bindings[i].descriptorType, // VkDescriptorType descriptorType;
680 DE_NULL, // const VkDescriptorImageInfo* pImageInfo;
681 &descriptorBufferInfos[i], // const VkDescriptorBufferInfo* pBufferInfo;
682 DE_NULL, // const VkBufferView* pTexelBufferView;
683 };
684 descriptorWrites.push_back(write);
685 }
686 vkd.updateDescriptorSets(device, static_cast<uint32_t>(descriptorWrites.size()), descriptorWrites.data(), 0u,
687 DE_NULL);
688
689 // Prepare barriers in advance so data is visible to the shaders and the host.
690 std::vector<vk::VkBufferMemoryBarrier> hostToDevBarriers;
691 std::vector<vk::VkBufferMemoryBarrier> devToHostBarriers;
692 for (int i = 0; i < DE_LENGTH_OF_ARRAY(buffers); ++i)
693 {
694 const vk::VkBufferMemoryBarrier hostToDev = {
695 vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
696 DE_NULL, // const void* pNext;
697 vk::VK_ACCESS_HOST_WRITE_BIT, // VkAccessFlags srcAccessMask;
698 (vk::VK_ACCESS_SHADER_READ_BIT | vk::VK_ACCESS_SHADER_WRITE_BIT), // VkAccessFlags dstAccessMask;
699 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
700 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
701 buffers[i], // VkBuffer buffer;
702 0u, // VkDeviceSize offset;
703 VK_WHOLE_SIZE, // VkDeviceSize size;
704 };
705 hostToDevBarriers.push_back(hostToDev);
706
707 const vk::VkBufferMemoryBarrier devToHost = {
708 vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
709 DE_NULL, // const void* pNext;
710 vk::VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
711 vk::VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
712 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
713 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
714 buffers[i], // VkBuffer buffer;
715 0u, // VkDeviceSize offset;
716 VK_WHOLE_SIZE, // VkDeviceSize size;
717 };
718 devToHostBarriers.push_back(devToHost);
719 }
720
721 // Create command pool and command buffer.
722 auto queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
723
724 const vk::VkCommandPoolCreateInfo cmdPoolCreateInfo = {
725 vk::VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, // VkStructureType sType;
726 DE_NULL, // const void* pNext;
727 vk::VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, // VkCommandPoolCreateFlags flags;
728 queueFamilyIndex, // uint32_t queueFamilyIndex;
729 };
730 auto cmdPool = vk::createCommandPool(vkd, device, &cmdPoolCreateInfo);
731
732 const vk::VkCommandBufferAllocateInfo cmdBufferAllocateInfo = {
733 vk::VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
734 DE_NULL, // const void* pNext;
735 *cmdPool, // VkCommandPool commandPool;
736 vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
737 1u, // uint32_t commandBufferCount;
738 };
739 auto cmdBuffer = vk::allocateCommandBuffer(vkd, device, &cmdBufferAllocateInfo);
740
741 // Create pipeline layout.
742 const vk::VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
743 vk::VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
744 DE_NULL, // const void* pNext;
745 0, // VkPipelineLayoutCreateFlags flags;
746 1u, // uint32_t setLayoutCount;
747 &descriptorSetLayout.get(), // const VkDescriptorSetLayout* pSetLayouts;
748 0u, // uint32_t pushConstantRangeCount;
749 DE_NULL, // const VkPushConstantRange* pPushConstantRanges;
750 };
751 auto pipelineLayout = vk::createPipelineLayout(vkd, device, &pipelineLayoutCreateInfo);
752
753 // Create compute pipeline.
754 const vk::Unique<vk::VkShaderModule> shader(
755 vk::createShaderModule(vkd, device, m_context.getBinaryCollection().get("comp"), 0));
756
757 const vk::VkComputePipelineCreateInfo computeCreateInfo = {
758 vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
759 DE_NULL, // const void* pNext;
760 0, // VkPipelineCreateFlags flags;
761 {
762 // VkPipelineShaderStageCreateInfo stage;
763 vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
764 DE_NULL, // const void* pNext;
765 0, // VkPipelineShaderStageCreateFlags flags;
766 vk::VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
767 *shader, // VkShaderModule module;
768 "main", // const char* pName;
769 DE_NULL, // const VkSpecializationInfo* pSpecializationInfo;
770 },
771 *pipelineLayout, // VkPipelineLayout layout;
772 DE_NULL, // VkPipeline basePipelineHandle;
773 0, // int32_t basePipelineIndex;
774 };
775 auto computePipeline = vk::createComputePipeline(vkd, device, DE_NULL, &computeCreateInfo);
776
777 // Run the shader.
778 vk::beginCommandBuffer(vkd, *cmdBuffer);
779 vkd.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
780 vkd.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1u,
781 &descriptorSet.get(), 0u, DE_NULL);
782 vkd.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0u,
783 DE_NULL, static_cast<uint32_t>(hostToDevBarriers.size()), hostToDevBarriers.data(), 0u,
784 DE_NULL);
785 vkd.cmdDispatch(*cmdBuffer, static_cast<uint32_t>(inputBufferSizeInfo.totalVectors), 1u, 1u);
786 vkd.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, 0, 0u,
787 DE_NULL, static_cast<uint32_t>(devToHostBarriers.size()), devToHostBarriers.data(), 0u,
788 DE_NULL);
789 vk::endCommandBuffer(vkd, *cmdBuffer);
790 vk::submitCommandsAndWait(vkd, device, m_context.getUniversalQueue(), *cmdBuffer);
791
792 // Invalidate output allocation.
793 vk::invalidateAlloc(vkd, device, outputBuffer->getAllocation());
794
795 // Copy output buffer data.
796 std::vector<uint8_t> outputMemory(outputBufferSizeInfo.memorySizeBytes);
797 {
798 auto &alloc = outputBuffer->getAllocation();
799 deMemcpy(outputMemory.data(), reinterpret_cast<uint8_t *>(alloc.getHostPtr()) + alloc.getOffset(),
800 outputBufferSizeInfo.memorySizeBytes);
801 }
802
803 // Unpack and verify output data.
804 auto &testLog = m_context.getTestContext().getLog();
805 bool conversionOk = false;
806 switch (m_params.to)
807 {
808 case FLOAT_TYPE_16_BITS:
809 {
810 auto outputValues =
811 unpackFloats<tcu::Float16>(outputMemory, m_params.vectorLength, inputBufferSizeInfo.numValues);
812 switch (m_params.from)
813 {
814 case FLOAT_TYPE_32_BITS:
815 {
816 auto &inputValues = InputGenerator::getInstance().getInputValues32();
817 conversionOk = validConversion(inputValues, outputValues, testLog);
818 }
819 break;
820
821 case FLOAT_TYPE_64_BITS:
822 {
823 auto &inputValues = InputGenerator::getInstance().getInputValues64();
824 conversionOk = validConversion(inputValues, outputValues, testLog);
825 }
826 break;
827
828 default:
829 DE_ASSERT(false);
830 break;
831 }
832 }
833 break;
834
835 case FLOAT_TYPE_32_BITS:
836 {
837 auto outputValues =
838 unpackFloats<tcu::Float32>(outputMemory, m_params.vectorLength, inputBufferSizeInfo.numValues);
839 switch (m_params.from)
840 {
841 case FLOAT_TYPE_16_BITS:
842 {
843 auto &inputValues = InputGenerator::getInstance().getInputValues16();
844 conversionOk = validConversion(inputValues, outputValues, testLog);
845 }
846 break;
847
848 case FLOAT_TYPE_64_BITS:
849 {
850 auto &inputValues = InputGenerator::getInstance().getInputValues64();
851 conversionOk = validConversion(inputValues, outputValues, testLog);
852 }
853 break;
854
855 default:
856 DE_ASSERT(false);
857 break;
858 }
859 }
860 break;
861
862 case FLOAT_TYPE_64_BITS:
863 {
864 auto outputValues =
865 unpackFloats<tcu::Float64>(outputMemory, m_params.vectorLength, inputBufferSizeInfo.numValues);
866 switch (m_params.from)
867 {
868 case FLOAT_TYPE_16_BITS:
869 {
870 auto &inputValues = InputGenerator::getInstance().getInputValues16();
871 conversionOk = validConversion(inputValues, outputValues, testLog);
872 }
873 break;
874
875 case FLOAT_TYPE_32_BITS:
876 {
877 auto &inputValues = InputGenerator::getInstance().getInputValues32();
878 conversionOk = validConversion(inputValues, outputValues, testLog);
879 }
880 break;
881
882 default:
883 DE_ASSERT(false);
884 break;
885 }
886 }
887 break;
888
889 default:
890 DE_ASSERT(false);
891 break;
892 }
893
894 return (conversionOk ? tcu::TestStatus::pass("Pass") : tcu::TestStatus::fail("Fail"));
895 }
896
897 } // namespace
898
createPrecisionFconvertGroup(tcu::TestContext & testCtx)899 tcu::TestCaseGroup *createPrecisionFconvertGroup(tcu::TestContext &testCtx)
900 {
901 tcu::TestCaseGroup *newGroup = new tcu::TestCaseGroup(testCtx, "precision_fconvert");
902
903 for (int i = 0; i < FLOAT_TYPE_MAX_ENUM; ++i)
904 for (int j = 0; j < FLOAT_TYPE_MAX_ENUM; ++j)
905 for (size_t k = kMinVectorLength; k <= kMaxVectorLength; ++k)
906 {
907 // No actual conversion if the types are the same.
908 if (i == j)
909 continue;
910
911 TestParams params = {
912 static_cast<FloatType>(i),
913 static_cast<FloatType>(j),
914 k,
915 };
916
917 std::string testName =
918 std::string() + kFloatNames[i] + "_to_" + kFloatNames[j] + "_size_" + std::to_string(k);
919
920 newGroup->addChild(new FConvertTestCase(testCtx, testName, params));
921 }
922
923 return newGroup;
924 }
925
926 } // namespace shaderexecutor
927 } // namespace vkt
928