1 #ifndef _VKTSPVASMCOMPUTESHADERTESTUTIL_HPP
2 #define _VKTSPVASMCOMPUTESHADERTESTUTIL_HPP
3 /*-------------------------------------------------------------------------
4  * Vulkan Conformance Tests
5  * ------------------------
6  *
7  * Copyright (c) 2015 Google Inc.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Compute Shader Based Test Case Utility Structs/Functions
24  *//*--------------------------------------------------------------------*/
25 
26 #include "deDefs.h"
27 #include "deFloat16.h"
28 #include "deRandom.hpp"
29 #include "tcuTestLog.hpp"
30 #include "tcuVector.hpp"
31 #include "tcuTestLog.hpp"
32 #include "vkMemUtil.hpp"
33 #include "vktSpvAsmUtils.hpp"
34 
35 #include <string>
36 #include <vector>
37 #include <map>
38 
39 using namespace vk;
40 
41 namespace vkt
42 {
43 namespace SpirVAssembly
44 {
45 
46 enum OpAtomicType
47 {
48     OPATOMIC_IADD = 0,
49     OPATOMIC_ISUB,
50     OPATOMIC_IINC,
51     OPATOMIC_IDEC,
52     OPATOMIC_LOAD,
53     OPATOMIC_STORE,
54     OPATOMIC_COMPEX,
55 
56     OPATOMIC_LAST
57 };
58 
59 enum BufferType
60 {
61     BUFFERTYPE_INPUT = 0,
62     BUFFERTYPE_EXPECTED,
63     BUFFERTYPE_ATOMIC_RET,
64 
65     BUFFERTYPE_LAST
66 };
67 
fillRandomScalars(de::Random & rnd,int32_t minValue,int32_t maxValue,int32_t * dst,int32_t numValues)68 static void fillRandomScalars(de::Random &rnd, int32_t minValue, int32_t maxValue, int32_t *dst, int32_t numValues)
69 {
70     for (int i = 0; i < numValues; i++)
71         dst[i] = rnd.getInt(minValue, maxValue);
72 }
73 
74 /*--------------------------------------------------------------------*//*!
75 * \brief Concrete class for an input/output storage buffer object used for OpAtomic tests
76 *//*--------------------------------------------------------------------*/
77 class OpAtomicBuffer : public BufferInterface
78 {
79 public:
OpAtomicBuffer(const uint32_t numInputElements,const uint32_t numOuptutElements,const OpAtomicType opAtomic,const BufferType type)80     OpAtomicBuffer(const uint32_t numInputElements, const uint32_t numOuptutElements, const OpAtomicType opAtomic,
81                    const BufferType type)
82         : m_numInputElements(numInputElements)
83         , m_numOutputElements(numOuptutElements)
84         , m_opAtomic(opAtomic)
85         , m_type(type)
86     {
87     }
88 
getBytes(std::vector<uint8_t> & bytes) const89     void getBytes(std::vector<uint8_t> &bytes) const
90     {
91         std::vector<int32_t> inputInts(m_numInputElements, 0);
92         de::Random rnd(m_opAtomic);
93 
94         fillRandomScalars(rnd, 1, 100, &inputInts.front(), m_numInputElements);
95 
96         // Return input values as is
97         if (m_type == BUFFERTYPE_INPUT)
98         {
99             size_t inputSize = m_numInputElements * sizeof(int32_t);
100 
101             bytes.resize(inputSize);
102             deMemcpy(&bytes.front(), &inputInts.front(), inputSize);
103         }
104         // Calculate expected output values
105         else if (m_type == BUFFERTYPE_EXPECTED)
106         {
107             size_t outputSize = m_numOutputElements * sizeof(int32_t);
108             bytes.resize(outputSize, 0xffu);
109 
110             for (size_t ndx = 0; ndx < m_numInputElements; ndx++)
111             {
112                 int32_t *const bytesAsInt = reinterpret_cast<int32_t *>(&bytes.front());
113 
114                 switch (m_opAtomic)
115                 {
116                 case OPATOMIC_IADD:
117                     bytesAsInt[0] += inputInts[ndx];
118                     break;
119                 case OPATOMIC_ISUB:
120                     bytesAsInt[0] -= inputInts[ndx];
121                     break;
122                 case OPATOMIC_IINC:
123                     bytesAsInt[0]++;
124                     break;
125                 case OPATOMIC_IDEC:
126                     bytesAsInt[0]--;
127                     break;
128                 case OPATOMIC_LOAD:
129                     bytesAsInt[ndx] = inputInts[ndx];
130                     break;
131                 case OPATOMIC_STORE:
132                     bytesAsInt[ndx] = inputInts[ndx];
133                     break;
134                 case OPATOMIC_COMPEX:
135                     bytesAsInt[ndx] = (inputInts[ndx] % 2) == 0 ? -1 : 1;
136                     break;
137                 default:
138                     DE_FATAL("Unknown OpAtomic type");
139                 }
140             }
141         }
142         else if (m_type == BUFFERTYPE_ATOMIC_RET)
143         {
144             bytes.resize(m_numInputElements * sizeof(int32_t), 0xff);
145 
146             if (m_opAtomic == OPATOMIC_COMPEX)
147             {
148                 int32_t *const bytesAsInt = reinterpret_cast<int32_t *>(&bytes.front());
149                 for (size_t ndx = 0; ndx < m_numInputElements; ndx++)
150                     bytesAsInt[ndx] = inputInts[ndx] % 2;
151             }
152         }
153         else
154             DE_FATAL("Unknown buffer type");
155     }
156 
getPackedBytes(std::vector<uint8_t> & bytes) const157     void getPackedBytes(std::vector<uint8_t> &bytes) const
158     {
159         return getBytes(bytes);
160     }
161 
getByteSize(void) const162     size_t getByteSize(void) const
163     {
164         switch (m_type)
165         {
166         case BUFFERTYPE_ATOMIC_RET:
167         case BUFFERTYPE_INPUT:
168             return m_numInputElements * sizeof(int32_t);
169         case BUFFERTYPE_EXPECTED:
170             return m_numOutputElements * sizeof(int32_t);
171         default:
172             DE_FATAL("Unknown buffer type");
173             return 0;
174         }
175     }
176 
177     template <int OpAtomic>
compareWithRetvals(const std::vector<Resource> & inputs,const std::vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,tcu::TestLog & log)178     static bool compareWithRetvals(const std::vector<Resource> &inputs, const std::vector<AllocationSp> &outputAllocs,
179                                    const std::vector<Resource> &expectedOutputs, tcu::TestLog &log)
180     {
181         if (outputAllocs.size() != 2 || inputs.size() != 1)
182             DE_FATAL("Wrong number of buffers to compare");
183 
184         for (size_t i = 0; i < outputAllocs.size(); ++i)
185         {
186             const uint32_t *values = reinterpret_cast<uint32_t *>(outputAllocs[i]->getHostPtr());
187 
188             if (i == 1 && OpAtomic != OPATOMIC_COMPEX)
189             {
190                 // BUFFERTYPE_ATOMIC_RET for arithmetic operations must be verified manually by matching return values to inputs
191                 std::vector<uint8_t> inputBytes;
192                 inputs[0].getBytes(inputBytes);
193 
194                 const uint32_t *inputValues   = reinterpret_cast<uint32_t *>(&inputBytes.front());
195                 const size_t inputValuesCount = inputBytes.size() / sizeof(uint32_t);
196 
197                 // result of all atomic operations
198                 const uint32_t resultValue = *reinterpret_cast<uint32_t *>(outputAllocs[0]->getHostPtr());
199 
200                 if (!compareRetVals<OpAtomic>(inputValues, inputValuesCount, resultValue, values))
201                 {
202                     log << tcu::TestLog::Message
203                         << "Wrong contents of buffer with return values after atomic operation."
204                         << tcu::TestLog::EndMessage;
205                     return false;
206                 }
207             }
208             else
209             {
210                 const BufferSp &expectedOutput = expectedOutputs[i].getBuffer();
211                 std::vector<uint8_t> expectedBytes;
212 
213                 expectedOutput->getBytes(expectedBytes);
214 
215                 if (deMemCmp(&expectedBytes.front(), values, expectedBytes.size()))
216                 {
217                     log << tcu::TestLog::Message << "Wrong contents of buffer after atomic operation"
218                         << tcu::TestLog::EndMessage;
219                     return false;
220                 }
221             }
222         }
223         return true;
224     }
225 
226     template <int OpAtomic>
compareRetVals(const uint32_t * inputValues,const size_t inputValuesCount,const uint32_t resultValue,const uint32_t * returnValues)227     static bool compareRetVals(const uint32_t *inputValues, const size_t inputValuesCount, const uint32_t resultValue,
228                                const uint32_t *returnValues)
229     {
230         // as the order of execution is undefined, validation of return values for atomic operations is tricky:
231         // each inputValue stands for one atomic operation. Iterate through all of
232         // done operations in time, each time finding one matching current result and un-doing it.
233 
234         std::vector<bool> operationsUndone(inputValuesCount, false);
235         uint32_t currentResult = resultValue;
236 
237         for (size_t operationUndone = 0; operationUndone < inputValuesCount; ++operationUndone)
238         {
239             // find which of operations was done at this moment
240             size_t ndx;
241             for (ndx = 0; ndx < inputValuesCount; ++ndx)
242             {
243                 if (operationsUndone[ndx])
244                     continue;
245 
246                 uint32_t previousResult = currentResult;
247 
248                 switch (OpAtomic)
249                 {
250                 // operations are undone here, so the actual opeation is reversed
251                 case OPATOMIC_IADD:
252                     previousResult -= inputValues[ndx];
253                     break;
254                 case OPATOMIC_ISUB:
255                     previousResult += inputValues[ndx];
256                     break;
257                 case OPATOMIC_IINC:
258                     previousResult--;
259                     break;
260                 case OPATOMIC_IDEC:
261                     previousResult++;
262                     break;
263                 default:
264                     DE_FATAL("Unsupported OpAtomic type for return value compare");
265                 }
266 
267                 if (previousResult == returnValues[ndx])
268                 {
269                     // found matching operation
270                     currentResult         = returnValues[ndx];
271                     operationsUndone[ndx] = true;
272                     break;
273                 }
274             }
275             if (ndx == inputValuesCount)
276             {
277                 // no operation matches the current result value
278                 return false;
279             }
280         }
281         return true;
282     }
283 
284 private:
285     const uint32_t m_numInputElements;
286     const uint32_t m_numOutputElements;
287     const OpAtomicType m_opAtomic;
288     const BufferType m_type;
289 };
290 
291 /*--------------------------------------------------------------------*//*!
292  * \brief Concrete class for an input/output storage buffer object
293  *//*--------------------------------------------------------------------*/
294 template <typename E>
295 class Buffer : public BufferInterface
296 {
297 public:
Buffer(const std::vector<E> & elements,uint32_t padding=0)298     Buffer(const std::vector<E> &elements, uint32_t padding = 0 /* in bytes */)
299         : m_elements(elements)
300         , m_padding(padding)
301     {
302     }
303 
getBytes(std::vector<uint8_t> & bytes) const304     void getBytes(std::vector<uint8_t> &bytes) const
305     {
306         const size_t count          = m_elements.size();
307         const size_t perSegmentSize = sizeof(E) + m_padding;
308         const size_t size           = count * perSegmentSize;
309 
310         bytes.resize(size);
311 
312         if (m_padding == 0)
313         {
314             deMemcpy(&bytes.front(), &m_elements.front(), size);
315         }
316         else
317         {
318             deMemset(&bytes.front(), 0xff, size);
319 
320             for (uint32_t elementIdx = 0; elementIdx < count; ++elementIdx)
321                 deMemcpy(&bytes[elementIdx * perSegmentSize], &m_elements[elementIdx], sizeof(E));
322         }
323     }
324 
getPackedBytes(std::vector<uint8_t> & bytes) const325     void getPackedBytes(std::vector<uint8_t> &bytes) const
326     {
327         const size_t size = m_elements.size() * sizeof(E);
328 
329         bytes.resize(size);
330 
331         deMemcpy(&bytes.front(), &m_elements.front(), size);
332     }
333 
getByteSize(void) const334     size_t getByteSize(void) const
335     {
336         return m_elements.size() * (sizeof(E) + m_padding);
337     }
338 
339 private:
340     std::vector<E> m_elements;
341     uint32_t m_padding;
342 };
343 
344 DE_STATIC_ASSERT(sizeof(tcu::Vec4) == 4 * sizeof(float));
345 
346 typedef Buffer<float> Float32Buffer;
347 typedef Buffer<deFloat16> Float16Buffer;
348 typedef Buffer<double> Float64Buffer;
349 typedef Buffer<int64_t> Int64Buffer;
350 typedef Buffer<int32_t> Int32Buffer;
351 typedef Buffer<int16_t> Int16Buffer;
352 typedef Buffer<int8_t> Int8Buffer;
353 typedef Buffer<uint8_t> Uint8Buffer;
354 typedef Buffer<uint16_t> Uint16Buffer;
355 typedef Buffer<uint32_t> Uint32Buffer;
356 typedef Buffer<uint64_t> Uint64Buffer;
357 typedef Buffer<tcu::Vec4> Vec4Buffer;
358 
359 typedef bool (*ComputeVerifyBinaryFunc)(const ProgramBinary &binary);
360 
361 /*--------------------------------------------------------------------*//*!
362  * \brief Specification for a compute shader.
363  *
364  * This struct bundles SPIR-V assembly code, input and expected output
365  * together.
366  *//*--------------------------------------------------------------------*/
367 struct ComputeShaderSpec
368 {
369     std::string assembly;
370     std::string entryPoint;
371     std::vector<Resource> inputs;
372     std::vector<Resource> outputs;
373     vk::VkFormat inputFormat = vk::VK_FORMAT_R32G32B32A32_SFLOAT;
374     tcu::IVec3 numWorkGroups;
375     SpecConstants specConstants;
376     BufferSp pushConstants;
377     std::vector<std::string> extensions;
378     VulkanFeatures requestedVulkanFeatures;
379     qpTestResult failResult;
380     std::string failMessage;
381     // If null, a default verification will be performed by comparing the memory pointed to by outputAllocations
382     // and the contents of expectedOutputs. Otherwise the function pointed to by verifyIO will be called.
383     // If true is returned, then the test case is assumed to have passed, if false is returned, then the test
384     // case is assumed to have failed. Exact meaning of failure can be customized with failResult.
385     VerifyIOFunc verifyIO;
386     ComputeVerifyBinaryFunc verifyBinary;
387     SpirvVersion spirvVersion;
388     bool coherentMemory;
389     bool usesPhysStorageBuffer;
390     const bool graphicsFeaturesRequired;
391 
ComputeShaderSpecvkt::SpirVAssembly::ComputeShaderSpec392     ComputeShaderSpec(void)
393         : entryPoint("main")
394         , pushConstants(DE_NULL)
395         , requestedVulkanFeatures()
396         , failResult(QP_TEST_RESULT_FAIL)
397         , failMessage("Output doesn't match with expected")
398         , verifyIO(DE_NULL)
399         , verifyBinary(DE_NULL)
400         , spirvVersion(SPIRV_VERSION_1_0)
401         , coherentMemory(false)
402         , usesPhysStorageBuffer(false)
403         , graphicsFeaturesRequired(false)
404     {
405     }
406 };
407 
408 /*--------------------------------------------------------------------*//*!
409  * \brief Helper functions for SPIR-V assembly shared by various tests
410  *//*--------------------------------------------------------------------*/
411 
412 std::string getComputeAsmShaderPreamble(const std::string &capabilities = "", const std::string &extensions = "",
413                                         const std::string &exeModes = "", const std::string &extraEntryPoints = "",
414                                         const std::string &extraEntryPointsArguments = "");
415 const char *getComputeAsmShaderPreambleWithoutLocalSize(void);
416 std::string getComputeAsmCommonTypes(std::string blockStorageClass = "Uniform");
417 const char *getComputeAsmCommonInt64Types(void);
418 
419 /*--------------------------------------------------------------------*//*!
420  * Declares two uniform variables (indata, outdata) of type
421  * "struct { float[] }". Depends on type "f32arr" (for "float[]").
422  *//*--------------------------------------------------------------------*/
423 std::string getComputeAsmInputOutputBuffer(std::string blockStorageClass = "Uniform");
424 /*--------------------------------------------------------------------*//*!
425  * Declares buffer type and layout for uniform variables indata and
426  * outdata. Both of them are SSBO bounded to descriptor set 0.
427  * indata is at binding point 0, while outdata is at 1.
428  *//*--------------------------------------------------------------------*/
429 std::string getComputeAsmInputOutputBufferTraits(std::string blockStorageClass = "BufferBlock");
430 
431 bool verifyOutput(const std::vector<Resource> &, const std::vector<AllocationSp> &outputAllocs,
432                   const std::vector<Resource> &expectedOutputs, tcu::TestLog &log);
433 
434 // Creates vertex-shader assembly by specializing a boilerplate StringTemplate
435 
436 std::string makeComputeShaderAssembly(const std::map<std::string, std::string> &fragments);
437 
438 } // namespace SpirVAssembly
439 } // namespace vkt
440 
441 #endif // _VKTSPVASMCOMPUTESHADERTESTUTIL_HPP
442