1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 Arm Limited.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Functional integer dot product tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "tcuTestLog.hpp"
25 #include "tcuVectorUtil.hpp"
26 
27 #include "deRandom.hpp"
28 
29 #include "vktSpvAsmComputeShaderCase.hpp"
30 #include "vktSpvAsmComputeShaderTestUtil.hpp"
31 #include "vktSpvAsmIntegerDotProductTests.hpp"
32 
33 #include <limits>
34 #include <string>
35 
36 // VK_KHR_shader_integer_dot_product tests
37 
38 // Note: these tests make use of the following extensions that are not
39 // required by the VK_KHR_shader_integer_dot_product extension itself:
40 //    * VK_KHR_8bit_storage (VkPhysicalDevice8BitStorageFeatures) for shaderInt8
41 //    * VK_KHR_16bit_storage (VkPhysicalDevice16BitStorageFeatures) for shaderInt16
42 
43 namespace vkt
44 {
45 namespace SpirVAssembly
46 {
47 
48 using namespace vk;
49 using std::string;
50 
51 namespace
52 {
53 using std::vector;
54 using tcu::IVec3;
55 using tcu::TestLog;
56 
57 template <typename T>
fillRandomScalars(de::Random & rnd,T minValue,T maxValue,void * dst,int numValues,int offset=0)58 static void fillRandomScalars(de::Random &rnd, T minValue, T maxValue, void *dst, int numValues, int offset = 0)
59 {
60     T *const typedPtr = (T *)dst;
61     for (int ndx = 0; ndx < numValues; ndx++)
62         typedPtr[offset + ndx] = de::randomScalar<T>(rnd, minValue, maxValue);
63 }
64 
65 template <typename T>
getEqualValue(T v1,T v2)66 T getEqualValue(T v1, T v2)
67 {
68     DE_ASSERT(v1 == v2);
69     (void)v2;
70     return v1;
71 }
72 
73 template <class T>
withinLimits(int64_t val)74 bool withinLimits(int64_t val)
75 {
76     return static_cast<int64_t>(std::numeric_limits<T>::min()) <= val &&
77            val <= static_cast<int64_t>(std::numeric_limits<T>::max());
78 }
79 
80 template <class T, class LHSOperandT, class RHSOperandT>
dotProduct(vector<LHSOperandT> lhs,vector<RHSOperandT> rhs)81 static T dotProduct(vector<LHSOperandT> lhs, vector<RHSOperandT> rhs)
82 {
83     uint64_t res = 0u;
84     size_t size  = getEqualValue(lhs.size(), rhs.size());
85 
86     for (size_t i = 0; i < size; ++i)
87         res += static_cast<uint64_t>(lhs[i]) * static_cast<uint64_t>(rhs[i]);
88 
89     int64_t signedRes;
90     deMemcpy(&signedRes, &res, sizeof(res));
91     return static_cast<T>(signedRes);
92 }
93 
94 template <class AddendT, class LHSOperandT, class RHSOperandT>
compareDotProductAccSat(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog &)95 bool compareDotProductAccSat(const std::vector<Resource> &inputs, const vector<AllocationSp> &outputAllocs,
96                              const std::vector<Resource> &, TestLog &)
97 {
98     if (inputs.size() != 3 || outputAllocs.size() != 1)
99         return false;
100 
101     vector<uint8_t> lhsBytes;
102     vector<uint8_t> rhsBytes;
103     vector<uint8_t> addendBytes;
104 
105     inputs[0].getBytes(lhsBytes);
106     inputs[1].getBytes(rhsBytes);
107     inputs[2].getBytes(addendBytes);
108 
109     const AddendT *const output      = static_cast<AddendT *const>(outputAllocs[0]->getHostPtr());
110     const AddendT *const addends     = reinterpret_cast<AddendT *const>(&addendBytes.front());
111     const LHSOperandT *const lhsInts = reinterpret_cast<LHSOperandT *const>(&lhsBytes.front());
112     const RHSOperandT *const rhsInts = reinterpret_cast<RHSOperandT *const>(&rhsBytes.front());
113 
114     for (size_t idx = 0; idx < inputs[2].getByteSize() / sizeof(AddendT); ++idx)
115     {
116         size_t vecLen = (inputs[0].getByteSize() / sizeof(LHSOperandT)) / (inputs[2].getByteSize() / sizeof(AddendT));
117 
118         std::vector<LHSOperandT> inputVec1Pos;
119         std::vector<RHSOperandT> inputVec2Pos;
120         inputVec1Pos.reserve(vecLen);
121         inputVec2Pos.reserve(vecLen);
122 
123         std::vector<LHSOperandT> inputVec1Neg;
124         std::vector<RHSOperandT> inputVec2Neg;
125         inputVec1Neg.reserve(vecLen);
126         inputVec2Neg.reserve(vecLen);
127 
128         for (unsigned int vecElem = 0; vecElem < vecLen; ++vecElem)
129         {
130             LHSOperandT elem1 = lhsInts[idx * vecLen + vecElem];
131             RHSOperandT elem2 = rhsInts[idx * vecLen + vecElem];
132 
133             // Note: ordering of components does not matter, provided
134             // that it is consistent between lhs and rhs.
135             if ((elem1 < 0) == (elem2 < 0))
136             {
137                 inputVec1Pos.push_back(elem1);
138                 inputVec2Pos.push_back(elem2);
139                 inputVec1Neg.push_back(0);
140                 inputVec2Neg.push_back(0);
141             }
142             else
143             {
144                 inputVec1Pos.push_back(0);
145                 inputVec2Pos.push_back(0);
146                 inputVec1Neg.push_back(elem1);
147                 inputVec2Neg.push_back(elem2);
148             }
149         }
150 
151         int64_t PosProduct  = dotProduct<int64_t>(inputVec1Pos, inputVec2Pos);
152         int64_t NegProduct  = dotProduct<int64_t>(inputVec1Neg, inputVec2Neg);
153         bool outputOverflow = (!withinLimits<AddendT>(PosProduct) || !withinLimits<AddendT>(NegProduct));
154 
155         if (!outputOverflow)
156         {
157             AddendT expectedOutput = static_cast<AddendT>(PosProduct + NegProduct);
158             const auto &addend     = addends[idx];
159 
160             if (addend < 0)
161             {
162                 if (expectedOutput < std::numeric_limits<AddendT>::min() - addend)
163                     expectedOutput = std::numeric_limits<AddendT>::min();
164                 else
165                     expectedOutput = static_cast<AddendT>(expectedOutput + addend);
166             }
167             else
168             {
169                 if (expectedOutput > std::numeric_limits<AddendT>::max() - addend)
170                     expectedOutput = std::numeric_limits<AddendT>::max();
171                 else
172                     expectedOutput = static_cast<AddendT>(expectedOutput + addend);
173             }
174 
175             if (output[idx] != expectedOutput)
176             {
177                 return false;
178             }
179         }
180     }
181 
182     return true;
183 }
184 
185 struct DotProductPackingInfo
186 {
187     bool packed;
188     bool signedLHS;
189     bool signedRHS;
190 };
191 
192 struct DotProductVectorInfo
193 {
194     size_t vecElementSize;
195     unsigned int vecLen;
196 };
197 
addDotProductExtensionAndFeatures(ComputeShaderSpec & spec,const struct DotProductPackingInfo & packingInfo,size_t elementSize,size_t outSize)198 void addDotProductExtensionAndFeatures(ComputeShaderSpec &spec, const struct DotProductPackingInfo &packingInfo,
199                                        size_t elementSize, size_t outSize)
200 {
201     spec.extensions.push_back("VK_KHR_shader_integer_dot_product");
202     spec.requestedVulkanFeatures.extIntegerDotProduct.shaderIntegerDotProduct = VK_TRUE;
203 
204     DE_ASSERT(!packingInfo.packed || elementSize == 8);
205     if ((!packingInfo.packed && elementSize == 8) || outSize == 8)
206     {
207         spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8              = true;
208         spec.requestedVulkanFeatures.ext8BitStorage.storageBuffer8BitAccess = true;
209         spec.extensions.push_back("VK_KHR_8bit_storage");
210     }
211 
212     if (elementSize == 16 || outSize == 16)
213     {
214         spec.requestedVulkanFeatures.coreFeatures.shaderInt16                 = true;
215         spec.requestedVulkanFeatures.ext16BitStorage.storageBuffer16BitAccess = true;
216         spec.extensions.push_back("VK_KHR_16bit_storage");
217     }
218 }
219 
220 const struct DotProductPackingInfo dotProductPacking[] = {
221     {false, false, false}, {false, false, true}, {false, true, false}, {false, true, true},
222     {true, true, true},    {true, true, false},  {true, false, true},  {true, false, false},
223 };
224 
225 const struct DotProductVectorInfo dotProductVector8[] = {
226     {8, 2},
227     {8, 3},
228     {8, 4},
229 };
230 
231 const struct DotProductVectorInfo dotProductVector16[] = {
232     {16, 2},
233     {16, 3},
234     {16, 4},
235 };
236 
237 const struct DotProductVectorInfo dotProductVector32[] = {
238     {32, 2},
239     {32, 3},
240     {32, 4},
241 };
242 
getAlignedVecLen(const DotProductVectorInfo & vectorInfo)243 unsigned int getAlignedVecLen(const DotProductVectorInfo &vectorInfo)
244 {
245     return (vectorInfo.vecLen == 3 ? 4 : vectorInfo.vecLen);
246 }
247 
generateIntegerDotProductTypeDeclsAndStrideDecors(std::ostringstream & typeDeclsStream,std::ostringstream & strideDecorsStream,const struct DotProductPackingInfo & packingInfo,const struct DotProductVectorInfo & vectorInfo,size_t outSize,bool signedLHSAndResult,bool signedRHS)248 void generateIntegerDotProductTypeDeclsAndStrideDecors(std::ostringstream &typeDeclsStream,
249                                                        std::ostringstream &strideDecorsStream,
250                                                        const struct DotProductPackingInfo &packingInfo,
251                                                        const struct DotProductVectorInfo &vectorInfo, size_t outSize,
252                                                        bool signedLHSAndResult, bool signedRHS)
253 {
254     size_t signedScalarArraysMask   = 0;
255     size_t unsignedScalarArraysMask = 0;
256     bool signedIntVectorNeeded      = false;
257     bool unsignedIntVectorNeeded    = false;
258 
259     if (signedLHSAndResult)
260         signedScalarArraysMask |= static_cast<int>(outSize);
261     else
262         unsignedScalarArraysMask |= static_cast<int>(outSize);
263 
264     if (packingInfo.packed)
265     {
266         if (packingInfo.signedLHS || packingInfo.signedRHS)
267             signedScalarArraysMask |= vectorInfo.vecElementSize * vectorInfo.vecLen;
268         if (!packingInfo.signedLHS || !packingInfo.signedRHS)
269             unsignedScalarArraysMask |= vectorInfo.vecElementSize * vectorInfo.vecLen;
270     }
271     else
272     {
273         if (signedLHSAndResult)
274         {
275             signedIntVectorNeeded = true;
276             signedScalarArraysMask |= vectorInfo.vecElementSize;
277         }
278         if (!signedRHS)
279         {
280             unsignedIntVectorNeeded = true;
281             unsignedScalarArraysMask |= vectorInfo.vecElementSize;
282         }
283     }
284 
285     size_t signedScalarTypesMask   = signedScalarArraysMask;
286     size_t unsignedScalarTypesMask = unsignedScalarArraysMask;
287 
288     for (unsigned int size = 8; size <= 64; size *= 2)
289     {
290         if (size != 32)
291         {
292             string sizeStr(de::toString(size));
293             if ((signedScalarTypesMask & size))
294                 typeDeclsStream << "%i" << sizeStr << " = OpTypeInt " << sizeStr << " 1\n";
295             if ((unsignedScalarTypesMask & size))
296                 typeDeclsStream << "%u" << sizeStr << " = OpTypeInt " << sizeStr << " 0\n";
297         }
298     }
299 
300     for (unsigned int size = 8; size <= 64; size *= 2)
301     {
302         string sizeStr = de::toString(size);
303         if ((signedScalarArraysMask & size))
304         {
305             if (size != 32)
306                 typeDeclsStream << "%i" << sizeStr << "ptr = OpTypePointer Uniform %i" << sizeStr
307                                 << "\n"
308                                    "%i"
309                                 << sizeStr << "arr = OpTypeRuntimeArray %i" << sizeStr << "\n";
310             strideDecorsStream << "OpDecorate %i" << sizeStr << "arr ArrayStride " << de::toString(size / 8) << "\n";
311         }
312         if ((unsignedScalarArraysMask & size))
313         {
314             typeDeclsStream << "%u" << sizeStr << "ptr = OpTypePointer Uniform %u" << sizeStr
315                             << "\n"
316                                "%u"
317                             << sizeStr << "arr = OpTypeRuntimeArray %u" << sizeStr << "\n";
318             strideDecorsStream << "OpDecorate %u" << sizeStr << "arr ArrayStride " << de::toString(size / 8) << "\n";
319         }
320     }
321 
322     if (signedIntVectorNeeded)
323     {
324         string vecType = "%i" + de::toString(vectorInfo.vecElementSize) + "vec" + de::toString(vectorInfo.vecLen);
325         typeDeclsStream << vecType << " = OpTypeVector %i" << vectorInfo.vecElementSize << " " << vectorInfo.vecLen
326                         << "\n"
327                         << vecType << "ptr = OpTypePointer Uniform " << vecType << "\n"
328                         << vecType << "arr = OpTypeRuntimeArray " << vecType << "\n";
329         strideDecorsStream << "OpDecorate " << vecType << "arr ArrayStride "
330                            << (vectorInfo.vecLen == 3 ? 4 : vectorInfo.vecLen) * (vectorInfo.vecElementSize / 8)
331                            << "\n";
332     }
333 
334     if (unsignedIntVectorNeeded)
335     {
336         string vecType      = "%u" + de::toString(vectorInfo.vecElementSize) + "vec" + de::toString(vectorInfo.vecLen);
337         bool changeTypeName = false;
338         if (vectorInfo.vecElementSize == 32 && vectorInfo.vecLen == 3)
339             changeTypeName = true;
340         else
341             typeDeclsStream << vecType << " = OpTypeVector %u" << vectorInfo.vecElementSize << " " << vectorInfo.vecLen
342                             << "\n";
343 
344         typeDeclsStream << vecType << "ptr = OpTypePointer Uniform " << (changeTypeName ? "%uvec3" : vecType) << "\n"
345                         << vecType << "arr = OpTypeRuntimeArray " << (changeTypeName ? "%uvec3" : vecType) << "\n";
346         strideDecorsStream << "OpDecorate " << vecType << "arr ArrayStride "
347                            << (vectorInfo.vecLen == 3 ? 4 : vectorInfo.vecLen) * (vectorInfo.vecElementSize / 8)
348                            << "\n";
349     }
350 }
351 
generateIntegerDotProductCode(const struct DotProductPackingInfo & packingInfo,const struct DotProductVectorInfo & vectorInfo,size_t outSize,bool signedLHSAndResult,bool signedRHS,bool acc)352 string generateIntegerDotProductCode(const struct DotProductPackingInfo &packingInfo,
353                                      const struct DotProductVectorInfo &vectorInfo, size_t outSize,
354                                      bool signedLHSAndResult, bool signedRHS, bool acc)
355 {
356     DE_ASSERT(signedLHSAndResult || !signedRHS);
357 
358     const string insnSignedness(signedLHSAndResult ? (signedRHS ? "S" : "SU") : "U");
359     const string insnName(string("Op") + insnSignedness + "Dot" + (acc ? "AccSat" : "") + "KHR");
360 
361     const string outputCapability(outSize != 32 ? "OpCapability Int" + de::toString(outSize) + "\n" : "");
362     const string elementCapability(!packingInfo.packed && outSize != vectorInfo.vecElementSize &&
363                                            vectorInfo.vecElementSize != 32 ?
364                                        "OpCapability Int" + de::toString(vectorInfo.vecElementSize) + "\n" :
365                                        "");
366 
367     const string dotProductInputCapabilityName(packingInfo.packed              ? "DotProductInput4x8BitPackedKHR" :
368                                                (vectorInfo.vecElementSize > 8) ? "DotProductInputAllKHR" :
369                                                                                  "DotProductInput4x8BitKHR");
370 
371     const string capabilities(outputCapability + elementCapability + "OpCapability " + dotProductInputCapabilityName +
372                               "\n"
373                               "OpCapability DotProductKHR\n");
374     const string extensions("OpExtension \"SPV_KHR_integer_dot_product\"\n");
375 
376     const string outType((signedLHSAndResult ? "i" : "u") + de::toString(outSize));
377 
378     std::ostringstream typeDeclsStream;
379     std::ostringstream strideDecorsStream;
380     generateIntegerDotProductTypeDeclsAndStrideDecors(typeDeclsStream, strideDecorsStream, packingInfo, vectorInfo,
381                                                       outSize, signedLHSAndResult, signedRHS);
382     string typeDecls(typeDeclsStream.str());
383     string strideDecors(strideDecorsStream.str());
384 
385     const string lhsVecType(
386         packingInfo.packed ?
387             string(packingInfo.signedLHS ? "i" : "u") + de::toString(vectorInfo.vecElementSize * vectorInfo.vecLen) :
388             (signedLHSAndResult ? "i" : "u") +
389                 ((!signedLHSAndResult && vectorInfo.vecElementSize == 32 && vectorInfo.vecLen == 3) ?
390                      "" :
391                      de::toString(vectorInfo.vecElementSize)) +
392                 "vec" + de::toString(vectorInfo.vecLen));
393     const string rhsVecType(packingInfo.packed ?
394                                 string(packingInfo.signedRHS ? "i" : "u") +
395                                     de::toString(vectorInfo.vecElementSize * vectorInfo.vecLen) :
396                                 (signedRHS ? "i" : "u") +
397                                     ((!signedRHS && vectorInfo.vecElementSize == 32 && vectorInfo.vecLen == 3) ?
398                                          "" :
399                                          de::toString(vectorInfo.vecElementSize)) +
400                                     "vec" + de::toString(vectorInfo.vecLen));
401     const string lhsVecTypeBase(packingInfo.packed ?
402                                     string(packingInfo.signedLHS ? "i" : "u") +
403                                         de::toString(vectorInfo.vecElementSize * vectorInfo.vecLen) :
404                                     (signedLHSAndResult ? "i" : "u") + de::toString(vectorInfo.vecElementSize) + "vec" +
405                                         de::toString(vectorInfo.vecLen));
406     const string rhsVecTypeBase(packingInfo.packed ? string(packingInfo.signedRHS ? "i" : "u") +
407                                                          de::toString(vectorInfo.vecElementSize * vectorInfo.vecLen) :
408                                                      (signedRHS ? "i" : "u") + de::toString(vectorInfo.vecElementSize) +
409                                                          "vec" + de::toString(vectorInfo.vecLen));
410 
411     const string optFormatParam(packingInfo.packed ? " PackedVectorFormat4x8BitKHR" : "");
412 
413     bool bufferSignednessMatches =
414         (packingInfo.packed ? (packingInfo.signedLHS == packingInfo.signedRHS) : (signedLHSAndResult == signedRHS));
415 
416     return string(getComputeAsmShaderPreamble(capabilities, extensions)) +
417 
418            "OpName %main           \"main\"\n"
419            "OpName %id             \"gl_GlobalInvocationID\"\n"
420 
421            "OpDecorate %id BuiltIn GlobalInvocationId\n" +
422            (bufferSignednessMatches ? "OpDecorate %bufin BufferBlock\n" :
423                                       "OpDecorate %buflhs BufferBlock\n"
424                                       "OpDecorate %bufrhs BufferBlock\n") +
425            "OpDecorate %bufout BufferBlock\n"
426            "OpDecorate %indatalhs DescriptorSet 0\n"
427            "OpDecorate %indatalhs Binding 0\n"
428            "OpDecorate %indatarhs DescriptorSet 0\n"
429            "OpDecorate %indatarhs Binding 1\n" +
430            (acc ? "OpDecorate %indataacc DescriptorSet 0\n"
431                   "OpDecorate %indataacc Binding 2\n" :
432                   "") +
433            "OpDecorate %outdata DescriptorSet 0\n"
434            "OpDecorate %outdata Binding " +
435            (acc ? "3" : "2") + "\n" + strideDecors
436 
437            + (bufferSignednessMatches ? "OpMemberDecorate %bufin 0 Offset 0\n" :
438                                         "OpMemberDecorate %buflhs 0 Offset 0\n"
439                                         "OpMemberDecorate %bufrhs 0 Offset 0\n") +
440            "OpMemberDecorate %bufout 0 Offset 0\n"
441 
442            + getComputeAsmCommonTypes() + typeDecls
443 
444            + (bufferSignednessMatches ? "%bufin     = OpTypeStruct %" + lhsVecTypeBase +
445                                             "arr\n"
446                                             "%bufinptr  = OpTypePointer Uniform %bufin\n" :
447                                         "%buflhs    = OpTypeStruct %" + lhsVecTypeBase +
448                                             "arr\n"
449                                             "%buflhsptr = OpTypePointer Uniform %buflhs\n"
450                                             "%bufrhs    = OpTypeStruct %" +
451                                             rhsVecTypeBase +
452                                             "arr\n"
453                                             "%bufrhsptr = OpTypePointer Uniform %bufrhs\n") +
454            "%bufout    = OpTypeStruct %" + outType +
455            "arr\n"
456            "%bufoutptr = OpTypePointer Uniform %bufout\n"
457            "%indatalhs = OpVariable " +
458            (bufferSignednessMatches ? "%bufinptr" : "%buflhsptr") +
459            " Uniform\n"
460            "%indatarhs = OpVariable " +
461            (bufferSignednessMatches ? "%bufinptr" : "%bufrhsptr") + " Uniform\n" +
462            (acc ? "%indataacc = OpVariable %bufoutptr Uniform\n" : "") +
463            "%outdata   = OpVariable %bufoutptr Uniform\n"
464 
465            "%id        = OpVariable %uvec3ptr Input\n"
466            "%zero      = OpConstant %i32 0\n"
467 
468            "%main      = OpFunction %void None %voidf\n"
469            "%label     = OpLabel\n"
470            "%idval     = OpLoad %uvec3 %id\n"
471            "%x         = OpCompositeExtract %u32 %idval 0\n"
472            "%inloclhs  = OpAccessChain %" +
473            lhsVecTypeBase +
474            "ptr %indatalhs %zero %x\n"
475            "%invallhs  = OpLoad %" +
476            lhsVecType +
477            " %inloclhs\n"
478            "%inlocrhs  = OpAccessChain %" +
479            rhsVecTypeBase +
480            "ptr %indatarhs %zero %x\n"
481            "%invalrhs  = OpLoad %" +
482            rhsVecType + " %inlocrhs\n" +
483            (acc ? "%inlocacc  = OpAccessChain %" + outType +
484                       "ptr %indataacc %zero %x\n"
485                       "%invalacc  = OpLoad %" +
486                       outType + " %inlocacc\n" :
487                   "") +
488            "%res       = " + insnName + " %" + outType + " %invallhs %invalrhs" + (acc ? " %invalacc" : "") +
489            optFormatParam +
490            "\n"
491            "%outloc    = OpAccessChain %" +
492            outType +
493            "ptr %outdata %zero %x\n"
494            "             OpStore %outloc %res\n"
495            "             OpReturn\n"
496            "             OpFunctionEnd\n";
497 }
498 
499 struct DotProductInputInfo
500 {
501     string name;
502     unsigned int vecLen;
503     size_t vecElemSize;
504 };
505 
506 template <class OutputT, class LHSOperandT, class RHSOperandT>
fillDotProductOutputs(int numElements,vector<LHSOperandT> & inputInts1,vector<RHSOperandT> & inputInts2,vector<OutputT> & outputInts,const struct DotProductInputInfo & inputInfo)507 void fillDotProductOutputs(int numElements, vector<LHSOperandT> &inputInts1, vector<RHSOperandT> &inputInts2,
508                            vector<OutputT> &outputInts, const struct DotProductInputInfo &inputInfo)
509 {
510     unsigned int alignedVecLen = inputInfo.vecLen == 3 ? 4 : inputInfo.vecLen;
511     for (int ndx = 0; ndx < numElements; ++ndx)
512     {
513         std::vector<LHSOperandT> inputVec1;
514         std::vector<RHSOperandT> inputVec2;
515         inputVec1.reserve(alignedVecLen);
516         inputVec2.reserve(alignedVecLen);
517 
518         for (unsigned int vecElem = 0; vecElem < alignedVecLen; ++vecElem)
519         {
520             // Note: ordering of components does not matter, provided
521             // that it is consistent between lhs and rhs.
522             inputVec1.push_back(inputInts1[ndx * alignedVecLen + vecElem]);
523             inputVec2.push_back(inputInts2[ndx * alignedVecLen + vecElem]);
524         }
525 
526         outputInts[ndx] = dotProduct<OutputT>(inputVec1, inputVec2);
527     }
528 }
529 
getDotProductTestName(const struct DotProductInputInfo & inputInfo,const struct DotProductPackingInfo & packingInfo,size_t outSize)530 string getDotProductTestName(const struct DotProductInputInfo &inputInfo,
531                              const struct DotProductPackingInfo &packingInfo, size_t outSize)
532 {
533     return inputInfo.name + (packingInfo.packed ? string("_packed_") : "_") + (packingInfo.signedLHS ? "s" : "u") +
534            (packingInfo.signedRHS ? "s" : "u") + "_v" + de::toString(inputInfo.vecLen) + "i" +
535            de::toString(inputInfo.vecElemSize) + "_out" + de::toString(outSize);
536 }
537 
538 template <class InBufferT, class OutBufferT, class OutputT, class OperandT>
addOpSDotKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,int numElements,vector<OperandT> & inputInts1,vector<OperandT> & inputInts2,const struct DotProductInputInfo & inputInfo,const struct DotProductPackingInfo & packingInfo,const struct DotProductVectorInfo & vectorInfo)539 void addOpSDotKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, int numElements,
540                               vector<OperandT> &inputInts1, vector<OperandT> &inputInts2,
541                               const struct DotProductInputInfo &inputInfo,
542                               const struct DotProductPackingInfo &packingInfo,
543                               const struct DotProductVectorInfo &vectorInfo)
544 {
545     ComputeShaderSpec spec;
546     size_t outSize = sizeof(OutputT) * 8;
547     vector<OutputT> outputInts(numElements, 0);
548 
549     fillDotProductOutputs(numElements, inputInts1, inputInts2, outputInts, inputInfo);
550 
551     spec.assembly = generateIntegerDotProductCode(packingInfo, vectorInfo, outSize, true, true, false);
552     addDotProductExtensionAndFeatures(spec, packingInfo, vectorInfo.vecElementSize, outSize);
553 
554     spec.inputs.push_back(BufferSp(new InBufferT(inputInts1)));
555     spec.inputs.push_back(BufferSp(new InBufferT(inputInts2)));
556     spec.outputs.push_back(BufferSp(new OutBufferT(outputInts)));
557     spec.numWorkGroups = IVec3(numElements, 1, 1);
558     spec.failResult    = QP_TEST_RESULT_FAIL;
559     spec.failMessage   = "Output doesn't match with expected";
560 
561     string qualTestName(getDotProductTestName(inputInfo, packingInfo, outSize));
562 
563     group->addChild(new SpvAsmComputeShaderCase(testCtx, qualTestName.data(), spec));
564 }
565 
566 template <class InBufferT, class T>
addOpSDotKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,const struct DotProductPackingInfo dotProductPackingInfo[],unsigned dotProductPackingInfoSize,const struct DotProductVectorInfo dotProductVectorInfo[],unsigned dotProductVectorInfoSize,T vecMin,T vecMax)567 void addOpSDotKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd, string name,
568                               const struct DotProductPackingInfo dotProductPackingInfo[],
569                               unsigned dotProductPackingInfoSize,
570                               const struct DotProductVectorInfo dotProductVectorInfo[],
571                               unsigned dotProductVectorInfoSize, T vecMin, T vecMax)
572 {
573     const int numElements = 200;
574     // Note: this test does not currently cover 64-bit integer results
575     for (unsigned int j = 0; j < dotProductVectorInfoSize; j++)
576     {
577         const struct DotProductVectorInfo &vectorInfo = dotProductVectorInfo[j];
578         unsigned int alignedVecLen                    = getAlignedVecLen(vectorInfo);
579         struct DotProductInputInfo inputInfo          = {name, vectorInfo.vecLen, vectorInfo.vecElementSize};
580         vector<T> inputInts1(numElements * alignedVecLen, 0);
581         vector<T> inputInts2(numElements * alignedVecLen, 0);
582 
583         fillRandomScalars(rnd, vecMin, vecMax, &inputInts1[0], numElements * alignedVecLen);
584         fillRandomScalars(rnd, vecMin, vecMax, &inputInts2[0], numElements * alignedVecLen);
585 
586         if (vectorInfo.vecLen == 3)
587             for (unsigned int ndx = 0; ndx < numElements; ++ndx)
588                 inputInts1[ndx * 4 + 3] = inputInts2[ndx * 4 + 3] = 0;
589 
590         for (unsigned int i = 0; i < dotProductPackingInfoSize; i++)
591         {
592             const struct DotProductPackingInfo &packingInfo = dotProductPackingInfo[i];
593             if (packingInfo.packed && (vectorInfo.vecElementSize != 8 || vectorInfo.vecLen != 4))
594                 continue;
595 
596             if (vectorInfo.vecElementSize <= 32)
597                 addOpSDotKHRComputeTests<InBufferT, Int32Buffer, int32_t>(
598                     testCtx, group, numElements, inputInts1, inputInts2, inputInfo, packingInfo, vectorInfo);
599             if (vectorInfo.vecElementSize <= 16)
600                 addOpSDotKHRComputeTests<InBufferT, Int16Buffer, int16_t>(
601                     testCtx, group, numElements, inputInts1, inputInts2, inputInfo, packingInfo, vectorInfo);
602             if (vectorInfo.vecElementSize <= 8)
603                 addOpSDotKHRComputeTests<InBufferT, Int8Buffer, int8_t>(testCtx, group, numElements, inputInts1,
604                                                                         inputInts2, inputInfo, packingInfo, vectorInfo);
605         }
606     }
607 }
608 
609 template <class T>
add32bitOpSDotKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,T vecMin,T vecMax)610 void add32bitOpSDotKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd, string name,
611                                    T vecMin, T vecMax)
612 {
613     addOpSDotKHRComputeTests<Int32Buffer>(testCtx, group, rnd, name, dotProductPacking,
614                                           DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector32,
615                                           DE_LENGTH_OF_ARRAY(dotProductVector32), vecMin, vecMax);
616 }
617 
618 template <class T>
add16bitOpSDotKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,T vecMin,T vecMax)619 void add16bitOpSDotKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd, string name,
620                                    T vecMin, T vecMax)
621 {
622     addOpSDotKHRComputeTests<Int16Buffer>(testCtx, group, rnd, name, dotProductPacking,
623                                           DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector16,
624                                           DE_LENGTH_OF_ARRAY(dotProductVector16), vecMin, vecMax);
625 }
626 
627 template <class T>
add8bitOpSDotKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,T vecMin,T vecMax)628 void add8bitOpSDotKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd, string name,
629                                   T vecMin, T vecMax)
630 {
631     addOpSDotKHRComputeTests<Int8Buffer>(testCtx, group, rnd, name, dotProductPacking,
632                                          DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector8,
633                                          DE_LENGTH_OF_ARRAY(dotProductVector8), vecMin, vecMax);
634 }
635 
636 template <class InBufferT, class OutBufferT, class OutputT, class OperandT>
addOpUDotKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,int numElements,vector<OperandT> & inputInts1,vector<OperandT> & inputInts2,const struct DotProductInputInfo & inputInfo,const struct DotProductPackingInfo & packingInfo,const struct DotProductVectorInfo & vectorInfo)637 void addOpUDotKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, int numElements,
638                               vector<OperandT> &inputInts1, vector<OperandT> &inputInts2,
639                               const struct DotProductInputInfo &inputInfo,
640                               const struct DotProductPackingInfo &packingInfo,
641                               const struct DotProductVectorInfo &vectorInfo)
642 {
643     ComputeShaderSpec spec;
644     size_t outSize = sizeof(OutputT) * 8;
645     vector<OutputT> outputInts(numElements, 0);
646 
647     fillDotProductOutputs(numElements, inputInts1, inputInts2, outputInts, inputInfo);
648 
649     spec.assembly = generateIntegerDotProductCode(packingInfo, vectorInfo, outSize, false, false, false);
650 
651     addDotProductExtensionAndFeatures(spec, packingInfo, vectorInfo.vecElementSize, outSize);
652 
653     spec.inputs.push_back(BufferSp(new InBufferT(inputInts1)));
654     spec.inputs.push_back(BufferSp(new InBufferT(inputInts2)));
655     spec.outputs.push_back(BufferSp(new OutBufferT(outputInts)));
656     spec.numWorkGroups = IVec3(numElements, 1, 1);
657     spec.failResult    = QP_TEST_RESULT_FAIL;
658     spec.failMessage   = "Output doesn't match with expected";
659 
660     string qualTestName(getDotProductTestName(inputInfo, packingInfo, outSize));
661 
662     group->addChild(new SpvAsmComputeShaderCase(testCtx, qualTestName.data(), spec));
663 }
664 
665 template <class InBufferT, class T>
addOpUDotKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,const struct DotProductPackingInfo dotProductPackingInfo[],unsigned dotProductPackingInfoSize,const struct DotProductVectorInfo dotProductVectorInfo[],unsigned dotProductVectorInfoSize,T vecMin,T vecMax)666 void addOpUDotKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd, string name,
667                               const struct DotProductPackingInfo dotProductPackingInfo[],
668                               unsigned dotProductPackingInfoSize,
669                               const struct DotProductVectorInfo dotProductVectorInfo[],
670                               unsigned dotProductVectorInfoSize, T vecMin, T vecMax)
671 {
672     const int numElements = 200;
673 
674     for (unsigned int j = 0; j < dotProductVectorInfoSize; j++)
675     {
676         const struct DotProductVectorInfo &vectorInfo = dotProductVectorInfo[j];
677         unsigned int alignedVecLen                    = getAlignedVecLen(vectorInfo);
678         struct DotProductInputInfo inputInfo          = {name, vectorInfo.vecLen, vectorInfo.vecElementSize};
679         vector<T> inputInts1(numElements * alignedVecLen, 0);
680         vector<T> inputInts2(numElements * alignedVecLen, 0);
681 
682         fillRandomScalars(rnd, vecMin, vecMax, &inputInts1[0], numElements * alignedVecLen);
683         fillRandomScalars(rnd, vecMin, vecMax, &inputInts2[0], numElements * alignedVecLen);
684 
685         if (vectorInfo.vecLen == 3)
686             for (unsigned int ndx = 0; ndx < numElements; ++ndx)
687                 inputInts1[ndx * 4 + 3] = inputInts2[ndx * 4 + 3] = 0;
688 
689         for (unsigned int i = 0; i < dotProductPackingInfoSize; i++)
690         {
691             const struct DotProductPackingInfo &packingInfo = dotProductPackingInfo[i];
692             if (packingInfo.packed && (vectorInfo.vecElementSize != 8 || vectorInfo.vecLen != 4))
693                 continue;
694 
695             if (vectorInfo.vecElementSize <= 32)
696                 addOpUDotKHRComputeTests<InBufferT, Uint32Buffer, uint32_t>(
697                     testCtx, group, numElements, inputInts1, inputInts2, inputInfo, packingInfo, vectorInfo);
698             if (vectorInfo.vecElementSize <= 16)
699                 addOpUDotKHRComputeTests<InBufferT, Uint16Buffer, uint16_t>(
700                     testCtx, group, numElements, inputInts1, inputInts2, inputInfo, packingInfo, vectorInfo);
701             if (vectorInfo.vecElementSize <= 8)
702                 addOpUDotKHRComputeTests<InBufferT, Uint8Buffer, uint8_t>(
703                     testCtx, group, numElements, inputInts1, inputInts2, inputInfo, packingInfo, vectorInfo);
704         }
705     }
706 }
707 
708 template <class T>
add32bitOpUDotKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,T vecMin,T vecMax)709 void add32bitOpUDotKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd, string name,
710                                    T vecMin, T vecMax)
711 {
712     addOpUDotKHRComputeTests<Uint32Buffer>(testCtx, group, rnd, name, dotProductPacking,
713                                            DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector32,
714                                            DE_LENGTH_OF_ARRAY(dotProductVector32), vecMin, vecMax);
715 }
716 
717 template <class T>
add16bitOpUDotKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,T vecMin,T vecMax)718 void add16bitOpUDotKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd, string name,
719                                    T vecMin, T vecMax)
720 {
721     addOpUDotKHRComputeTests<Uint16Buffer>(testCtx, group, rnd, name, dotProductPacking,
722                                            DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector16,
723                                            DE_LENGTH_OF_ARRAY(dotProductVector16), vecMin, vecMax);
724 }
725 
726 template <class T>
add8bitOpUDotKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,T vecMin,T vecMax)727 void add8bitOpUDotKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd, string name,
728                                   T vecMin, T vecMax)
729 {
730     addOpUDotKHRComputeTests<Uint8Buffer>(testCtx, group, rnd, name, dotProductPacking,
731                                           DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector8,
732                                           DE_LENGTH_OF_ARRAY(dotProductVector8), vecMin, vecMax);
733 }
734 
735 template <class LHSBufferT, class RHSBufferT, class OutBufferT, class OutputT, class LHSOperandT, class RHSOperandT>
addOpSUDotKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,int numElements,vector<LHSOperandT> & inputInts1,vector<RHSOperandT> & inputInts2,const struct DotProductInputInfo & inputInfo,const struct DotProductPackingInfo & packingInfo,const struct DotProductVectorInfo & vectorInfo)736 void addOpSUDotKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, int numElements,
737                                vector<LHSOperandT> &inputInts1, vector<RHSOperandT> &inputInts2,
738                                const struct DotProductInputInfo &inputInfo,
739                                const struct DotProductPackingInfo &packingInfo,
740                                const struct DotProductVectorInfo &vectorInfo)
741 {
742     ComputeShaderSpec spec;
743     size_t outSize = sizeof(OutputT) * 8;
744     vector<OutputT> outputInts(numElements, 0);
745 
746     fillDotProductOutputs(numElements, inputInts1, inputInts2, outputInts, inputInfo);
747 
748     spec.assembly = generateIntegerDotProductCode(packingInfo, vectorInfo, outSize, true, false, false);
749     addDotProductExtensionAndFeatures(spec, packingInfo, vectorInfo.vecElementSize, outSize);
750 
751     spec.inputs.push_back(BufferSp(new LHSBufferT(inputInts1)));
752     spec.inputs.push_back(BufferSp(new RHSBufferT(inputInts2)));
753     spec.outputs.push_back(BufferSp(new OutBufferT(outputInts)));
754     spec.numWorkGroups = IVec3(numElements, 1, 1);
755     spec.failResult    = QP_TEST_RESULT_FAIL;
756     spec.failMessage   = "Output doesn't match with expected";
757 
758     string qualTestName(getDotProductTestName(inputInfo, packingInfo, outSize));
759 
760     group->addChild(new SpvAsmComputeShaderCase(testCtx, qualTestName.data(), spec));
761 }
762 
763 template <class LHSBufferT, class RHSBufferT, class LHSOperandT, class RHSOperandT>
addOpSUDotKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,const struct DotProductPackingInfo dotProductPackingInfo[],unsigned dotProductPackingInfoSize,const struct DotProductVectorInfo dotProductVectorInfo[],unsigned dotProductVectorInfoSize,LHSOperandT lhsVecMin,LHSOperandT lhsVecMax,RHSOperandT rhsVecMin,RHSOperandT rhsVecMax)764 void addOpSUDotKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd, string name,
765                                const struct DotProductPackingInfo dotProductPackingInfo[],
766                                unsigned dotProductPackingInfoSize,
767                                const struct DotProductVectorInfo dotProductVectorInfo[],
768                                unsigned dotProductVectorInfoSize, LHSOperandT lhsVecMin, LHSOperandT lhsVecMax,
769                                RHSOperandT rhsVecMin, RHSOperandT rhsVecMax)
770 {
771     const int numElements = 200;
772     // Note: this test does not currently cover 64-bit integer results
773     for (unsigned int j = 0; j < dotProductVectorInfoSize; j++)
774     {
775         const struct DotProductVectorInfo &vectorInfo = dotProductVectorInfo[j];
776         unsigned int alignedVecLen                    = getAlignedVecLen(vectorInfo);
777         struct DotProductInputInfo inputInfo          = {name, vectorInfo.vecLen, vectorInfo.vecElementSize};
778         vector<LHSOperandT> inputInts1(numElements * alignedVecLen, 0);
779         vector<RHSOperandT> inputInts2(numElements * alignedVecLen, 0);
780 
781         fillRandomScalars(rnd, lhsVecMin, lhsVecMax, &inputInts1[0], numElements * alignedVecLen);
782         fillRandomScalars(rnd, rhsVecMin, rhsVecMax, &inputInts2[0], numElements * alignedVecLen);
783 
784         if (vectorInfo.vecLen == 3)
785             for (unsigned int ndx = 0; ndx < numElements; ++ndx)
786                 inputInts1[ndx * 4 + 3] = inputInts2[ndx * 4 + 3] = 0;
787 
788         for (unsigned int i = 0; i < dotProductPackingInfoSize; i++)
789         {
790             const struct DotProductPackingInfo &packingInfo = dotProductPackingInfo[i];
791             if (packingInfo.packed && (vectorInfo.vecElementSize != 8 || vectorInfo.vecLen != 4))
792                 continue;
793 
794             if (vectorInfo.vecElementSize <= 32)
795                 addOpSUDotKHRComputeTests<LHSBufferT, RHSBufferT, Int32Buffer, int32_t>(
796                     testCtx, group, numElements, inputInts1, inputInts2, inputInfo, packingInfo, vectorInfo);
797             if (vectorInfo.vecElementSize <= 16)
798                 addOpSUDotKHRComputeTests<LHSBufferT, RHSBufferT, Int16Buffer, int16_t>(
799                     testCtx, group, numElements, inputInts1, inputInts2, inputInfo, packingInfo, vectorInfo);
800             if (vectorInfo.vecElementSize <= 8)
801                 addOpSUDotKHRComputeTests<LHSBufferT, RHSBufferT, Int8Buffer, int8_t>(
802                     testCtx, group, numElements, inputInts1, inputInts2, inputInfo, packingInfo, vectorInfo);
803         }
804     }
805 }
806 
807 template <class LHSOperandT, class RHSOperandT>
add32bitOpSUDotKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,LHSOperandT lhsVecMin,LHSOperandT lhsVecMax,RHSOperandT rhsVecMin,RHSOperandT rhsVecMax)808 void add32bitOpSUDotKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd, string name,
809                                     LHSOperandT lhsVecMin, LHSOperandT lhsVecMax, RHSOperandT rhsVecMin,
810                                     RHSOperandT rhsVecMax)
811 {
812     addOpSUDotKHRComputeTests<Int32Buffer, Uint32Buffer>(
813         testCtx, group, rnd, name, dotProductPacking, DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector32,
814         DE_LENGTH_OF_ARRAY(dotProductVector32), lhsVecMin, lhsVecMax, rhsVecMin, rhsVecMax);
815 }
816 
817 template <class LHSOperandT, class RHSOperandT>
add16bitOpSUDotKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,LHSOperandT lhsVecMin,LHSOperandT lhsVecMax,RHSOperandT rhsVecMin,RHSOperandT rhsVecMax)818 void add16bitOpSUDotKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd, string name,
819                                     LHSOperandT lhsVecMin, LHSOperandT lhsVecMax, RHSOperandT rhsVecMin,
820                                     RHSOperandT rhsVecMax)
821 {
822     addOpSUDotKHRComputeTests<Int16Buffer, Uint16Buffer>(
823         testCtx, group, rnd, name, dotProductPacking, DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector16,
824         DE_LENGTH_OF_ARRAY(dotProductVector16), lhsVecMin, lhsVecMax, rhsVecMin, rhsVecMax);
825 }
826 
827 template <class LHSOperandT, class RHSOperandT>
add8bitOpSUDotKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,LHSOperandT lhsVecMin,LHSOperandT lhsVecMax,RHSOperandT rhsVecMin,RHSOperandT rhsVecMax)828 void add8bitOpSUDotKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd, string name,
829                                    LHSOperandT lhsVecMin, LHSOperandT lhsVecMax, RHSOperandT rhsVecMin,
830                                    RHSOperandT rhsVecMax)
831 {
832     addOpSUDotKHRComputeTests<Int8Buffer, Uint8Buffer>(
833         testCtx, group, rnd, name, dotProductPacking, DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector8,
834         DE_LENGTH_OF_ARRAY(dotProductVector8), lhsVecMin, lhsVecMax, rhsVecMin, rhsVecMax);
835 }
836 
837 template <class InBufferT, class AddendBufferT, class AddendT, class OperandT>
addOpSDotAccSatKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,int numElements,vector<OperandT> & inputInts1,vector<OperandT> & inputInts2,const struct DotProductInputInfo & inputInfo,const struct DotProductPackingInfo & packingInfo,const struct DotProductVectorInfo & vectorInfo,bool useMaxAddend)838 void addOpSDotAccSatKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd,
839                                     int numElements, vector<OperandT> &inputInts1, vector<OperandT> &inputInts2,
840                                     const struct DotProductInputInfo &inputInfo,
841                                     const struct DotProductPackingInfo &packingInfo,
842                                     const struct DotProductVectorInfo &vectorInfo, bool useMaxAddend)
843 {
844     ComputeShaderSpec spec;
845     size_t addendSize = sizeof(AddendT) * 8;
846     vector<AddendT> inputInts3(numElements, 0);
847     vector<AddendT> outputInts(numElements, 0);
848 
849     if (useMaxAddend)
850         fillRandomScalars(rnd, (AddendT)(std::numeric_limits<AddendT>::max() - 20),
851                           (AddendT)(std::numeric_limits<AddendT>::max()), &inputInts3[0], numElements);
852     else
853         fillRandomScalars(rnd, (AddendT)(std::numeric_limits<AddendT>::min()),
854                           (AddendT)(std::numeric_limits<AddendT>::min() + 20), &inputInts3[0], numElements);
855 
856     spec.assembly = generateIntegerDotProductCode(packingInfo, vectorInfo, addendSize, true, true, true);
857 
858     addDotProductExtensionAndFeatures(spec, packingInfo, vectorInfo.vecElementSize, addendSize);
859     spec.inputs.push_back(BufferSp(new InBufferT(inputInts1)));
860     spec.inputs.push_back(BufferSp(new InBufferT(inputInts2)));
861     spec.inputs.push_back(BufferSp(new AddendBufferT(inputInts3)));
862     spec.outputs.push_back(BufferSp(new AddendBufferT(outputInts)));
863     spec.numWorkGroups = IVec3(numElements, 1, 1);
864     spec.verifyIO      = &compareDotProductAccSat<AddendT, OperandT, OperandT>;
865     spec.failResult    = QP_TEST_RESULT_FAIL;
866     spec.failMessage   = "Output doesn't match with expected";
867 
868     string qualTestName(getDotProductTestName(inputInfo, packingInfo, addendSize));
869 
870     group->addChild(new SpvAsmComputeShaderCase(testCtx, qualTestName.data(), spec));
871 }
872 
873 template <class InBufferT, class T>
addOpSDotAccSatKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,const struct DotProductPackingInfo dotProductPackingInfo[],unsigned dotProductPackingInfoSize,const struct DotProductVectorInfo dotProductVectorInfo[],unsigned dotProductVectorInfoSize,T vecMin,T vecMax,bool useMaxAddend)874 void addOpSDotAccSatKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd, string name,
875                                     const struct DotProductPackingInfo dotProductPackingInfo[],
876                                     unsigned dotProductPackingInfoSize,
877                                     const struct DotProductVectorInfo dotProductVectorInfo[],
878                                     unsigned dotProductVectorInfoSize, T vecMin, T vecMax, bool useMaxAddend)
879 {
880     const int numElements = 200;
881     // Note: this test does not currently cover 64-bit integer results
882     for (unsigned int j = 0; j < dotProductVectorInfoSize; j++)
883     {
884         const struct DotProductVectorInfo &vectorInfo = dotProductVectorInfo[j];
885         unsigned int alignedVecLen                    = getAlignedVecLen(vectorInfo);
886         struct DotProductInputInfo inputInfo          = {name, vectorInfo.vecLen, vectorInfo.vecElementSize};
887         vector<T> inputInts1(numElements * alignedVecLen, 0);
888         vector<T> inputInts2(numElements * alignedVecLen, 0);
889 
890         fillRandomScalars(rnd, vecMin, vecMax, &inputInts1[0], numElements * alignedVecLen);
891         fillRandomScalars(rnd, vecMin, vecMax, &inputInts2[0], numElements * alignedVecLen);
892 
893         if (vectorInfo.vecLen == 3)
894             for (unsigned int ndx = 0; ndx < numElements; ++ndx)
895                 inputInts1[ndx * 4 + 3] = inputInts2[ndx * 4 + 3] = 0;
896 
897         for (unsigned int i = 0; i < dotProductPackingInfoSize; i++)
898         {
899             const struct DotProductPackingInfo &packingInfo = dotProductPackingInfo[i];
900             if (packingInfo.packed && (vectorInfo.vecElementSize != 8 || vectorInfo.vecLen != 4))
901                 continue;
902 
903             if (vectorInfo.vecElementSize <= 32)
904                 addOpSDotAccSatKHRComputeTests<InBufferT, Int32Buffer, int32_t>(testCtx, group, rnd, numElements,
905                                                                                 inputInts1, inputInts2, inputInfo,
906                                                                                 packingInfo, vectorInfo, useMaxAddend);
907             if (vectorInfo.vecElementSize <= 16)
908                 addOpSDotAccSatKHRComputeTests<InBufferT, Int16Buffer, int16_t>(testCtx, group, rnd, numElements,
909                                                                                 inputInts1, inputInts2, inputInfo,
910                                                                                 packingInfo, vectorInfo, useMaxAddend);
911             if (vectorInfo.vecElementSize <= 8)
912                 addOpSDotAccSatKHRComputeTests<InBufferT, Int8Buffer, int8_t>(testCtx, group, rnd, numElements,
913                                                                               inputInts1, inputInts2, inputInfo,
914                                                                               packingInfo, vectorInfo, useMaxAddend);
915         }
916     }
917 }
918 
919 template <class T>
add32bitOpSDotAccSatKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,T vecMin,T vecMax,bool useMaxAddend=true)920 void add32bitOpSDotAccSatKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd,
921                                          string name, T vecMin, T vecMax, bool useMaxAddend = true)
922 {
923     addOpSDotAccSatKHRComputeTests<Int32Buffer>(testCtx, group, rnd, name, dotProductPacking,
924                                                 DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector32,
925                                                 DE_LENGTH_OF_ARRAY(dotProductVector32), vecMin, vecMax, useMaxAddend);
926 }
927 
928 template <class T>
add16bitOpSDotAccSatKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,T vecMin,T vecMax,bool useMaxAddend=true)929 void add16bitOpSDotAccSatKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd,
930                                          string name, T vecMin, T vecMax, bool useMaxAddend = true)
931 {
932     addOpSDotAccSatKHRComputeTests<Int16Buffer>(testCtx, group, rnd, name, dotProductPacking,
933                                                 DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector16,
934                                                 DE_LENGTH_OF_ARRAY(dotProductVector16), vecMin, vecMax, useMaxAddend);
935 }
936 
937 template <class T>
add8bitOpSDotAccSatKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,T vecMin,T vecMax,bool useMaxAddend=true)938 void add8bitOpSDotAccSatKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd,
939                                         string name, T vecMin, T vecMax, bool useMaxAddend = true)
940 {
941     addOpSDotAccSatKHRComputeTests<Int8Buffer>(testCtx, group, rnd, name, dotProductPacking,
942                                                DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector8,
943                                                DE_LENGTH_OF_ARRAY(dotProductVector8), vecMin, vecMax, useMaxAddend);
944 }
945 
946 template <class InBufferT, class AddendBufferT, class AddendT, class OperandT>
addOpUDotAccSatKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,int numElements,vector<OperandT> & inputInts1,vector<OperandT> & inputInts2,const struct DotProductInputInfo & inputInfo,const struct DotProductPackingInfo & packingInfo,const struct DotProductVectorInfo & vectorInfo,bool useMaxAddend)947 void addOpUDotAccSatKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd,
948                                     int numElements, vector<OperandT> &inputInts1, vector<OperandT> &inputInts2,
949                                     const struct DotProductInputInfo &inputInfo,
950                                     const struct DotProductPackingInfo &packingInfo,
951                                     const struct DotProductVectorInfo &vectorInfo, bool useMaxAddend)
952 {
953     ComputeShaderSpec spec;
954     size_t addendSize = sizeof(AddendT) * 8;
955     vector<AddendT> inputInts3(numElements, 0);
956     vector<AddendT> outputInts(numElements, 0);
957 
958     if (useMaxAddend)
959         fillRandomScalars(rnd, (AddendT)(std::numeric_limits<AddendT>::max() - 20),
960                           (AddendT)(std::numeric_limits<AddendT>::max()), &inputInts3[0], numElements);
961     else
962         fillRandomScalars(rnd, (AddendT)(std::numeric_limits<AddendT>::min()),
963                           (AddendT)(std::numeric_limits<AddendT>::min() + 20), &inputInts3[0], numElements);
964 
965     spec.assembly = generateIntegerDotProductCode(packingInfo, vectorInfo, addendSize, false, false, true);
966 
967     addDotProductExtensionAndFeatures(spec, packingInfo, vectorInfo.vecElementSize, addendSize);
968     spec.inputs.push_back(BufferSp(new InBufferT(inputInts1)));
969     spec.inputs.push_back(BufferSp(new InBufferT(inputInts2)));
970     spec.inputs.push_back(BufferSp(new AddendBufferT(inputInts3)));
971     spec.outputs.push_back(BufferSp(new AddendBufferT(outputInts)));
972     spec.numWorkGroups = IVec3(numElements, 1, 1);
973     spec.verifyIO      = &compareDotProductAccSat<AddendT, OperandT, OperandT>;
974     spec.failResult    = QP_TEST_RESULT_FAIL;
975     spec.failMessage   = "Output doesn't match with expected";
976 
977     string qualTestName(getDotProductTestName(inputInfo, packingInfo, addendSize));
978 
979     group->addChild(new SpvAsmComputeShaderCase(testCtx, qualTestName.data(), spec));
980 }
981 
982 template <class InBufferT, class T>
addOpUDotAccSatKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,const struct DotProductPackingInfo dotProductPackingInfo[],unsigned dotProductPackingInfoSize,const struct DotProductVectorInfo dotProductVectorInfo[],unsigned dotProductVectorInfoSize,T vecMin,T vecMax,bool useMaxAddend)983 void addOpUDotAccSatKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd, string name,
984                                     const struct DotProductPackingInfo dotProductPackingInfo[],
985                                     unsigned dotProductPackingInfoSize,
986                                     const struct DotProductVectorInfo dotProductVectorInfo[],
987                                     unsigned dotProductVectorInfoSize, T vecMin, T vecMax, bool useMaxAddend)
988 {
989     const int numElements = 200;
990     // Note: this test does not currently cover 64-bit integer results
991 
992     for (unsigned int j = 0; j < dotProductVectorInfoSize; j++)
993     {
994         const struct DotProductVectorInfo &vectorInfo = dotProductVectorInfo[j];
995         unsigned int alignedVecLen                    = getAlignedVecLen(vectorInfo);
996         struct DotProductInputInfo inputInfo          = {name, vectorInfo.vecLen, vectorInfo.vecElementSize};
997         vector<T> inputInts1(numElements * alignedVecLen, 0);
998         vector<T> inputInts2(numElements * alignedVecLen, 0);
999 
1000         fillRandomScalars(rnd, vecMin, vecMax, &inputInts1[0], numElements * alignedVecLen);
1001         fillRandomScalars(rnd, vecMin, vecMax, &inputInts2[0], numElements * alignedVecLen);
1002 
1003         if (vectorInfo.vecLen == 3)
1004             for (unsigned int ndx = 0; ndx < numElements; ++ndx)
1005                 inputInts1[ndx * 4 + 3] = inputInts2[ndx * 4 + 3] = 0;
1006 
1007         for (unsigned int i = 0; i < dotProductPackingInfoSize; i++)
1008         {
1009             const struct DotProductPackingInfo &packingInfo = dotProductPackingInfo[i];
1010             if (packingInfo.packed && (vectorInfo.vecElementSize != 8 || vectorInfo.vecLen != 4))
1011                 continue;
1012 
1013             if (vectorInfo.vecElementSize <= 32)
1014                 addOpUDotAccSatKHRComputeTests<InBufferT, Uint32Buffer, uint32_t>(
1015                     testCtx, group, rnd, numElements, inputInts1, inputInts2, inputInfo, packingInfo, vectorInfo,
1016                     useMaxAddend);
1017             if (vectorInfo.vecElementSize <= 16)
1018                 addOpUDotAccSatKHRComputeTests<InBufferT, Uint16Buffer, uint16_t>(
1019                     testCtx, group, rnd, numElements, inputInts1, inputInts2, inputInfo, packingInfo, vectorInfo,
1020                     useMaxAddend);
1021             if (vectorInfo.vecElementSize <= 8)
1022                 addOpUDotAccSatKHRComputeTests<InBufferT, Uint8Buffer, uint8_t>(testCtx, group, rnd, numElements,
1023                                                                                 inputInts1, inputInts2, inputInfo,
1024                                                                                 packingInfo, vectorInfo, useMaxAddend);
1025         }
1026     }
1027 }
1028 
1029 template <class T>
add32bitOpUDotAccSatKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,T vecMin,T vecMax,bool useMaxAddend=true)1030 void add32bitOpUDotAccSatKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd,
1031                                          string name, T vecMin, T vecMax, bool useMaxAddend = true)
1032 {
1033     addOpUDotAccSatKHRComputeTests<Uint32Buffer>(testCtx, group, rnd, name, dotProductPacking,
1034                                                  DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector32,
1035                                                  DE_LENGTH_OF_ARRAY(dotProductVector32), vecMin, vecMax, useMaxAddend);
1036 }
1037 
1038 template <class T>
add16bitOpUDotAccSatKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,T vecMin,T vecMax,bool useMaxAddend=true)1039 void add16bitOpUDotAccSatKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd,
1040                                          string name, T vecMin, T vecMax, bool useMaxAddend = true)
1041 {
1042     addOpUDotAccSatKHRComputeTests<Uint16Buffer>(testCtx, group, rnd, name, dotProductPacking,
1043                                                  DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector16,
1044                                                  DE_LENGTH_OF_ARRAY(dotProductVector16), vecMin, vecMax, useMaxAddend);
1045 }
1046 
1047 template <class T>
add8bitOpUDotAccSatKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,T vecMin,T vecMax,bool useMaxAddend=true)1048 void add8bitOpUDotAccSatKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd,
1049                                         string name, T vecMin, T vecMax, bool useMaxAddend = true)
1050 {
1051     addOpUDotAccSatKHRComputeTests<Uint8Buffer>(testCtx, group, rnd, name, dotProductPacking,
1052                                                 DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector8,
1053                                                 DE_LENGTH_OF_ARRAY(dotProductVector8), vecMin, vecMax, useMaxAddend);
1054 }
1055 
1056 template <class LHSBufferT, class RHSBufferT, class AddendBufferT, class AddendT, class LHSOperandT, class RHSOperandT>
addOpSUDotAccSatKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,int numElements,vector<LHSOperandT> & inputInts1,vector<RHSOperandT> & inputInts2,const struct DotProductInputInfo & inputInfo,const struct DotProductPackingInfo & packingInfo,const struct DotProductVectorInfo & vectorInfo,bool useMaxAddend)1057 void addOpSUDotAccSatKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd,
1058                                      int numElements, vector<LHSOperandT> &inputInts1, vector<RHSOperandT> &inputInts2,
1059                                      const struct DotProductInputInfo &inputInfo,
1060                                      const struct DotProductPackingInfo &packingInfo,
1061                                      const struct DotProductVectorInfo &vectorInfo, bool useMaxAddend)
1062 {
1063     ComputeShaderSpec spec;
1064     size_t addendSize = sizeof(AddendT) * 8;
1065     vector<AddendT> inputInts3(numElements, 0);
1066     vector<AddendT> outputInts(numElements, 0);
1067 
1068     // Populate the accumulation buffer with large values to attempt to guarantee saturation
1069     if (useMaxAddend)
1070         fillRandomScalars(rnd, (AddendT)(std::numeric_limits<AddendT>::max() - 20),
1071                           (AddendT)(std::numeric_limits<AddendT>::max()), &inputInts3[0], numElements);
1072     else
1073         fillRandomScalars(rnd, (AddendT)(std::numeric_limits<AddendT>::min()),
1074                           (AddendT)(std::numeric_limits<AddendT>::min() + 20), &inputInts3[0], numElements);
1075 
1076     spec.assembly = generateIntegerDotProductCode(packingInfo, vectorInfo, addendSize, true, false, true);
1077     addDotProductExtensionAndFeatures(spec, packingInfo, vectorInfo.vecElementSize, addendSize);
1078     spec.inputs.push_back(BufferSp(new LHSBufferT(inputInts1)));
1079     spec.inputs.push_back(BufferSp(new RHSBufferT(inputInts2)));
1080     spec.inputs.push_back(BufferSp(new AddendBufferT(inputInts3)));
1081     spec.outputs.push_back(BufferSp(new AddendBufferT(outputInts)));
1082     spec.numWorkGroups = IVec3(numElements, 1, 1);
1083     spec.verifyIO      = &compareDotProductAccSat<AddendT, LHSOperandT, RHSOperandT>;
1084     spec.failResult    = QP_TEST_RESULT_FAIL;
1085     spec.failMessage   = "Output doesn't match with expected";
1086 
1087     string qualTestName(getDotProductTestName(inputInfo, packingInfo, addendSize));
1088 
1089     group->addChild(new SpvAsmComputeShaderCase(testCtx, qualTestName.data(), spec));
1090 }
1091 
1092 template <class LHSBufferT, class RHSBufferT, class LHSOperandT, class RHSOperandT>
addOpSUDotAccSatKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,const struct DotProductPackingInfo dotProductPackingInfo[],unsigned dotProductPackingInfoSize,const struct DotProductVectorInfo dotProductVectorInfo[],unsigned dotProductVectorInfoSize,LHSOperandT lhsVecMin,LHSOperandT lhsVecMax,RHSOperandT rhsVecMin,RHSOperandT rhsVecMax,bool useMaxAddend)1093 void addOpSUDotAccSatKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd, string name,
1094                                      const struct DotProductPackingInfo dotProductPackingInfo[],
1095                                      unsigned dotProductPackingInfoSize,
1096                                      const struct DotProductVectorInfo dotProductVectorInfo[],
1097                                      unsigned dotProductVectorInfoSize, LHSOperandT lhsVecMin, LHSOperandT lhsVecMax,
1098                                      RHSOperandT rhsVecMin, RHSOperandT rhsVecMax, bool useMaxAddend)
1099 {
1100     const int numElements = 200;
1101     // Note: this test does not currently cover 64-bit integer results
1102 
1103     for (unsigned int j = 0; j < dotProductVectorInfoSize; j++)
1104     {
1105         const struct DotProductVectorInfo &vectorInfo = dotProductVectorInfo[j];
1106         unsigned int alignedVecLen                    = getAlignedVecLen(vectorInfo);
1107         struct DotProductInputInfo inputInfo          = {name, vectorInfo.vecLen, vectorInfo.vecElementSize};
1108         vector<LHSOperandT> inputInts1(numElements * alignedVecLen, 0);
1109         vector<RHSOperandT> inputInts2(numElements * alignedVecLen, 0);
1110 
1111         fillRandomScalars(rnd, lhsVecMin, lhsVecMax, &inputInts1[0], numElements * alignedVecLen);
1112         fillRandomScalars(rnd, rhsVecMin, rhsVecMax, &inputInts2[0], numElements * alignedVecLen);
1113 
1114         if (vectorInfo.vecLen == 3)
1115             for (unsigned int ndx = 0; ndx < numElements; ++ndx)
1116                 inputInts1[ndx * 4 + 3] = inputInts2[ndx * 4 + 3] = 0;
1117 
1118         for (unsigned int i = 0; i < dotProductPackingInfoSize; i++)
1119         {
1120             const struct DotProductPackingInfo &packingInfo = dotProductPackingInfo[i];
1121             if (packingInfo.packed && (vectorInfo.vecElementSize != 8 || vectorInfo.vecLen != 4))
1122                 continue;
1123 
1124             if (vectorInfo.vecElementSize <= 32)
1125                 addOpSUDotAccSatKHRComputeTests<LHSBufferT, RHSBufferT, Int32Buffer, int32_t>(
1126                     testCtx, group, rnd, numElements, inputInts1, inputInts2, inputInfo, packingInfo, vectorInfo,
1127                     useMaxAddend);
1128             if (vectorInfo.vecElementSize <= 16)
1129                 addOpSUDotAccSatKHRComputeTests<LHSBufferT, RHSBufferT, Int16Buffer, int16_t>(
1130                     testCtx, group, rnd, numElements, inputInts1, inputInts2, inputInfo, packingInfo, vectorInfo,
1131                     useMaxAddend);
1132             if (vectorInfo.vecElementSize <= 8)
1133                 addOpSUDotAccSatKHRComputeTests<LHSBufferT, RHSBufferT, Int8Buffer, int8_t>(
1134                     testCtx, group, rnd, numElements, inputInts1, inputInts2, inputInfo, packingInfo, vectorInfo,
1135                     useMaxAddend);
1136         }
1137     }
1138 }
1139 
1140 template <class LHSOperandT, class RHSOperandT>
add32bitOpSUDotAccSatKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,LHSOperandT lhsVecMin,LHSOperandT lhsVecMax,RHSOperandT rhsVecMin,RHSOperandT rhsVecMax,bool useMaxAddend=true)1141 void add32bitOpSUDotAccSatKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd,
1142                                           string name, LHSOperandT lhsVecMin, LHSOperandT lhsVecMax,
1143                                           RHSOperandT rhsVecMin, RHSOperandT rhsVecMax, bool useMaxAddend = true)
1144 {
1145     addOpSUDotAccSatKHRComputeTests<Int32Buffer, Uint32Buffer>(
1146         testCtx, group, rnd, name, dotProductPacking, DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector32,
1147         DE_LENGTH_OF_ARRAY(dotProductVector32), lhsVecMin, lhsVecMax, rhsVecMin, rhsVecMax, useMaxAddend);
1148 }
1149 
1150 template <class LHSOperandT, class RHSOperandT>
add16bitOpSUDotAccSatKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,LHSOperandT lhsVecMin,LHSOperandT lhsVecMax,RHSOperandT rhsVecMin,RHSOperandT rhsVecMax,bool useMaxAddend=true)1151 void add16bitOpSUDotAccSatKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd,
1152                                           string name, LHSOperandT lhsVecMin, LHSOperandT lhsVecMax,
1153                                           RHSOperandT rhsVecMin, RHSOperandT rhsVecMax, bool useMaxAddend = true)
1154 {
1155     addOpSUDotAccSatKHRComputeTests<Int16Buffer, Uint16Buffer>(
1156         testCtx, group, rnd, name, dotProductPacking, DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector16,
1157         DE_LENGTH_OF_ARRAY(dotProductVector16), lhsVecMin, lhsVecMax, rhsVecMin, rhsVecMax, useMaxAddend);
1158 }
1159 
1160 template <class LHSOperandT, class RHSOperandT>
add8bitOpSUDotAccSatKHRComputeTests(tcu::TestContext & testCtx,tcu::TestCaseGroup * group,de::Random & rnd,string name,LHSOperandT lhsVecMin,LHSOperandT lhsVecMax,RHSOperandT rhsVecMin,RHSOperandT rhsVecMax,bool useMaxAddend=true)1161 void add8bitOpSUDotAccSatKHRComputeTests(tcu::TestContext &testCtx, tcu::TestCaseGroup *group, de::Random &rnd,
1162                                          string name, LHSOperandT lhsVecMin, LHSOperandT lhsVecMax,
1163                                          RHSOperandT rhsVecMin, RHSOperandT rhsVecMax, bool useMaxAddend = true)
1164 {
1165     addOpSUDotAccSatKHRComputeTests<Int8Buffer, Uint8Buffer>(
1166         testCtx, group, rnd, name, dotProductPacking, DE_LENGTH_OF_ARRAY(dotProductPacking), dotProductVector8,
1167         DE_LENGTH_OF_ARRAY(dotProductVector8), lhsVecMin, lhsVecMax, rhsVecMin, rhsVecMax, useMaxAddend);
1168 }
1169 
1170 } // namespace
1171 
createOpSDotKHRComputeGroup(tcu::TestContext & testCtx)1172 tcu::TestCaseGroup *createOpSDotKHRComputeGroup(tcu::TestContext &testCtx)
1173 {
1174     // Test the OpSDotKHR instruction
1175     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsdotkhr"));
1176     de::Random rnd(deStringHash(group->getName()));
1177 
1178     add8bitOpSDotKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<int8_t>::min(),
1179                                  std::numeric_limits<int8_t>::max());
1180     add8bitOpSDotKHRComputeTests(testCtx, group.get(), rnd, string("small"), (int8_t)-20, (int8_t)20);
1181     add16bitOpSDotKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<int16_t>::min(),
1182                                   std::numeric_limits<int16_t>::max());
1183     add32bitOpSDotKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<int32_t>::min(),
1184                                   std::numeric_limits<int32_t>::max());
1185 
1186     return group.release();
1187 }
1188 
createOpUDotKHRComputeGroup(tcu::TestContext & testCtx)1189 tcu::TestCaseGroup *createOpUDotKHRComputeGroup(tcu::TestContext &testCtx)
1190 {
1191     // Test the OpUDotKHR instruction
1192     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opudotkhr"));
1193     de::Random rnd(deStringHash(group->getName()));
1194 
1195     add8bitOpUDotKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<uint8_t>::min(),
1196                                  std::numeric_limits<uint8_t>::max());
1197     add8bitOpUDotKHRComputeTests(testCtx, group.get(), rnd, string("small"), (uint8_t)0, (uint8_t)20);
1198     add16bitOpUDotKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<uint16_t>::min(),
1199                                   std::numeric_limits<uint16_t>::max());
1200     add32bitOpUDotKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<uint32_t>::min(),
1201                                   std::numeric_limits<uint32_t>::max());
1202 
1203     return group.release();
1204 }
1205 
createOpSUDotKHRComputeGroup(tcu::TestContext & testCtx)1206 tcu::TestCaseGroup *createOpSUDotKHRComputeGroup(tcu::TestContext &testCtx)
1207 {
1208     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsudotkhr"));
1209     de::Random rnd(deStringHash(group->getName()));
1210 
1211     add8bitOpSUDotKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<int8_t>::min(),
1212                                   std::numeric_limits<int8_t>::max(), std::numeric_limits<uint8_t>::min(),
1213                                   std::numeric_limits<uint8_t>::max());
1214     add8bitOpSUDotKHRComputeTests(testCtx, group.get(), rnd, string("small"), (int8_t)-20, (int8_t)20, (uint8_t)0,
1215                                   (uint8_t)20);
1216     add16bitOpSUDotKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<int16_t>::min(),
1217                                    std::numeric_limits<int16_t>::max(), std::numeric_limits<uint16_t>::min(),
1218                                    std::numeric_limits<uint16_t>::max());
1219     add32bitOpSUDotKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<int32_t>::min(),
1220                                    std::numeric_limits<int32_t>::max(), std::numeric_limits<uint32_t>::min(),
1221                                    std::numeric_limits<uint32_t>::max());
1222 
1223     return group.release();
1224 }
1225 
createOpSDotAccSatKHRComputeGroup(tcu::TestContext & testCtx)1226 tcu::TestCaseGroup *createOpSDotAccSatKHRComputeGroup(tcu::TestContext &testCtx)
1227 {
1228     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsdotaccsatkhr"));
1229     de::Random rnd(deStringHash(group->getName()));
1230 
1231     add8bitOpSDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<int8_t>::min(),
1232                                        std::numeric_limits<int8_t>::max());
1233     add8bitOpSDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("limits"), (int8_t)(12), (int8_t)(20));
1234     add8bitOpSDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("limits-neg"), (int8_t)(-20), (int8_t)(-12),
1235                                        false);
1236     add8bitOpSDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("small"), (int8_t)-4, (int8_t)4);
1237     add8bitOpSDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("small-neg"), (int8_t)-4, (int8_t)4, false);
1238     add16bitOpSDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<int16_t>::min(),
1239                                         std::numeric_limits<int16_t>::max());
1240     add16bitOpSDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("limits"), (int16_t)(-20), (int16_t)(20));
1241     add16bitOpSDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("limits-neg"), (int16_t)(-20), (int16_t)(20),
1242                                         false);
1243     add32bitOpSDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<int32_t>::min(),
1244                                         std::numeric_limits<int32_t>::max());
1245     add32bitOpSDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("limits"),
1246                                         (int32_t)(std::numeric_limits<int8_t>::min()),
1247                                         (int32_t)(std::numeric_limits<int8_t>::max()));
1248     add32bitOpSDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("limits-neg"),
1249                                         (int32_t)(std::numeric_limits<int8_t>::min()),
1250                                         (int32_t)(std::numeric_limits<int8_t>::max()), false);
1251 
1252     return group.release();
1253 }
1254 
createOpUDotAccSatKHRComputeGroup(tcu::TestContext & testCtx)1255 tcu::TestCaseGroup *createOpUDotAccSatKHRComputeGroup(tcu::TestContext &testCtx)
1256 {
1257     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opudotaccsatkhr"));
1258     de::Random rnd(deStringHash(group->getName()));
1259 
1260     add8bitOpUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<uint8_t>::min(),
1261                                        std::numeric_limits<uint8_t>::max());
1262     add8bitOpUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("limits"), (uint8_t)(12), (uint8_t)(20));
1263     add8bitOpUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("small"), (uint8_t)1, (uint8_t)8);
1264     add8bitOpUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("small-nosat"), (uint8_t)1, (uint8_t)8, false);
1265     add16bitOpUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<uint16_t>::min(),
1266                                         std::numeric_limits<uint16_t>::max());
1267     add16bitOpUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("limits"), (uint16_t)(12), (uint16_t)(20));
1268     add16bitOpUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("nosat"), (uint16_t)(12), (uint16_t)(20),
1269                                         false);
1270     add32bitOpUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<uint32_t>::min(),
1271                                         std::numeric_limits<uint32_t>::max());
1272     add32bitOpUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("limits"),
1273                                         (uint32_t)(std::numeric_limits<uint8_t>::max() - 40),
1274                                         (uint32_t)(std::numeric_limits<uint8_t>::max() - 20));
1275     add32bitOpUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("nosat"),
1276                                         (uint32_t)(std::numeric_limits<uint8_t>::max() - 40),
1277                                         (uint32_t)(std::numeric_limits<uint8_t>::max() - 20), false);
1278 
1279     return group.release();
1280 }
1281 
createOpSUDotAccSatKHRComputeGroup(tcu::TestContext & testCtx)1282 tcu::TestCaseGroup *createOpSUDotAccSatKHRComputeGroup(tcu::TestContext &testCtx)
1283 {
1284     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opsudotaccsatkhr"));
1285     de::Random rnd(deStringHash(group->getName()));
1286 
1287     add8bitOpSUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<int8_t>::min(),
1288                                         std::numeric_limits<int8_t>::max(), std::numeric_limits<uint8_t>::min(),
1289                                         std::numeric_limits<uint8_t>::max());
1290     add8bitOpSUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("limits"), (int8_t)(12), (int8_t)(20),
1291                                         (uint8_t)(12), (uint8_t)(20));
1292     add8bitOpSUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("limits-neg"), (int8_t)(-20), (int8_t)(-12),
1293                                         (uint8_t)(12), (uint8_t)(20), false);
1294     add8bitOpSUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("small"), (int8_t)-4, (int8_t)4, (uint8_t)1,
1295                                         (uint8_t)8);
1296     add8bitOpSUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("small-neg"), (int8_t)-4, (int8_t)4,
1297                                         (uint8_t)1, (uint8_t)8, false);
1298     add16bitOpSUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<int16_t>::min(),
1299                                          std::numeric_limits<int16_t>::max(), std::numeric_limits<uint16_t>::min(),
1300                                          std::numeric_limits<uint16_t>::max());
1301     add16bitOpSUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("limits"), (int16_t)(-20), (int16_t)(20),
1302                                          (uint16_t)(12), (uint16_t)(20));
1303     add16bitOpSUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("limits-neg"), (int16_t)(-20), (int16_t)(20),
1304                                          (uint16_t)(12), (uint16_t)(20), false);
1305     add32bitOpSUDotAccSatKHRComputeTests(testCtx, group.get(), rnd, string("all"), std::numeric_limits<int32_t>::min(),
1306                                          std::numeric_limits<int32_t>::max(), std::numeric_limits<uint32_t>::min(),
1307                                          std::numeric_limits<uint32_t>::max());
1308     add32bitOpSUDotAccSatKHRComputeTests(
1309         testCtx, group.get(), rnd, string("limits"), (int32_t)(std::numeric_limits<int8_t>::min()),
1310         (int32_t)(std::numeric_limits<int8_t>::max()), (uint32_t)(std::numeric_limits<uint8_t>::min()),
1311         (uint32_t)(std::numeric_limits<uint8_t>::max()));
1312     add32bitOpSUDotAccSatKHRComputeTests(
1313         testCtx, group.get(), rnd, string("limits-neg"), (int32_t)(std::numeric_limits<int8_t>::min()),
1314         (int32_t)(std::numeric_limits<int8_t>::max()), (uint32_t)(std::numeric_limits<uint8_t>::max()),
1315         (uint32_t)(std::numeric_limits<uint8_t>::max()), false);
1316 
1317     return group.release();
1318 }
1319 
1320 } // namespace SpirVAssembly
1321 } // namespace vkt
1322