1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Floating-point packing and unpacking function tests.
24  *//*--------------------------------------------------------------------*/
25 
26 #include "vktShaderPackingFunctionTests.hpp"
27 #include "vktShaderExecutor.hpp"
28 #include "tcuTestLog.hpp"
29 #include "tcuFormatUtil.hpp"
30 #include "tcuFloat.hpp"
31 #include "tcuVectorUtil.hpp"
32 #include "deRandom.hpp"
33 #include "deMath.h"
34 #include "deString.h"
35 #include "deSharedPtr.hpp"
36 
37 namespace vkt
38 {
39 namespace shaderexecutor
40 {
41 
42 using namespace shaderexecutor;
43 
44 using std::string;
45 using tcu::TestLog;
46 
47 namespace
48 {
49 
getUlpDiff(float a,float b)50 inline uint32_t getUlpDiff(float a, float b)
51 {
52     const uint32_t aBits = tcu::Float32(a).bits();
53     const uint32_t bBits = tcu::Float32(b).bits();
54     return aBits > bBits ? aBits - bBits : bBits - aBits;
55 }
56 
57 struct HexFloat
58 {
59     const float value;
HexFloatvkt::shaderexecutor::__anon8b8336cc0111::HexFloat60     HexFloat(const float value_) : value(value_)
61     {
62     }
63 };
64 
operator <<(std::ostream & str,const HexFloat & v)65 std::ostream &operator<<(std::ostream &str, const HexFloat &v)
66 {
67     return str << v.value << " / " << tcu::toHex(tcu::Float32(v.value).bits());
68 }
69 
70 } // namespace
71 
72 // ShaderPackingFunctionCase
73 
74 class ShaderPackingFunctionCase : public TestCase
75 {
76 public:
77     ShaderPackingFunctionCase(tcu::TestContext &testCtx, const char *name, glu::ShaderType shaderType);
78     ~ShaderPackingFunctionCase(void);
79 
80     void checkSupport(Context &context) const;
initPrograms(vk::SourceCollections & programCollection) const81     virtual void initPrograms(vk::SourceCollections &programCollection) const
82     {
83         generateSources(m_shaderType, m_spec, programCollection);
84     }
85 
86 protected:
87     const glu::ShaderType m_shaderType;
88     ShaderSpec m_spec;
89 
90 private:
91     ShaderPackingFunctionCase(const ShaderPackingFunctionCase &other);
92     ShaderPackingFunctionCase &operator=(const ShaderPackingFunctionCase &other);
93 };
94 
ShaderPackingFunctionCase(tcu::TestContext & testCtx,const char * name,glu::ShaderType shaderType)95 ShaderPackingFunctionCase::ShaderPackingFunctionCase(tcu::TestContext &testCtx, const char *name,
96                                                      glu::ShaderType shaderType)
97     : TestCase(testCtx, name)
98     , m_shaderType(shaderType)
99 {
100 }
101 
~ShaderPackingFunctionCase(void)102 ShaderPackingFunctionCase::~ShaderPackingFunctionCase(void)
103 {
104 }
105 
checkSupport(Context & context) const106 void ShaderPackingFunctionCase::checkSupport(Context &context) const
107 {
108     checkSupportShader(context, m_shaderType);
109 }
110 
111 // ShaderPackingFunctionTestInstance
112 
113 class ShaderPackingFunctionTestInstance : public TestInstance
114 {
115 public:
ShaderPackingFunctionTestInstance(Context & context,glu::ShaderType shaderType,const ShaderSpec & spec,const char * name)116     ShaderPackingFunctionTestInstance(Context &context, glu::ShaderType shaderType, const ShaderSpec &spec,
117                                       const char *name)
118         : TestInstance(context)
119         , m_testCtx(context.getTestContext())
120         , m_shaderType(shaderType)
121         , m_spec(spec)
122         , m_name(name)
123         , m_executor(createExecutor(context, m_shaderType, m_spec))
124     {
125     }
126     virtual tcu::TestStatus iterate(void) = 0;
127 
128 protected:
129     tcu::TestContext &m_testCtx;
130     const glu::ShaderType m_shaderType;
131     ShaderSpec m_spec;
132     const char *m_name;
133     de::UniquePtr<ShaderExecutor> m_executor;
134 };
135 
136 // Test cases
137 
138 class PackSnorm2x16CaseInstance : public ShaderPackingFunctionTestInstance
139 {
140 public:
PackSnorm2x16CaseInstance(Context & context,glu::ShaderType shaderType,const ShaderSpec & spec,glu::Precision precision,const char * name)141     PackSnorm2x16CaseInstance(Context &context, glu::ShaderType shaderType, const ShaderSpec &spec,
142                               glu::Precision precision, const char *name)
143         : ShaderPackingFunctionTestInstance(context, shaderType, spec, name)
144         , m_precision(precision)
145     {
146     }
147 
iterate(void)148     tcu::TestStatus iterate(void)
149     {
150         de::Random rnd(deStringHash(m_name) ^ 0x776002);
151         std::vector<tcu::Vec2> inputs;
152         std::vector<uint32_t> outputs;
153         const int                    maxDiff = m_precision == glu::PRECISION_HIGHP    ? 1        : // Rounding only.
154                                                   m_precision == glu::PRECISION_MEDIUMP    ? 33    : // (2^-10) * (2^15) + 1
155                                                   m_precision == glu::PRECISION_LOWP    ? 129    : 0;    // (2^-8) * (2^15) + 1
156 
157         // Special values to check.
158         inputs.push_back(tcu::Vec2(0.0f, 0.0f));
159         inputs.push_back(tcu::Vec2(-1.0f, 1.0f));
160         inputs.push_back(tcu::Vec2(0.5f, -0.5f));
161         inputs.push_back(tcu::Vec2(-1.5f, 1.5f));
162         inputs.push_back(tcu::Vec2(0.25f, -0.75f));
163 
164         // Random values, mostly in range.
165         for (int ndx = 0; ndx < 15; ndx++)
166         {
167             inputs.push_back(tcu::randomVector<float, 2>(rnd, tcu::Vec2(-1.25f), tcu::Vec2(1.25f)));
168         }
169 
170         // Large random values.
171         for (int ndx = 0; ndx < 80; ndx++)
172         {
173             inputs.push_back(tcu::randomVector<float, 2>(rnd, tcu::Vec2(-0.5e6f), tcu::Vec2(0.5e6f)));
174         }
175 
176         outputs.resize(inputs.size());
177 
178         m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values"
179                            << tcu::TestLog::EndMessage;
180 
181         {
182             const void *in = &inputs[0];
183             void *out      = &outputs[0];
184 
185             m_executor->execute((int)inputs.size(), &in, &out);
186         }
187 
188         // Verify
189         {
190             const int numValues = (int)inputs.size();
191             const int maxPrints = 10;
192             int numFailed       = 0;
193 
194             for (int valNdx = 0; valNdx < numValues; valNdx++)
195             {
196                 const uint16_t ref0 =
197                     (uint16_t)de::clamp(deRoundFloatToInt32(de::clamp(inputs[valNdx].x(), -1.0f, 1.0f) * 32767.0f),
198                                         -(1 << 15), (1 << 15) - 1);
199                 const uint16_t ref1 =
200                     (uint16_t)de::clamp(deRoundFloatToInt32(de::clamp(inputs[valNdx].y(), -1.0f, 1.0f) * 32767.0f),
201                                         -(1 << 15), (1 << 15) - 1);
202                 const uint32_t ref  = (ref1 << 16) | ref0;
203                 const uint32_t res  = outputs[valNdx];
204                 const uint16_t res0 = (uint16_t)(res & 0xffff);
205                 const uint16_t res1 = (uint16_t)(res >> 16);
206                 const int diff0     = de::abs((int)ref0 - (int)res0);
207                 const int diff1     = de::abs((int)ref1 - (int)res1);
208 
209                 if (diff0 > maxDiff || diff1 > maxDiff)
210                 {
211                     if (numFailed < maxPrints)
212                     {
213                         m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx
214                                            << ", expected packSnorm2x16(" << inputs[valNdx] << ") = " << tcu::toHex(ref)
215                                            << ", got " << tcu::toHex(res) << "\n  diffs = (" << diff0 << ", " << diff1
216                                            << "), max diff = " << maxDiff << TestLog::EndMessage;
217                     }
218                     else if (numFailed == maxPrints)
219                         m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
220 
221                     numFailed += 1;
222                 }
223             }
224 
225             m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed"
226                                << TestLog::EndMessage;
227 
228             if (numFailed == 0)
229                 return tcu::TestStatus::pass("Pass");
230             else
231                 return tcu::TestStatus::fail("Result comparison failed");
232         }
233     }
234 
235 private:
236     const glu::Precision m_precision;
237 };
238 
239 class PackSnorm2x16Case : public ShaderPackingFunctionCase
240 {
241 public:
PackSnorm2x16Case(tcu::TestContext & testCtx,glu::ShaderType shaderType,glu::Precision precision)242     PackSnorm2x16Case(tcu::TestContext &testCtx, glu::ShaderType shaderType, glu::Precision precision)
243         : ShaderPackingFunctionCase(
244               testCtx,
245               (string("packsnorm2x16") + getPrecisionPostfix(precision) + getShaderTypePostfix(shaderType)).c_str(),
246               shaderType)
247         , m_precision(precision)
248     {
249         m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_FLOAT_VEC2, precision)));
250         m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
251 
252         m_spec.source = "out0 = packSnorm2x16(in0);";
253     }
254 
createInstance(Context & ctx) const255     TestInstance *createInstance(Context &ctx) const
256     {
257         return new PackSnorm2x16CaseInstance(ctx, m_shaderType, m_spec, m_precision, getName());
258     }
259 
260 private:
261     const glu::Precision m_precision;
262 };
263 
264 class UnpackSnorm2x16CaseInstance : public ShaderPackingFunctionTestInstance
265 {
266 public:
UnpackSnorm2x16CaseInstance(Context & context,glu::ShaderType shaderType,const ShaderSpec & spec,const char * name)267     UnpackSnorm2x16CaseInstance(Context &context, glu::ShaderType shaderType, const ShaderSpec &spec, const char *name)
268         : ShaderPackingFunctionTestInstance(context, shaderType, spec, name)
269     {
270     }
271 
iterate(void)272     tcu::TestStatus iterate(void)
273     {
274         const uint32_t maxDiff = 1; // Rounding error.
275         de::Random rnd(deStringHash(m_name) ^ 0x776002);
276         std::vector<uint32_t> inputs;
277         std::vector<tcu::Vec2> outputs;
278 
279         inputs.push_back(0x00000000u);
280         inputs.push_back(0x7fff8000u);
281         inputs.push_back(0x80007fffu);
282         inputs.push_back(0xffffffffu);
283         inputs.push_back(0x0001fffeu);
284 
285         // Random values.
286         for (int ndx = 0; ndx < 95; ndx++)
287             inputs.push_back(rnd.getUint32());
288 
289         outputs.resize(inputs.size());
290 
291         m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values"
292                            << tcu::TestLog::EndMessage;
293 
294         {
295             const void *in = &inputs[0];
296             void *out      = &outputs[0];
297 
298             m_executor->execute((int)inputs.size(), &in, &out);
299         }
300 
301         // Verify
302         {
303             const int numValues = (int)inputs.size();
304             const int maxPrints = 10;
305             int numFailed       = 0;
306 
307             for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
308             {
309                 const int16_t in0 = (int16_t)(uint16_t)(inputs[valNdx] & 0xffff);
310                 const int16_t in1 = (int16_t)(uint16_t)(inputs[valNdx] >> 16);
311                 const float ref0  = de::clamp(float(in0) / 32767.f, -1.0f, 1.0f);
312                 const float ref1  = de::clamp(float(in1) / 32767.f, -1.0f, 1.0f);
313                 const float res0  = outputs[valNdx].x();
314                 const float res1  = outputs[valNdx].y();
315 
316                 const uint32_t diff0 = getUlpDiff(ref0, res0);
317                 const uint32_t diff1 = getUlpDiff(ref1, res1);
318 
319                 if (diff0 > maxDiff || diff1 > maxDiff)
320                 {
321                     if (numFailed < maxPrints)
322                     {
323                         m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx << ",\n"
324                                            << "  expected unpackSnorm2x16(" << tcu::toHex(inputs[valNdx]) << ") = "
325                                            << "vec2(" << HexFloat(ref0) << ", " << HexFloat(ref1) << ")"
326                                            << ", got vec2(" << HexFloat(res0) << ", " << HexFloat(res1) << ")"
327                                            << "\n  ULP diffs = (" << diff0 << ", " << diff1
328                                            << "), max diff = " << maxDiff << TestLog::EndMessage;
329                     }
330                     else if (numFailed == maxPrints)
331                         m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
332 
333                     numFailed += 1;
334                 }
335             }
336 
337             m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed"
338                                << TestLog::EndMessage;
339 
340             if (numFailed == 0)
341                 return tcu::TestStatus::pass("Pass");
342             else
343                 return tcu::TestStatus::fail("Result comparison failed");
344         }
345     }
346 };
347 
348 class UnpackSnorm2x16Case : public ShaderPackingFunctionCase
349 {
350 public:
UnpackSnorm2x16Case(tcu::TestContext & testCtx,glu::ShaderType shaderType)351     UnpackSnorm2x16Case(tcu::TestContext &testCtx, glu::ShaderType shaderType)
352         : ShaderPackingFunctionCase(testCtx, (string("unpacksnorm2x16") + getShaderTypePostfix(shaderType)).c_str(),
353                                     shaderType)
354     {
355         m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
356         m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_FLOAT_VEC2, glu::PRECISION_HIGHP)));
357 
358         m_spec.source = "out0 = unpackSnorm2x16(in0);";
359     }
360 
createInstance(Context & ctx) const361     TestInstance *createInstance(Context &ctx) const
362     {
363         return new UnpackSnorm2x16CaseInstance(ctx, m_shaderType, m_spec, getName());
364     }
365 };
366 
367 class PackUnorm2x16CaseInstance : public ShaderPackingFunctionTestInstance
368 {
369 public:
PackUnorm2x16CaseInstance(Context & context,glu::ShaderType shaderType,const ShaderSpec & spec,glu::Precision precision,const char * name)370     PackUnorm2x16CaseInstance(Context &context, glu::ShaderType shaderType, const ShaderSpec &spec,
371                               glu::Precision precision, const char *name)
372         : ShaderPackingFunctionTestInstance(context, shaderType, spec, name)
373         , m_precision(precision)
374     {
375     }
376 
iterate(void)377     tcu::TestStatus iterate(void)
378     {
379         de::Random rnd(deStringHash(m_name) ^ 0x776002);
380         std::vector<tcu::Vec2> inputs;
381         std::vector<uint32_t> outputs;
382         const int                    maxDiff = m_precision == glu::PRECISION_HIGHP    ? 1        : // Rounding only.
383                                                   m_precision == glu::PRECISION_MEDIUMP    ? 65    : // (2^-10) * (2^16) + 1
384                                                   m_precision == glu::PRECISION_LOWP    ? 257    : 0;    // (2^-8) * (2^16) + 1
385 
386         // Special values to check.
387         inputs.push_back(tcu::Vec2(0.0f, 0.0f));
388         inputs.push_back(tcu::Vec2(0.5f, 1.0f));
389         inputs.push_back(tcu::Vec2(1.0f, 0.5f));
390         inputs.push_back(tcu::Vec2(-0.5f, 1.5f));
391         inputs.push_back(tcu::Vec2(0.25f, 0.75f));
392 
393         // Random values, mostly in range.
394         for (int ndx = 0; ndx < 15; ndx++)
395         {
396             inputs.push_back(tcu::randomVector<float, 2>(rnd, tcu::Vec2(0.0f), tcu::Vec2(1.25f)));
397         }
398 
399         // Large random values.
400         for (int ndx = 0; ndx < 80; ndx++)
401         {
402             inputs.push_back(tcu::randomVector<float, 2>(rnd, tcu::Vec2(-1e5f), tcu::Vec2(0.9e6f)));
403         }
404 
405         outputs.resize(inputs.size());
406 
407         m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values"
408                            << tcu::TestLog::EndMessage;
409 
410         {
411             const void *in = &inputs[0];
412             void *out      = &outputs[0];
413 
414             m_executor->execute((int)inputs.size(), &in, &out);
415         }
416 
417         // Verify
418         {
419             const int numValues = (int)inputs.size();
420             const int maxPrints = 10;
421             int numFailed       = 0;
422 
423             for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
424             {
425                 const uint16_t ref0 = (uint16_t)de::clamp(
426                     deRoundFloatToInt32(de::clamp(inputs[valNdx].x(), 0.0f, 1.0f) * 65535.0f), 0, (1 << 16) - 1);
427                 const uint16_t ref1 = (uint16_t)de::clamp(
428                     deRoundFloatToInt32(de::clamp(inputs[valNdx].y(), 0.0f, 1.0f) * 65535.0f), 0, (1 << 16) - 1);
429                 const uint32_t ref  = (ref1 << 16) | ref0;
430                 const uint32_t res  = outputs[valNdx];
431                 const uint16_t res0 = (uint16_t)(res & 0xffff);
432                 const uint16_t res1 = (uint16_t)(res >> 16);
433                 const int diff0     = de::abs((int)ref0 - (int)res0);
434                 const int diff1     = de::abs((int)ref1 - (int)res1);
435 
436                 if (diff0 > maxDiff || diff1 > maxDiff)
437                 {
438                     if (numFailed < maxPrints)
439                     {
440                         m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx
441                                            << ", expected packUnorm2x16(" << inputs[valNdx] << ") = " << tcu::toHex(ref)
442                                            << ", got " << tcu::toHex(res) << "\n  diffs = (" << diff0 << ", " << diff1
443                                            << "), max diff = " << maxDiff << TestLog::EndMessage;
444                     }
445                     else if (numFailed == maxPrints)
446                         m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
447 
448                     numFailed += 1;
449                 }
450             }
451 
452             m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed"
453                                << TestLog::EndMessage;
454 
455             if (numFailed == 0)
456                 return tcu::TestStatus::pass("Pass");
457             else
458                 return tcu::TestStatus::fail("Result comparison failed");
459         }
460     }
461 
462 private:
463     const glu::Precision m_precision;
464 };
465 
466 class PackUnorm2x16Case : public ShaderPackingFunctionCase
467 {
468 public:
PackUnorm2x16Case(tcu::TestContext & testCtx,glu::ShaderType shaderType,glu::Precision precision)469     PackUnorm2x16Case(tcu::TestContext &testCtx, glu::ShaderType shaderType, glu::Precision precision)
470         : ShaderPackingFunctionCase(
471               testCtx,
472               (string("packunorm2x16") + getPrecisionPostfix(precision) + getShaderTypePostfix(shaderType)).c_str(),
473               shaderType)
474         , m_precision(precision)
475     {
476         m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_FLOAT_VEC2, precision)));
477         m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
478 
479         m_spec.source = "out0 = packUnorm2x16(in0);";
480     }
481 
createInstance(Context & ctx) const482     TestInstance *createInstance(Context &ctx) const
483     {
484         return new PackUnorm2x16CaseInstance(ctx, m_shaderType, m_spec, m_precision, getName());
485     }
486 
487 private:
488     const glu::Precision m_precision;
489 };
490 
491 class UnpackUnorm2x16CaseInstance : public ShaderPackingFunctionTestInstance
492 {
493 public:
UnpackUnorm2x16CaseInstance(Context & context,glu::ShaderType shaderType,const ShaderSpec & spec,const char * name)494     UnpackUnorm2x16CaseInstance(Context &context, glu::ShaderType shaderType, const ShaderSpec &spec, const char *name)
495         : ShaderPackingFunctionTestInstance(context, shaderType, spec, name)
496     {
497     }
498 
iterate(void)499     tcu::TestStatus iterate(void)
500     {
501         const uint32_t maxDiff = 1; // Rounding error.
502         de::Random rnd(deStringHash(m_name) ^ 0x776002);
503         std::vector<uint32_t> inputs;
504         std::vector<tcu::Vec2> outputs;
505 
506         inputs.push_back(0x00000000u);
507         inputs.push_back(0x7fff8000u);
508         inputs.push_back(0x80007fffu);
509         inputs.push_back(0xffffffffu);
510         inputs.push_back(0x0001fffeu);
511 
512         // Random values.
513         for (int ndx = 0; ndx < 95; ndx++)
514             inputs.push_back(rnd.getUint32());
515 
516         outputs.resize(inputs.size());
517 
518         m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values"
519                            << tcu::TestLog::EndMessage;
520 
521         {
522             const void *in = &inputs[0];
523             void *out      = &outputs[0];
524 
525             m_executor->execute((int)inputs.size(), &in, &out);
526         }
527 
528         // Verify
529         {
530             const int numValues = (int)inputs.size();
531             const int maxPrints = 10;
532             int numFailed       = 0;
533 
534             for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
535             {
536                 const uint16_t in0 = (uint16_t)(inputs[valNdx] & 0xffff);
537                 const uint16_t in1 = (uint16_t)(inputs[valNdx] >> 16);
538                 const float ref0   = float(in0) / 65535.0f;
539                 const float ref1   = float(in1) / 65535.0f;
540                 const float res0   = outputs[valNdx].x();
541                 const float res1   = outputs[valNdx].y();
542 
543                 const uint32_t diff0 = getUlpDiff(ref0, res0);
544                 const uint32_t diff1 = getUlpDiff(ref1, res1);
545 
546                 if (diff0 > maxDiff || diff1 > maxDiff)
547                 {
548                     if (numFailed < maxPrints)
549                     {
550                         m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx << ",\n"
551                                            << "  expected unpackUnorm2x16(" << tcu::toHex(inputs[valNdx]) << ") = "
552                                            << "vec2(" << HexFloat(ref0) << ", " << HexFloat(ref1) << ")"
553                                            << ", got vec2(" << HexFloat(res0) << ", " << HexFloat(res1) << ")"
554                                            << "\n  ULP diffs = (" << diff0 << ", " << diff1
555                                            << "), max diff = " << maxDiff << TestLog::EndMessage;
556                     }
557                     else if (numFailed == maxPrints)
558                         m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
559 
560                     numFailed += 1;
561                 }
562             }
563 
564             m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed"
565                                << TestLog::EndMessage;
566 
567             if (numFailed == 0)
568                 return tcu::TestStatus::pass("Pass");
569             else
570                 return tcu::TestStatus::fail("Result comparison failed");
571         }
572     }
573 };
574 
575 class UnpackUnorm2x16Case : public ShaderPackingFunctionCase
576 {
577 public:
UnpackUnorm2x16Case(tcu::TestContext & testCtx,glu::ShaderType shaderType)578     UnpackUnorm2x16Case(tcu::TestContext &testCtx, glu::ShaderType shaderType)
579         : ShaderPackingFunctionCase(testCtx, (string("unpackunorm2x16") + getShaderTypePostfix(shaderType)).c_str(),
580                                     shaderType)
581     {
582         m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
583         m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_FLOAT_VEC2, glu::PRECISION_HIGHP)));
584 
585         m_spec.source = "out0 = unpackUnorm2x16(in0);";
586     }
587 
createInstance(Context & ctx) const588     TestInstance *createInstance(Context &ctx) const
589     {
590         return new UnpackUnorm2x16CaseInstance(ctx, m_shaderType, m_spec, getName());
591     }
592 };
593 
594 class PackHalf2x16CaseInstance : public ShaderPackingFunctionTestInstance
595 {
596 public:
PackHalf2x16CaseInstance(Context & context,glu::ShaderType shaderType,const ShaderSpec & spec,const char * name)597     PackHalf2x16CaseInstance(Context &context, glu::ShaderType shaderType, const ShaderSpec &spec, const char *name)
598         : ShaderPackingFunctionTestInstance(context, shaderType, spec, name)
599     {
600     }
601 
iterate(void)602     tcu::TestStatus iterate(void)
603     {
604         const int maxDiff = 0; // Values can be represented exactly in mediump.
605         de::Random rnd(deStringHash(m_name) ^ 0x776002);
606         std::vector<tcu::Vec2> inputs;
607         std::vector<uint32_t> outputs;
608 
609         // Special values to check.
610         inputs.push_back(tcu::Vec2(0.0f, 0.0f));
611         inputs.push_back(tcu::Vec2(0.5f, 1.0f));
612         inputs.push_back(tcu::Vec2(1.0f, 0.5f));
613         inputs.push_back(tcu::Vec2(-0.5f, 1.5f));
614         inputs.push_back(tcu::Vec2(0.25f, 0.75f));
615 
616         // Random values.
617         {
618             const int minExp = -14;
619             const int maxExp = 15;
620 
621             for (int ndx = 0; ndx < 95; ndx++)
622             {
623                 tcu::Vec2 v;
624                 for (int c = 0; c < 2; c++)
625                 {
626                     const int s             = rnd.getBool() ? 1 : -1;
627                     const int exp           = rnd.getInt(minExp, maxExp);
628                     const uint32_t mantissa = rnd.getUint32() & ((1 << 23) - 1);
629 
630                     v[c] = tcu::Float32::construct(s, exp ? exp : 1 /* avoid denormals */, (1u << 23) | mantissa)
631                                .asFloat();
632                 }
633                 inputs.push_back(v);
634             }
635         }
636 
637         // Convert input values to fp16 and back to make sure they can be represented exactly in mediump.
638         for (std::vector<tcu::Vec2>::iterator inVal = inputs.begin(); inVal != inputs.end(); ++inVal)
639             *inVal = tcu::Vec2(tcu::Float16(inVal->x()).asFloat(), tcu::Float16(inVal->y()).asFloat());
640 
641         outputs.resize(inputs.size());
642 
643         m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values"
644                            << tcu::TestLog::EndMessage;
645 
646         {
647             const void *in = &inputs[0];
648             void *out      = &outputs[0];
649 
650             m_executor->execute((int)inputs.size(), &in, &out);
651         }
652 
653         // Verify
654         {
655             const int numValues = (int)inputs.size();
656             const int maxPrints = 10;
657             int numFailed       = 0;
658 
659             for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
660             {
661                 const uint16_t ref0 = (uint16_t)tcu::Float16(inputs[valNdx].x()).bits();
662                 const uint16_t ref1 = (uint16_t)tcu::Float16(inputs[valNdx].y()).bits();
663                 const uint32_t ref  = (ref1 << 16) | ref0;
664                 const uint32_t res  = outputs[valNdx];
665                 const uint16_t res0 = (uint16_t)(res & 0xffff);
666                 const uint16_t res1 = (uint16_t)(res >> 16);
667                 const int diff0     = de::abs((int)ref0 - (int)res0);
668                 const int diff1     = de::abs((int)ref1 - (int)res1);
669 
670                 if (diff0 > maxDiff || diff1 > maxDiff)
671                 {
672                     if (numFailed < maxPrints)
673                     {
674                         m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx
675                                            << ", expected packHalf2x16(" << inputs[valNdx] << ") = " << tcu::toHex(ref)
676                                            << ", got " << tcu::toHex(res) << "\n  diffs = (" << diff0 << ", " << diff1
677                                            << "), max diff = " << maxDiff << TestLog::EndMessage;
678                     }
679                     else if (numFailed == maxPrints)
680                         m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
681 
682                     numFailed += 1;
683                 }
684             }
685 
686             m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed"
687                                << TestLog::EndMessage;
688 
689             if (numFailed == 0)
690                 return tcu::TestStatus::pass("Pass");
691             else
692                 return tcu::TestStatus::fail("Result comparison failed");
693         }
694     }
695 };
696 
697 class PackHalf2x16Case : public ShaderPackingFunctionCase
698 {
699 public:
PackHalf2x16Case(tcu::TestContext & testCtx,glu::ShaderType shaderType)700     PackHalf2x16Case(tcu::TestContext &testCtx, glu::ShaderType shaderType)
701         : ShaderPackingFunctionCase(testCtx, (string("packhalf2x16") + getShaderTypePostfix(shaderType)).c_str(),
702                                     shaderType)
703     {
704         m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_FLOAT_VEC2, glu::PRECISION_HIGHP)));
705         m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
706 
707         m_spec.source = "out0 = packHalf2x16(in0);";
708     }
709 
createInstance(Context & ctx) const710     TestInstance *createInstance(Context &ctx) const
711     {
712         return new PackHalf2x16CaseInstance(ctx, m_shaderType, m_spec, getName());
713     }
714 };
715 
716 class UnpackHalf2x16CaseInstance : public ShaderPackingFunctionTestInstance
717 {
718     enum Sign
719     {
720         POSITIVE = 0,
721         NEGATIVE
722     };
723     enum SubnormalizedConversionType
724     {
725         UNKNOWN = 0,
726         CONVERTED,
727         ZERO_FLUSHED,
728     };
729 
730 public:
UnpackHalf2x16CaseInstance(Context & context,glu::ShaderType shaderType,const ShaderSpec & spec,const char * name)731     UnpackHalf2x16CaseInstance(Context &context, glu::ShaderType shaderType, const ShaderSpec &spec, const char *name)
732         : ShaderPackingFunctionTestInstance(context, shaderType, spec, name)
733     {
734     }
735 
iterate(void)736     tcu::TestStatus iterate(void)
737     {
738         const int minExp           = -14;
739         const int maxExp           = 15;
740         const int mantBits         = 10;
741         const uint32_t mantBitMask = (1u << mantBits) - 1u;
742         tcu::TestLog &log          = m_testCtx.getLog();
743 
744         de::Random rnd(deStringHash(m_name) ^ 0x776002);
745         std::vector<uint32_t> inputs;
746         std::vector<tcu::Vec2> outputs;
747 
748         // Special values.
749         inputs.push_back((tcu::Float16(0.0f).bits() << 16) | tcu::Float16(1.0f).bits());
750         inputs.push_back((tcu::Float16(1.0f).bits() << 16) | tcu::Float16(0.0f).bits());
751         inputs.push_back((tcu::Float16(-1.0f).bits() << 16) | tcu::Float16(0.5f).bits());
752         inputs.push_back((tcu::Float16(0.5f).bits() << 16) | tcu::Float16(-0.5f).bits());
753         // Special subnormal value: single lowest bit set
754         inputs.push_back((tcu::Float16(composeHalfFloat(POSITIVE, 0u, 1u)).bits() << 16) |
755                          tcu::Float16(composeHalfFloat(NEGATIVE, 0u, 1u)).bits());
756         // Special subnormal value: single highest fraction bit set
757         inputs.push_back((tcu::Float16(composeHalfFloat(NEGATIVE, 0u, 1u << (mantBits - 1u))).bits() << 16) |
758                          tcu::Float16(composeHalfFloat(POSITIVE, 0u, 1u << (mantBits - 1u))).bits());
759         // Special subnormal value: all fraction bits set
760         inputs.push_back((tcu::Float16(composeHalfFloat(POSITIVE, 0u, mantBitMask)).bits() << 16) |
761                          tcu::Float16(composeHalfFloat(NEGATIVE, 0u, mantBitMask)).bits());
762 
763         // Construct random values.
764         for (int ndx = 0; ndx < 90; ndx++)
765         {
766             uint32_t inVal = 0;
767             for (int c = 0; c < 2; c++)
768             {
769                 const int s             = rnd.getBool() ? 1 : -1;
770                 const int exp           = rnd.getInt(minExp, maxExp);
771                 const uint32_t mantissa = rnd.getUint32() & mantBitMask;
772                 const uint16_t value    = tcu::Float16::construct(s, exp != 0 ? exp : 1 /* avoid denorm */,
773                                                                   static_cast<uint16_t>((1u << 10) | mantissa))
774                                            .bits();
775 
776                 inVal |= value << (16u * c);
777             }
778             inputs.push_back(inVal);
779         }
780         for (int ndx = 0; ndx < 15; ndx++)
781         {
782             uint32_t inVal = 0;
783             for (int c = 0; c < 2; c++)
784             {
785                 const Sign sign         = rnd.getBool() ? POSITIVE : NEGATIVE;
786                 const uint32_t mantissa = rnd.getUint32() & mantBitMask;
787                 const uint16_t value    = tcu::Float16(composeHalfFloat(sign, 0u /* force denorm */, mantissa)).bits();
788 
789                 inVal |= value << (16u * c);
790             }
791             inputs.push_back(inVal);
792         }
793 
794         outputs.resize(inputs.size());
795 
796         log << TestLog::Message << "Executing shader for " << inputs.size() << " input values"
797             << tcu::TestLog::EndMessage;
798 
799         {
800             const void *in = &inputs[0];
801             void *out      = &outputs[0];
802 
803             m_executor->execute((int)inputs.size(), &in, &out);
804         }
805 
806         // Verify
807         {
808             const int numValues                    = (int)inputs.size();
809             const int maxPrints                    = 10;
810             int numFailed                          = 0;
811             SubnormalizedConversionType conversion = UNKNOWN;
812 
813             for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
814             {
815                 const uint16_t in0 = (uint16_t)(inputs[valNdx] & 0xffff);
816                 const uint16_t in1 = (uint16_t)(inputs[valNdx] >> 16);
817                 const float res0   = outputs[valNdx].x();
818                 const float res1   = outputs[valNdx].y();
819 
820                 const bool value0 = checkValue(in0, res0, conversion);
821                 // note: do not avoid calling checkValue for in1 if it failed for in0 by using && laziness
822                 // checkValue may potentially change 'conversion' parameter if it was set to UNKNOWN so far
823                 const bool value1   = checkValue(in1, res1, conversion);
824                 const bool valuesOK = value0 && value1;
825 
826                 if (!valuesOK)
827                 {
828                     if (numFailed < maxPrints)
829                         printErrorMessage(log, valNdx, in0, in1, res0, res1);
830                     else if (numFailed == maxPrints)
831                         log << TestLog::Message << "..." << TestLog::EndMessage;
832                     ++numFailed;
833                 }
834             }
835 
836             log << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed"
837                 << TestLog::EndMessage;
838 
839             if (numFailed == 0)
840                 return tcu::TestStatus::pass("Pass");
841             else
842                 return tcu::TestStatus::fail("Result comparison failed");
843         }
844     }
845 
846 private:
checkValue(uint16_t inValue,float outValue,SubnormalizedConversionType & conversion)847     bool checkValue(uint16_t inValue, float outValue, SubnormalizedConversionType &conversion)
848     {
849         const tcu::Float16 temp = tcu::Float16(inValue);
850         const float ref         = temp.asFloat();
851         const uint32_t refBits  = tcu::Float32(ref).bits();
852         const uint32_t resBits  = tcu::Float32(outValue).bits();
853         const bool bitMatch     = (refBits ^ resBits) == 0u;
854         const bool denorm       = temp.isDenorm();
855 
856         if (conversion != CONVERTED && denorm)
857         {
858             if (resBits == 0 || (ref < 0 && resBits == 0x80000000UL))
859             {
860                 conversion = ZERO_FLUSHED;
861                 return true;
862             }
863             if (conversion != ZERO_FLUSHED && bitMatch)
864             {
865                 conversion = CONVERTED;
866                 return true;
867             }
868             return false;
869         }
870         else if (bitMatch)
871             return true;
872         return false;
873     }
printErrorMessage(tcu::TestLog & log,uint32_t valNdx,uint16_t in0,uint16_t in1,float out0,float out1)874     void printErrorMessage(tcu::TestLog &log, uint32_t valNdx, uint16_t in0, uint16_t in1, float out0, float out1)
875     {
876         const float ref0        = tcu::Float16(in0).asFloat();
877         const uint32_t refBits0 = tcu::Float32(ref0).bits();
878         const uint32_t resBits0 = tcu::Float32(out0).bits();
879         const float ref1        = tcu::Float16(in1).asFloat();
880         const uint32_t refBits1 = tcu::Float32(ref1).bits();
881         const uint32_t resBits1 = tcu::Float32(out1).bits();
882         log << TestLog::Message << "ERROR: Mismatch in value " << valNdx << ",\n"
883             << "  expected unpackHalf2x16(" << tcu::toHex((in1 << 16u) | in0) << ") = "
884             << "vec2(" << ref0 << " / " << tcu::toHex(refBits0) << ", " << ref1 << " / " << tcu::toHex(refBits1) << ")"
885             << ", got vec2(" << out0 << " / " << tcu::toHex(resBits0) << ", " << out1 << " / " << tcu::toHex(resBits1)
886             << ")" << TestLog::EndMessage;
887     }
composeHalfFloat(Sign sign,uint32_t exponent,uint32_t significand)888     uint16_t composeHalfFloat(Sign sign, uint32_t exponent, uint32_t significand)
889     {
890         const uint32_t BitMask_05 = (1u << 5u) - 1u;
891         const uint32_t BitMask_10 = (1u << 10u) - 1u;
892         const uint32_t BitMask_16 = (1u << 16u) - 1u;
893         DE_UNREF(BitMask_05);
894         DE_UNREF(BitMask_10);
895         DE_UNREF(BitMask_16);
896         DE_ASSERT((exponent & ~BitMask_05) == 0u);
897         DE_ASSERT((significand & ~BitMask_10) == 0u);
898         const uint32_t value = (((sign == NEGATIVE ? 1u : 0u) << 5u | exponent) << 10u) | significand;
899         DE_ASSERT((value & ~BitMask_16) == 0u);
900         return static_cast<uint16_t>(value);
901     }
902 };
903 
904 class UnpackHalf2x16Case : public ShaderPackingFunctionCase
905 {
906 public:
UnpackHalf2x16Case(tcu::TestContext & testCtx,glu::ShaderType shaderType)907     UnpackHalf2x16Case(tcu::TestContext &testCtx, glu::ShaderType shaderType)
908         : ShaderPackingFunctionCase(testCtx, (string("unpackhalf2x16") + getShaderTypePostfix(shaderType)).c_str(),
909                                     shaderType)
910     {
911         m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
912         m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_FLOAT_VEC2, glu::PRECISION_MEDIUMP)));
913 
914         m_spec.source = "out0 = unpackHalf2x16(in0);";
915     }
916 
createInstance(Context & ctx) const917     TestInstance *createInstance(Context &ctx) const
918     {
919         return new UnpackHalf2x16CaseInstance(ctx, m_shaderType, m_spec, getName());
920     }
921 };
922 
923 class PackSnorm4x8CaseInstance : public ShaderPackingFunctionTestInstance
924 {
925 public:
PackSnorm4x8CaseInstance(Context & context,glu::ShaderType shaderType,const ShaderSpec & spec,glu::Precision precision,const char * name)926     PackSnorm4x8CaseInstance(Context &context, glu::ShaderType shaderType, const ShaderSpec &spec,
927                              glu::Precision precision, const char *name)
928         : ShaderPackingFunctionTestInstance(context, shaderType, spec, name)
929         , m_precision(precision)
930     {
931     }
932 
iterate(void)933     tcu::TestStatus iterate(void)
934     {
935         de::Random rnd(deStringHash(m_name) ^ 0x42f2c0);
936         std::vector<tcu::Vec4> inputs;
937         std::vector<uint32_t> outputs;
938         const int                    maxDiff = m_precision == glu::PRECISION_HIGHP    ? 1    : // Rounding only.
939                                                   m_precision == glu::PRECISION_MEDIUMP    ? 1    : // (2^-10) * (2^7) + 1
940                                                   m_precision == glu::PRECISION_LOWP    ? 2    : 0;    // (2^-8) * (2^7) + 1
941 
942         // Special values to check.
943         inputs.push_back(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f));
944         inputs.push_back(tcu::Vec4(-1.0f, 1.0f, -1.0f, 1.0f));
945         inputs.push_back(tcu::Vec4(0.5f, -0.5f, -0.5f, 0.5f));
946         inputs.push_back(tcu::Vec4(-1.5f, 1.5f, -1.5f, 1.5f));
947         inputs.push_back(tcu::Vec4(0.25f, -0.75f, -0.25f, 0.75f));
948 
949         // Random values, mostly in range.
950         for (int ndx = 0; ndx < 15; ndx++)
951         {
952             inputs.push_back(tcu::randomVector<float, 4>(rnd, tcu::Vec4(-1.25f), tcu::Vec4(1.25f)));
953         }
954 
955         // Large random values.
956         for (int ndx = 0; ndx < 80; ndx++)
957         {
958             inputs.push_back(tcu::randomVector<float, 4>(rnd, tcu::Vec4(-0.5e6f), tcu::Vec4(0.5e6f)));
959         }
960 
961         outputs.resize(inputs.size());
962 
963         m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values"
964                            << tcu::TestLog::EndMessage;
965 
966         {
967             const void *in = &inputs[0];
968             void *out      = &outputs[0];
969 
970             m_executor->execute((int)inputs.size(), &in, &out);
971         }
972 
973         // Verify
974         {
975             const int numValues = (int)inputs.size();
976             const int maxPrints = 10;
977             int numFailed       = 0;
978 
979             for (int valNdx = 0; valNdx < numValues; valNdx++)
980             {
981                 const uint16_t ref0 = (uint8_t)de::clamp(
982                     deRoundFloatToInt32(de::clamp(inputs[valNdx].x(), -1.0f, 1.0f) * 127.0f), -(1 << 7), (1 << 7) - 1);
983                 const uint16_t ref1 = (uint8_t)de::clamp(
984                     deRoundFloatToInt32(de::clamp(inputs[valNdx].y(), -1.0f, 1.0f) * 127.0f), -(1 << 7), (1 << 7) - 1);
985                 const uint16_t ref2 = (uint8_t)de::clamp(
986                     deRoundFloatToInt32(de::clamp(inputs[valNdx].z(), -1.0f, 1.0f) * 127.0f), -(1 << 7), (1 << 7) - 1);
987                 const uint16_t ref3 = (uint8_t)de::clamp(
988                     deRoundFloatToInt32(de::clamp(inputs[valNdx].w(), -1.0f, 1.0f) * 127.0f), -(1 << 7), (1 << 7) - 1);
989                 const uint32_t ref =
990                     (uint32_t(ref3) << 24) | (uint32_t(ref2) << 16) | (uint32_t(ref1) << 8) | uint32_t(ref0);
991                 const uint32_t res  = outputs[valNdx];
992                 const uint16_t res0 = (uint8_t)(res & 0xff);
993                 const uint16_t res1 = (uint8_t)((res >> 8) & 0xff);
994                 const uint16_t res2 = (uint8_t)((res >> 16) & 0xff);
995                 const uint16_t res3 = (uint8_t)((res >> 24) & 0xff);
996                 const int diff0     = de::abs((int)ref0 - (int)res0);
997                 const int diff1     = de::abs((int)ref1 - (int)res1);
998                 const int diff2     = de::abs((int)ref2 - (int)res2);
999                 const int diff3     = de::abs((int)ref3 - (int)res3);
1000 
1001                 if (diff0 > maxDiff || diff1 > maxDiff || diff2 > maxDiff || diff3 > maxDiff)
1002                 {
1003                     if (numFailed < maxPrints)
1004                     {
1005                         m_testCtx.getLog()
1006                             << TestLog::Message << "ERROR: Mismatch in value " << valNdx << ", expected packSnorm4x8("
1007                             << inputs[valNdx] << ") = " << tcu::toHex(ref) << ", got " << tcu::toHex(res)
1008                             << "\n  diffs = " << tcu::IVec4(diff0, diff1, diff2, diff3) << ", max diff = " << maxDiff
1009                             << TestLog::EndMessage;
1010                     }
1011                     else if (numFailed == maxPrints)
1012                         m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
1013 
1014                     numFailed += 1;
1015                 }
1016             }
1017 
1018             m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed"
1019                                << TestLog::EndMessage;
1020 
1021             if (numFailed == 0)
1022                 return tcu::TestStatus::pass("Pass");
1023             else
1024                 return tcu::TestStatus::fail("Result comparison failed");
1025         }
1026     }
1027 
1028 private:
1029     const glu::Precision m_precision;
1030 };
1031 
1032 class PackSnorm4x8Case : public ShaderPackingFunctionCase
1033 {
1034 public:
PackSnorm4x8Case(tcu::TestContext & testCtx,glu::ShaderType shaderType,glu::Precision precision)1035     PackSnorm4x8Case(tcu::TestContext &testCtx, glu::ShaderType shaderType, glu::Precision precision)
1036         : ShaderPackingFunctionCase(
1037               testCtx,
1038               (string("packsnorm4x8") + getPrecisionPostfix(precision) + getShaderTypePostfix(shaderType)).c_str(),
1039               shaderType)
1040         , m_precision(precision)
1041     {
1042         m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_FLOAT_VEC4, precision)));
1043         m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1044 
1045         m_spec.source = "out0 = packSnorm4x8(in0);";
1046     }
1047 
createInstance(Context & ctx) const1048     TestInstance *createInstance(Context &ctx) const
1049     {
1050         return new PackSnorm4x8CaseInstance(ctx, m_shaderType, m_spec, m_precision, getName());
1051     }
1052 
1053 private:
1054     const glu::Precision m_precision;
1055 };
1056 
1057 class UnpackSnorm4x8CaseInstance : public ShaderPackingFunctionTestInstance
1058 {
1059 public:
UnpackSnorm4x8CaseInstance(Context & context,glu::ShaderType shaderType,const ShaderSpec & spec,const char * name)1060     UnpackSnorm4x8CaseInstance(Context &context, glu::ShaderType shaderType, const ShaderSpec &spec, const char *name)
1061         : ShaderPackingFunctionTestInstance(context, shaderType, spec, name)
1062     {
1063     }
1064 
iterate(void)1065     tcu::TestStatus iterate(void)
1066     {
1067         const uint32_t maxDiff = 1; // Rounding error.
1068         de::Random rnd(deStringHash(m_name) ^ 0x776002);
1069         std::vector<uint32_t> inputs;
1070         std::vector<tcu::Vec4> outputs;
1071 
1072         inputs.push_back(0x00000000u);
1073         inputs.push_back(0x7fff8000u);
1074         inputs.push_back(0x80007fffu);
1075         inputs.push_back(0xffffffffu);
1076         inputs.push_back(0x0001fffeu);
1077 
1078         // Random values.
1079         for (int ndx = 0; ndx < 95; ndx++)
1080             inputs.push_back(rnd.getUint32());
1081 
1082         outputs.resize(inputs.size());
1083 
1084         m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values"
1085                            << tcu::TestLog::EndMessage;
1086 
1087         {
1088             const void *in = &inputs[0];
1089             void *out      = &outputs[0];
1090 
1091             m_executor->execute((int)inputs.size(), &in, &out);
1092         }
1093 
1094         // Verify
1095         {
1096             const int numValues = (int)inputs.size();
1097             const int maxPrints = 10;
1098             int numFailed       = 0;
1099 
1100             for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
1101             {
1102                 const int8_t in0 = (int8_t)(uint8_t)(inputs[valNdx] & 0xff);
1103                 const int8_t in1 = (int8_t)(uint8_t)((inputs[valNdx] >> 8) & 0xff);
1104                 const int8_t in2 = (int8_t)(uint8_t)((inputs[valNdx] >> 16) & 0xff);
1105                 const int8_t in3 = (int8_t)(uint8_t)(inputs[valNdx] >> 24);
1106                 const float ref0 = de::clamp(float(in0) / 127.f, -1.0f, 1.0f);
1107                 const float ref1 = de::clamp(float(in1) / 127.f, -1.0f, 1.0f);
1108                 const float ref2 = de::clamp(float(in2) / 127.f, -1.0f, 1.0f);
1109                 const float ref3 = de::clamp(float(in3) / 127.f, -1.0f, 1.0f);
1110                 const float res0 = outputs[valNdx].x();
1111                 const float res1 = outputs[valNdx].y();
1112                 const float res2 = outputs[valNdx].z();
1113                 const float res3 = outputs[valNdx].w();
1114 
1115                 const uint32_t diff0 = getUlpDiff(ref0, res0);
1116                 const uint32_t diff1 = getUlpDiff(ref1, res1);
1117                 const uint32_t diff2 = getUlpDiff(ref2, res2);
1118                 const uint32_t diff3 = getUlpDiff(ref3, res3);
1119 
1120                 if (diff0 > maxDiff || diff1 > maxDiff || diff2 > maxDiff || diff3 > maxDiff)
1121                 {
1122                     if (numFailed < maxPrints)
1123                     {
1124                         m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx << ",\n"
1125                                            << "  expected unpackSnorm4x8(" << tcu::toHex(inputs[valNdx]) << ") = "
1126                                            << "vec4(" << HexFloat(ref0) << ", " << HexFloat(ref1) << ", "
1127                                            << HexFloat(ref2) << ", " << HexFloat(ref3) << ")"
1128                                            << ", got vec4(" << HexFloat(res0) << ", " << HexFloat(res1) << ", "
1129                                            << HexFloat(res2) << ", " << HexFloat(res3) << ")"
1130                                            << "\n  ULP diffs = (" << diff0 << ", " << diff1 << ", " << diff2 << ", "
1131                                            << diff3 << "), max diff = " << maxDiff << TestLog::EndMessage;
1132                     }
1133                     else if (numFailed == maxPrints)
1134                         m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
1135 
1136                     numFailed += 1;
1137                 }
1138             }
1139 
1140             m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed"
1141                                << TestLog::EndMessage;
1142 
1143             if (numFailed == 0)
1144                 return tcu::TestStatus::pass("Pass");
1145             else
1146                 return tcu::TestStatus::fail("Result comparison failed");
1147         }
1148     }
1149 };
1150 
1151 class UnpackSnorm4x8Case : public ShaderPackingFunctionCase
1152 {
1153 public:
UnpackSnorm4x8Case(tcu::TestContext & testCtx,glu::ShaderType shaderType)1154     UnpackSnorm4x8Case(tcu::TestContext &testCtx, glu::ShaderType shaderType)
1155         : ShaderPackingFunctionCase(testCtx, (string("unpacksnorm4x8") + getShaderTypePostfix(shaderType)).c_str(),
1156                                     shaderType)
1157     {
1158         m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1159         m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_FLOAT_VEC4, glu::PRECISION_HIGHP)));
1160 
1161         m_spec.source = "out0 = unpackSnorm4x8(in0);";
1162     }
1163 
createInstance(Context & ctx) const1164     TestInstance *createInstance(Context &ctx) const
1165     {
1166         return new UnpackSnorm4x8CaseInstance(ctx, m_shaderType, m_spec, getName());
1167     }
1168 };
1169 
1170 class PackUnorm4x8CaseInstance : public ShaderPackingFunctionTestInstance
1171 {
1172 public:
PackUnorm4x8CaseInstance(Context & context,glu::ShaderType shaderType,const ShaderSpec & spec,glu::Precision precision,const char * name)1173     PackUnorm4x8CaseInstance(Context &context, glu::ShaderType shaderType, const ShaderSpec &spec,
1174                              glu::Precision precision, const char *name)
1175         : ShaderPackingFunctionTestInstance(context, shaderType, spec, name)
1176         , m_precision(precision)
1177     {
1178     }
1179 
iterate(void)1180     tcu::TestStatus iterate(void)
1181     {
1182         de::Random rnd(deStringHash(m_name) ^ 0x776002);
1183         std::vector<tcu::Vec4> inputs;
1184         std::vector<uint32_t> outputs;
1185         const int                    maxDiff = m_precision == glu::PRECISION_HIGHP    ? 1    : // Rounding only.
1186                                                   m_precision == glu::PRECISION_MEDIUMP    ? 1    : // (2^-10) * (2^8) + 1
1187                                                   m_precision == glu::PRECISION_LOWP    ? 2    : 0;    // (2^-8) * (2^8) + 1
1188 
1189         // Special values to check.
1190         inputs.push_back(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f));
1191         inputs.push_back(tcu::Vec4(-1.0f, 1.0f, -1.0f, 1.0f));
1192         inputs.push_back(tcu::Vec4(0.5f, -0.5f, -0.5f, 0.5f));
1193         inputs.push_back(tcu::Vec4(-1.5f, 1.5f, -1.5f, 1.5f));
1194         inputs.push_back(tcu::Vec4(0.25f, -0.75f, -0.25f, 0.75f));
1195 
1196         // Random values, mostly in range.
1197         for (int ndx = 0; ndx < 15; ndx++)
1198         {
1199             inputs.push_back(tcu::randomVector<float, 4>(rnd, tcu::Vec4(-0.125f), tcu::Vec4(1.125f)));
1200         }
1201 
1202         // Large random values.
1203         for (int ndx = 0; ndx < 80; ndx++)
1204         {
1205             inputs.push_back(tcu::randomVector<float, 4>(rnd, tcu::Vec4(-1e5f), tcu::Vec4(0.9e6f)));
1206         }
1207 
1208         outputs.resize(inputs.size());
1209 
1210         m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values"
1211                            << tcu::TestLog::EndMessage;
1212 
1213         {
1214             const void *in = &inputs[0];
1215             void *out      = &outputs[0];
1216 
1217             m_executor->execute((int)inputs.size(), &in, &out);
1218         }
1219 
1220         // Verify
1221         {
1222             const int numValues = (int)inputs.size();
1223             const int maxPrints = 10;
1224             int numFailed       = 0;
1225 
1226             for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
1227             {
1228                 const uint16_t ref0 = (uint8_t)de::clamp(
1229                     deRoundFloatToInt32(de::clamp(inputs[valNdx].x(), 0.0f, 1.0f) * 255.0f), 0, (1 << 8) - 1);
1230                 const uint16_t ref1 = (uint8_t)de::clamp(
1231                     deRoundFloatToInt32(de::clamp(inputs[valNdx].y(), 0.0f, 1.0f) * 255.0f), 0, (1 << 8) - 1);
1232                 const uint16_t ref2 = (uint8_t)de::clamp(
1233                     deRoundFloatToInt32(de::clamp(inputs[valNdx].z(), 0.0f, 1.0f) * 255.0f), 0, (1 << 8) - 1);
1234                 const uint16_t ref3 = (uint8_t)de::clamp(
1235                     deRoundFloatToInt32(de::clamp(inputs[valNdx].w(), 0.0f, 1.0f) * 255.0f), 0, (1 << 8) - 1);
1236                 const uint32_t ref =
1237                     (uint32_t(ref3) << 24) | (uint32_t(ref2) << 16) | (uint32_t(ref1) << 8) | uint32_t(ref0);
1238                 const uint32_t res  = outputs[valNdx];
1239                 const uint16_t res0 = (uint8_t)(res & 0xff);
1240                 const uint16_t res1 = (uint8_t)((res >> 8) & 0xff);
1241                 const uint16_t res2 = (uint8_t)((res >> 16) & 0xff);
1242                 const uint16_t res3 = (uint8_t)((res >> 24) & 0xff);
1243                 const int diff0     = de::abs((int)ref0 - (int)res0);
1244                 const int diff1     = de::abs((int)ref1 - (int)res1);
1245                 const int diff2     = de::abs((int)ref2 - (int)res2);
1246                 const int diff3     = de::abs((int)ref3 - (int)res3);
1247 
1248                 if (diff0 > maxDiff || diff1 > maxDiff || diff2 > maxDiff || diff3 > maxDiff)
1249                 {
1250                     if (numFailed < maxPrints)
1251                     {
1252                         m_testCtx.getLog()
1253                             << TestLog::Message << "ERROR: Mismatch in value " << valNdx << ", expected packUnorm4x8("
1254                             << inputs[valNdx] << ") = " << tcu::toHex(ref) << ", got " << tcu::toHex(res)
1255                             << "\n  diffs = " << tcu::IVec4(diff0, diff1, diff2, diff3) << ", max diff = " << maxDiff
1256                             << TestLog::EndMessage;
1257                     }
1258                     else if (numFailed == maxPrints)
1259                         m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
1260 
1261                     numFailed += 1;
1262                 }
1263             }
1264 
1265             m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed"
1266                                << TestLog::EndMessage;
1267 
1268             if (numFailed == 0)
1269                 return tcu::TestStatus::pass("Pass");
1270             else
1271                 return tcu::TestStatus::fail("Result comparison failed");
1272         }
1273     }
1274 
1275 private:
1276     const glu::Precision m_precision;
1277 };
1278 
1279 class PackUnorm4x8Case : public ShaderPackingFunctionCase
1280 {
1281 public:
PackUnorm4x8Case(tcu::TestContext & testCtx,glu::ShaderType shaderType,glu::Precision precision)1282     PackUnorm4x8Case(tcu::TestContext &testCtx, glu::ShaderType shaderType, glu::Precision precision)
1283         : ShaderPackingFunctionCase(
1284               testCtx,
1285               (string("packunorm4x8") + getPrecisionPostfix(precision) + getShaderTypePostfix(shaderType)).c_str(),
1286               shaderType)
1287         , m_precision(precision)
1288     {
1289         m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_FLOAT_VEC4, precision)));
1290         m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1291 
1292         m_spec.source = "out0 = packUnorm4x8(in0);";
1293     }
1294 
createInstance(Context & ctx) const1295     TestInstance *createInstance(Context &ctx) const
1296     {
1297         return new PackUnorm4x8CaseInstance(ctx, m_shaderType, m_spec, m_precision, getName());
1298     }
1299 
1300 private:
1301     const glu::Precision m_precision;
1302 };
1303 
1304 class UnpackUnorm4x8CaseInstance : public ShaderPackingFunctionTestInstance
1305 {
1306 public:
UnpackUnorm4x8CaseInstance(Context & context,glu::ShaderType shaderType,const ShaderSpec & spec,const char * name)1307     UnpackUnorm4x8CaseInstance(Context &context, glu::ShaderType shaderType, const ShaderSpec &spec, const char *name)
1308         : ShaderPackingFunctionTestInstance(context, shaderType, spec, name)
1309     {
1310     }
1311 
iterate(void)1312     tcu::TestStatus iterate(void)
1313     {
1314         const uint32_t maxDiff = 1; // Rounding error.
1315         de::Random rnd(deStringHash(m_name) ^ 0x776002);
1316         std::vector<uint32_t> inputs;
1317         std::vector<tcu::Vec4> outputs;
1318 
1319         inputs.push_back(0x00000000u);
1320         inputs.push_back(0x7fff8000u);
1321         inputs.push_back(0x80007fffu);
1322         inputs.push_back(0xffffffffu);
1323         inputs.push_back(0x0001fffeu);
1324 
1325         // Random values.
1326         for (int ndx = 0; ndx < 95; ndx++)
1327             inputs.push_back(rnd.getUint32());
1328 
1329         outputs.resize(inputs.size());
1330 
1331         m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values"
1332                            << tcu::TestLog::EndMessage;
1333 
1334         {
1335             const void *in = &inputs[0];
1336             void *out      = &outputs[0];
1337 
1338             m_executor->execute((int)inputs.size(), &in, &out);
1339         }
1340 
1341         // Verify
1342         {
1343             const int numValues = (int)inputs.size();
1344             const int maxPrints = 10;
1345             int numFailed       = 0;
1346 
1347             for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
1348             {
1349                 const uint8_t in0 = (uint8_t)(inputs[valNdx] & 0xff);
1350                 const uint8_t in1 = (uint8_t)((inputs[valNdx] >> 8) & 0xff);
1351                 const uint8_t in2 = (uint8_t)((inputs[valNdx] >> 16) & 0xff);
1352                 const uint8_t in3 = (uint8_t)(inputs[valNdx] >> 24);
1353                 const float ref0  = de::clamp(float(in0) / 255.f, 0.0f, 1.0f);
1354                 const float ref1  = de::clamp(float(in1) / 255.f, 0.0f, 1.0f);
1355                 const float ref2  = de::clamp(float(in2) / 255.f, 0.0f, 1.0f);
1356                 const float ref3  = de::clamp(float(in3) / 255.f, 0.0f, 1.0f);
1357                 const float res0  = outputs[valNdx].x();
1358                 const float res1  = outputs[valNdx].y();
1359                 const float res2  = outputs[valNdx].z();
1360                 const float res3  = outputs[valNdx].w();
1361 
1362                 const uint32_t diff0 = getUlpDiff(ref0, res0);
1363                 const uint32_t diff1 = getUlpDiff(ref1, res1);
1364                 const uint32_t diff2 = getUlpDiff(ref2, res2);
1365                 const uint32_t diff3 = getUlpDiff(ref3, res3);
1366 
1367                 if (diff0 > maxDiff || diff1 > maxDiff || diff2 > maxDiff || diff3 > maxDiff)
1368                 {
1369                     if (numFailed < maxPrints)
1370                     {
1371                         m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx << ",\n"
1372                                            << "  expected unpackUnorm4x8(" << tcu::toHex(inputs[valNdx]) << ") = "
1373                                            << "vec4(" << HexFloat(ref0) << ", " << HexFloat(ref1) << ", "
1374                                            << HexFloat(ref2) << ", " << HexFloat(ref3) << ")"
1375                                            << ", got vec4(" << HexFloat(res0) << ", " << HexFloat(res1) << ", "
1376                                            << HexFloat(res2) << ", " << HexFloat(res3) << ")"
1377                                            << "\n  ULP diffs = (" << diff0 << ", " << diff1 << ", " << diff2 << ", "
1378                                            << diff3 << "), max diff = " << maxDiff << TestLog::EndMessage;
1379                     }
1380                     else if (numFailed == maxPrints)
1381                         m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
1382 
1383                     numFailed += 1;
1384                 }
1385             }
1386 
1387             m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed"
1388                                << TestLog::EndMessage;
1389 
1390             if (numFailed == 0)
1391                 return tcu::TestStatus::pass("Pass");
1392             else
1393                 return tcu::TestStatus::fail("Result comparison failed");
1394         }
1395     }
1396 };
1397 
1398 class UnpackUnorm4x8Case : public ShaderPackingFunctionCase
1399 {
1400 public:
UnpackUnorm4x8Case(tcu::TestContext & testCtx,glu::ShaderType shaderType)1401     UnpackUnorm4x8Case(tcu::TestContext &testCtx, glu::ShaderType shaderType)
1402         : ShaderPackingFunctionCase(testCtx, (string("unpackunorm4x8") + getShaderTypePostfix(shaderType)).c_str(),
1403                                     shaderType)
1404     {
1405         m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1406         m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_FLOAT_VEC4, glu::PRECISION_HIGHP)));
1407 
1408         m_spec.source = "out0 = unpackUnorm4x8(in0);";
1409     }
1410 
createInstance(Context & ctx) const1411     TestInstance *createInstance(Context &ctx) const
1412     {
1413         return new UnpackUnorm4x8CaseInstance(ctx, m_shaderType, m_spec, getName());
1414     }
1415 };
1416 
ShaderPackingFunctionTests(tcu::TestContext & testCtx)1417 ShaderPackingFunctionTests::ShaderPackingFunctionTests(tcu::TestContext &testCtx)
1418     : tcu::TestCaseGroup(testCtx, "pack_unpack")
1419 {
1420 }
1421 
~ShaderPackingFunctionTests(void)1422 ShaderPackingFunctionTests::~ShaderPackingFunctionTests(void)
1423 {
1424 }
1425 
init(void)1426 void ShaderPackingFunctionTests::init(void)
1427 {
1428     // New built-in functions in GLES 3.1
1429     {
1430         const glu::ShaderType allShaderTypes[] = {glu::SHADERTYPE_VERTEX,
1431                                                   glu::SHADERTYPE_TESSELLATION_CONTROL,
1432                                                   glu::SHADERTYPE_TESSELLATION_EVALUATION,
1433                                                   glu::SHADERTYPE_GEOMETRY,
1434                                                   glu::SHADERTYPE_FRAGMENT,
1435                                                   glu::SHADERTYPE_COMPUTE};
1436 
1437         // packSnorm4x8
1438         for (int prec = glu::PRECISION_MEDIUMP; prec < glu::PRECISION_LAST; prec++)
1439         {
1440             for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(allShaderTypes); shaderTypeNdx++)
1441                 addChild(new PackSnorm4x8Case(m_testCtx, allShaderTypes[shaderTypeNdx], glu::Precision(prec)));
1442         }
1443 
1444         // unpackSnorm4x8
1445         for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(allShaderTypes); shaderTypeNdx++)
1446             addChild(new UnpackSnorm4x8Case(m_testCtx, allShaderTypes[shaderTypeNdx]));
1447 
1448         // packUnorm4x8
1449         for (int prec = glu::PRECISION_MEDIUMP; prec < glu::PRECISION_LAST; prec++)
1450         {
1451             for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(allShaderTypes); shaderTypeNdx++)
1452                 addChild(new PackUnorm4x8Case(m_testCtx, allShaderTypes[shaderTypeNdx], glu::Precision(prec)));
1453         }
1454 
1455         // unpackUnorm4x8
1456         for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(allShaderTypes); shaderTypeNdx++)
1457             addChild(new UnpackUnorm4x8Case(m_testCtx, allShaderTypes[shaderTypeNdx]));
1458     }
1459 
1460     // GLES 3 functions in new shader types.
1461     {
1462         const glu::ShaderType newShaderTypes[] = {glu::SHADERTYPE_GEOMETRY, glu::SHADERTYPE_COMPUTE};
1463 
1464         // packSnorm2x16
1465         for (int prec = glu::PRECISION_MEDIUMP; prec < glu::PRECISION_LAST; prec++)
1466         {
1467             for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(newShaderTypes); shaderTypeNdx++)
1468                 addChild(new PackSnorm2x16Case(m_testCtx, newShaderTypes[shaderTypeNdx], glu::Precision(prec)));
1469         }
1470 
1471         // unpackSnorm2x16
1472         for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(newShaderTypes); shaderTypeNdx++)
1473             addChild(new UnpackSnorm2x16Case(m_testCtx, newShaderTypes[shaderTypeNdx]));
1474 
1475         // packUnorm2x16
1476         for (int prec = glu::PRECISION_MEDIUMP; prec < glu::PRECISION_LAST; prec++)
1477         {
1478             for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(newShaderTypes); shaderTypeNdx++)
1479                 addChild(new PackUnorm2x16Case(m_testCtx, newShaderTypes[shaderTypeNdx], glu::Precision(prec)));
1480         }
1481 
1482         // unpackUnorm2x16
1483         for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(newShaderTypes); shaderTypeNdx++)
1484             addChild(new UnpackUnorm2x16Case(m_testCtx, newShaderTypes[shaderTypeNdx]));
1485 
1486         // packHalf2x16
1487         for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(newShaderTypes); shaderTypeNdx++)
1488             addChild(new PackHalf2x16Case(m_testCtx, newShaderTypes[shaderTypeNdx]));
1489 
1490         // unpackHalf2x16
1491         for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(newShaderTypes); shaderTypeNdx++)
1492             addChild(new UnpackHalf2x16Case(m_testCtx, newShaderTypes[shaderTypeNdx]));
1493     }
1494 }
1495 
1496 } // namespace shaderexecutor
1497 } // namespace vkt
1498