1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  * Copyright (c) 2016 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Shader derivate function tests.
24  *
25  * \todo [2013-06-25 pyry] Missing features:
26  *  - lines and points
27  *  - projected coordinates
28  *  - continous non-trivial functions (sin, exp)
29  *  - non-continous functions (step)
30  *//*--------------------------------------------------------------------*/
31 
32 #include "vktShaderRenderDerivateTests.hpp"
33 #include "vktShaderRender.hpp"
34 #include "subgroups/vktSubgroupsTestsUtils.hpp"
35 #include "vkImageUtil.hpp"
36 #include "vkQueryUtil.hpp"
37 
38 #include "gluTextureUtil.hpp"
39 
40 #include "tcuStringTemplate.hpp"
41 #include "tcuSurface.hpp"
42 #include "tcuTestLog.hpp"
43 #include "tcuVectorUtil.hpp"
44 #include "tcuTextureUtil.hpp"
45 #include "tcuRGBA.hpp"
46 #include "tcuFloat.hpp"
47 #include "tcuInterval.hpp"
48 
49 #include "deUniquePtr.hpp"
50 #include "glwEnums.hpp"
51 
52 #include <sstream>
53 #include <string>
54 
55 namespace vkt
56 {
57 namespace sr
58 {
59 namespace
60 {
61 
62 using namespace vk;
63 
64 using std::map;
65 using std::ostringstream;
66 using std::string;
67 using std::vector;
68 using tcu::TestLog;
69 
70 enum
71 {
72     VIEWPORT_WIDTH      = 99,
73     VIEWPORT_HEIGHT     = 133,
74     MAX_FAILED_MESSAGES = 10
75 };
76 
77 enum DerivateFunc
78 {
79     DERIVATE_DFDX = 0,
80     DERIVATE_DFDXFINE,
81     DERIVATE_DFDXCOARSE,
82     DERIVATE_DFDXSUBGROUP,
83 
84     DERIVATE_DFDY,
85     DERIVATE_DFDYFINE,
86     DERIVATE_DFDYCOARSE,
87     DERIVATE_DFDYSUBGROUP,
88 
89     DERIVATE_FWIDTH,
90     DERIVATE_FWIDTHFINE,
91     DERIVATE_FWIDTHCOARSE,
92 
93     DERIVATE_LAST
94 };
95 
96 enum SurfaceType
97 {
98     SURFACETYPE_UNORM_FBO = 0,
99     SURFACETYPE_FLOAT_FBO, // \note Uses RGBA32UI fbo actually, since FP rendertargets are not in core spec.
100 
101     SURFACETYPE_LAST
102 };
103 
104 // Utilities
105 
getDerivateFuncName(DerivateFunc func)106 static const char *getDerivateFuncName(DerivateFunc func)
107 {
108     switch (func)
109     {
110     case DERIVATE_DFDX:
111         return "dFdx";
112     case DERIVATE_DFDXFINE:
113         return "dFdxFine";
114     case DERIVATE_DFDXCOARSE:
115         return "dFdxCoarse";
116     case DERIVATE_DFDXSUBGROUP:
117         return "dFdxSubgroup";
118     case DERIVATE_DFDY:
119         return "dFdy";
120     case DERIVATE_DFDYFINE:
121         return "dFdyFine";
122     case DERIVATE_DFDYCOARSE:
123         return "dFdyCoarse";
124     case DERIVATE_DFDYSUBGROUP:
125         return "dFdySubgroup";
126     case DERIVATE_FWIDTH:
127         return "fwidth";
128     case DERIVATE_FWIDTHFINE:
129         return "fwidthFine";
130     case DERIVATE_FWIDTHCOARSE:
131         return "fwidthCoarse";
132     default:
133         DE_ASSERT(false);
134         return DE_NULL;
135     }
136 }
137 
getDerivateFuncCaseName(DerivateFunc func)138 static const char *getDerivateFuncCaseName(DerivateFunc func)
139 {
140     switch (func)
141     {
142     case DERIVATE_DFDX:
143         return "dfdx";
144     case DERIVATE_DFDXFINE:
145         return "dfdxfine";
146     case DERIVATE_DFDXCOARSE:
147         return "dfdxcoarse";
148     case DERIVATE_DFDXSUBGROUP:
149         return "dfdxsubgroup";
150     case DERIVATE_DFDY:
151         return "dfdy";
152     case DERIVATE_DFDYFINE:
153         return "dfdyfine";
154     case DERIVATE_DFDYCOARSE:
155         return "dfdycoarse";
156     case DERIVATE_DFDYSUBGROUP:
157         return "dfdysubgroup";
158     case DERIVATE_FWIDTH:
159         return "fwidth";
160     case DERIVATE_FWIDTHFINE:
161         return "fwidthfine";
162     case DERIVATE_FWIDTHCOARSE:
163         return "fwidthcoarse";
164     default:
165         DE_ASSERT(false);
166         return DE_NULL;
167     }
168 }
169 
isDfdxFunc(DerivateFunc func)170 static inline bool isDfdxFunc(DerivateFunc func)
171 {
172     return func == DERIVATE_DFDX || func == DERIVATE_DFDXFINE || func == DERIVATE_DFDXCOARSE ||
173            func == DERIVATE_DFDXSUBGROUP;
174 }
175 
isDfdyFunc(DerivateFunc func)176 static inline bool isDfdyFunc(DerivateFunc func)
177 {
178     return func == DERIVATE_DFDY || func == DERIVATE_DFDYFINE || func == DERIVATE_DFDYCOARSE ||
179            func == DERIVATE_DFDYSUBGROUP;
180 }
181 
isFwidthFunc(DerivateFunc func)182 static inline bool isFwidthFunc(DerivateFunc func)
183 {
184     return func == DERIVATE_FWIDTH || func == DERIVATE_FWIDTHFINE || func == DERIVATE_FWIDTHCOARSE;
185 }
186 
isSubgroupFunc(DerivateFunc func)187 static inline bool isSubgroupFunc(DerivateFunc func)
188 {
189     return func == DERIVATE_DFDXSUBGROUP || func == DERIVATE_DFDYSUBGROUP;
190 }
191 
getDerivateMask(glu::DataType type)192 static inline tcu::BVec4 getDerivateMask(glu::DataType type)
193 {
194     switch (type)
195     {
196     case glu::TYPE_FLOAT:
197         return tcu::BVec4(true, false, false, false);
198     case glu::TYPE_FLOAT_VEC2:
199         return tcu::BVec4(true, true, false, false);
200     case glu::TYPE_FLOAT_VEC3:
201         return tcu::BVec4(true, true, true, false);
202     case glu::TYPE_FLOAT_VEC4:
203         return tcu::BVec4(true, true, true, true);
204     default:
205         DE_ASSERT(false);
206         return tcu::BVec4(true);
207     }
208 }
209 
isSkippedPixel(const tcu::ConstPixelBufferAccess & surface,int x,int y)210 static inline bool isSkippedPixel(const tcu::ConstPixelBufferAccess &surface, int x, int y)
211 {
212     const tcu::Vec4 skipValue(0.7843f, 0.2039f, 0.4706f, 0.0f);
213     const tcu::Vec4 value = surface.getPixel(x, y);
214     return tcu::allEqual(tcu::lessThanEqual(tcu::abs(value - skipValue), tcu::Vec4(0.01f)), tcu::BVec4(true));
215 }
216 
readDerivate(const tcu::ConstPixelBufferAccess & surface,const tcu::Vec4 & derivScale,const tcu::Vec4 & derivBias,int x,int y)217 static inline tcu::Vec4 readDerivate(const tcu::ConstPixelBufferAccess &surface, const tcu::Vec4 &derivScale,
218                                      const tcu::Vec4 &derivBias, int x, int y)
219 {
220     return (surface.getPixel(x, y) - derivBias) / derivScale;
221 }
222 
getCompExpBits(const tcu::Vec4 & v)223 static inline tcu::UVec4 getCompExpBits(const tcu::Vec4 &v)
224 {
225     return tcu::UVec4(tcu::Float32(v[0]).exponentBits(), tcu::Float32(v[1]).exponentBits(),
226                       tcu::Float32(v[2]).exponentBits(), tcu::Float32(v[3]).exponentBits());
227 }
228 
computeFloatingPointError(const float value,const int numAccurateBits)229 float computeFloatingPointError(const float value, const int numAccurateBits)
230 {
231     const int numGarbageBits = 23 - numAccurateBits;
232     const uint32_t mask      = (1u << numGarbageBits) - 1u;
233     const int exp            = tcu::Float32(value).exponent();
234 
235     return tcu::Float32::construct(+1, exp, (1u << 23) | mask).asFloat() -
236            tcu::Float32::construct(+1, exp, 1u << 23).asFloat();
237 }
238 
getNumMantissaBits(const glu::Precision precision)239 static int getNumMantissaBits(const glu::Precision precision)
240 {
241     switch (precision)
242     {
243     case glu::PRECISION_HIGHP:
244         return 23;
245     case glu::PRECISION_MEDIUMP:
246         return 10;
247     case glu::PRECISION_LOWP:
248         return 6;
249     default:
250         DE_ASSERT(false);
251         return 0;
252     }
253 }
254 
getMinExponent(const glu::Precision precision)255 static int getMinExponent(const glu::Precision precision)
256 {
257     switch (precision)
258     {
259     case glu::PRECISION_HIGHP:
260         return -126;
261     case glu::PRECISION_MEDIUMP:
262         return -14;
263     case glu::PRECISION_LOWP:
264         return -8;
265     default:
266         DE_ASSERT(false);
267         return 0;
268     }
269 }
270 
getSingleULPForExponent(int exp,int numMantissaBits)271 static float getSingleULPForExponent(int exp, int numMantissaBits)
272 {
273     if (numMantissaBits > 0)
274     {
275         DE_ASSERT(numMantissaBits <= 23);
276 
277         const int ulpBitNdx = 23 - numMantissaBits;
278         return tcu::Float32::construct(+1, exp, (1 << 23) | (1 << ulpBitNdx)).asFloat() -
279                tcu::Float32::construct(+1, exp, (1 << 23)).asFloat();
280     }
281     else
282     {
283         DE_ASSERT(numMantissaBits == 0);
284         return tcu::Float32::construct(+1, exp, (1 << 23)).asFloat();
285     }
286 }
287 
getSingleULPForValue(float value,int numMantissaBits)288 static float getSingleULPForValue(float value, int numMantissaBits)
289 {
290     const int exp = tcu::Float32(value).exponent();
291     return getSingleULPForExponent(exp, numMantissaBits);
292 }
293 
convertFloatFlushToZeroRtn(float value,int minExponent,int numAccurateBits)294 static float convertFloatFlushToZeroRtn(float value, int minExponent, int numAccurateBits)
295 {
296     if (value == 0.0f)
297     {
298         return 0.0f;
299     }
300     else
301     {
302         const tcu::Float32 inputFloat = tcu::Float32(value);
303         const int numTruncatedBits    = 23 - numAccurateBits;
304         const uint32_t truncMask      = (1u << numTruncatedBits) - 1u;
305 
306         if (value > 0.0f)
307         {
308             if (value > 0.0f && tcu::Float32(value).exponent() < minExponent)
309             {
310                 // flush to zero if possible
311                 return 0.0f;
312             }
313             else
314             {
315                 // just mask away non-representable bits
316                 return tcu::Float32::construct(+1, inputFloat.exponent(), inputFloat.mantissa() & ~truncMask).asFloat();
317             }
318         }
319         else
320         {
321             if (inputFloat.mantissa() & truncMask)
322             {
323                 // decrement one ulp if truncated bits are non-zero (i.e. if value is not representable)
324                 return tcu::Float32::construct(-1, inputFloat.exponent(), inputFloat.mantissa() & ~truncMask)
325                            .asFloat() -
326                        getSingleULPForExponent(inputFloat.exponent(), numAccurateBits);
327             }
328             else
329             {
330                 // value is representable, no need to do anything
331                 return value;
332             }
333         }
334     }
335 }
336 
convertFloatFlushToZeroRtp(float value,int minExponent,int numAccurateBits)337 static float convertFloatFlushToZeroRtp(float value, int minExponent, int numAccurateBits)
338 {
339     return -convertFloatFlushToZeroRtn(-value, minExponent, numAccurateBits);
340 }
341 
addErrorUlp(float value,float numUlps,int numMantissaBits)342 static float addErrorUlp(float value, float numUlps, int numMantissaBits)
343 {
344     return value + numUlps * getSingleULPForValue(value, numMantissaBits);
345 }
346 
347 enum
348 {
349     INTERPOLATION_LOST_BITS = 3, // number mantissa of bits allowed to be lost in varying interpolation
350 };
351 
getDerivateThreshold(const glu::Precision precision,const tcu::Vec4 & valueMin,const tcu::Vec4 & valueMax,const tcu::Vec4 & expectedDerivate)352 static inline tcu::Vec4 getDerivateThreshold(const glu::Precision precision, const tcu::Vec4 &valueMin,
353                                              const tcu::Vec4 &valueMax, const tcu::Vec4 &expectedDerivate)
354 {
355     const int baseBits           = getNumMantissaBits(precision);
356     const tcu::UVec4 derivExp    = getCompExpBits(expectedDerivate);
357     const tcu::UVec4 maxValueExp = max(getCompExpBits(valueMin), getCompExpBits(valueMax));
358     const tcu::UVec4 numBitsLost = maxValueExp - min(maxValueExp, derivExp);
359     const tcu::IVec4 numAccurateBits =
360         max(baseBits - numBitsLost.asInt() - (int)INTERPOLATION_LOST_BITS, tcu::IVec4(0));
361 
362     return tcu::Vec4(computeFloatingPointError(expectedDerivate[0], numAccurateBits[0]),
363                      computeFloatingPointError(expectedDerivate[1], numAccurateBits[1]),
364                      computeFloatingPointError(expectedDerivate[2], numAccurateBits[2]),
365                      computeFloatingPointError(expectedDerivate[3], numAccurateBits[3]));
366 }
367 
368 struct LogVecComps
369 {
370     const tcu::Vec4 &v;
371     int numComps;
372 
LogVecCompsvkt::sr::__anon71e082ae0111::LogVecComps373     LogVecComps(const tcu::Vec4 &v_, int numComps_) : v(v_), numComps(numComps_)
374     {
375     }
376 };
377 
operator <<(std::ostream & str,const LogVecComps & v)378 std::ostream &operator<<(std::ostream &str, const LogVecComps &v)
379 {
380     DE_ASSERT(de::inRange(v.numComps, 1, 4));
381     if (v.numComps == 1)
382         return str << v.v[0];
383     else if (v.numComps == 2)
384         return str << v.v.toWidth<2>();
385     else if (v.numComps == 3)
386         return str << v.v.toWidth<3>();
387     else
388         return str << v.v;
389 }
390 
391 enum VerificationLogging
392 {
393     LOG_ALL = 0,
394     LOG_NOTHING
395 };
396 
verifyConstantDerivate(tcu::TestLog & log,const tcu::ConstPixelBufferAccess & result,const tcu::PixelBufferAccess & errorMask,glu::DataType dataType,const tcu::Vec4 & reference,const tcu::Vec4 & threshold,const tcu::Vec4 & scale,const tcu::Vec4 & bias,VerificationLogging logPolicy=LOG_ALL,bool demoteToHelperInvocation=false)397 static bool verifyConstantDerivate(tcu::TestLog &log, const tcu::ConstPixelBufferAccess &result,
398                                    const tcu::PixelBufferAccess &errorMask, glu::DataType dataType,
399                                    const tcu::Vec4 &reference, const tcu::Vec4 &threshold, const tcu::Vec4 &scale,
400                                    const tcu::Vec4 &bias, VerificationLogging logPolicy = LOG_ALL,
401                                    bool demoteToHelperInvocation = false)
402 {
403     const int numComps    = glu::getDataTypeFloatScalars(dataType);
404     const tcu::BVec4 mask = tcu::logicalNot(getDerivateMask(dataType));
405     int numFailedPixels   = 0;
406 
407     if (logPolicy == LOG_ALL)
408         log << TestLog::Message << "Expecting " << LogVecComps(reference, numComps) << " with threshold "
409             << LogVecComps(threshold, numComps) << TestLog::EndMessage;
410 
411     for (int y = 0; y < result.getHeight(); y++)
412     {
413         for (int x = 0; x < result.getWidth(); x++)
414         {
415             if (isSkippedPixel(result, x, y))
416                 continue;
417 
418             if (demoteToHelperInvocation && deMod(y, 2) == 1)
419                 continue;
420 
421             const tcu::Vec4 resDerivate = readDerivate(result, scale, bias, x, y);
422             const bool isOk =
423                 tcu::allEqual(tcu::logicalOr(tcu::lessThanEqual(tcu::abs(reference - resDerivate), threshold), mask),
424                               tcu::BVec4(true));
425 
426             if (!isOk)
427             {
428                 if (numFailedPixels < MAX_FAILED_MESSAGES && logPolicy == LOG_ALL)
429                     log << TestLog::Message << "FAIL: got " << LogVecComps(resDerivate, numComps)
430                         << ", diff = " << LogVecComps(tcu::abs(reference - resDerivate), numComps) << ", at x = " << x
431                         << ", y = " << y << TestLog::EndMessage;
432                 numFailedPixels += 1;
433                 errorMask.setPixel(tcu::RGBA::red().toVec(), x, y);
434             }
435         }
436     }
437 
438     if (numFailedPixels >= MAX_FAILED_MESSAGES && logPolicy == LOG_ALL)
439         log << TestLog::Message << "..." << TestLog::EndMessage;
440 
441     if (numFailedPixels > 0 && logPolicy == LOG_ALL)
442         log << TestLog::Message << "FAIL: found " << numFailedPixels << " failed pixels" << TestLog::EndMessage;
443 
444     return numFailedPixels == 0;
445 }
446 
447 struct Linear2DFunctionEvaluator
448 {
449     tcu::Matrix<float, 4, 3> matrix;
450 
451     //      .-----.
452     //      | s_x |
453     //  M x | s_y |
454     //      | 1.0 |
455     //      '-----'
456     tcu::Vec4 evaluateAt(float screenX, float screenY) const;
457 };
458 
evaluateAt(float screenX,float screenY) const459 tcu::Vec4 Linear2DFunctionEvaluator::evaluateAt(float screenX, float screenY) const
460 {
461     const tcu::Vec3 position(screenX, screenY, 1.0f);
462     return matrix * position;
463 }
464 
reverifyConstantDerivateWithFlushRelaxations(tcu::TestLog & log,const tcu::ConstPixelBufferAccess & result,const tcu::PixelBufferAccess & errorMask,glu::DataType dataType,glu::Precision precision,const tcu::Vec4 & derivScale,const tcu::Vec4 & derivBias,const tcu::Vec4 & surfaceThreshold,DerivateFunc derivateFunc,const Linear2DFunctionEvaluator & function)465 static bool reverifyConstantDerivateWithFlushRelaxations(tcu::TestLog &log, const tcu::ConstPixelBufferAccess &result,
466                                                          const tcu::PixelBufferAccess &errorMask,
467                                                          glu::DataType dataType, glu::Precision precision,
468                                                          const tcu::Vec4 &derivScale, const tcu::Vec4 &derivBias,
469                                                          const tcu::Vec4 &surfaceThreshold, DerivateFunc derivateFunc,
470                                                          const Linear2DFunctionEvaluator &function)
471 {
472     DE_ASSERT(result.getWidth() == errorMask.getWidth());
473     DE_ASSERT(result.getHeight() == errorMask.getHeight());
474     DE_ASSERT(isDfdxFunc(derivateFunc) || isDfdyFunc(derivateFunc));
475 
476     const tcu::IVec4 red(255, 0, 0, 255);
477     const tcu::IVec4 green(0, 255, 0, 255);
478     const float divisionErrorUlps = 2.5f;
479 
480     const int numComponents = glu::getDataTypeFloatScalars(dataType);
481     const int numBits       = getNumMantissaBits(precision);
482     const int minExponent   = getMinExponent(precision);
483 
484     const int numVaryingSampleBits = numBits - INTERPOLATION_LOST_BITS;
485     int numFailedPixels            = 0;
486 
487     tcu::clear(errorMask, green);
488 
489     // search for failed pixels
490     for (int y = 0; y < result.getHeight(); ++y)
491         for (int x = 0; x < result.getWidth(); ++x)
492         {
493             if (isSkippedPixel(result, x, y))
494                 continue;
495 
496             //                 flushToZero?(f2z?(functionValueCurrent) - f2z?(functionValueBefore))
497             // flushToZero? ( ------------------------------------------------------------------------ +- 2.5 ULP )
498             //                                                  dx
499 
500             const tcu::Vec4 resultDerivative = readDerivate(result, derivScale, derivBias, x, y);
501 
502             // sample at the front of the back pixel and the back of the front pixel to cover the whole area of
503             // legal sample positions. In general case this is NOT OK, but we know that the target funtion is
504             // (mostly*) linear which allows us to take the sample points at arbitrary points. This gets us the
505             // maximum difference possible in exponents which are used in error bound calculations.
506             // * non-linearity may happen around zero or with very high function values due to subnorms not
507             //   behaving well.
508             const tcu::Vec4 functionValueForward  = (isDfdxFunc(derivateFunc)) ?
509                                                         (function.evaluateAt((float)x + 2.0f, (float)y + 0.5f)) :
510                                                         (function.evaluateAt((float)x + 0.5f, (float)y + 2.0f));
511             const tcu::Vec4 functionValueBackward = (isDfdyFunc(derivateFunc)) ?
512                                                         (function.evaluateAt((float)x - 1.0f, (float)y + 0.5f)) :
513                                                         (function.evaluateAt((float)x + 0.5f, (float)y - 1.0f));
514 
515             bool anyComponentFailed = false;
516 
517             // check components separately
518             for (int c = 0; c < numComponents; ++c)
519             {
520                 // Simulate interpolation. Add allowed interpolation error and round to target precision. Allow one half ULP (i.e. correct rounding)
521                 const tcu::Interval forwardComponent(
522                     convertFloatFlushToZeroRtn(addErrorUlp((float)functionValueForward[c], -0.5f, numVaryingSampleBits),
523                                                minExponent, numBits),
524                     convertFloatFlushToZeroRtp(addErrorUlp((float)functionValueForward[c], +0.5f, numVaryingSampleBits),
525                                                minExponent, numBits));
526                 const tcu::Interval backwardComponent(
527                     convertFloatFlushToZeroRtn(
528                         addErrorUlp((float)functionValueBackward[c], -0.5f, numVaryingSampleBits), minExponent,
529                         numBits),
530                     convertFloatFlushToZeroRtp(
531                         addErrorUlp((float)functionValueBackward[c], +0.5f, numVaryingSampleBits), minExponent,
532                         numBits));
533                 const int maxValueExp = de::max(de::max(tcu::Float32(forwardComponent.lo()).exponent(),
534                                                         tcu::Float32(forwardComponent.hi()).exponent()),
535                                                 de::max(tcu::Float32(backwardComponent.lo()).exponent(),
536                                                         tcu::Float32(backwardComponent.hi()).exponent()));
537 
538                 // subtraction in numerator will likely cause a cancellation of the most
539                 // significant bits. Apply error bounds.
540 
541                 const tcu::Interval numerator(forwardComponent - backwardComponent);
542                 const int numeratorLoExp      = tcu::Float32(numerator.lo()).exponent();
543                 const int numeratorHiExp      = tcu::Float32(numerator.hi()).exponent();
544                 const int numeratorLoBitsLost = de::max(
545                     0,
546                     maxValueExp -
547                         numeratorLoExp); //!< must clamp to zero since if forward and backward components have different
548                 const int numeratorHiBitsLost = de::max(
549                     0, maxValueExp - numeratorHiExp); //!< sign, numerator might have larger exponent than its operands.
550                 const int numeratorLoBits = de::max(0, numBits - numeratorLoBitsLost);
551                 const int numeratorHiBits = de::max(0, numBits - numeratorHiBitsLost);
552 
553                 const tcu::Interval numeratorRange(
554                     convertFloatFlushToZeroRtn((float)numerator.lo(), minExponent, numeratorLoBits),
555                     convertFloatFlushToZeroRtp((float)numerator.hi(), minExponent, numeratorHiBits));
556 
557                 const tcu::Interval divisionRange =
558                     numeratorRange /
559                     3.0f; // legal sample area is anywhere within this and neighboring pixels (i.e. size = 3)
560                 const tcu::Interval divisionResultRange(
561                     convertFloatFlushToZeroRtn(addErrorUlp((float)divisionRange.lo(), -divisionErrorUlps, numBits),
562                                                minExponent, numBits),
563                     convertFloatFlushToZeroRtp(addErrorUlp((float)divisionRange.hi(), +divisionErrorUlps, numBits),
564                                                minExponent, numBits));
565                 const tcu::Interval finalResultRange(divisionResultRange.lo() - surfaceThreshold[c],
566                                                      divisionResultRange.hi() + surfaceThreshold[c]);
567 
568                 if (resultDerivative[c] >= finalResultRange.lo() && resultDerivative[c] <= finalResultRange.hi())
569                 {
570                     // value ok
571                 }
572                 else
573                 {
574                     if (numFailedPixels < MAX_FAILED_MESSAGES)
575                         log << tcu::TestLog::Message << "Error in pixel at " << x << ", " << y << " with component "
576                             << c << " (channel " << ("rgba"[c]) << ")\n"
577                             << "\tGot pixel value " << result.getPixelInt(x, y) << "\n"
578                             << "\t\tdFd" << ((isDfdxFunc(derivateFunc)) ? ('x') : ('y'))
579                             << " ~= " << resultDerivative[c] << "\n"
580                             << "\t\tdifference to a valid range: "
581                             << ((resultDerivative[c] < finalResultRange.lo()) ? ("-") : ("+"))
582                             << ((resultDerivative[c] < finalResultRange.lo()) ?
583                                     (finalResultRange.lo() - resultDerivative[c]) :
584                                     (resultDerivative[c] - finalResultRange.hi()))
585                             << "\n"
586                             << "\tDerivative value range:\n"
587                             << "\t\tMin: " << finalResultRange.lo() << "\n"
588                             << "\t\tMax: " << finalResultRange.hi() << "\n"
589                             << tcu::TestLog::EndMessage;
590 
591                     ++numFailedPixels;
592                     anyComponentFailed = true;
593                 }
594             }
595 
596             if (anyComponentFailed)
597                 errorMask.setPixel(red, x, y);
598         }
599 
600     if (numFailedPixels >= MAX_FAILED_MESSAGES)
601         log << TestLog::Message << "..." << TestLog::EndMessage;
602 
603     if (numFailedPixels > 0)
604         log << TestLog::Message << "FAIL: found " << numFailedPixels << " failed pixels" << TestLog::EndMessage;
605 
606     return numFailedPixels == 0;
607 }
608 
609 // TestCase utils
610 
611 struct DerivateCaseDefinition
612 {
DerivateCaseDefinitionvkt::sr::__anon71e082ae0111::DerivateCaseDefinition613     DerivateCaseDefinition(void)
614     {
615         func                     = DERIVATE_LAST;
616         dataType                 = glu::TYPE_LAST;
617         precision                = glu::PRECISION_LAST;
618         inNonUniformControlFlow  = false;
619         coordDataType            = glu::TYPE_LAST;
620         coordPrecision           = glu::PRECISION_LAST;
621         surfaceType              = SURFACETYPE_UNORM_FBO;
622         numSamples               = 0;
623         demoteToHelperInvocation = false;
624     }
625 
626     DerivateFunc func;
627     glu::DataType dataType;
628     glu::Precision precision;
629     bool inNonUniformControlFlow;
630 
631     glu::DataType coordDataType;
632     glu::Precision coordPrecision;
633 
634     SurfaceType surfaceType;
635     int numSamples;
636 
637     bool demoteToHelperInvocation;
638 };
639 
640 struct DerivateCaseValues
641 {
642     tcu::Vec4 coordMin;
643     tcu::Vec4 coordMax;
644     tcu::Vec4 derivScale;
645     tcu::Vec4 derivBias;
646 };
647 
648 struct TextureCaseValues
649 {
650     tcu::Vec4 texValueMin;
651     tcu::Vec4 texValueMax;
652 };
653 
654 class DerivateUniformSetup : public UniformSetup
655 {
656 public:
657     DerivateUniformSetup(bool useSampler);
658     virtual ~DerivateUniformSetup(void);
659 
660     virtual void setup(ShaderRenderCaseInstance &instance, const tcu::Vec4 &) const;
661 
662 private:
663     const bool m_useSampler;
664 };
665 
DerivateUniformSetup(bool useSampler)666 DerivateUniformSetup::DerivateUniformSetup(bool useSampler) : m_useSampler(useSampler)
667 {
668 }
669 
~DerivateUniformSetup(void)670 DerivateUniformSetup::~DerivateUniformSetup(void)
671 {
672 }
673 
674 // TriangleDerivateCaseInstance
675 
676 class TriangleDerivateCaseInstance : public ShaderRenderCaseInstance
677 {
678 public:
679     TriangleDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
680                                  const DerivateCaseDefinition &definitions, const DerivateCaseValues &values);
681     virtual ~TriangleDerivateCaseInstance(void);
682     virtual tcu::TestStatus iterate(void);
getDerivateCaseDefinition(void)683     DerivateCaseDefinition getDerivateCaseDefinition(void)
684     {
685         return m_definitions;
686     }
getDerivateCaseValues(void)687     DerivateCaseValues getDerivateCaseValues(void)
688     {
689         return m_values;
690     }
691 
692 protected:
693     virtual bool verify(const tcu::ConstPixelBufferAccess &result, const tcu::PixelBufferAccess &errorMask) = 0;
694     tcu::Vec4 getSurfaceThreshold(void) const;
695     virtual void setupDefaultInputs(void);
696 
697     const DerivateCaseDefinition &m_definitions;
698     const DerivateCaseValues &m_values;
699 };
700 
getVkSampleCount(int numSamples)701 static VkSampleCountFlagBits getVkSampleCount(int numSamples)
702 {
703     switch (numSamples)
704     {
705     case 0:
706         return VK_SAMPLE_COUNT_1_BIT;
707     case 2:
708         return VK_SAMPLE_COUNT_2_BIT;
709     case 4:
710         return VK_SAMPLE_COUNT_4_BIT;
711     default:
712         DE_ASSERT(false);
713         return (VkSampleCountFlagBits)0;
714     }
715 }
716 
TriangleDerivateCaseInstance(Context & context,const UniformSetup & uniformSetup,const DerivateCaseDefinition & definitions,const DerivateCaseValues & values)717 TriangleDerivateCaseInstance::TriangleDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
718                                                            const DerivateCaseDefinition &definitions,
719                                                            const DerivateCaseValues &values)
720     : ShaderRenderCaseInstance(context, true, DE_NULL, uniformSetup, DE_NULL)
721     , m_definitions(definitions)
722     , m_values(values)
723 {
724     m_renderSize  = tcu::UVec2(VIEWPORT_WIDTH, VIEWPORT_HEIGHT);
725     m_colorFormat = vk::mapTextureFormat(
726         glu::mapGLInternalFormat(m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO ? GL_RGBA32UI : GL_RGBA8));
727 
728     setSampleCount(getVkSampleCount(definitions.numSamples));
729 }
730 
~TriangleDerivateCaseInstance(void)731 TriangleDerivateCaseInstance::~TriangleDerivateCaseInstance(void)
732 {
733 }
734 
getSurfaceThreshold(void) const735 tcu::Vec4 TriangleDerivateCaseInstance::getSurfaceThreshold(void) const
736 {
737     switch (m_definitions.surfaceType)
738     {
739     case SURFACETYPE_UNORM_FBO:
740         return tcu::IVec4(1).asFloat() / 255.0f;
741     case SURFACETYPE_FLOAT_FBO:
742         return tcu::Vec4(0.0f);
743     default:
744         DE_ASSERT(false);
745         return tcu::Vec4(0.0f);
746     }
747 }
748 
setupDefaultInputs(void)749 void TriangleDerivateCaseInstance::setupDefaultInputs(void)
750 {
751     const int numVertices   = 4;
752     const float positions[] = {-1.0f, -1.0f, 0.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f,
753                                1.0f,  -1.0f, 0.0f, 1.0f, 1.0f,  1.0f, 0.0f, 1.0f};
754     const float coords[]    = {m_values.coordMin.x(),
755                                m_values.coordMin.y(),
756                                m_values.coordMin.z(),
757                                m_values.coordMax.w(),
758                                m_values.coordMin.x(),
759                                m_values.coordMax.y(),
760                                (m_values.coordMin.z() + m_values.coordMax.z()) * 0.5f,
761                                (m_values.coordMin.w() + m_values.coordMax.w()) * 0.5f,
762                                m_values.coordMax.x(),
763                                m_values.coordMin.y(),
764                                (m_values.coordMin.z() + m_values.coordMax.z()) * 0.5f,
765                                (m_values.coordMin.w() + m_values.coordMax.w()) * 0.5f,
766                                m_values.coordMax.x(),
767                                m_values.coordMax.y(),
768                                m_values.coordMax.z(),
769                                m_values.coordMin.w()};
770 
771     addAttribute(0u, vk::VK_FORMAT_R32G32B32A32_SFLOAT, 4 * (uint32_t)sizeof(float), numVertices, positions);
772     if (m_definitions.coordDataType != glu::TYPE_LAST)
773         addAttribute(1u, vk::VK_FORMAT_R32G32B32A32_SFLOAT, 4 * (uint32_t)sizeof(float), numVertices, coords);
774 }
775 
iterate(void)776 tcu::TestStatus TriangleDerivateCaseInstance::iterate(void)
777 {
778     tcu::TestLog &log           = m_context.getTestContext().getLog();
779     const uint32_t numVertices  = 4;
780     const uint32_t numTriangles = 2;
781     const uint16_t indices[]    = {0, 2, 1, 2, 3, 1};
782     tcu::TextureLevel resultImage;
783 
784     setup();
785 
786     render(numVertices, numTriangles, indices);
787 
788     {
789         const tcu::TextureLevel &renderedImage = getResultImage();
790 
791         if (m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO)
792         {
793             const tcu::TextureFormat dataFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::FLOAT);
794 
795             resultImage.setStorage(dataFormat, renderedImage.getWidth(), renderedImage.getHeight());
796             tcu::copy(resultImage.getAccess(), tcu::ConstPixelBufferAccess(dataFormat, renderedImage.getSize(),
797                                                                            renderedImage.getAccess().getDataPtr()));
798         }
799         else
800         {
801             resultImage = renderedImage;
802         }
803     }
804 
805     // Verify
806     {
807         tcu::Surface errorMask(resultImage.getWidth(), resultImage.getHeight());
808         tcu::clear(errorMask.getAccess(), tcu::RGBA::green().toVec());
809 
810         const bool isOk = verify(resultImage.getAccess(), errorMask.getAccess());
811 
812         log << TestLog::ImageSet("Result", "Result images")
813             << TestLog::Image("Rendered", "Rendered image", resultImage);
814 
815         if (!isOk)
816             log << TestLog::Image("ErrorMask", "Error mask", errorMask);
817 
818         log << TestLog::EndImageSet;
819 
820         if (isOk)
821             return tcu::TestStatus::pass("Pass");
822         else
823             return tcu::TestStatus::fail("Image comparison failed");
824     }
825 }
826 
setup(ShaderRenderCaseInstance & instance,const tcu::Vec4 &) const827 void DerivateUniformSetup::setup(ShaderRenderCaseInstance &instance, const tcu::Vec4 &) const
828 {
829     DerivateCaseDefinition definitions =
830         dynamic_cast<TriangleDerivateCaseInstance &>(instance).getDerivateCaseDefinition();
831     DerivateCaseValues values = dynamic_cast<TriangleDerivateCaseInstance &>(instance).getDerivateCaseValues();
832 
833     DE_ASSERT(glu::isDataTypeFloatOrVec(definitions.dataType));
834 
835     instance.addUniform(0u, vk::VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
836                         glu::getDataTypeScalarSize(definitions.dataType) * sizeof(float), values.derivScale.getPtr());
837     instance.addUniform(1u, vk::VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
838                         glu::getDataTypeScalarSize(definitions.dataType) * sizeof(float), values.derivBias.getPtr());
839 
840     if (m_useSampler)
841         instance.useSampler(2u, 0u); // To the uniform binding location 2 bind the texture 0
842 }
843 
844 // TriangleDerivateCase
845 
846 class TriangleDerivateCase : public ShaderRenderCase
847 {
848 public:
849     TriangleDerivateCase(tcu::TestContext &testCtx, const std::string &name, const UniformSetup *uniformSetup);
850     virtual ~TriangleDerivateCase(void);
851 
852     void checkSupport(Context &context) const override;
853 
854 protected:
855     DerivateCaseDefinition m_definitions;
856     DerivateCaseValues m_values;
857 };
858 
TriangleDerivateCase(tcu::TestContext & testCtx,const std::string & name,const UniformSetup * uniformSetup)859 TriangleDerivateCase::TriangleDerivateCase(tcu::TestContext &testCtx, const std::string &name,
860                                            const UniformSetup *uniformSetup)
861     : ShaderRenderCase(testCtx, name, false, (ShaderEvaluator *)DE_NULL, uniformSetup, DE_NULL)
862     , m_definitions()
863 {
864 }
865 
~TriangleDerivateCase(void)866 TriangleDerivateCase::~TriangleDerivateCase(void)
867 {
868 }
869 
checkSupport(Context & context) const870 void TriangleDerivateCase::checkSupport(Context &context) const
871 {
872     ShaderRenderCase::checkSupport(context);
873 
874     const bool subgroupFunc = isSubgroupFunc(m_definitions.func);
875 
876     if (m_definitions.inNonUniformControlFlow || subgroupFunc)
877     {
878         const std::string errorPrefix = m_definitions.inNonUniformControlFlow ?
879                                             "Derivatives in dynamic control flow" :
880                                             "Manual derivatives with subgroup operations";
881 
882         if (!context.contextSupports(vk::ApiVersion(0, 1, 1, 0)))
883             throw tcu::NotSupportedError(errorPrefix + " require Vulkan 1.1");
884 
885         const auto &subgroupProperties = context.getSubgroupProperties();
886 
887         if (subgroupProperties.subgroupSize < 4)
888             throw tcu::NotSupportedError(errorPrefix + " require subgroupSize >= 4");
889 
890         if ((subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) == 0)
891             throw tcu::NotSupportedError(errorPrefix + " tests require VK_SUBGROUP_FEATURE_BALLOT_BIT");
892 
893         if ((subgroupProperties.supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
894             throw tcu::NotSupportedError(
895                 errorPrefix + " tests require subgroup supported stage including VK_SHADER_STAGE_FRAGMENT_BIT");
896 
897         if (subgroupFunc && (subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_QUAD_BIT) == 0)
898             throw tcu::NotSupportedError(errorPrefix + " tests require VK_SUBGROUP_FEATURE_QUAD_BIT");
899     }
900 }
901 
genVertexSource(glu::DataType coordType,glu::Precision precision)902 static std::string genVertexSource(glu::DataType coordType, glu::Precision precision)
903 {
904     DE_ASSERT(coordType == glu::TYPE_LAST || glu::isDataTypeFloatOrVec(coordType));
905 
906     const std::string vertexTmpl =
907         "#version 450\n"
908         "layout(location = 0) in highp vec4 a_position;\n" +
909         string(coordType != glu::TYPE_LAST ? "layout(location = 1) in ${PRECISION} ${DATATYPE} a_coord;\n"
910                                              "layout(location = 0) out ${PRECISION} ${DATATYPE} v_coord;\n" :
911                                              "") +
912         "out gl_PerVertex {\n"
913         "    vec4 gl_Position;\n"
914         "};\n"
915         "void main (void)\n"
916         "{\n"
917         "    gl_Position = a_position;\n" +
918         string(coordType != glu::TYPE_LAST ? "    v_coord = a_coord;\n" : "") + "}\n";
919 
920     map<string, string> vertexParams;
921 
922     if (coordType != glu::TYPE_LAST)
923     {
924         vertexParams["PRECISION"] = glu::getPrecisionName(precision);
925         vertexParams["DATATYPE"]  = glu::getDataTypeName(coordType);
926     }
927 
928     return tcu::StringTemplate(vertexTmpl).specialize(vertexParams);
929 }
930 
931 // ConstantDerivateCaseInstance
932 
933 class ConstantDerivateCaseInstance : public TriangleDerivateCaseInstance
934 {
935 public:
936     ConstantDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
937                                  const DerivateCaseDefinition &definitions, const DerivateCaseValues &values);
938     virtual ~ConstantDerivateCaseInstance(void);
939 
940     virtual bool verify(const tcu::ConstPixelBufferAccess &result, const tcu::PixelBufferAccess &errorMask);
941 };
942 
ConstantDerivateCaseInstance(Context & context,const UniformSetup & uniformSetup,const DerivateCaseDefinition & definitions,const DerivateCaseValues & values)943 ConstantDerivateCaseInstance::ConstantDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
944                                                            const DerivateCaseDefinition &definitions,
945                                                            const DerivateCaseValues &values)
946     : TriangleDerivateCaseInstance(context, uniformSetup, definitions, values)
947 {
948 }
949 
~ConstantDerivateCaseInstance(void)950 ConstantDerivateCaseInstance::~ConstantDerivateCaseInstance(void)
951 {
952 }
953 
verify(const tcu::ConstPixelBufferAccess & result,const tcu::PixelBufferAccess & errorMask)954 bool ConstantDerivateCaseInstance::verify(const tcu::ConstPixelBufferAccess &result,
955                                           const tcu::PixelBufferAccess &errorMask)
956 {
957     const tcu::Vec4 reference(0.0f); // Derivate of constant argument should always be 0
958     const tcu::Vec4 threshold = getSurfaceThreshold() / abs(m_values.derivScale);
959 
960     return verifyConstantDerivate(m_context.getTestContext().getLog(), result, errorMask, m_definitions.dataType,
961                                   reference, threshold, m_values.derivScale, m_values.derivBias);
962 }
963 
964 // ConstantDerivateCase
965 
966 class ConstantDerivateCase : public TriangleDerivateCase
967 {
968 public:
969     ConstantDerivateCase(tcu::TestContext &testCtx, const std::string &name, DerivateFunc func, glu::DataType type);
970     virtual ~ConstantDerivateCase(void);
971 
972     virtual void initPrograms(vk::SourceCollections &programCollection) const;
973     virtual TestInstance *createInstance(Context &context) const;
974 };
975 
ConstantDerivateCase(tcu::TestContext & testCtx,const std::string & name,DerivateFunc func,glu::DataType type)976 ConstantDerivateCase::ConstantDerivateCase(tcu::TestContext &testCtx, const std::string &name, DerivateFunc func,
977                                            glu::DataType type)
978     : TriangleDerivateCase(testCtx, name, new DerivateUniformSetup(false))
979 {
980     m_definitions.func      = func;
981     m_definitions.dataType  = type;
982     m_definitions.precision = glu::PRECISION_HIGHP;
983 
984     m_values.derivScale = tcu::Vec4(1e3f, 1e3f, 1e3f, 1e3f);
985     m_values.derivBias  = tcu::Vec4(0.5f, 0.5f, 0.5f, 0.5f);
986 }
987 
~ConstantDerivateCase(void)988 ConstantDerivateCase::~ConstantDerivateCase(void)
989 {
990 }
991 
createInstance(Context & context) const992 TestInstance *ConstantDerivateCase::createInstance(Context &context) const
993 {
994     DE_ASSERT(m_uniformSetup != DE_NULL);
995     return new ConstantDerivateCaseInstance(context, *m_uniformSetup, m_definitions, m_values);
996 }
997 
initPrograms(vk::SourceCollections & programCollection) const998 void ConstantDerivateCase::initPrograms(vk::SourceCollections &programCollection) const
999 {
1000     const char *fragmentTmpl = "#version 450\n"
1001                                "layout(location = 0) out mediump vec4 o_color;\n"
1002                                "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1003                                "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; }; \n"
1004                                "void main (void)\n"
1005                                "{\n"
1006                                "    ${PRECISION} ${DATATYPE} res = ${FUNC}(${VALUE}) * u_scale + u_bias;\n"
1007                                "    o_color = ${CAST_TO_OUTPUT};\n"
1008                                "}\n";
1009 
1010     map<string, string> fragmentParams;
1011     fragmentParams["PRECISION"] = glu::getPrecisionName(m_definitions.precision);
1012     fragmentParams["DATATYPE"]  = glu::getDataTypeName(m_definitions.dataType);
1013     fragmentParams["FUNC"]      = getDerivateFuncName(m_definitions.func);
1014     fragmentParams["VALUE"]     = m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "vec4(1.0, 7.2, -1e5, 0.0)" :
1015                                   m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? "vec3(1e2, 8.0, 0.01)" :
1016                                   m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? "vec2(-0.0, 2.7)" :
1017                                                                                    /* TYPE_FLOAT */ "7.7";
1018     fragmentParams["CAST_TO_OUTPUT"] =
1019         m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "res" :
1020         m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? "vec4(res, 1.0)" :
1021         m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? "vec4(res, 0.0, 1.0)" :
1022                                                          /* TYPE_FLOAT */ "vec4(res, 0.0, 0.0, 1.0)";
1023 
1024     std::string fragmentSrc = tcu::StringTemplate(fragmentTmpl).specialize(fragmentParams);
1025     programCollection.glslSources.add("vert")
1026         << glu::VertexSource(genVertexSource(m_definitions.coordDataType, m_definitions.coordPrecision));
1027     programCollection.glslSources.add("frag") << glu::FragmentSource(fragmentSrc);
1028 }
1029 
1030 // Linear cases
1031 
1032 class LinearDerivateUniformSetup : public DerivateUniformSetup
1033 {
1034 public:
1035     LinearDerivateUniformSetup(bool useSampler, BaseUniformType usedDefaultUniform);
1036     virtual ~LinearDerivateUniformSetup(void);
1037 
1038     virtual void setup(ShaderRenderCaseInstance &instance, const tcu::Vec4 &constCoords) const;
1039 
1040 private:
1041     const BaseUniformType m_usedDefaultUniform;
1042 };
1043 
LinearDerivateUniformSetup(bool useSampler,BaseUniformType usedDefaultUniform)1044 LinearDerivateUniformSetup::LinearDerivateUniformSetup(bool useSampler, BaseUniformType usedDefaultUniform)
1045     : DerivateUniformSetup(useSampler)
1046     , m_usedDefaultUniform(usedDefaultUniform)
1047 {
1048 }
1049 
~LinearDerivateUniformSetup(void)1050 LinearDerivateUniformSetup::~LinearDerivateUniformSetup(void)
1051 {
1052 }
1053 
setup(ShaderRenderCaseInstance & instance,const tcu::Vec4 & constCoords) const1054 void LinearDerivateUniformSetup::setup(ShaderRenderCaseInstance &instance, const tcu::Vec4 &constCoords) const
1055 {
1056     DerivateUniformSetup::setup(instance, constCoords);
1057 
1058     if (m_usedDefaultUniform != U_LAST)
1059         switch (m_usedDefaultUniform)
1060         {
1061         case UB_TRUE:
1062         case UI_ONE:
1063         case UI_TWO:
1064             instance.useUniform(2u, m_usedDefaultUniform);
1065             break;
1066         default:
1067             DE_ASSERT(false);
1068             break;
1069         }
1070 }
1071 
1072 class LinearDerivateCaseInstance : public TriangleDerivateCaseInstance
1073 {
1074 public:
1075     LinearDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
1076                                const DerivateCaseDefinition &definitions, const DerivateCaseValues &values);
1077     virtual ~LinearDerivateCaseInstance(void);
1078 
1079     virtual bool verify(const tcu::ConstPixelBufferAccess &result, const tcu::PixelBufferAccess &errorMask);
1080 };
1081 
LinearDerivateCaseInstance(Context & context,const UniformSetup & uniformSetup,const DerivateCaseDefinition & definitions,const DerivateCaseValues & values)1082 LinearDerivateCaseInstance::LinearDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
1083                                                        const DerivateCaseDefinition &definitions,
1084                                                        const DerivateCaseValues &values)
1085     : TriangleDerivateCaseInstance(context, uniformSetup, definitions, values)
1086 {
1087 }
1088 
~LinearDerivateCaseInstance(void)1089 LinearDerivateCaseInstance::~LinearDerivateCaseInstance(void)
1090 {
1091 }
1092 
verify(const tcu::ConstPixelBufferAccess & result,const tcu::PixelBufferAccess & errorMask)1093 bool LinearDerivateCaseInstance::verify(const tcu::ConstPixelBufferAccess &result,
1094                                         const tcu::PixelBufferAccess &errorMask)
1095 {
1096     const tcu::Vec4 xScale           = tcu::Vec4(1.0f, 0.0f, 0.5f, -0.5f);
1097     const tcu::Vec4 yScale           = tcu::Vec4(0.0f, 1.0f, 0.5f, -0.5f);
1098     const tcu::Vec4 surfaceThreshold = getSurfaceThreshold() / abs(m_values.derivScale);
1099 
1100     if (isDfdxFunc(m_definitions.func) || isDfdyFunc(m_definitions.func))
1101     {
1102         const bool isX        = isDfdxFunc(m_definitions.func);
1103         const float div       = isX ? float(result.getWidth()) : float(result.getHeight());
1104         const tcu::Vec4 scale = isX ? xScale : yScale;
1105         tcu::Vec4 reference   = ((m_values.coordMax - m_values.coordMin) / div);
1106         const tcu::Vec4 opThreshold =
1107             getDerivateThreshold(m_definitions.precision, m_values.coordMin, m_values.coordMax, reference);
1108         const tcu::Vec4 threshold = max(surfaceThreshold, opThreshold);
1109         const int numComps        = glu::getDataTypeFloatScalars(m_definitions.dataType);
1110 
1111         /* adjust the reference value for the correct dfdx or dfdy sample adjacency */
1112         reference = reference * scale;
1113 
1114         m_context.getTestContext().getLog()
1115             << tcu::TestLog::Message << "Verifying result image.\n"
1116             << "\tValid derivative is " << LogVecComps(reference, numComps) << " with threshold "
1117             << LogVecComps(threshold, numComps) << tcu::TestLog::EndMessage;
1118 
1119         // short circuit if result is strictly within the normal value error bounds.
1120         // This improves performance significantly.
1121         if (verifyConstantDerivate(m_context.getTestContext().getLog(), result, errorMask, m_definitions.dataType,
1122                                    reference, threshold, m_values.derivScale, m_values.derivBias, LOG_NOTHING,
1123                                    m_definitions.demoteToHelperInvocation))
1124         {
1125             m_context.getTestContext().getLog()
1126                 << tcu::TestLog::Message << "No incorrect derivatives found, result valid." << tcu::TestLog::EndMessage;
1127 
1128             return true;
1129         }
1130 
1131         // some pixels exceed error bounds calculated for normal values. Verify that these
1132         // potentially invalid pixels are in fact valid due to (for example) subnorm flushing.
1133 
1134         m_context.getTestContext().getLog()
1135             << tcu::TestLog::Message
1136             << "Initial verification failed, verifying image by calculating accurate error bounds for each result "
1137                "pixel.\n"
1138             << "\tVerifying each result derivative is within its range of legal result values."
1139             << tcu::TestLog::EndMessage;
1140 
1141         {
1142             const tcu::UVec2 viewportSize(VIEWPORT_WIDTH, VIEWPORT_HEIGHT);
1143             const float w             = float(viewportSize.x());
1144             const float h             = float(viewportSize.y());
1145             const tcu::Vec4 valueRamp = (m_values.coordMax - m_values.coordMin);
1146             Linear2DFunctionEvaluator function;
1147 
1148             function.matrix.setRow(0, tcu::Vec3(valueRamp.x() / w, 0.0f, m_values.coordMin.x()));
1149             function.matrix.setRow(1, tcu::Vec3(0.0f, valueRamp.y() / h, m_values.coordMin.y()));
1150             function.matrix.setRow(
1151                 2,
1152                 tcu::Vec3(valueRamp.z() / w, valueRamp.z() / h, m_values.coordMin.z() + m_values.coordMin.z()) / 2.0f);
1153             function.matrix.setRow(
1154                 3, tcu::Vec3(-valueRamp.w() / w, -valueRamp.w() / h, m_values.coordMax.w() + m_values.coordMax.w()) /
1155                        2.0f);
1156 
1157             return reverifyConstantDerivateWithFlushRelaxations(
1158                 m_context.getTestContext().getLog(), result, errorMask, m_definitions.dataType, m_definitions.precision,
1159                 m_values.derivScale, m_values.derivBias, surfaceThreshold, m_definitions.func, function);
1160         }
1161     }
1162     else
1163     {
1164         DE_ASSERT(isFwidthFunc(m_definitions.func));
1165         const float w = float(result.getWidth());
1166         const float h = float(result.getHeight());
1167 
1168         const tcu::Vec4 dx        = ((m_values.coordMax - m_values.coordMin) / w) * xScale;
1169         const tcu::Vec4 dy        = ((m_values.coordMax - m_values.coordMin) / h) * yScale;
1170         const tcu::Vec4 reference = tcu::abs(dx) + tcu::abs(dy);
1171         const tcu::Vec4 dxThreshold =
1172             getDerivateThreshold(m_definitions.precision, m_values.coordMin * xScale, m_values.coordMax * xScale, dx);
1173         const tcu::Vec4 dyThreshold =
1174             getDerivateThreshold(m_definitions.precision, m_values.coordMin * yScale, m_values.coordMax * yScale, dy);
1175         const tcu::Vec4 threshold = max(surfaceThreshold, max(dxThreshold, dyThreshold));
1176 
1177         return verifyConstantDerivate(m_context.getTestContext().getLog(), result, errorMask, m_definitions.dataType,
1178                                       reference, threshold, m_values.derivScale, m_values.derivBias);
1179     }
1180 }
1181 
1182 // LinearDerivateCase
1183 
1184 class LinearDerivateCase : public TriangleDerivateCase
1185 {
1186 public:
1187     LinearDerivateCase(tcu::TestContext &testCtx, const std::string &name, DerivateFunc func, glu::DataType type,
1188                        glu::Precision precision, bool inNonUniformControlFlow, SurfaceType surfaceType, int numSamples,
1189                        const std::string &fragmentSrcTmpl, BaseUniformType usedDefaultUniform,
1190                        bool demoteToHelperInvocaiton);
1191     virtual ~LinearDerivateCase(void);
1192 
1193     virtual void initPrograms(vk::SourceCollections &programCollection) const;
1194     virtual TestInstance *createInstance(Context &context) const;
checkSupport(Context & context) const1195     virtual void checkSupport(Context &context) const
1196     {
1197         TriangleDerivateCase::checkSupport(context);
1198         if (m_definitions.demoteToHelperInvocation)
1199         {
1200             context.requireDeviceFunctionality("VK_EXT_shader_demote_to_helper_invocation");
1201         }
1202     }
1203 
1204 private:
1205     const std::string m_fragmentTmpl;
1206 };
1207 
LinearDerivateCase(tcu::TestContext & testCtx,const std::string & name,DerivateFunc func,glu::DataType type,glu::Precision precision,bool inNonUniformControlFlow,SurfaceType surfaceType,int numSamples,const std::string & fragmentSrcTmpl,BaseUniformType usedDefaultUniform,bool demoteToHelperInvocaiton)1208 LinearDerivateCase::LinearDerivateCase(tcu::TestContext &testCtx, const std::string &name, DerivateFunc func,
1209                                        glu::DataType type, glu::Precision precision, bool inNonUniformControlFlow,
1210                                        SurfaceType surfaceType, int numSamples, const std::string &fragmentSrcTmpl,
1211                                        BaseUniformType usedDefaultUniform, bool demoteToHelperInvocaiton)
1212     : TriangleDerivateCase(testCtx, name, new LinearDerivateUniformSetup(false, usedDefaultUniform))
1213     , m_fragmentTmpl(fragmentSrcTmpl)
1214 {
1215     m_definitions.func                     = func;
1216     m_definitions.dataType                 = type;
1217     m_definitions.precision                = precision;
1218     m_definitions.inNonUniformControlFlow  = inNonUniformControlFlow;
1219     m_definitions.coordDataType            = m_definitions.dataType;
1220     m_definitions.coordPrecision           = m_definitions.precision;
1221     m_definitions.surfaceType              = surfaceType;
1222     m_definitions.numSamples               = numSamples;
1223     m_definitions.demoteToHelperInvocation = demoteToHelperInvocaiton;
1224 
1225     const tcu::UVec2 viewportSize(VIEWPORT_WIDTH, VIEWPORT_HEIGHT);
1226     const float w = float(viewportSize.x());
1227     const float h = float(viewportSize.y());
1228 
1229     switch (m_definitions.precision)
1230     {
1231     case glu::PRECISION_HIGHP:
1232         m_values.coordMin = tcu::Vec4(-97.f, 0.2f, 71.f, 74.f);
1233         m_values.coordMax = tcu::Vec4(-13.2f, -77.f, 44.f, 76.f);
1234         break;
1235 
1236     case glu::PRECISION_MEDIUMP:
1237         m_values.coordMin = tcu::Vec4(-37.0f, 47.f, -7.f, 0.0f);
1238         m_values.coordMax = tcu::Vec4(-1.0f, 12.f, 7.f, 19.f);
1239         break;
1240 
1241     case glu::PRECISION_LOWP:
1242         m_values.coordMin = tcu::Vec4(0.0f, -1.0f, 0.0f, 1.0f);
1243         m_values.coordMax = tcu::Vec4(1.0f, 1.0f, -1.0f, -1.0f);
1244         break;
1245 
1246     default:
1247         DE_ASSERT(false);
1248     }
1249 
1250     if (m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO)
1251     {
1252         // No scale or bias used for accuracy.
1253         m_values.derivScale = tcu::Vec4(1.0f);
1254         m_values.derivBias  = tcu::Vec4(0.0f);
1255     }
1256     else
1257     {
1258         // Compute scale - bias that normalizes to 0..1 range.
1259         const tcu::Vec4 dx = (m_values.coordMax - m_values.coordMin) / tcu::Vec4(w, w, w * 0.5f, -w * 0.5f);
1260         const tcu::Vec4 dy = (m_values.coordMax - m_values.coordMin) / tcu::Vec4(h, h, h * 0.5f, -h * 0.5f);
1261 
1262         if (isDfdxFunc(m_definitions.func))
1263             m_values.derivScale = 0.5f / dx;
1264         else if (isDfdyFunc(m_definitions.func))
1265             m_values.derivScale = 0.5f / dy;
1266         else if (isFwidthFunc(m_definitions.func))
1267             m_values.derivScale = 0.5f / (tcu::abs(dx) + tcu::abs(dy));
1268         else
1269             DE_ASSERT(false);
1270 
1271         m_values.derivBias = tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f);
1272     }
1273 }
1274 
~LinearDerivateCase(void)1275 LinearDerivateCase::~LinearDerivateCase(void)
1276 {
1277 }
1278 
createInstance(Context & context) const1279 TestInstance *LinearDerivateCase::createInstance(Context &context) const
1280 {
1281     DE_ASSERT(m_uniformSetup != DE_NULL);
1282     if (m_fragmentTmpl.find("gl_SubgroupInvocationID") != std::string::npos)
1283     {
1284         if (!subgroups::areQuadOperationsSupportedForStages(context, VK_SHADER_STAGE_FRAGMENT_BIT))
1285             throw tcu::NotSupportedError("test requires VK_SUBGROUP_FEATURE_QUAD_BIT");
1286 
1287         if (subgroups::getSubgroupSize(context) < 4)
1288             throw tcu::NotSupportedError("test requires subgroupSize >= 4");
1289     }
1290     return new LinearDerivateCaseInstance(context, *m_uniformSetup, m_definitions, m_values);
1291 }
1292 
initPrograms(vk::SourceCollections & programCollection) const1293 void LinearDerivateCase::initPrograms(vk::SourceCollections &programCollection) const
1294 {
1295     const SpirvVersion spirvVersion = (m_definitions.inNonUniformControlFlow || isSubgroupFunc(m_definitions.func)) ?
1296                                           vk::SPIRV_VERSION_1_3 :
1297                                           vk::SPIRV_VERSION_1_0;
1298     const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, spirvVersion, 0u);
1299 
1300     const bool packToInt = m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO;
1301     map<string, string> fragmentParams;
1302 
1303     fragmentParams["OUTPUT_TYPE"] = glu::getDataTypeName(packToInt ? glu::TYPE_UINT_VEC4 : glu::TYPE_FLOAT_VEC4);
1304     fragmentParams["OUTPUT_PREC"] = glu::getPrecisionName(packToInt ? glu::PRECISION_HIGHP : m_definitions.precision);
1305     fragmentParams["PRECISION"]   = glu::getPrecisionName(m_definitions.precision);
1306     fragmentParams["DATATYPE"]    = glu::getDataTypeName(m_definitions.dataType);
1307     fragmentParams["FUNC"]        = getDerivateFuncName(m_definitions.func);
1308 
1309     if (packToInt)
1310     {
1311         fragmentParams["CAST_TO_OUTPUT"] = m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ?
1312                                                "floatBitsToUint(res)" :
1313                                            m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ?
1314                                                "floatBitsToUint(vec4(res, 1.0))" :
1315                                            m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ?
1316                                                "floatBitsToUint(vec4(res, 0.0, 1.0))" :
1317                                                /* TYPE_FLOAT */ "floatBitsToUint(vec4(res, 0.0, 0.0, 1.0))";
1318     }
1319     else
1320     {
1321         fragmentParams["CAST_TO_OUTPUT"] =
1322             m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "res" :
1323             m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? "vec4(res, 1.0)" :
1324             m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? "vec4(res, 0.0, 1.0)" :
1325                                                              /* TYPE_FLOAT */ "vec4(res, 0.0, 0.0, 1.0)";
1326     }
1327 
1328     std::string fragmentSrc = tcu::StringTemplate(m_fragmentTmpl).specialize(fragmentParams);
1329     programCollection.glslSources.add("vert")
1330         << glu::VertexSource(genVertexSource(m_definitions.coordDataType, m_definitions.coordPrecision));
1331     programCollection.glslSources.add("frag") << glu::FragmentSource(fragmentSrc) << buildOptions;
1332 }
1333 
1334 // TextureDerivateCaseInstance
1335 
1336 class TextureDerivateCaseInstance : public TriangleDerivateCaseInstance
1337 {
1338 public:
1339     TextureDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
1340                                 const DerivateCaseDefinition &definitions, const DerivateCaseValues &values,
1341                                 const TextureCaseValues &textureValues);
1342     virtual ~TextureDerivateCaseInstance(void);
1343 
1344     virtual bool verify(const tcu::ConstPixelBufferAccess &result, const tcu::PixelBufferAccess &errorMask);
1345 
1346 private:
1347     const TextureCaseValues &m_textureValues;
1348 };
1349 
TextureDerivateCaseInstance(Context & context,const UniformSetup & uniformSetup,const DerivateCaseDefinition & definitions,const DerivateCaseValues & values,const TextureCaseValues & textureValues)1350 TextureDerivateCaseInstance::TextureDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
1351                                                          const DerivateCaseDefinition &definitions,
1352                                                          const DerivateCaseValues &values,
1353                                                          const TextureCaseValues &textureValues)
1354     : TriangleDerivateCaseInstance(context, uniformSetup, definitions, values)
1355     , m_textureValues(textureValues)
1356 {
1357     de::MovePtr<tcu::Texture2D> texture;
1358 
1359     // Lowp and mediump cases use RGBA16F format, while highp uses RGBA32F.
1360     {
1361         const tcu::UVec2 viewportSize(VIEWPORT_WIDTH, VIEWPORT_HEIGHT);
1362         const tcu::TextureFormat format =
1363             glu::mapGLInternalFormat(m_definitions.precision == glu::PRECISION_HIGHP ? GL_RGBA32F : GL_RGBA16F);
1364 
1365         texture = de::MovePtr<tcu::Texture2D>(new tcu::Texture2D(format, viewportSize.x(), viewportSize.y()));
1366         texture->allocLevel(0);
1367     }
1368 
1369     // Fill with gradients.
1370     {
1371         const tcu::PixelBufferAccess level0 = texture->getLevel(0);
1372         for (int y = 0; y < level0.getHeight(); y++)
1373         {
1374             for (int x = 0; x < level0.getWidth(); x++)
1375             {
1376                 const float xf    = (float(x) + 0.5f) / float(level0.getWidth());
1377                 const float yf    = (float(y) + 0.5f) / float(level0.getHeight());
1378                 const tcu::Vec4 s = tcu::Vec4(xf, yf, (xf + yf) / 2.0f, 1.0f - (xf + yf) / 2.0f);
1379 
1380                 level0.setPixel(m_textureValues.texValueMin +
1381                                     (m_textureValues.texValueMax - m_textureValues.texValueMin) * s,
1382                                 x, y);
1383             }
1384         }
1385     }
1386 
1387     de::SharedPtr<TextureBinding> testTexture(new TextureBinding(
1388         texture.release(),
1389         tcu::Sampler(tcu::Sampler::CLAMP_TO_EDGE, tcu::Sampler::CLAMP_TO_EDGE, tcu::Sampler::CLAMP_TO_EDGE,
1390                      tcu::Sampler::NEAREST, tcu::Sampler::NEAREST, 0.0f, true, tcu::Sampler::COMPAREMODE_NONE, 0,
1391                      tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), true)));
1392     m_textures.push_back(testTexture);
1393 }
1394 
~TextureDerivateCaseInstance(void)1395 TextureDerivateCaseInstance::~TextureDerivateCaseInstance(void)
1396 {
1397 }
1398 
verify(const tcu::ConstPixelBufferAccess & result,const tcu::PixelBufferAccess & errorMask)1399 bool TextureDerivateCaseInstance::verify(const tcu::ConstPixelBufferAccess &result,
1400                                          const tcu::PixelBufferAccess &errorMask)
1401 {
1402     // \note Edges are ignored in comparison
1403     if (result.getWidth() < 2 || result.getHeight() < 2)
1404         throw tcu::NotSupportedError("Too small viewport");
1405 
1406     tcu::ConstPixelBufferAccess compareArea =
1407         tcu::getSubregion(result, 1, 1, result.getWidth() - 2, result.getHeight() - 2);
1408     tcu::PixelBufferAccess maskArea =
1409         tcu::getSubregion(errorMask, 1, 1, errorMask.getWidth() - 2, errorMask.getHeight() - 2);
1410     const tcu::Vec4 xScale = tcu::Vec4(1.0f, 0.0f, 0.5f, -0.5f);
1411     const tcu::Vec4 yScale = tcu::Vec4(0.0f, 1.0f, 0.5f, -0.5f);
1412     const float w          = float(result.getWidth());
1413     const float h          = float(result.getHeight());
1414 
1415     const tcu::Vec4 surfaceThreshold = getSurfaceThreshold() / abs(m_values.derivScale);
1416 
1417     if (isDfdxFunc(m_definitions.func) || isDfdyFunc(m_definitions.func))
1418     {
1419         const bool isX              = isDfdxFunc(m_definitions.func);
1420         const float div             = isX ? w : h;
1421         const tcu::Vec4 scale       = isX ? xScale : yScale;
1422         tcu::Vec4 reference         = ((m_textureValues.texValueMax - m_textureValues.texValueMin) / div);
1423         const tcu::Vec4 opThreshold = getDerivateThreshold(m_definitions.precision, m_textureValues.texValueMin,
1424                                                            m_textureValues.texValueMax, reference);
1425         const tcu::Vec4 threshold   = max(surfaceThreshold, opThreshold);
1426         const int numComps          = glu::getDataTypeFloatScalars(m_definitions.dataType);
1427 
1428         /* adjust the reference value for the correct dfdx or dfdy sample adjacency */
1429         reference = reference * scale;
1430 
1431         m_context.getTestContext().getLog()
1432             << tcu::TestLog::Message << "Verifying result image.\n"
1433             << "\tValid derivative is " << LogVecComps(reference, numComps) << " with threshold "
1434             << LogVecComps(threshold, numComps) << tcu::TestLog::EndMessage;
1435 
1436         // short circuit if result is strictly within the normal value error bounds.
1437         // This improves performance significantly.
1438         if (verifyConstantDerivate(m_context.getTestContext().getLog(), compareArea, maskArea, m_definitions.dataType,
1439                                    reference, threshold, m_values.derivScale, m_values.derivBias, LOG_NOTHING))
1440         {
1441             m_context.getTestContext().getLog()
1442                 << tcu::TestLog::Message << "No incorrect derivatives found, result valid." << tcu::TestLog::EndMessage;
1443 
1444             return true;
1445         }
1446 
1447         // some pixels exceed error bounds calculated for normal values. Verify that these
1448         // potentially invalid pixels are in fact valid due to (for example) subnorm flushing.
1449 
1450         m_context.getTestContext().getLog()
1451             << tcu::TestLog::Message
1452             << "Initial verification failed, verifying image by calculating accurate error bounds for each result "
1453                "pixel.\n"
1454             << "\tVerifying each result derivative is within its range of legal result values."
1455             << tcu::TestLog::EndMessage;
1456 
1457         {
1458             const tcu::Vec4 valueRamp = (m_textureValues.texValueMax - m_textureValues.texValueMin);
1459             Linear2DFunctionEvaluator function;
1460 
1461             function.matrix.setRow(0, tcu::Vec3(valueRamp.x() / w, 0.0f, m_textureValues.texValueMin.x()));
1462             function.matrix.setRow(1, tcu::Vec3(0.0f, valueRamp.y() / h, m_textureValues.texValueMin.y()));
1463             function.matrix.setRow(2, tcu::Vec3(valueRamp.z() / w, valueRamp.z() / h,
1464                                                 m_textureValues.texValueMin.z() + m_textureValues.texValueMin.z()) /
1465                                           2.0f);
1466             function.matrix.setRow(3, tcu::Vec3(-valueRamp.w() / w, -valueRamp.w() / h,
1467                                                 m_textureValues.texValueMax.w() + m_textureValues.texValueMax.w()) /
1468                                           2.0f);
1469 
1470             return reverifyConstantDerivateWithFlushRelaxations(
1471                 m_context.getTestContext().getLog(), compareArea, maskArea, m_definitions.dataType,
1472                 m_definitions.precision, m_values.derivScale, m_values.derivBias, surfaceThreshold, m_definitions.func,
1473                 function);
1474         }
1475     }
1476     else
1477     {
1478         DE_ASSERT(isFwidthFunc(m_definitions.func));
1479         const tcu::Vec4 dx          = ((m_textureValues.texValueMax - m_textureValues.texValueMin) / w) * xScale;
1480         const tcu::Vec4 dy          = ((m_textureValues.texValueMax - m_textureValues.texValueMin) / h) * yScale;
1481         const tcu::Vec4 reference   = tcu::abs(dx) + tcu::abs(dy);
1482         const tcu::Vec4 dxThreshold = getDerivateThreshold(
1483             m_definitions.precision, m_textureValues.texValueMin * xScale, m_textureValues.texValueMax * xScale, dx);
1484         const tcu::Vec4 dyThreshold = getDerivateThreshold(
1485             m_definitions.precision, m_textureValues.texValueMin * yScale, m_textureValues.texValueMax * yScale, dy);
1486         const tcu::Vec4 threshold = max(surfaceThreshold, max(dxThreshold, dyThreshold));
1487 
1488         return verifyConstantDerivate(m_context.getTestContext().getLog(), compareArea, maskArea,
1489                                       m_definitions.dataType, reference, threshold, m_values.derivScale,
1490                                       m_values.derivBias);
1491     }
1492 }
1493 
1494 // TextureDerivateCase
1495 
1496 class TextureDerivateCase : public TriangleDerivateCase
1497 {
1498 public:
1499     TextureDerivateCase(tcu::TestContext &testCtx, const std::string &name, DerivateFunc func, glu::DataType type,
1500                         glu::Precision precision, SurfaceType surfaceType, int numSamples);
1501     virtual ~TextureDerivateCase(void);
1502 
1503     virtual void initPrograms(vk::SourceCollections &programCollection) const;
1504     virtual TestInstance *createInstance(Context &context) const;
1505 
1506 private:
1507     TextureCaseValues m_textureValues;
1508 };
1509 
TextureDerivateCase(tcu::TestContext & testCtx,const std::string & name,DerivateFunc func,glu::DataType type,glu::Precision precision,SurfaceType surfaceType,int numSamples)1510 TextureDerivateCase::TextureDerivateCase(tcu::TestContext &testCtx, const std::string &name, DerivateFunc func,
1511                                          glu::DataType type, glu::Precision precision, SurfaceType surfaceType,
1512                                          int numSamples)
1513     : TriangleDerivateCase(testCtx, name, new DerivateUniformSetup(true))
1514 {
1515     m_definitions.dataType       = type;
1516     m_definitions.func           = func;
1517     m_definitions.precision      = precision;
1518     m_definitions.coordDataType  = glu::TYPE_FLOAT_VEC2;
1519     m_definitions.coordPrecision = glu::PRECISION_HIGHP;
1520     m_definitions.surfaceType    = surfaceType;
1521     m_definitions.numSamples     = numSamples;
1522 
1523     // Texture size matches viewport and nearest sampling is used. Thus texture sampling
1524     // is equal to just interpolating the texture value range.
1525 
1526     // Determine value range for texture.
1527 
1528     switch (m_definitions.precision)
1529     {
1530     case glu::PRECISION_HIGHP:
1531         m_textureValues.texValueMin = tcu::Vec4(-97.f, 0.2f, 71.f, 74.f);
1532         m_textureValues.texValueMax = tcu::Vec4(-13.2f, -77.f, 44.f, 76.f);
1533         break;
1534 
1535     case glu::PRECISION_MEDIUMP:
1536         m_textureValues.texValueMin = tcu::Vec4(-37.0f, 47.f, -7.f, 0.0f);
1537         m_textureValues.texValueMax = tcu::Vec4(-1.0f, 12.f, 7.f, 19.f);
1538         break;
1539 
1540     case glu::PRECISION_LOWP:
1541         m_textureValues.texValueMin = tcu::Vec4(0.0f, -1.0f, 0.0f, 1.0f);
1542         m_textureValues.texValueMax = tcu::Vec4(1.0f, 1.0f, -1.0f, -1.0f);
1543         break;
1544 
1545     default:
1546         DE_ASSERT(false);
1547     }
1548 
1549     // Texture coordinates
1550     m_values.coordMin = tcu::Vec4(0.0f);
1551     m_values.coordMax = tcu::Vec4(1.0f);
1552 
1553     if (m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO)
1554     {
1555         // No scale or bias used for accuracy.
1556         m_values.derivScale = tcu::Vec4(1.0f);
1557         m_values.derivBias  = tcu::Vec4(0.0f);
1558     }
1559     else
1560     {
1561         // Compute scale - bias that normalizes to 0..1 range.
1562         const tcu::UVec2 viewportSize(VIEWPORT_WIDTH, VIEWPORT_HEIGHT);
1563         const float w = float(viewportSize.x());
1564         const float h = float(viewportSize.y());
1565         const tcu::Vec4 dx =
1566             (m_textureValues.texValueMax - m_textureValues.texValueMin) / tcu::Vec4(w, w, w * 0.5f, -w * 0.5f);
1567         const tcu::Vec4 dy =
1568             (m_textureValues.texValueMax - m_textureValues.texValueMin) / tcu::Vec4(h, h, h * 0.5f, -h * 0.5f);
1569 
1570         if (isDfdxFunc(m_definitions.func))
1571             m_values.derivScale = 0.5f / dx;
1572         else if (isDfdyFunc(m_definitions.func))
1573             m_values.derivScale = 0.5f / dy;
1574         else if (isFwidthFunc(m_definitions.func))
1575             m_values.derivScale = 0.5f / (tcu::abs(dx) + tcu::abs(dy));
1576         else
1577             DE_ASSERT(false);
1578 
1579         m_values.derivBias = tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f);
1580     }
1581 }
1582 
~TextureDerivateCase(void)1583 TextureDerivateCase::~TextureDerivateCase(void)
1584 {
1585 }
1586 
createInstance(Context & context) const1587 TestInstance *TextureDerivateCase::createInstance(Context &context) const
1588 {
1589     DE_ASSERT(m_uniformSetup != DE_NULL);
1590     return new TextureDerivateCaseInstance(context, *m_uniformSetup, m_definitions, m_values, m_textureValues);
1591 }
1592 
initPrograms(vk::SourceCollections & programCollection) const1593 void TextureDerivateCase::initPrograms(vk::SourceCollections &programCollection) const
1594 {
1595     // Generate shader
1596     {
1597         const char *fragmentTmpl = "#version 450\n"
1598                                    "layout(location = 0) in highp vec2 v_coord;\n"
1599                                    "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1600                                    "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1601                                    "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1602                                    "layout(binding = 2) uniform ${PRECISION} sampler2D u_sampler;\n"
1603                                    "void main (void)\n"
1604                                    "{\n"
1605                                    "    ${PRECISION} vec4 tex = texture(u_sampler, v_coord);\n"
1606                                    "    ${PRECISION} ${DATATYPE} res = ${FUNC}(tex${SWIZZLE}) * u_scale + u_bias;\n"
1607                                    "    o_color = ${CAST_TO_OUTPUT};\n"
1608                                    "}\n";
1609 
1610         const bool packToInt = m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO;
1611         map<string, string> fragmentParams;
1612 
1613         fragmentParams["OUTPUT_TYPE"] = glu::getDataTypeName(packToInt ? glu::TYPE_UINT_VEC4 : glu::TYPE_FLOAT_VEC4);
1614         fragmentParams["OUTPUT_PREC"] =
1615             glu::getPrecisionName(packToInt ? glu::PRECISION_HIGHP : m_definitions.precision);
1616         fragmentParams["PRECISION"] = glu::getPrecisionName(m_definitions.precision);
1617         fragmentParams["DATATYPE"]  = glu::getDataTypeName(m_definitions.dataType);
1618         fragmentParams["FUNC"]      = getDerivateFuncName(m_definitions.func);
1619         fragmentParams["SWIZZLE"]   = m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "" :
1620                                       m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? ".xyz" :
1621                                       m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? ".xy" :
1622                                                                                        /* TYPE_FLOAT */ ".x";
1623 
1624         if (packToInt)
1625         {
1626             fragmentParams["CAST_TO_OUTPUT"] = m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ?
1627                                                    "floatBitsToUint(res)" :
1628                                                m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ?
1629                                                    "floatBitsToUint(vec4(res, 1.0))" :
1630                                                m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ?
1631                                                    "floatBitsToUint(vec4(res, 0.0, 1.0))" :
1632                                                    /* TYPE_FLOAT */ "floatBitsToUint(vec4(res, 0.0, 0.0, 1.0))";
1633         }
1634         else
1635         {
1636             fragmentParams["CAST_TO_OUTPUT"] =
1637                 m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "res" :
1638                 m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? "vec4(res, 1.0)" :
1639                 m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? "vec4(res, 0.0, 1.0)" :
1640                                                                  /* TYPE_FLOAT */ "vec4(res, 0.0, 0.0, 1.0)";
1641         }
1642 
1643         std::string fragmentSrc = tcu::StringTemplate(fragmentTmpl).specialize(fragmentParams);
1644         programCollection.glslSources.add("vert")
1645             << glu::VertexSource(genVertexSource(m_definitions.coordDataType, m_definitions.coordPrecision));
1646         programCollection.glslSources.add("frag") << glu::FragmentSource(fragmentSrc);
1647     }
1648 }
1649 
1650 // ShaderDerivateTests
1651 
1652 class ShaderDerivateTests : public tcu::TestCaseGroup
1653 {
1654 public:
1655     ShaderDerivateTests(tcu::TestContext &testCtx);
1656     virtual ~ShaderDerivateTests(void);
1657 
1658     virtual void init(void);
1659 
1660 private:
1661     ShaderDerivateTests(const ShaderDerivateTests &);            // not allowed!
1662     ShaderDerivateTests &operator=(const ShaderDerivateTests &); // not allowed!
1663 };
1664 
ShaderDerivateTests(tcu::TestContext & testCtx)1665 ShaderDerivateTests::ShaderDerivateTests(tcu::TestContext &testCtx) : TestCaseGroup(testCtx, "derivate")
1666 {
1667 }
1668 
~ShaderDerivateTests(void)1669 ShaderDerivateTests::~ShaderDerivateTests(void)
1670 {
1671 }
1672 
1673 struct FunctionSpec
1674 {
1675     std::string name;
1676     DerivateFunc function;
1677     glu::DataType dataType;
1678     glu::Precision precision;
1679 
FunctionSpecvkt::sr::__anon71e082ae0111::FunctionSpec1680     FunctionSpec(const std::string &name_, DerivateFunc function_, glu::DataType dataType_, glu::Precision precision_)
1681         : name(name_)
1682         , function(function_)
1683         , dataType(dataType_)
1684         , precision(precision_)
1685     {
1686     }
1687 };
1688 
init(void)1689 void ShaderDerivateTests::init(void)
1690 {
1691     static const struct
1692     {
1693         const char *name;
1694         const char *description;
1695         const char *source;
1696         BaseUniformType usedDefaultUniform;
1697         bool inNonUniformControlFlow;
1698         bool demoteToHelperInvocation;
1699     } s_linearDerivateCases[] = {
1700         {"linear", "Basic derivate of linearly interpolated argument",
1701 
1702          "#version 450\n"
1703          "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1704          "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1705          "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1706          "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1707          "void main (void)\n"
1708          "{\n"
1709          "    ${PRECISION} ${DATATYPE} res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1710          "    o_color = ${CAST_TO_OUTPUT};\n"
1711          "}\n",
1712 
1713          U_LAST, false, false},
1714         {"in_function", "Derivate of linear function argument",
1715 
1716          "#version 450\n"
1717          "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1718          "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1719          "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1720          "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1721          "\n"
1722          "${PRECISION} ${DATATYPE} computeRes (${PRECISION} ${DATATYPE} value)\n"
1723          "{\n"
1724          "    return ${FUNC}(v_coord) * u_scale + u_bias;\n"
1725          "}\n"
1726          "\n"
1727          "void main (void)\n"
1728          "{\n"
1729          "    ${PRECISION} ${DATATYPE} res = computeRes(v_coord);\n"
1730          "    o_color = ${CAST_TO_OUTPUT};\n"
1731          "}\n",
1732 
1733          U_LAST, false, false},
1734         {"static_if", "Derivate of linearly interpolated value in static if",
1735 
1736          "#version 450\n"
1737          "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1738          "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1739          "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1740          "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1741          "void main (void)\n"
1742          "{\n"
1743          "    ${PRECISION} ${DATATYPE} res;\n"
1744          "    if (false)\n"
1745          "        res = ${FUNC}(-v_coord) * u_scale + u_bias;\n"
1746          "    else\n"
1747          "        res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1748          "    o_color = ${CAST_TO_OUTPUT};\n"
1749          "}\n",
1750 
1751          U_LAST, false, false},
1752         {"static_loop", "Derivate of linearly interpolated value in static loop",
1753 
1754          "#version 450\n"
1755          "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1756          "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1757          "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1758          "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1759          "void main (void)\n"
1760          "{\n"
1761          "    ${PRECISION} ${DATATYPE} res = ${DATATYPE}(0.0);\n"
1762          "    for (int i = 0; i < 2; i++)\n"
1763          "        res += ${FUNC}(v_coord * float(i));\n"
1764          "    res = res * u_scale + u_bias;\n"
1765          "    o_color = ${CAST_TO_OUTPUT};\n"
1766          "}\n",
1767 
1768          U_LAST, false, false},
1769         {"static_switch", "Derivate of linearly interpolated value in static switch",
1770 
1771          "#version 450\n"
1772          "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1773          "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1774          "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1775          "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1776          "void main (void)\n"
1777          "{\n"
1778          "    ${PRECISION} ${DATATYPE} res;\n"
1779          "    switch (1)\n"
1780          "    {\n"
1781          "        case 0: res = ${FUNC}(-v_coord) * u_scale + u_bias;    break;\n"
1782          "        case 1: res = ${FUNC}(v_coord) * u_scale + u_bias;    break;\n"
1783          "    }\n"
1784          "    o_color = ${CAST_TO_OUTPUT};\n"
1785          "}\n",
1786 
1787          U_LAST, false, false},
1788         {"uniform_if", "Derivate of linearly interpolated value in uniform if",
1789 
1790          "#version 450\n"
1791          "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1792          "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1793          "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1794          "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1795          "layout(binding = 2, std140) uniform Ui_true { bool ub_true; };\n"
1796          "void main (void)\n"
1797          "{\n"
1798          "    ${PRECISION} ${DATATYPE} res;\n"
1799          "    if (ub_true)"
1800          "        res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1801          "    else\n"
1802          "        res = ${FUNC}(-v_coord) * u_scale + u_bias;\n"
1803          "    o_color = ${CAST_TO_OUTPUT};\n"
1804          "}\n",
1805 
1806          UB_TRUE, false, false},
1807         {"uniform_loop", "Derivate of linearly interpolated value in uniform loop",
1808 
1809          "#version 450\n"
1810          "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1811          "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1812          "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1813          "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1814          "layout(binding = 2, std140) uniform Ui_two { int ui_two; };\n"
1815          "void main (void)\n"
1816          "{\n"
1817          "    ${PRECISION} ${DATATYPE} res = ${DATATYPE}(0.0);\n"
1818          "    for (int i = 0; i < ui_two; i++)\n"
1819          "        res += ${FUNC}(v_coord * float(i));\n"
1820          "    res = res * u_scale + u_bias;\n"
1821          "    o_color = ${CAST_TO_OUTPUT};\n"
1822          "}\n",
1823 
1824          UI_TWO, false, false},
1825         {"uniform_switch", "Derivate of linearly interpolated value in uniform switch",
1826 
1827          "#version 450\n"
1828          "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1829          "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1830          "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1831          "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1832          "layout(binding = 2, std140) uniform Ui_one { int ui_one; };\n"
1833          "void main (void)\n"
1834          "{\n"
1835          "    ${PRECISION} ${DATATYPE} res;\n"
1836          "    switch (ui_one)\n"
1837          "    {\n"
1838          "        case 0: res = ${FUNC}(-v_coord) * u_scale + u_bias;    break;\n"
1839          "        case 1: res = ${FUNC}(v_coord) * u_scale + u_bias;    break;\n"
1840          "    }\n"
1841          "    o_color = ${CAST_TO_OUTPUT};\n"
1842          "}\n",
1843 
1844          UI_ONE, false, false},
1845         {"dynamic_if", "Derivate of linearly interpolated value in static if",
1846 
1847          "#version 450\n"
1848          "#extension GL_KHR_shader_subgroup_ballot : require\n"
1849          "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1850          "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1851          "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1852          "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1853          "layout(binding = 2, std140) uniform Ui_one { int ui_one; };\n"
1854          "void main (void)\n"
1855          "{\n"
1856          "    ${PRECISION} ${DATATYPE} res;\n"
1857          "    bool non_uniform = ((uint(gl_FragCoord.x * 0.4) + uint(gl_FragCoord.y * 0.3)) & 2) != 0;\n"
1858          "    uvec4 quad_ballot = uvec4(0);\n"
1859          "    quad_ballot[gl_SubgroupInvocationID >> 5] = 0xf << (gl_SubgroupInvocationID & 0x1c);\n"
1860          "    bool quad_uniform = (subgroupBallot(non_uniform) & quad_ballot) == quad_ballot;\n"
1861          "    if (quad_uniform)\n"
1862          "        res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1863          "    else\n"
1864          "        res = ${FUNC}(v_coord * float(ui_one)) * u_scale + u_bias;\n"
1865          "    o_color = ${CAST_TO_OUTPUT};\n"
1866          "}\n",
1867 
1868          UI_ONE, true, false},
1869         {"dynamic_loop", "Derivate of linearly interpolated value in uniform loop",
1870 
1871          "#version 450\n"
1872          "#extension GL_KHR_shader_subgroup_ballot : require\n"
1873          "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1874          "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1875          "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1876          "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1877          "layout(binding = 2, std140) uniform Ui_one { int ui_one; };\n"
1878          "void main (void)\n"
1879          "{\n"
1880          "    ${PRECISION} ${DATATYPE} res = ${DATATYPE}(0.0);\n"
1881          "    bool non_uniform = ((uint(gl_FragCoord.x * 0.4) + uint(gl_FragCoord.y * 0.3)) & 2) != 0;\n"
1882          "    uvec4 quad_ballot = uvec4(0);\n"
1883          "    quad_ballot[gl_SubgroupInvocationID >> 5] = 0xf << (gl_SubgroupInvocationID & 0x1c);\n"
1884          "    bool quad_uniform = (subgroupBallot(non_uniform) & quad_ballot) == quad_ballot;\n"
1885          "    for (int i = 0; i < ui_one + int(quad_uniform); i++)\n"
1886          "        res = ${FUNC}(v_coord * float(i - int(quad_uniform) + 1));\n"
1887          "    res = res * u_scale + u_bias;\n"
1888          "    o_color = ${CAST_TO_OUTPUT};\n"
1889          "}\n",
1890 
1891          UI_ONE, true, false},
1892         {"dynamic_switch", "Derivate of linearly interpolated value in uniform switch",
1893 
1894          "#version 450\n"
1895          "#extension GL_KHR_shader_subgroup_ballot : require\n"
1896          "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1897          "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1898          "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1899          "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1900          "layout(binding = 2, std140) uniform Ui_one { int ui_one; };\n"
1901          "void main (void)\n"
1902          "{\n"
1903          "    ${PRECISION} ${DATATYPE} res;\n"
1904          "    bool non_uniform = ((uint(gl_FragCoord.x * 0.4) + uint(gl_FragCoord.y * 0.3)) & 2) != 0;\n"
1905          "    uvec4 quad_ballot = uvec4(0);\n"
1906          "    quad_ballot[gl_SubgroupInvocationID >> 5] = 0xf << (gl_SubgroupInvocationID & 0x1c);\n"
1907          "    bool quad_uniform = (subgroupBallot(non_uniform) & quad_ballot) == quad_ballot;\n"
1908          "    switch (int(quad_uniform))\n"
1909          "    {\n"
1910          "        case 0: res = ${FUNC}(v_coord) * u_scale + u_bias;    break;\n"
1911          "        case 1: res = ${FUNC}(v_coord * float(ui_one)) * u_scale + u_bias;    break;\n"
1912          "    }\n"
1913          "    o_color = ${CAST_TO_OUTPUT};\n"
1914          "}\n",
1915 
1916          UI_ONE, true, false},
1917         {"output_store", "Store variable to output and read it before using in a derivative",
1918 
1919          "#version 450\n"
1920          "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1921          "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1922          "layout(location = 1) out ${PRECISION} ${DATATYPE} intermediateStore;\n"
1923          "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1924          "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1925          "void main (void)\n"
1926          "{\n"
1927          "    intermediateStore = v_coord;\n"
1928          "    ${PRECISION} ${DATATYPE} res = ${FUNC}(intermediateStore) * u_scale + u_bias;\n"
1929          "    o_color = ${CAST_TO_OUTPUT};\n"
1930          "}\n",
1931 
1932          U_LAST, false, true},
1933         {"private_store", "Store variable to global and read it before using in a derivative",
1934 
1935          "#version 450\n"
1936          "#extension GL_EXT_demote_to_helper_invocation : enable\n"
1937          "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1938          "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1939          "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1940          "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1941          "${PRECISION} ${DATATYPE} intermediateStore;\n"
1942          "void main (void)\n"
1943          "{\n"
1944          "    intermediateStore = v_coord;\n"
1945          "    if (mod(gl_FragCoord.y, 2.0f) == 1.0f) demote;\n"
1946          "    ${PRECISION} ${DATATYPE} res = ${FUNC}(intermediateStore) * u_scale + u_bias;\n"
1947          "    o_color = ${CAST_TO_OUTPUT};\n"
1948          "}\n",
1949 
1950          U_LAST, false, true},
1951     };
1952 
1953     const char *dFdxSubgroupSource =
1954         "#version 450\n"
1955         "#extension GL_KHR_shader_subgroup_ballot : require\n"
1956         "#extension GL_KHR_shader_subgroup_quad : require\n"
1957         "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1958         "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1959         "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1960         "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1961         "${DATATYPE} dFdxSubgroup(${DATATYPE} f)\n"
1962         "{\n"
1963         "    ${DATATYPE} left, right;\n"
1964         "    if ((gl_SubgroupInvocationID & 2) == 0) {\n"
1965         "        left = subgroupQuadBroadcast(f, 0);\n"
1966         "        right = subgroupQuadBroadcast(f, 1);\n"
1967         "    } else {\n"
1968         "        left = subgroupQuadBroadcast(f, 2);\n"
1969         "        right = subgroupQuadBroadcast(f, 3);\n"
1970         "    }\n"
1971         "    return right - left;\n"
1972         "}\n"
1973         "\n"
1974         "void main (void)\n"
1975         "{\n"
1976         "    uvec4 quad_ballot = uvec4(0);\n"
1977         "    ${PRECISION} ${DATATYPE} res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1978         "    o_color = ${CAST_TO_OUTPUT};\n"
1979         "}\n";
1980 
1981     const char *dFdySubgroupSource =
1982         "#version 450\n"
1983         "#extension GL_KHR_shader_subgroup_quad : require\n"
1984         "#extension GL_KHR_shader_subgroup_ballot : require\n"
1985         "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1986         "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1987         "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1988         "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1989         "${DATATYPE} dFdySubgroup(${DATATYPE} f)\n"
1990         "{\n"
1991         "    ${DATATYPE} top, bottom;\n"
1992         "    if ((gl_SubgroupInvocationID & 1) == 0) {\n"
1993         "        top = subgroupQuadBroadcast(f, 0);\n"
1994         "        bottom = subgroupQuadBroadcast(f, 2);\n"
1995         "    } else {\n"
1996         "        top = subgroupQuadBroadcast(f, 1);\n"
1997         "        bottom = subgroupQuadBroadcast(f, 3);\n"
1998         "    }\n"
1999         "    return bottom - top;\n"
2000         "}\n"
2001         "\n"
2002         "void main (void)\n"
2003         "{\n"
2004         "    uvec4 quad_ballot = uvec4(0);\n"
2005         "    quad_ballot[gl_SubgroupInvocationID >> 5] = 0xf << (gl_SubgroupInvocationID & 0x1c);\n"
2006         "    ${PRECISION} ${DATATYPE} res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
2007         "    o_color = ${CAST_TO_OUTPUT};\n"
2008         "}\n";
2009 
2010     static const struct
2011     {
2012         const char *name;
2013         SurfaceType surfaceType;
2014         int numSamples;
2015     } s_fboConfigs[] = {
2016         {"fbo", SURFACETYPE_UNORM_FBO, 0},
2017         {"fbo_msaa2", SURFACETYPE_UNORM_FBO, 2},
2018         {"fbo_msaa4", SURFACETYPE_UNORM_FBO, 4},
2019         {"fbo_float", SURFACETYPE_FLOAT_FBO, 0},
2020     };
2021 
2022     static const struct
2023     {
2024         const char *name;
2025         SurfaceType surfaceType;
2026         int numSamples;
2027     } s_textureConfigs[] = {
2028         {"basic", SURFACETYPE_UNORM_FBO, 0},
2029         {"msaa4", SURFACETYPE_UNORM_FBO, 4},
2030         {"float", SURFACETYPE_FLOAT_FBO, 0},
2031     };
2032 
2033     // .dfdx[fine|coarse], .dfdy[fine|coarse], .fwidth[fine|coarse]
2034     for (int funcNdx = 0; funcNdx < DERIVATE_LAST; funcNdx++)
2035     {
2036         const DerivateFunc function = DerivateFunc(funcNdx);
2037         de::MovePtr<tcu::TestCaseGroup> functionGroup(
2038             new tcu::TestCaseGroup(m_testCtx, getDerivateFuncCaseName(function)));
2039 
2040         // .constant - no precision variants and no subgroup derivatives, checks that derivate of constant arguments is 0
2041         if (!isSubgroupFunc(function))
2042         {
2043             // Derivate of constant argument
2044             de::MovePtr<tcu::TestCaseGroup> constantGroup(new tcu::TestCaseGroup(m_testCtx, "constant"));
2045 
2046             for (int vecSize = 1; vecSize <= 4; vecSize++)
2047             {
2048                 const glu::DataType dataType = vecSize > 1 ? glu::getDataTypeFloatVec(vecSize) : glu::TYPE_FLOAT;
2049                 constantGroup->addChild(
2050                     new ConstantDerivateCase(m_testCtx, glu::getDataTypeName(dataType), function, dataType));
2051             }
2052 
2053             functionGroup->addChild(constantGroup.release());
2054         }
2055 
2056         // Cases based on LinearDerivateCase; subgroup derivatives are handled separately
2057         if (!isSubgroupFunc(function))
2058         {
2059             for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(s_linearDerivateCases); caseNdx++)
2060             {
2061                 de::MovePtr<tcu::TestCaseGroup> linearCaseGroup(
2062                     new tcu::TestCaseGroup(m_testCtx, s_linearDerivateCases[caseNdx].name));
2063                 const char *source = s_linearDerivateCases[caseNdx].source;
2064 
2065                 for (int vecSize = 1; vecSize <= 4; vecSize++)
2066                 {
2067                     for (int precNdx = 0; precNdx < glu::PRECISION_LAST; precNdx++)
2068                     {
2069                         const glu::DataType dataType =
2070                             vecSize > 1 ? glu::getDataTypeFloatVec(vecSize) : glu::TYPE_FLOAT;
2071                         const glu::Precision precision = glu::Precision(precNdx);
2072                         const SurfaceType surfaceType  = SURFACETYPE_UNORM_FBO;
2073                         const int numSamples           = 0;
2074                         std::ostringstream caseName;
2075 
2076                         if (caseNdx != 0 && precision == glu::PRECISION_LOWP)
2077                             continue; // Skip as lowp doesn't actually produce any bits when rendered to default FB.
2078 
2079                         caseName << glu::getDataTypeName(dataType) << "_" << glu::getPrecisionName(precision);
2080 
2081                         linearCaseGroup->addChild(new LinearDerivateCase(
2082                             m_testCtx, caseName.str(), function, dataType, precision,
2083                             s_linearDerivateCases[caseNdx].inNonUniformControlFlow, surfaceType, numSamples, source,
2084                             s_linearDerivateCases[caseNdx].usedDefaultUniform,
2085                             s_linearDerivateCases[caseNdx].demoteToHelperInvocation));
2086                     }
2087                 }
2088 
2089                 functionGroup->addChild(linearCaseGroup.release());
2090             }
2091         }
2092 
2093         // Fbo cases
2094         for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(s_fboConfigs); caseNdx++)
2095         {
2096             // Derivate usage when rendering into FBO
2097             de::MovePtr<tcu::TestCaseGroup> fboGroup(new tcu::TestCaseGroup(m_testCtx, s_fboConfigs[caseNdx].name));
2098             // use source from subgroup source or source from .linear group
2099             const char *source            = function == DERIVATE_DFDXSUBGROUP ? dFdxSubgroupSource :
2100                                             function == DERIVATE_DFDYSUBGROUP ? dFdySubgroupSource :
2101                                                                                 s_linearDerivateCases[0].source;
2102             const SurfaceType surfaceType = s_fboConfigs[caseNdx].surfaceType;
2103             const int numSamples          = s_fboConfigs[caseNdx].numSamples;
2104 
2105             for (int vecSize = 1; vecSize <= 4; vecSize++)
2106             {
2107                 for (int precNdx = 0; precNdx < glu::PRECISION_LAST; precNdx++)
2108                 {
2109                     const glu::DataType dataType   = vecSize > 1 ? glu::getDataTypeFloatVec(vecSize) : glu::TYPE_FLOAT;
2110                     const glu::Precision precision = glu::Precision(precNdx);
2111                     std::ostringstream caseName;
2112 
2113                     if (surfaceType != SURFACETYPE_FLOAT_FBO && precision == glu::PRECISION_LOWP)
2114                         continue; // Skip as lowp doesn't actually produce any bits when rendered to U8 RT.
2115 
2116                     caseName << glu::getDataTypeName(dataType) << "_" << glu::getPrecisionName(precision);
2117 
2118                     fboGroup->addChild(new LinearDerivateCase(m_testCtx, caseName.str(), function, dataType, precision,
2119                                                               false, surfaceType, numSamples, source, U_LAST, false));
2120                 }
2121             }
2122 
2123             functionGroup->addChild(fboGroup.release());
2124         }
2125 
2126         // .texture
2127         if (!isSubgroupFunc(function))
2128         {
2129             de::MovePtr<tcu::TestCaseGroup> textureGroup(new tcu::TestCaseGroup(m_testCtx, "texture"));
2130 
2131             for (int texCaseNdx = 0; texCaseNdx < DE_LENGTH_OF_ARRAY(s_textureConfigs); texCaseNdx++)
2132             {
2133                 de::MovePtr<tcu::TestCaseGroup> caseGroup(
2134                     new tcu::TestCaseGroup(m_testCtx, s_textureConfigs[texCaseNdx].name));
2135                 const SurfaceType surfaceType = s_textureConfigs[texCaseNdx].surfaceType;
2136                 const int numSamples          = s_textureConfigs[texCaseNdx].numSamples;
2137 
2138                 for (int vecSize = 1; vecSize <= 4; vecSize++)
2139                 {
2140                     for (int precNdx = 0; precNdx < glu::PRECISION_LAST; precNdx++)
2141                     {
2142                         const glu::DataType dataType =
2143                             vecSize > 1 ? glu::getDataTypeFloatVec(vecSize) : glu::TYPE_FLOAT;
2144                         const glu::Precision precision = glu::Precision(precNdx);
2145                         std::ostringstream caseName;
2146 
2147                         if (surfaceType != SURFACETYPE_FLOAT_FBO && precision == glu::PRECISION_LOWP)
2148                             continue; // Skip as lowp doesn't actually produce any bits when rendered to U8 RT.
2149 
2150                         caseName << glu::getDataTypeName(dataType) << "_" << glu::getPrecisionName(precision);
2151 
2152                         caseGroup->addChild(new TextureDerivateCase(m_testCtx, caseName.str(), function, dataType,
2153                                                                     precision, surfaceType, numSamples));
2154                     }
2155                 }
2156 
2157                 textureGroup->addChild(caseGroup.release());
2158             }
2159 
2160             functionGroup->addChild(textureGroup.release());
2161         }
2162 
2163         addChild(functionGroup.release());
2164     }
2165 }
2166 
2167 } // namespace
2168 
createDerivateTests(tcu::TestContext & testCtx)2169 tcu::TestCaseGroup *createDerivateTests(tcu::TestContext &testCtx)
2170 {
2171     return new ShaderDerivateTests(testCtx);
2172 }
2173 
2174 } // namespace sr
2175 } // namespace vkt
2176