1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 * Copyright (c) 2016 Samsung Electronics Co., Ltd.
7 * Copyright (c) 2016 The Android Open Source Project
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief Shader derivate function tests.
24 *
25 * \todo [2013-06-25 pyry] Missing features:
26 * - lines and points
27 * - projected coordinates
28 * - continous non-trivial functions (sin, exp)
29 * - non-continous functions (step)
30 *//*--------------------------------------------------------------------*/
31
32 #include "vktShaderRenderDerivateTests.hpp"
33 #include "vktShaderRender.hpp"
34 #include "subgroups/vktSubgroupsTestsUtils.hpp"
35 #include "vkImageUtil.hpp"
36 #include "vkQueryUtil.hpp"
37
38 #include "gluTextureUtil.hpp"
39
40 #include "tcuStringTemplate.hpp"
41 #include "tcuSurface.hpp"
42 #include "tcuTestLog.hpp"
43 #include "tcuVectorUtil.hpp"
44 #include "tcuTextureUtil.hpp"
45 #include "tcuRGBA.hpp"
46 #include "tcuFloat.hpp"
47 #include "tcuInterval.hpp"
48
49 #include "deUniquePtr.hpp"
50 #include "glwEnums.hpp"
51
52 #include <sstream>
53 #include <string>
54
55 namespace vkt
56 {
57 namespace sr
58 {
59 namespace
60 {
61
62 using namespace vk;
63
64 using std::map;
65 using std::ostringstream;
66 using std::string;
67 using std::vector;
68 using tcu::TestLog;
69
70 enum
71 {
72 VIEWPORT_WIDTH = 99,
73 VIEWPORT_HEIGHT = 133,
74 MAX_FAILED_MESSAGES = 10
75 };
76
77 enum DerivateFunc
78 {
79 DERIVATE_DFDX = 0,
80 DERIVATE_DFDXFINE,
81 DERIVATE_DFDXCOARSE,
82 DERIVATE_DFDXSUBGROUP,
83
84 DERIVATE_DFDY,
85 DERIVATE_DFDYFINE,
86 DERIVATE_DFDYCOARSE,
87 DERIVATE_DFDYSUBGROUP,
88
89 DERIVATE_FWIDTH,
90 DERIVATE_FWIDTHFINE,
91 DERIVATE_FWIDTHCOARSE,
92
93 DERIVATE_LAST
94 };
95
96 enum SurfaceType
97 {
98 SURFACETYPE_UNORM_FBO = 0,
99 SURFACETYPE_FLOAT_FBO, // \note Uses RGBA32UI fbo actually, since FP rendertargets are not in core spec.
100
101 SURFACETYPE_LAST
102 };
103
104 // Utilities
105
getDerivateFuncName(DerivateFunc func)106 static const char *getDerivateFuncName(DerivateFunc func)
107 {
108 switch (func)
109 {
110 case DERIVATE_DFDX:
111 return "dFdx";
112 case DERIVATE_DFDXFINE:
113 return "dFdxFine";
114 case DERIVATE_DFDXCOARSE:
115 return "dFdxCoarse";
116 case DERIVATE_DFDXSUBGROUP:
117 return "dFdxSubgroup";
118 case DERIVATE_DFDY:
119 return "dFdy";
120 case DERIVATE_DFDYFINE:
121 return "dFdyFine";
122 case DERIVATE_DFDYCOARSE:
123 return "dFdyCoarse";
124 case DERIVATE_DFDYSUBGROUP:
125 return "dFdySubgroup";
126 case DERIVATE_FWIDTH:
127 return "fwidth";
128 case DERIVATE_FWIDTHFINE:
129 return "fwidthFine";
130 case DERIVATE_FWIDTHCOARSE:
131 return "fwidthCoarse";
132 default:
133 DE_ASSERT(false);
134 return DE_NULL;
135 }
136 }
137
getDerivateFuncCaseName(DerivateFunc func)138 static const char *getDerivateFuncCaseName(DerivateFunc func)
139 {
140 switch (func)
141 {
142 case DERIVATE_DFDX:
143 return "dfdx";
144 case DERIVATE_DFDXFINE:
145 return "dfdxfine";
146 case DERIVATE_DFDXCOARSE:
147 return "dfdxcoarse";
148 case DERIVATE_DFDXSUBGROUP:
149 return "dfdxsubgroup";
150 case DERIVATE_DFDY:
151 return "dfdy";
152 case DERIVATE_DFDYFINE:
153 return "dfdyfine";
154 case DERIVATE_DFDYCOARSE:
155 return "dfdycoarse";
156 case DERIVATE_DFDYSUBGROUP:
157 return "dfdysubgroup";
158 case DERIVATE_FWIDTH:
159 return "fwidth";
160 case DERIVATE_FWIDTHFINE:
161 return "fwidthfine";
162 case DERIVATE_FWIDTHCOARSE:
163 return "fwidthcoarse";
164 default:
165 DE_ASSERT(false);
166 return DE_NULL;
167 }
168 }
169
isDfdxFunc(DerivateFunc func)170 static inline bool isDfdxFunc(DerivateFunc func)
171 {
172 return func == DERIVATE_DFDX || func == DERIVATE_DFDXFINE || func == DERIVATE_DFDXCOARSE ||
173 func == DERIVATE_DFDXSUBGROUP;
174 }
175
isDfdyFunc(DerivateFunc func)176 static inline bool isDfdyFunc(DerivateFunc func)
177 {
178 return func == DERIVATE_DFDY || func == DERIVATE_DFDYFINE || func == DERIVATE_DFDYCOARSE ||
179 func == DERIVATE_DFDYSUBGROUP;
180 }
181
isFwidthFunc(DerivateFunc func)182 static inline bool isFwidthFunc(DerivateFunc func)
183 {
184 return func == DERIVATE_FWIDTH || func == DERIVATE_FWIDTHFINE || func == DERIVATE_FWIDTHCOARSE;
185 }
186
isSubgroupFunc(DerivateFunc func)187 static inline bool isSubgroupFunc(DerivateFunc func)
188 {
189 return func == DERIVATE_DFDXSUBGROUP || func == DERIVATE_DFDYSUBGROUP;
190 }
191
getDerivateMask(glu::DataType type)192 static inline tcu::BVec4 getDerivateMask(glu::DataType type)
193 {
194 switch (type)
195 {
196 case glu::TYPE_FLOAT:
197 return tcu::BVec4(true, false, false, false);
198 case glu::TYPE_FLOAT_VEC2:
199 return tcu::BVec4(true, true, false, false);
200 case glu::TYPE_FLOAT_VEC3:
201 return tcu::BVec4(true, true, true, false);
202 case glu::TYPE_FLOAT_VEC4:
203 return tcu::BVec4(true, true, true, true);
204 default:
205 DE_ASSERT(false);
206 return tcu::BVec4(true);
207 }
208 }
209
isSkippedPixel(const tcu::ConstPixelBufferAccess & surface,int x,int y)210 static inline bool isSkippedPixel(const tcu::ConstPixelBufferAccess &surface, int x, int y)
211 {
212 const tcu::Vec4 skipValue(0.7843f, 0.2039f, 0.4706f, 0.0f);
213 const tcu::Vec4 value = surface.getPixel(x, y);
214 return tcu::allEqual(tcu::lessThanEqual(tcu::abs(value - skipValue), tcu::Vec4(0.01f)), tcu::BVec4(true));
215 }
216
readDerivate(const tcu::ConstPixelBufferAccess & surface,const tcu::Vec4 & derivScale,const tcu::Vec4 & derivBias,int x,int y)217 static inline tcu::Vec4 readDerivate(const tcu::ConstPixelBufferAccess &surface, const tcu::Vec4 &derivScale,
218 const tcu::Vec4 &derivBias, int x, int y)
219 {
220 return (surface.getPixel(x, y) - derivBias) / derivScale;
221 }
222
getCompExpBits(const tcu::Vec4 & v)223 static inline tcu::UVec4 getCompExpBits(const tcu::Vec4 &v)
224 {
225 return tcu::UVec4(tcu::Float32(v[0]).exponentBits(), tcu::Float32(v[1]).exponentBits(),
226 tcu::Float32(v[2]).exponentBits(), tcu::Float32(v[3]).exponentBits());
227 }
228
computeFloatingPointError(const float value,const int numAccurateBits)229 float computeFloatingPointError(const float value, const int numAccurateBits)
230 {
231 const int numGarbageBits = 23 - numAccurateBits;
232 const uint32_t mask = (1u << numGarbageBits) - 1u;
233 const int exp = tcu::Float32(value).exponent();
234
235 return tcu::Float32::construct(+1, exp, (1u << 23) | mask).asFloat() -
236 tcu::Float32::construct(+1, exp, 1u << 23).asFloat();
237 }
238
getNumMantissaBits(const glu::Precision precision)239 static int getNumMantissaBits(const glu::Precision precision)
240 {
241 switch (precision)
242 {
243 case glu::PRECISION_HIGHP:
244 return 23;
245 case glu::PRECISION_MEDIUMP:
246 return 10;
247 case glu::PRECISION_LOWP:
248 return 6;
249 default:
250 DE_ASSERT(false);
251 return 0;
252 }
253 }
254
getMinExponent(const glu::Precision precision)255 static int getMinExponent(const glu::Precision precision)
256 {
257 switch (precision)
258 {
259 case glu::PRECISION_HIGHP:
260 return -126;
261 case glu::PRECISION_MEDIUMP:
262 return -14;
263 case glu::PRECISION_LOWP:
264 return -8;
265 default:
266 DE_ASSERT(false);
267 return 0;
268 }
269 }
270
getSingleULPForExponent(int exp,int numMantissaBits)271 static float getSingleULPForExponent(int exp, int numMantissaBits)
272 {
273 if (numMantissaBits > 0)
274 {
275 DE_ASSERT(numMantissaBits <= 23);
276
277 const int ulpBitNdx = 23 - numMantissaBits;
278 return tcu::Float32::construct(+1, exp, (1 << 23) | (1 << ulpBitNdx)).asFloat() -
279 tcu::Float32::construct(+1, exp, (1 << 23)).asFloat();
280 }
281 else
282 {
283 DE_ASSERT(numMantissaBits == 0);
284 return tcu::Float32::construct(+1, exp, (1 << 23)).asFloat();
285 }
286 }
287
getSingleULPForValue(float value,int numMantissaBits)288 static float getSingleULPForValue(float value, int numMantissaBits)
289 {
290 const int exp = tcu::Float32(value).exponent();
291 return getSingleULPForExponent(exp, numMantissaBits);
292 }
293
convertFloatFlushToZeroRtn(float value,int minExponent,int numAccurateBits)294 static float convertFloatFlushToZeroRtn(float value, int minExponent, int numAccurateBits)
295 {
296 if (value == 0.0f)
297 {
298 return 0.0f;
299 }
300 else
301 {
302 const tcu::Float32 inputFloat = tcu::Float32(value);
303 const int numTruncatedBits = 23 - numAccurateBits;
304 const uint32_t truncMask = (1u << numTruncatedBits) - 1u;
305
306 if (value > 0.0f)
307 {
308 if (value > 0.0f && tcu::Float32(value).exponent() < minExponent)
309 {
310 // flush to zero if possible
311 return 0.0f;
312 }
313 else
314 {
315 // just mask away non-representable bits
316 return tcu::Float32::construct(+1, inputFloat.exponent(), inputFloat.mantissa() & ~truncMask).asFloat();
317 }
318 }
319 else
320 {
321 if (inputFloat.mantissa() & truncMask)
322 {
323 // decrement one ulp if truncated bits are non-zero (i.e. if value is not representable)
324 return tcu::Float32::construct(-1, inputFloat.exponent(), inputFloat.mantissa() & ~truncMask)
325 .asFloat() -
326 getSingleULPForExponent(inputFloat.exponent(), numAccurateBits);
327 }
328 else
329 {
330 // value is representable, no need to do anything
331 return value;
332 }
333 }
334 }
335 }
336
convertFloatFlushToZeroRtp(float value,int minExponent,int numAccurateBits)337 static float convertFloatFlushToZeroRtp(float value, int minExponent, int numAccurateBits)
338 {
339 return -convertFloatFlushToZeroRtn(-value, minExponent, numAccurateBits);
340 }
341
addErrorUlp(float value,float numUlps,int numMantissaBits)342 static float addErrorUlp(float value, float numUlps, int numMantissaBits)
343 {
344 return value + numUlps * getSingleULPForValue(value, numMantissaBits);
345 }
346
347 enum
348 {
349 INTERPOLATION_LOST_BITS = 3, // number mantissa of bits allowed to be lost in varying interpolation
350 };
351
getDerivateThreshold(const glu::Precision precision,const tcu::Vec4 & valueMin,const tcu::Vec4 & valueMax,const tcu::Vec4 & expectedDerivate)352 static inline tcu::Vec4 getDerivateThreshold(const glu::Precision precision, const tcu::Vec4 &valueMin,
353 const tcu::Vec4 &valueMax, const tcu::Vec4 &expectedDerivate)
354 {
355 const int baseBits = getNumMantissaBits(precision);
356 const tcu::UVec4 derivExp = getCompExpBits(expectedDerivate);
357 const tcu::UVec4 maxValueExp = max(getCompExpBits(valueMin), getCompExpBits(valueMax));
358 const tcu::UVec4 numBitsLost = maxValueExp - min(maxValueExp, derivExp);
359 const tcu::IVec4 numAccurateBits =
360 max(baseBits - numBitsLost.asInt() - (int)INTERPOLATION_LOST_BITS, tcu::IVec4(0));
361
362 return tcu::Vec4(computeFloatingPointError(expectedDerivate[0], numAccurateBits[0]),
363 computeFloatingPointError(expectedDerivate[1], numAccurateBits[1]),
364 computeFloatingPointError(expectedDerivate[2], numAccurateBits[2]),
365 computeFloatingPointError(expectedDerivate[3], numAccurateBits[3]));
366 }
367
368 struct LogVecComps
369 {
370 const tcu::Vec4 &v;
371 int numComps;
372
LogVecCompsvkt::sr::__anon71e082ae0111::LogVecComps373 LogVecComps(const tcu::Vec4 &v_, int numComps_) : v(v_), numComps(numComps_)
374 {
375 }
376 };
377
operator <<(std::ostream & str,const LogVecComps & v)378 std::ostream &operator<<(std::ostream &str, const LogVecComps &v)
379 {
380 DE_ASSERT(de::inRange(v.numComps, 1, 4));
381 if (v.numComps == 1)
382 return str << v.v[0];
383 else if (v.numComps == 2)
384 return str << v.v.toWidth<2>();
385 else if (v.numComps == 3)
386 return str << v.v.toWidth<3>();
387 else
388 return str << v.v;
389 }
390
391 enum VerificationLogging
392 {
393 LOG_ALL = 0,
394 LOG_NOTHING
395 };
396
verifyConstantDerivate(tcu::TestLog & log,const tcu::ConstPixelBufferAccess & result,const tcu::PixelBufferAccess & errorMask,glu::DataType dataType,const tcu::Vec4 & reference,const tcu::Vec4 & threshold,const tcu::Vec4 & scale,const tcu::Vec4 & bias,VerificationLogging logPolicy=LOG_ALL,bool demoteToHelperInvocation=false)397 static bool verifyConstantDerivate(tcu::TestLog &log, const tcu::ConstPixelBufferAccess &result,
398 const tcu::PixelBufferAccess &errorMask, glu::DataType dataType,
399 const tcu::Vec4 &reference, const tcu::Vec4 &threshold, const tcu::Vec4 &scale,
400 const tcu::Vec4 &bias, VerificationLogging logPolicy = LOG_ALL,
401 bool demoteToHelperInvocation = false)
402 {
403 const int numComps = glu::getDataTypeFloatScalars(dataType);
404 const tcu::BVec4 mask = tcu::logicalNot(getDerivateMask(dataType));
405 int numFailedPixels = 0;
406
407 if (logPolicy == LOG_ALL)
408 log << TestLog::Message << "Expecting " << LogVecComps(reference, numComps) << " with threshold "
409 << LogVecComps(threshold, numComps) << TestLog::EndMessage;
410
411 for (int y = 0; y < result.getHeight(); y++)
412 {
413 for (int x = 0; x < result.getWidth(); x++)
414 {
415 if (isSkippedPixel(result, x, y))
416 continue;
417
418 if (demoteToHelperInvocation && deMod(y, 2) == 1)
419 continue;
420
421 const tcu::Vec4 resDerivate = readDerivate(result, scale, bias, x, y);
422 const bool isOk =
423 tcu::allEqual(tcu::logicalOr(tcu::lessThanEqual(tcu::abs(reference - resDerivate), threshold), mask),
424 tcu::BVec4(true));
425
426 if (!isOk)
427 {
428 if (numFailedPixels < MAX_FAILED_MESSAGES && logPolicy == LOG_ALL)
429 log << TestLog::Message << "FAIL: got " << LogVecComps(resDerivate, numComps)
430 << ", diff = " << LogVecComps(tcu::abs(reference - resDerivate), numComps) << ", at x = " << x
431 << ", y = " << y << TestLog::EndMessage;
432 numFailedPixels += 1;
433 errorMask.setPixel(tcu::RGBA::red().toVec(), x, y);
434 }
435 }
436 }
437
438 if (numFailedPixels >= MAX_FAILED_MESSAGES && logPolicy == LOG_ALL)
439 log << TestLog::Message << "..." << TestLog::EndMessage;
440
441 if (numFailedPixels > 0 && logPolicy == LOG_ALL)
442 log << TestLog::Message << "FAIL: found " << numFailedPixels << " failed pixels" << TestLog::EndMessage;
443
444 return numFailedPixels == 0;
445 }
446
447 struct Linear2DFunctionEvaluator
448 {
449 tcu::Matrix<float, 4, 3> matrix;
450
451 // .-----.
452 // | s_x |
453 // M x | s_y |
454 // | 1.0 |
455 // '-----'
456 tcu::Vec4 evaluateAt(float screenX, float screenY) const;
457 };
458
evaluateAt(float screenX,float screenY) const459 tcu::Vec4 Linear2DFunctionEvaluator::evaluateAt(float screenX, float screenY) const
460 {
461 const tcu::Vec3 position(screenX, screenY, 1.0f);
462 return matrix * position;
463 }
464
reverifyConstantDerivateWithFlushRelaxations(tcu::TestLog & log,const tcu::ConstPixelBufferAccess & result,const tcu::PixelBufferAccess & errorMask,glu::DataType dataType,glu::Precision precision,const tcu::Vec4 & derivScale,const tcu::Vec4 & derivBias,const tcu::Vec4 & surfaceThreshold,DerivateFunc derivateFunc,const Linear2DFunctionEvaluator & function)465 static bool reverifyConstantDerivateWithFlushRelaxations(tcu::TestLog &log, const tcu::ConstPixelBufferAccess &result,
466 const tcu::PixelBufferAccess &errorMask,
467 glu::DataType dataType, glu::Precision precision,
468 const tcu::Vec4 &derivScale, const tcu::Vec4 &derivBias,
469 const tcu::Vec4 &surfaceThreshold, DerivateFunc derivateFunc,
470 const Linear2DFunctionEvaluator &function)
471 {
472 DE_ASSERT(result.getWidth() == errorMask.getWidth());
473 DE_ASSERT(result.getHeight() == errorMask.getHeight());
474 DE_ASSERT(isDfdxFunc(derivateFunc) || isDfdyFunc(derivateFunc));
475
476 const tcu::IVec4 red(255, 0, 0, 255);
477 const tcu::IVec4 green(0, 255, 0, 255);
478 const float divisionErrorUlps = 2.5f;
479
480 const int numComponents = glu::getDataTypeFloatScalars(dataType);
481 const int numBits = getNumMantissaBits(precision);
482 const int minExponent = getMinExponent(precision);
483
484 const int numVaryingSampleBits = numBits - INTERPOLATION_LOST_BITS;
485 int numFailedPixels = 0;
486
487 tcu::clear(errorMask, green);
488
489 // search for failed pixels
490 for (int y = 0; y < result.getHeight(); ++y)
491 for (int x = 0; x < result.getWidth(); ++x)
492 {
493 if (isSkippedPixel(result, x, y))
494 continue;
495
496 // flushToZero?(f2z?(functionValueCurrent) - f2z?(functionValueBefore))
497 // flushToZero? ( ------------------------------------------------------------------------ +- 2.5 ULP )
498 // dx
499
500 const tcu::Vec4 resultDerivative = readDerivate(result, derivScale, derivBias, x, y);
501
502 // sample at the front of the back pixel and the back of the front pixel to cover the whole area of
503 // legal sample positions. In general case this is NOT OK, but we know that the target funtion is
504 // (mostly*) linear which allows us to take the sample points at arbitrary points. This gets us the
505 // maximum difference possible in exponents which are used in error bound calculations.
506 // * non-linearity may happen around zero or with very high function values due to subnorms not
507 // behaving well.
508 const tcu::Vec4 functionValueForward = (isDfdxFunc(derivateFunc)) ?
509 (function.evaluateAt((float)x + 2.0f, (float)y + 0.5f)) :
510 (function.evaluateAt((float)x + 0.5f, (float)y + 2.0f));
511 const tcu::Vec4 functionValueBackward = (isDfdyFunc(derivateFunc)) ?
512 (function.evaluateAt((float)x - 1.0f, (float)y + 0.5f)) :
513 (function.evaluateAt((float)x + 0.5f, (float)y - 1.0f));
514
515 bool anyComponentFailed = false;
516
517 // check components separately
518 for (int c = 0; c < numComponents; ++c)
519 {
520 // Simulate interpolation. Add allowed interpolation error and round to target precision. Allow one half ULP (i.e. correct rounding)
521 const tcu::Interval forwardComponent(
522 convertFloatFlushToZeroRtn(addErrorUlp((float)functionValueForward[c], -0.5f, numVaryingSampleBits),
523 minExponent, numBits),
524 convertFloatFlushToZeroRtp(addErrorUlp((float)functionValueForward[c], +0.5f, numVaryingSampleBits),
525 minExponent, numBits));
526 const tcu::Interval backwardComponent(
527 convertFloatFlushToZeroRtn(
528 addErrorUlp((float)functionValueBackward[c], -0.5f, numVaryingSampleBits), minExponent,
529 numBits),
530 convertFloatFlushToZeroRtp(
531 addErrorUlp((float)functionValueBackward[c], +0.5f, numVaryingSampleBits), minExponent,
532 numBits));
533 const int maxValueExp = de::max(de::max(tcu::Float32(forwardComponent.lo()).exponent(),
534 tcu::Float32(forwardComponent.hi()).exponent()),
535 de::max(tcu::Float32(backwardComponent.lo()).exponent(),
536 tcu::Float32(backwardComponent.hi()).exponent()));
537
538 // subtraction in numerator will likely cause a cancellation of the most
539 // significant bits. Apply error bounds.
540
541 const tcu::Interval numerator(forwardComponent - backwardComponent);
542 const int numeratorLoExp = tcu::Float32(numerator.lo()).exponent();
543 const int numeratorHiExp = tcu::Float32(numerator.hi()).exponent();
544 const int numeratorLoBitsLost = de::max(
545 0,
546 maxValueExp -
547 numeratorLoExp); //!< must clamp to zero since if forward and backward components have different
548 const int numeratorHiBitsLost = de::max(
549 0, maxValueExp - numeratorHiExp); //!< sign, numerator might have larger exponent than its operands.
550 const int numeratorLoBits = de::max(0, numBits - numeratorLoBitsLost);
551 const int numeratorHiBits = de::max(0, numBits - numeratorHiBitsLost);
552
553 const tcu::Interval numeratorRange(
554 convertFloatFlushToZeroRtn((float)numerator.lo(), minExponent, numeratorLoBits),
555 convertFloatFlushToZeroRtp((float)numerator.hi(), minExponent, numeratorHiBits));
556
557 const tcu::Interval divisionRange =
558 numeratorRange /
559 3.0f; // legal sample area is anywhere within this and neighboring pixels (i.e. size = 3)
560 const tcu::Interval divisionResultRange(
561 convertFloatFlushToZeroRtn(addErrorUlp((float)divisionRange.lo(), -divisionErrorUlps, numBits),
562 minExponent, numBits),
563 convertFloatFlushToZeroRtp(addErrorUlp((float)divisionRange.hi(), +divisionErrorUlps, numBits),
564 minExponent, numBits));
565 const tcu::Interval finalResultRange(divisionResultRange.lo() - surfaceThreshold[c],
566 divisionResultRange.hi() + surfaceThreshold[c]);
567
568 if (resultDerivative[c] >= finalResultRange.lo() && resultDerivative[c] <= finalResultRange.hi())
569 {
570 // value ok
571 }
572 else
573 {
574 if (numFailedPixels < MAX_FAILED_MESSAGES)
575 log << tcu::TestLog::Message << "Error in pixel at " << x << ", " << y << " with component "
576 << c << " (channel " << ("rgba"[c]) << ")\n"
577 << "\tGot pixel value " << result.getPixelInt(x, y) << "\n"
578 << "\t\tdFd" << ((isDfdxFunc(derivateFunc)) ? ('x') : ('y'))
579 << " ~= " << resultDerivative[c] << "\n"
580 << "\t\tdifference to a valid range: "
581 << ((resultDerivative[c] < finalResultRange.lo()) ? ("-") : ("+"))
582 << ((resultDerivative[c] < finalResultRange.lo()) ?
583 (finalResultRange.lo() - resultDerivative[c]) :
584 (resultDerivative[c] - finalResultRange.hi()))
585 << "\n"
586 << "\tDerivative value range:\n"
587 << "\t\tMin: " << finalResultRange.lo() << "\n"
588 << "\t\tMax: " << finalResultRange.hi() << "\n"
589 << tcu::TestLog::EndMessage;
590
591 ++numFailedPixels;
592 anyComponentFailed = true;
593 }
594 }
595
596 if (anyComponentFailed)
597 errorMask.setPixel(red, x, y);
598 }
599
600 if (numFailedPixels >= MAX_FAILED_MESSAGES)
601 log << TestLog::Message << "..." << TestLog::EndMessage;
602
603 if (numFailedPixels > 0)
604 log << TestLog::Message << "FAIL: found " << numFailedPixels << " failed pixels" << TestLog::EndMessage;
605
606 return numFailedPixels == 0;
607 }
608
609 // TestCase utils
610
611 struct DerivateCaseDefinition
612 {
DerivateCaseDefinitionvkt::sr::__anon71e082ae0111::DerivateCaseDefinition613 DerivateCaseDefinition(void)
614 {
615 func = DERIVATE_LAST;
616 dataType = glu::TYPE_LAST;
617 precision = glu::PRECISION_LAST;
618 inNonUniformControlFlow = false;
619 coordDataType = glu::TYPE_LAST;
620 coordPrecision = glu::PRECISION_LAST;
621 surfaceType = SURFACETYPE_UNORM_FBO;
622 numSamples = 0;
623 demoteToHelperInvocation = false;
624 }
625
626 DerivateFunc func;
627 glu::DataType dataType;
628 glu::Precision precision;
629 bool inNonUniformControlFlow;
630
631 glu::DataType coordDataType;
632 glu::Precision coordPrecision;
633
634 SurfaceType surfaceType;
635 int numSamples;
636
637 bool demoteToHelperInvocation;
638 };
639
640 struct DerivateCaseValues
641 {
642 tcu::Vec4 coordMin;
643 tcu::Vec4 coordMax;
644 tcu::Vec4 derivScale;
645 tcu::Vec4 derivBias;
646 };
647
648 struct TextureCaseValues
649 {
650 tcu::Vec4 texValueMin;
651 tcu::Vec4 texValueMax;
652 };
653
654 class DerivateUniformSetup : public UniformSetup
655 {
656 public:
657 DerivateUniformSetup(bool useSampler);
658 virtual ~DerivateUniformSetup(void);
659
660 virtual void setup(ShaderRenderCaseInstance &instance, const tcu::Vec4 &) const;
661
662 private:
663 const bool m_useSampler;
664 };
665
DerivateUniformSetup(bool useSampler)666 DerivateUniformSetup::DerivateUniformSetup(bool useSampler) : m_useSampler(useSampler)
667 {
668 }
669
~DerivateUniformSetup(void)670 DerivateUniformSetup::~DerivateUniformSetup(void)
671 {
672 }
673
674 // TriangleDerivateCaseInstance
675
676 class TriangleDerivateCaseInstance : public ShaderRenderCaseInstance
677 {
678 public:
679 TriangleDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
680 const DerivateCaseDefinition &definitions, const DerivateCaseValues &values);
681 virtual ~TriangleDerivateCaseInstance(void);
682 virtual tcu::TestStatus iterate(void);
getDerivateCaseDefinition(void)683 DerivateCaseDefinition getDerivateCaseDefinition(void)
684 {
685 return m_definitions;
686 }
getDerivateCaseValues(void)687 DerivateCaseValues getDerivateCaseValues(void)
688 {
689 return m_values;
690 }
691
692 protected:
693 virtual bool verify(const tcu::ConstPixelBufferAccess &result, const tcu::PixelBufferAccess &errorMask) = 0;
694 tcu::Vec4 getSurfaceThreshold(void) const;
695 virtual void setupDefaultInputs(void);
696
697 const DerivateCaseDefinition &m_definitions;
698 const DerivateCaseValues &m_values;
699 };
700
getVkSampleCount(int numSamples)701 static VkSampleCountFlagBits getVkSampleCount(int numSamples)
702 {
703 switch (numSamples)
704 {
705 case 0:
706 return VK_SAMPLE_COUNT_1_BIT;
707 case 2:
708 return VK_SAMPLE_COUNT_2_BIT;
709 case 4:
710 return VK_SAMPLE_COUNT_4_BIT;
711 default:
712 DE_ASSERT(false);
713 return (VkSampleCountFlagBits)0;
714 }
715 }
716
TriangleDerivateCaseInstance(Context & context,const UniformSetup & uniformSetup,const DerivateCaseDefinition & definitions,const DerivateCaseValues & values)717 TriangleDerivateCaseInstance::TriangleDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
718 const DerivateCaseDefinition &definitions,
719 const DerivateCaseValues &values)
720 : ShaderRenderCaseInstance(context, true, DE_NULL, uniformSetup, DE_NULL)
721 , m_definitions(definitions)
722 , m_values(values)
723 {
724 m_renderSize = tcu::UVec2(VIEWPORT_WIDTH, VIEWPORT_HEIGHT);
725 m_colorFormat = vk::mapTextureFormat(
726 glu::mapGLInternalFormat(m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO ? GL_RGBA32UI : GL_RGBA8));
727
728 setSampleCount(getVkSampleCount(definitions.numSamples));
729 }
730
~TriangleDerivateCaseInstance(void)731 TriangleDerivateCaseInstance::~TriangleDerivateCaseInstance(void)
732 {
733 }
734
getSurfaceThreshold(void) const735 tcu::Vec4 TriangleDerivateCaseInstance::getSurfaceThreshold(void) const
736 {
737 switch (m_definitions.surfaceType)
738 {
739 case SURFACETYPE_UNORM_FBO:
740 return tcu::IVec4(1).asFloat() / 255.0f;
741 case SURFACETYPE_FLOAT_FBO:
742 return tcu::Vec4(0.0f);
743 default:
744 DE_ASSERT(false);
745 return tcu::Vec4(0.0f);
746 }
747 }
748
setupDefaultInputs(void)749 void TriangleDerivateCaseInstance::setupDefaultInputs(void)
750 {
751 const int numVertices = 4;
752 const float positions[] = {-1.0f, -1.0f, 0.0f, 1.0f, -1.0f, 1.0f, 0.0f, 1.0f,
753 1.0f, -1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f};
754 const float coords[] = {m_values.coordMin.x(),
755 m_values.coordMin.y(),
756 m_values.coordMin.z(),
757 m_values.coordMax.w(),
758 m_values.coordMin.x(),
759 m_values.coordMax.y(),
760 (m_values.coordMin.z() + m_values.coordMax.z()) * 0.5f,
761 (m_values.coordMin.w() + m_values.coordMax.w()) * 0.5f,
762 m_values.coordMax.x(),
763 m_values.coordMin.y(),
764 (m_values.coordMin.z() + m_values.coordMax.z()) * 0.5f,
765 (m_values.coordMin.w() + m_values.coordMax.w()) * 0.5f,
766 m_values.coordMax.x(),
767 m_values.coordMax.y(),
768 m_values.coordMax.z(),
769 m_values.coordMin.w()};
770
771 addAttribute(0u, vk::VK_FORMAT_R32G32B32A32_SFLOAT, 4 * (uint32_t)sizeof(float), numVertices, positions);
772 if (m_definitions.coordDataType != glu::TYPE_LAST)
773 addAttribute(1u, vk::VK_FORMAT_R32G32B32A32_SFLOAT, 4 * (uint32_t)sizeof(float), numVertices, coords);
774 }
775
iterate(void)776 tcu::TestStatus TriangleDerivateCaseInstance::iterate(void)
777 {
778 tcu::TestLog &log = m_context.getTestContext().getLog();
779 const uint32_t numVertices = 4;
780 const uint32_t numTriangles = 2;
781 const uint16_t indices[] = {0, 2, 1, 2, 3, 1};
782 tcu::TextureLevel resultImage;
783
784 setup();
785
786 render(numVertices, numTriangles, indices);
787
788 {
789 const tcu::TextureLevel &renderedImage = getResultImage();
790
791 if (m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO)
792 {
793 const tcu::TextureFormat dataFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::FLOAT);
794
795 resultImage.setStorage(dataFormat, renderedImage.getWidth(), renderedImage.getHeight());
796 tcu::copy(resultImage.getAccess(), tcu::ConstPixelBufferAccess(dataFormat, renderedImage.getSize(),
797 renderedImage.getAccess().getDataPtr()));
798 }
799 else
800 {
801 resultImage = renderedImage;
802 }
803 }
804
805 // Verify
806 {
807 tcu::Surface errorMask(resultImage.getWidth(), resultImage.getHeight());
808 tcu::clear(errorMask.getAccess(), tcu::RGBA::green().toVec());
809
810 const bool isOk = verify(resultImage.getAccess(), errorMask.getAccess());
811
812 log << TestLog::ImageSet("Result", "Result images")
813 << TestLog::Image("Rendered", "Rendered image", resultImage);
814
815 if (!isOk)
816 log << TestLog::Image("ErrorMask", "Error mask", errorMask);
817
818 log << TestLog::EndImageSet;
819
820 if (isOk)
821 return tcu::TestStatus::pass("Pass");
822 else
823 return tcu::TestStatus::fail("Image comparison failed");
824 }
825 }
826
setup(ShaderRenderCaseInstance & instance,const tcu::Vec4 &) const827 void DerivateUniformSetup::setup(ShaderRenderCaseInstance &instance, const tcu::Vec4 &) const
828 {
829 DerivateCaseDefinition definitions =
830 dynamic_cast<TriangleDerivateCaseInstance &>(instance).getDerivateCaseDefinition();
831 DerivateCaseValues values = dynamic_cast<TriangleDerivateCaseInstance &>(instance).getDerivateCaseValues();
832
833 DE_ASSERT(glu::isDataTypeFloatOrVec(definitions.dataType));
834
835 instance.addUniform(0u, vk::VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
836 glu::getDataTypeScalarSize(definitions.dataType) * sizeof(float), values.derivScale.getPtr());
837 instance.addUniform(1u, vk::VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
838 glu::getDataTypeScalarSize(definitions.dataType) * sizeof(float), values.derivBias.getPtr());
839
840 if (m_useSampler)
841 instance.useSampler(2u, 0u); // To the uniform binding location 2 bind the texture 0
842 }
843
844 // TriangleDerivateCase
845
846 class TriangleDerivateCase : public ShaderRenderCase
847 {
848 public:
849 TriangleDerivateCase(tcu::TestContext &testCtx, const std::string &name, const UniformSetup *uniformSetup);
850 virtual ~TriangleDerivateCase(void);
851
852 void checkSupport(Context &context) const override;
853
854 protected:
855 DerivateCaseDefinition m_definitions;
856 DerivateCaseValues m_values;
857 };
858
TriangleDerivateCase(tcu::TestContext & testCtx,const std::string & name,const UniformSetup * uniformSetup)859 TriangleDerivateCase::TriangleDerivateCase(tcu::TestContext &testCtx, const std::string &name,
860 const UniformSetup *uniformSetup)
861 : ShaderRenderCase(testCtx, name, false, (ShaderEvaluator *)DE_NULL, uniformSetup, DE_NULL)
862 , m_definitions()
863 {
864 }
865
~TriangleDerivateCase(void)866 TriangleDerivateCase::~TriangleDerivateCase(void)
867 {
868 }
869
checkSupport(Context & context) const870 void TriangleDerivateCase::checkSupport(Context &context) const
871 {
872 ShaderRenderCase::checkSupport(context);
873
874 const bool subgroupFunc = isSubgroupFunc(m_definitions.func);
875
876 if (m_definitions.inNonUniformControlFlow || subgroupFunc)
877 {
878 const std::string errorPrefix = m_definitions.inNonUniformControlFlow ?
879 "Derivatives in dynamic control flow" :
880 "Manual derivatives with subgroup operations";
881
882 if (!context.contextSupports(vk::ApiVersion(0, 1, 1, 0)))
883 throw tcu::NotSupportedError(errorPrefix + " require Vulkan 1.1");
884
885 const auto &subgroupProperties = context.getSubgroupProperties();
886
887 if (subgroupProperties.subgroupSize < 4)
888 throw tcu::NotSupportedError(errorPrefix + " require subgroupSize >= 4");
889
890 if ((subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) == 0)
891 throw tcu::NotSupportedError(errorPrefix + " tests require VK_SUBGROUP_FEATURE_BALLOT_BIT");
892
893 if ((subgroupProperties.supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
894 throw tcu::NotSupportedError(
895 errorPrefix + " tests require subgroup supported stage including VK_SHADER_STAGE_FRAGMENT_BIT");
896
897 if (subgroupFunc && (subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_QUAD_BIT) == 0)
898 throw tcu::NotSupportedError(errorPrefix + " tests require VK_SUBGROUP_FEATURE_QUAD_BIT");
899 }
900 }
901
genVertexSource(glu::DataType coordType,glu::Precision precision)902 static std::string genVertexSource(glu::DataType coordType, glu::Precision precision)
903 {
904 DE_ASSERT(coordType == glu::TYPE_LAST || glu::isDataTypeFloatOrVec(coordType));
905
906 const std::string vertexTmpl =
907 "#version 450\n"
908 "layout(location = 0) in highp vec4 a_position;\n" +
909 string(coordType != glu::TYPE_LAST ? "layout(location = 1) in ${PRECISION} ${DATATYPE} a_coord;\n"
910 "layout(location = 0) out ${PRECISION} ${DATATYPE} v_coord;\n" :
911 "") +
912 "out gl_PerVertex {\n"
913 " vec4 gl_Position;\n"
914 "};\n"
915 "void main (void)\n"
916 "{\n"
917 " gl_Position = a_position;\n" +
918 string(coordType != glu::TYPE_LAST ? " v_coord = a_coord;\n" : "") + "}\n";
919
920 map<string, string> vertexParams;
921
922 if (coordType != glu::TYPE_LAST)
923 {
924 vertexParams["PRECISION"] = glu::getPrecisionName(precision);
925 vertexParams["DATATYPE"] = glu::getDataTypeName(coordType);
926 }
927
928 return tcu::StringTemplate(vertexTmpl).specialize(vertexParams);
929 }
930
931 // ConstantDerivateCaseInstance
932
933 class ConstantDerivateCaseInstance : public TriangleDerivateCaseInstance
934 {
935 public:
936 ConstantDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
937 const DerivateCaseDefinition &definitions, const DerivateCaseValues &values);
938 virtual ~ConstantDerivateCaseInstance(void);
939
940 virtual bool verify(const tcu::ConstPixelBufferAccess &result, const tcu::PixelBufferAccess &errorMask);
941 };
942
ConstantDerivateCaseInstance(Context & context,const UniformSetup & uniformSetup,const DerivateCaseDefinition & definitions,const DerivateCaseValues & values)943 ConstantDerivateCaseInstance::ConstantDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
944 const DerivateCaseDefinition &definitions,
945 const DerivateCaseValues &values)
946 : TriangleDerivateCaseInstance(context, uniformSetup, definitions, values)
947 {
948 }
949
~ConstantDerivateCaseInstance(void)950 ConstantDerivateCaseInstance::~ConstantDerivateCaseInstance(void)
951 {
952 }
953
verify(const tcu::ConstPixelBufferAccess & result,const tcu::PixelBufferAccess & errorMask)954 bool ConstantDerivateCaseInstance::verify(const tcu::ConstPixelBufferAccess &result,
955 const tcu::PixelBufferAccess &errorMask)
956 {
957 const tcu::Vec4 reference(0.0f); // Derivate of constant argument should always be 0
958 const tcu::Vec4 threshold = getSurfaceThreshold() / abs(m_values.derivScale);
959
960 return verifyConstantDerivate(m_context.getTestContext().getLog(), result, errorMask, m_definitions.dataType,
961 reference, threshold, m_values.derivScale, m_values.derivBias);
962 }
963
964 // ConstantDerivateCase
965
966 class ConstantDerivateCase : public TriangleDerivateCase
967 {
968 public:
969 ConstantDerivateCase(tcu::TestContext &testCtx, const std::string &name, DerivateFunc func, glu::DataType type);
970 virtual ~ConstantDerivateCase(void);
971
972 virtual void initPrograms(vk::SourceCollections &programCollection) const;
973 virtual TestInstance *createInstance(Context &context) const;
974 };
975
ConstantDerivateCase(tcu::TestContext & testCtx,const std::string & name,DerivateFunc func,glu::DataType type)976 ConstantDerivateCase::ConstantDerivateCase(tcu::TestContext &testCtx, const std::string &name, DerivateFunc func,
977 glu::DataType type)
978 : TriangleDerivateCase(testCtx, name, new DerivateUniformSetup(false))
979 {
980 m_definitions.func = func;
981 m_definitions.dataType = type;
982 m_definitions.precision = glu::PRECISION_HIGHP;
983
984 m_values.derivScale = tcu::Vec4(1e3f, 1e3f, 1e3f, 1e3f);
985 m_values.derivBias = tcu::Vec4(0.5f, 0.5f, 0.5f, 0.5f);
986 }
987
~ConstantDerivateCase(void)988 ConstantDerivateCase::~ConstantDerivateCase(void)
989 {
990 }
991
createInstance(Context & context) const992 TestInstance *ConstantDerivateCase::createInstance(Context &context) const
993 {
994 DE_ASSERT(m_uniformSetup != DE_NULL);
995 return new ConstantDerivateCaseInstance(context, *m_uniformSetup, m_definitions, m_values);
996 }
997
initPrograms(vk::SourceCollections & programCollection) const998 void ConstantDerivateCase::initPrograms(vk::SourceCollections &programCollection) const
999 {
1000 const char *fragmentTmpl = "#version 450\n"
1001 "layout(location = 0) out mediump vec4 o_color;\n"
1002 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1003 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; }; \n"
1004 "void main (void)\n"
1005 "{\n"
1006 " ${PRECISION} ${DATATYPE} res = ${FUNC}(${VALUE}) * u_scale + u_bias;\n"
1007 " o_color = ${CAST_TO_OUTPUT};\n"
1008 "}\n";
1009
1010 map<string, string> fragmentParams;
1011 fragmentParams["PRECISION"] = glu::getPrecisionName(m_definitions.precision);
1012 fragmentParams["DATATYPE"] = glu::getDataTypeName(m_definitions.dataType);
1013 fragmentParams["FUNC"] = getDerivateFuncName(m_definitions.func);
1014 fragmentParams["VALUE"] = m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "vec4(1.0, 7.2, -1e5, 0.0)" :
1015 m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? "vec3(1e2, 8.0, 0.01)" :
1016 m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? "vec2(-0.0, 2.7)" :
1017 /* TYPE_FLOAT */ "7.7";
1018 fragmentParams["CAST_TO_OUTPUT"] =
1019 m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "res" :
1020 m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? "vec4(res, 1.0)" :
1021 m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? "vec4(res, 0.0, 1.0)" :
1022 /* TYPE_FLOAT */ "vec4(res, 0.0, 0.0, 1.0)";
1023
1024 std::string fragmentSrc = tcu::StringTemplate(fragmentTmpl).specialize(fragmentParams);
1025 programCollection.glslSources.add("vert")
1026 << glu::VertexSource(genVertexSource(m_definitions.coordDataType, m_definitions.coordPrecision));
1027 programCollection.glslSources.add("frag") << glu::FragmentSource(fragmentSrc);
1028 }
1029
1030 // Linear cases
1031
1032 class LinearDerivateUniformSetup : public DerivateUniformSetup
1033 {
1034 public:
1035 LinearDerivateUniformSetup(bool useSampler, BaseUniformType usedDefaultUniform);
1036 virtual ~LinearDerivateUniformSetup(void);
1037
1038 virtual void setup(ShaderRenderCaseInstance &instance, const tcu::Vec4 &constCoords) const;
1039
1040 private:
1041 const BaseUniformType m_usedDefaultUniform;
1042 };
1043
LinearDerivateUniformSetup(bool useSampler,BaseUniformType usedDefaultUniform)1044 LinearDerivateUniformSetup::LinearDerivateUniformSetup(bool useSampler, BaseUniformType usedDefaultUniform)
1045 : DerivateUniformSetup(useSampler)
1046 , m_usedDefaultUniform(usedDefaultUniform)
1047 {
1048 }
1049
~LinearDerivateUniformSetup(void)1050 LinearDerivateUniformSetup::~LinearDerivateUniformSetup(void)
1051 {
1052 }
1053
setup(ShaderRenderCaseInstance & instance,const tcu::Vec4 & constCoords) const1054 void LinearDerivateUniformSetup::setup(ShaderRenderCaseInstance &instance, const tcu::Vec4 &constCoords) const
1055 {
1056 DerivateUniformSetup::setup(instance, constCoords);
1057
1058 if (m_usedDefaultUniform != U_LAST)
1059 switch (m_usedDefaultUniform)
1060 {
1061 case UB_TRUE:
1062 case UI_ONE:
1063 case UI_TWO:
1064 instance.useUniform(2u, m_usedDefaultUniform);
1065 break;
1066 default:
1067 DE_ASSERT(false);
1068 break;
1069 }
1070 }
1071
1072 class LinearDerivateCaseInstance : public TriangleDerivateCaseInstance
1073 {
1074 public:
1075 LinearDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
1076 const DerivateCaseDefinition &definitions, const DerivateCaseValues &values);
1077 virtual ~LinearDerivateCaseInstance(void);
1078
1079 virtual bool verify(const tcu::ConstPixelBufferAccess &result, const tcu::PixelBufferAccess &errorMask);
1080 };
1081
LinearDerivateCaseInstance(Context & context,const UniformSetup & uniformSetup,const DerivateCaseDefinition & definitions,const DerivateCaseValues & values)1082 LinearDerivateCaseInstance::LinearDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
1083 const DerivateCaseDefinition &definitions,
1084 const DerivateCaseValues &values)
1085 : TriangleDerivateCaseInstance(context, uniformSetup, definitions, values)
1086 {
1087 }
1088
~LinearDerivateCaseInstance(void)1089 LinearDerivateCaseInstance::~LinearDerivateCaseInstance(void)
1090 {
1091 }
1092
verify(const tcu::ConstPixelBufferAccess & result,const tcu::PixelBufferAccess & errorMask)1093 bool LinearDerivateCaseInstance::verify(const tcu::ConstPixelBufferAccess &result,
1094 const tcu::PixelBufferAccess &errorMask)
1095 {
1096 const tcu::Vec4 xScale = tcu::Vec4(1.0f, 0.0f, 0.5f, -0.5f);
1097 const tcu::Vec4 yScale = tcu::Vec4(0.0f, 1.0f, 0.5f, -0.5f);
1098 const tcu::Vec4 surfaceThreshold = getSurfaceThreshold() / abs(m_values.derivScale);
1099
1100 if (isDfdxFunc(m_definitions.func) || isDfdyFunc(m_definitions.func))
1101 {
1102 const bool isX = isDfdxFunc(m_definitions.func);
1103 const float div = isX ? float(result.getWidth()) : float(result.getHeight());
1104 const tcu::Vec4 scale = isX ? xScale : yScale;
1105 tcu::Vec4 reference = ((m_values.coordMax - m_values.coordMin) / div);
1106 const tcu::Vec4 opThreshold =
1107 getDerivateThreshold(m_definitions.precision, m_values.coordMin, m_values.coordMax, reference);
1108 const tcu::Vec4 threshold = max(surfaceThreshold, opThreshold);
1109 const int numComps = glu::getDataTypeFloatScalars(m_definitions.dataType);
1110
1111 /* adjust the reference value for the correct dfdx or dfdy sample adjacency */
1112 reference = reference * scale;
1113
1114 m_context.getTestContext().getLog()
1115 << tcu::TestLog::Message << "Verifying result image.\n"
1116 << "\tValid derivative is " << LogVecComps(reference, numComps) << " with threshold "
1117 << LogVecComps(threshold, numComps) << tcu::TestLog::EndMessage;
1118
1119 // short circuit if result is strictly within the normal value error bounds.
1120 // This improves performance significantly.
1121 if (verifyConstantDerivate(m_context.getTestContext().getLog(), result, errorMask, m_definitions.dataType,
1122 reference, threshold, m_values.derivScale, m_values.derivBias, LOG_NOTHING,
1123 m_definitions.demoteToHelperInvocation))
1124 {
1125 m_context.getTestContext().getLog()
1126 << tcu::TestLog::Message << "No incorrect derivatives found, result valid." << tcu::TestLog::EndMessage;
1127
1128 return true;
1129 }
1130
1131 // some pixels exceed error bounds calculated for normal values. Verify that these
1132 // potentially invalid pixels are in fact valid due to (for example) subnorm flushing.
1133
1134 m_context.getTestContext().getLog()
1135 << tcu::TestLog::Message
1136 << "Initial verification failed, verifying image by calculating accurate error bounds for each result "
1137 "pixel.\n"
1138 << "\tVerifying each result derivative is within its range of legal result values."
1139 << tcu::TestLog::EndMessage;
1140
1141 {
1142 const tcu::UVec2 viewportSize(VIEWPORT_WIDTH, VIEWPORT_HEIGHT);
1143 const float w = float(viewportSize.x());
1144 const float h = float(viewportSize.y());
1145 const tcu::Vec4 valueRamp = (m_values.coordMax - m_values.coordMin);
1146 Linear2DFunctionEvaluator function;
1147
1148 function.matrix.setRow(0, tcu::Vec3(valueRamp.x() / w, 0.0f, m_values.coordMin.x()));
1149 function.matrix.setRow(1, tcu::Vec3(0.0f, valueRamp.y() / h, m_values.coordMin.y()));
1150 function.matrix.setRow(
1151 2,
1152 tcu::Vec3(valueRamp.z() / w, valueRamp.z() / h, m_values.coordMin.z() + m_values.coordMin.z()) / 2.0f);
1153 function.matrix.setRow(
1154 3, tcu::Vec3(-valueRamp.w() / w, -valueRamp.w() / h, m_values.coordMax.w() + m_values.coordMax.w()) /
1155 2.0f);
1156
1157 return reverifyConstantDerivateWithFlushRelaxations(
1158 m_context.getTestContext().getLog(), result, errorMask, m_definitions.dataType, m_definitions.precision,
1159 m_values.derivScale, m_values.derivBias, surfaceThreshold, m_definitions.func, function);
1160 }
1161 }
1162 else
1163 {
1164 DE_ASSERT(isFwidthFunc(m_definitions.func));
1165 const float w = float(result.getWidth());
1166 const float h = float(result.getHeight());
1167
1168 const tcu::Vec4 dx = ((m_values.coordMax - m_values.coordMin) / w) * xScale;
1169 const tcu::Vec4 dy = ((m_values.coordMax - m_values.coordMin) / h) * yScale;
1170 const tcu::Vec4 reference = tcu::abs(dx) + tcu::abs(dy);
1171 const tcu::Vec4 dxThreshold =
1172 getDerivateThreshold(m_definitions.precision, m_values.coordMin * xScale, m_values.coordMax * xScale, dx);
1173 const tcu::Vec4 dyThreshold =
1174 getDerivateThreshold(m_definitions.precision, m_values.coordMin * yScale, m_values.coordMax * yScale, dy);
1175 const tcu::Vec4 threshold = max(surfaceThreshold, max(dxThreshold, dyThreshold));
1176
1177 return verifyConstantDerivate(m_context.getTestContext().getLog(), result, errorMask, m_definitions.dataType,
1178 reference, threshold, m_values.derivScale, m_values.derivBias);
1179 }
1180 }
1181
1182 // LinearDerivateCase
1183
1184 class LinearDerivateCase : public TriangleDerivateCase
1185 {
1186 public:
1187 LinearDerivateCase(tcu::TestContext &testCtx, const std::string &name, DerivateFunc func, glu::DataType type,
1188 glu::Precision precision, bool inNonUniformControlFlow, SurfaceType surfaceType, int numSamples,
1189 const std::string &fragmentSrcTmpl, BaseUniformType usedDefaultUniform,
1190 bool demoteToHelperInvocaiton);
1191 virtual ~LinearDerivateCase(void);
1192
1193 virtual void initPrograms(vk::SourceCollections &programCollection) const;
1194 virtual TestInstance *createInstance(Context &context) const;
checkSupport(Context & context) const1195 virtual void checkSupport(Context &context) const
1196 {
1197 TriangleDerivateCase::checkSupport(context);
1198 if (m_definitions.demoteToHelperInvocation)
1199 {
1200 context.requireDeviceFunctionality("VK_EXT_shader_demote_to_helper_invocation");
1201 }
1202 }
1203
1204 private:
1205 const std::string m_fragmentTmpl;
1206 };
1207
LinearDerivateCase(tcu::TestContext & testCtx,const std::string & name,DerivateFunc func,glu::DataType type,glu::Precision precision,bool inNonUniformControlFlow,SurfaceType surfaceType,int numSamples,const std::string & fragmentSrcTmpl,BaseUniformType usedDefaultUniform,bool demoteToHelperInvocaiton)1208 LinearDerivateCase::LinearDerivateCase(tcu::TestContext &testCtx, const std::string &name, DerivateFunc func,
1209 glu::DataType type, glu::Precision precision, bool inNonUniformControlFlow,
1210 SurfaceType surfaceType, int numSamples, const std::string &fragmentSrcTmpl,
1211 BaseUniformType usedDefaultUniform, bool demoteToHelperInvocaiton)
1212 : TriangleDerivateCase(testCtx, name, new LinearDerivateUniformSetup(false, usedDefaultUniform))
1213 , m_fragmentTmpl(fragmentSrcTmpl)
1214 {
1215 m_definitions.func = func;
1216 m_definitions.dataType = type;
1217 m_definitions.precision = precision;
1218 m_definitions.inNonUniformControlFlow = inNonUniformControlFlow;
1219 m_definitions.coordDataType = m_definitions.dataType;
1220 m_definitions.coordPrecision = m_definitions.precision;
1221 m_definitions.surfaceType = surfaceType;
1222 m_definitions.numSamples = numSamples;
1223 m_definitions.demoteToHelperInvocation = demoteToHelperInvocaiton;
1224
1225 const tcu::UVec2 viewportSize(VIEWPORT_WIDTH, VIEWPORT_HEIGHT);
1226 const float w = float(viewportSize.x());
1227 const float h = float(viewportSize.y());
1228
1229 switch (m_definitions.precision)
1230 {
1231 case glu::PRECISION_HIGHP:
1232 m_values.coordMin = tcu::Vec4(-97.f, 0.2f, 71.f, 74.f);
1233 m_values.coordMax = tcu::Vec4(-13.2f, -77.f, 44.f, 76.f);
1234 break;
1235
1236 case glu::PRECISION_MEDIUMP:
1237 m_values.coordMin = tcu::Vec4(-37.0f, 47.f, -7.f, 0.0f);
1238 m_values.coordMax = tcu::Vec4(-1.0f, 12.f, 7.f, 19.f);
1239 break;
1240
1241 case glu::PRECISION_LOWP:
1242 m_values.coordMin = tcu::Vec4(0.0f, -1.0f, 0.0f, 1.0f);
1243 m_values.coordMax = tcu::Vec4(1.0f, 1.0f, -1.0f, -1.0f);
1244 break;
1245
1246 default:
1247 DE_ASSERT(false);
1248 }
1249
1250 if (m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO)
1251 {
1252 // No scale or bias used for accuracy.
1253 m_values.derivScale = tcu::Vec4(1.0f);
1254 m_values.derivBias = tcu::Vec4(0.0f);
1255 }
1256 else
1257 {
1258 // Compute scale - bias that normalizes to 0..1 range.
1259 const tcu::Vec4 dx = (m_values.coordMax - m_values.coordMin) / tcu::Vec4(w, w, w * 0.5f, -w * 0.5f);
1260 const tcu::Vec4 dy = (m_values.coordMax - m_values.coordMin) / tcu::Vec4(h, h, h * 0.5f, -h * 0.5f);
1261
1262 if (isDfdxFunc(m_definitions.func))
1263 m_values.derivScale = 0.5f / dx;
1264 else if (isDfdyFunc(m_definitions.func))
1265 m_values.derivScale = 0.5f / dy;
1266 else if (isFwidthFunc(m_definitions.func))
1267 m_values.derivScale = 0.5f / (tcu::abs(dx) + tcu::abs(dy));
1268 else
1269 DE_ASSERT(false);
1270
1271 m_values.derivBias = tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f);
1272 }
1273 }
1274
~LinearDerivateCase(void)1275 LinearDerivateCase::~LinearDerivateCase(void)
1276 {
1277 }
1278
createInstance(Context & context) const1279 TestInstance *LinearDerivateCase::createInstance(Context &context) const
1280 {
1281 DE_ASSERT(m_uniformSetup != DE_NULL);
1282 if (m_fragmentTmpl.find("gl_SubgroupInvocationID") != std::string::npos)
1283 {
1284 if (!subgroups::areQuadOperationsSupportedForStages(context, VK_SHADER_STAGE_FRAGMENT_BIT))
1285 throw tcu::NotSupportedError("test requires VK_SUBGROUP_FEATURE_QUAD_BIT");
1286
1287 if (subgroups::getSubgroupSize(context) < 4)
1288 throw tcu::NotSupportedError("test requires subgroupSize >= 4");
1289 }
1290 return new LinearDerivateCaseInstance(context, *m_uniformSetup, m_definitions, m_values);
1291 }
1292
initPrograms(vk::SourceCollections & programCollection) const1293 void LinearDerivateCase::initPrograms(vk::SourceCollections &programCollection) const
1294 {
1295 const SpirvVersion spirvVersion = (m_definitions.inNonUniformControlFlow || isSubgroupFunc(m_definitions.func)) ?
1296 vk::SPIRV_VERSION_1_3 :
1297 vk::SPIRV_VERSION_1_0;
1298 const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, spirvVersion, 0u);
1299
1300 const bool packToInt = m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO;
1301 map<string, string> fragmentParams;
1302
1303 fragmentParams["OUTPUT_TYPE"] = glu::getDataTypeName(packToInt ? glu::TYPE_UINT_VEC4 : glu::TYPE_FLOAT_VEC4);
1304 fragmentParams["OUTPUT_PREC"] = glu::getPrecisionName(packToInt ? glu::PRECISION_HIGHP : m_definitions.precision);
1305 fragmentParams["PRECISION"] = glu::getPrecisionName(m_definitions.precision);
1306 fragmentParams["DATATYPE"] = glu::getDataTypeName(m_definitions.dataType);
1307 fragmentParams["FUNC"] = getDerivateFuncName(m_definitions.func);
1308
1309 if (packToInt)
1310 {
1311 fragmentParams["CAST_TO_OUTPUT"] = m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ?
1312 "floatBitsToUint(res)" :
1313 m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ?
1314 "floatBitsToUint(vec4(res, 1.0))" :
1315 m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ?
1316 "floatBitsToUint(vec4(res, 0.0, 1.0))" :
1317 /* TYPE_FLOAT */ "floatBitsToUint(vec4(res, 0.0, 0.0, 1.0))";
1318 }
1319 else
1320 {
1321 fragmentParams["CAST_TO_OUTPUT"] =
1322 m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "res" :
1323 m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? "vec4(res, 1.0)" :
1324 m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? "vec4(res, 0.0, 1.0)" :
1325 /* TYPE_FLOAT */ "vec4(res, 0.0, 0.0, 1.0)";
1326 }
1327
1328 std::string fragmentSrc = tcu::StringTemplate(m_fragmentTmpl).specialize(fragmentParams);
1329 programCollection.glslSources.add("vert")
1330 << glu::VertexSource(genVertexSource(m_definitions.coordDataType, m_definitions.coordPrecision));
1331 programCollection.glslSources.add("frag") << glu::FragmentSource(fragmentSrc) << buildOptions;
1332 }
1333
1334 // TextureDerivateCaseInstance
1335
1336 class TextureDerivateCaseInstance : public TriangleDerivateCaseInstance
1337 {
1338 public:
1339 TextureDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
1340 const DerivateCaseDefinition &definitions, const DerivateCaseValues &values,
1341 const TextureCaseValues &textureValues);
1342 virtual ~TextureDerivateCaseInstance(void);
1343
1344 virtual bool verify(const tcu::ConstPixelBufferAccess &result, const tcu::PixelBufferAccess &errorMask);
1345
1346 private:
1347 const TextureCaseValues &m_textureValues;
1348 };
1349
TextureDerivateCaseInstance(Context & context,const UniformSetup & uniformSetup,const DerivateCaseDefinition & definitions,const DerivateCaseValues & values,const TextureCaseValues & textureValues)1350 TextureDerivateCaseInstance::TextureDerivateCaseInstance(Context &context, const UniformSetup &uniformSetup,
1351 const DerivateCaseDefinition &definitions,
1352 const DerivateCaseValues &values,
1353 const TextureCaseValues &textureValues)
1354 : TriangleDerivateCaseInstance(context, uniformSetup, definitions, values)
1355 , m_textureValues(textureValues)
1356 {
1357 de::MovePtr<tcu::Texture2D> texture;
1358
1359 // Lowp and mediump cases use RGBA16F format, while highp uses RGBA32F.
1360 {
1361 const tcu::UVec2 viewportSize(VIEWPORT_WIDTH, VIEWPORT_HEIGHT);
1362 const tcu::TextureFormat format =
1363 glu::mapGLInternalFormat(m_definitions.precision == glu::PRECISION_HIGHP ? GL_RGBA32F : GL_RGBA16F);
1364
1365 texture = de::MovePtr<tcu::Texture2D>(new tcu::Texture2D(format, viewportSize.x(), viewportSize.y()));
1366 texture->allocLevel(0);
1367 }
1368
1369 // Fill with gradients.
1370 {
1371 const tcu::PixelBufferAccess level0 = texture->getLevel(0);
1372 for (int y = 0; y < level0.getHeight(); y++)
1373 {
1374 for (int x = 0; x < level0.getWidth(); x++)
1375 {
1376 const float xf = (float(x) + 0.5f) / float(level0.getWidth());
1377 const float yf = (float(y) + 0.5f) / float(level0.getHeight());
1378 const tcu::Vec4 s = tcu::Vec4(xf, yf, (xf + yf) / 2.0f, 1.0f - (xf + yf) / 2.0f);
1379
1380 level0.setPixel(m_textureValues.texValueMin +
1381 (m_textureValues.texValueMax - m_textureValues.texValueMin) * s,
1382 x, y);
1383 }
1384 }
1385 }
1386
1387 de::SharedPtr<TextureBinding> testTexture(new TextureBinding(
1388 texture.release(),
1389 tcu::Sampler(tcu::Sampler::CLAMP_TO_EDGE, tcu::Sampler::CLAMP_TO_EDGE, tcu::Sampler::CLAMP_TO_EDGE,
1390 tcu::Sampler::NEAREST, tcu::Sampler::NEAREST, 0.0f, true, tcu::Sampler::COMPAREMODE_NONE, 0,
1391 tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f), true)));
1392 m_textures.push_back(testTexture);
1393 }
1394
~TextureDerivateCaseInstance(void)1395 TextureDerivateCaseInstance::~TextureDerivateCaseInstance(void)
1396 {
1397 }
1398
verify(const tcu::ConstPixelBufferAccess & result,const tcu::PixelBufferAccess & errorMask)1399 bool TextureDerivateCaseInstance::verify(const tcu::ConstPixelBufferAccess &result,
1400 const tcu::PixelBufferAccess &errorMask)
1401 {
1402 // \note Edges are ignored in comparison
1403 if (result.getWidth() < 2 || result.getHeight() < 2)
1404 throw tcu::NotSupportedError("Too small viewport");
1405
1406 tcu::ConstPixelBufferAccess compareArea =
1407 tcu::getSubregion(result, 1, 1, result.getWidth() - 2, result.getHeight() - 2);
1408 tcu::PixelBufferAccess maskArea =
1409 tcu::getSubregion(errorMask, 1, 1, errorMask.getWidth() - 2, errorMask.getHeight() - 2);
1410 const tcu::Vec4 xScale = tcu::Vec4(1.0f, 0.0f, 0.5f, -0.5f);
1411 const tcu::Vec4 yScale = tcu::Vec4(0.0f, 1.0f, 0.5f, -0.5f);
1412 const float w = float(result.getWidth());
1413 const float h = float(result.getHeight());
1414
1415 const tcu::Vec4 surfaceThreshold = getSurfaceThreshold() / abs(m_values.derivScale);
1416
1417 if (isDfdxFunc(m_definitions.func) || isDfdyFunc(m_definitions.func))
1418 {
1419 const bool isX = isDfdxFunc(m_definitions.func);
1420 const float div = isX ? w : h;
1421 const tcu::Vec4 scale = isX ? xScale : yScale;
1422 tcu::Vec4 reference = ((m_textureValues.texValueMax - m_textureValues.texValueMin) / div);
1423 const tcu::Vec4 opThreshold = getDerivateThreshold(m_definitions.precision, m_textureValues.texValueMin,
1424 m_textureValues.texValueMax, reference);
1425 const tcu::Vec4 threshold = max(surfaceThreshold, opThreshold);
1426 const int numComps = glu::getDataTypeFloatScalars(m_definitions.dataType);
1427
1428 /* adjust the reference value for the correct dfdx or dfdy sample adjacency */
1429 reference = reference * scale;
1430
1431 m_context.getTestContext().getLog()
1432 << tcu::TestLog::Message << "Verifying result image.\n"
1433 << "\tValid derivative is " << LogVecComps(reference, numComps) << " with threshold "
1434 << LogVecComps(threshold, numComps) << tcu::TestLog::EndMessage;
1435
1436 // short circuit if result is strictly within the normal value error bounds.
1437 // This improves performance significantly.
1438 if (verifyConstantDerivate(m_context.getTestContext().getLog(), compareArea, maskArea, m_definitions.dataType,
1439 reference, threshold, m_values.derivScale, m_values.derivBias, LOG_NOTHING))
1440 {
1441 m_context.getTestContext().getLog()
1442 << tcu::TestLog::Message << "No incorrect derivatives found, result valid." << tcu::TestLog::EndMessage;
1443
1444 return true;
1445 }
1446
1447 // some pixels exceed error bounds calculated for normal values. Verify that these
1448 // potentially invalid pixels are in fact valid due to (for example) subnorm flushing.
1449
1450 m_context.getTestContext().getLog()
1451 << tcu::TestLog::Message
1452 << "Initial verification failed, verifying image by calculating accurate error bounds for each result "
1453 "pixel.\n"
1454 << "\tVerifying each result derivative is within its range of legal result values."
1455 << tcu::TestLog::EndMessage;
1456
1457 {
1458 const tcu::Vec4 valueRamp = (m_textureValues.texValueMax - m_textureValues.texValueMin);
1459 Linear2DFunctionEvaluator function;
1460
1461 function.matrix.setRow(0, tcu::Vec3(valueRamp.x() / w, 0.0f, m_textureValues.texValueMin.x()));
1462 function.matrix.setRow(1, tcu::Vec3(0.0f, valueRamp.y() / h, m_textureValues.texValueMin.y()));
1463 function.matrix.setRow(2, tcu::Vec3(valueRamp.z() / w, valueRamp.z() / h,
1464 m_textureValues.texValueMin.z() + m_textureValues.texValueMin.z()) /
1465 2.0f);
1466 function.matrix.setRow(3, tcu::Vec3(-valueRamp.w() / w, -valueRamp.w() / h,
1467 m_textureValues.texValueMax.w() + m_textureValues.texValueMax.w()) /
1468 2.0f);
1469
1470 return reverifyConstantDerivateWithFlushRelaxations(
1471 m_context.getTestContext().getLog(), compareArea, maskArea, m_definitions.dataType,
1472 m_definitions.precision, m_values.derivScale, m_values.derivBias, surfaceThreshold, m_definitions.func,
1473 function);
1474 }
1475 }
1476 else
1477 {
1478 DE_ASSERT(isFwidthFunc(m_definitions.func));
1479 const tcu::Vec4 dx = ((m_textureValues.texValueMax - m_textureValues.texValueMin) / w) * xScale;
1480 const tcu::Vec4 dy = ((m_textureValues.texValueMax - m_textureValues.texValueMin) / h) * yScale;
1481 const tcu::Vec4 reference = tcu::abs(dx) + tcu::abs(dy);
1482 const tcu::Vec4 dxThreshold = getDerivateThreshold(
1483 m_definitions.precision, m_textureValues.texValueMin * xScale, m_textureValues.texValueMax * xScale, dx);
1484 const tcu::Vec4 dyThreshold = getDerivateThreshold(
1485 m_definitions.precision, m_textureValues.texValueMin * yScale, m_textureValues.texValueMax * yScale, dy);
1486 const tcu::Vec4 threshold = max(surfaceThreshold, max(dxThreshold, dyThreshold));
1487
1488 return verifyConstantDerivate(m_context.getTestContext().getLog(), compareArea, maskArea,
1489 m_definitions.dataType, reference, threshold, m_values.derivScale,
1490 m_values.derivBias);
1491 }
1492 }
1493
1494 // TextureDerivateCase
1495
1496 class TextureDerivateCase : public TriangleDerivateCase
1497 {
1498 public:
1499 TextureDerivateCase(tcu::TestContext &testCtx, const std::string &name, DerivateFunc func, glu::DataType type,
1500 glu::Precision precision, SurfaceType surfaceType, int numSamples);
1501 virtual ~TextureDerivateCase(void);
1502
1503 virtual void initPrograms(vk::SourceCollections &programCollection) const;
1504 virtual TestInstance *createInstance(Context &context) const;
1505
1506 private:
1507 TextureCaseValues m_textureValues;
1508 };
1509
TextureDerivateCase(tcu::TestContext & testCtx,const std::string & name,DerivateFunc func,glu::DataType type,glu::Precision precision,SurfaceType surfaceType,int numSamples)1510 TextureDerivateCase::TextureDerivateCase(tcu::TestContext &testCtx, const std::string &name, DerivateFunc func,
1511 glu::DataType type, glu::Precision precision, SurfaceType surfaceType,
1512 int numSamples)
1513 : TriangleDerivateCase(testCtx, name, new DerivateUniformSetup(true))
1514 {
1515 m_definitions.dataType = type;
1516 m_definitions.func = func;
1517 m_definitions.precision = precision;
1518 m_definitions.coordDataType = glu::TYPE_FLOAT_VEC2;
1519 m_definitions.coordPrecision = glu::PRECISION_HIGHP;
1520 m_definitions.surfaceType = surfaceType;
1521 m_definitions.numSamples = numSamples;
1522
1523 // Texture size matches viewport and nearest sampling is used. Thus texture sampling
1524 // is equal to just interpolating the texture value range.
1525
1526 // Determine value range for texture.
1527
1528 switch (m_definitions.precision)
1529 {
1530 case glu::PRECISION_HIGHP:
1531 m_textureValues.texValueMin = tcu::Vec4(-97.f, 0.2f, 71.f, 74.f);
1532 m_textureValues.texValueMax = tcu::Vec4(-13.2f, -77.f, 44.f, 76.f);
1533 break;
1534
1535 case glu::PRECISION_MEDIUMP:
1536 m_textureValues.texValueMin = tcu::Vec4(-37.0f, 47.f, -7.f, 0.0f);
1537 m_textureValues.texValueMax = tcu::Vec4(-1.0f, 12.f, 7.f, 19.f);
1538 break;
1539
1540 case glu::PRECISION_LOWP:
1541 m_textureValues.texValueMin = tcu::Vec4(0.0f, -1.0f, 0.0f, 1.0f);
1542 m_textureValues.texValueMax = tcu::Vec4(1.0f, 1.0f, -1.0f, -1.0f);
1543 break;
1544
1545 default:
1546 DE_ASSERT(false);
1547 }
1548
1549 // Texture coordinates
1550 m_values.coordMin = tcu::Vec4(0.0f);
1551 m_values.coordMax = tcu::Vec4(1.0f);
1552
1553 if (m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO)
1554 {
1555 // No scale or bias used for accuracy.
1556 m_values.derivScale = tcu::Vec4(1.0f);
1557 m_values.derivBias = tcu::Vec4(0.0f);
1558 }
1559 else
1560 {
1561 // Compute scale - bias that normalizes to 0..1 range.
1562 const tcu::UVec2 viewportSize(VIEWPORT_WIDTH, VIEWPORT_HEIGHT);
1563 const float w = float(viewportSize.x());
1564 const float h = float(viewportSize.y());
1565 const tcu::Vec4 dx =
1566 (m_textureValues.texValueMax - m_textureValues.texValueMin) / tcu::Vec4(w, w, w * 0.5f, -w * 0.5f);
1567 const tcu::Vec4 dy =
1568 (m_textureValues.texValueMax - m_textureValues.texValueMin) / tcu::Vec4(h, h, h * 0.5f, -h * 0.5f);
1569
1570 if (isDfdxFunc(m_definitions.func))
1571 m_values.derivScale = 0.5f / dx;
1572 else if (isDfdyFunc(m_definitions.func))
1573 m_values.derivScale = 0.5f / dy;
1574 else if (isFwidthFunc(m_definitions.func))
1575 m_values.derivScale = 0.5f / (tcu::abs(dx) + tcu::abs(dy));
1576 else
1577 DE_ASSERT(false);
1578
1579 m_values.derivBias = tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f);
1580 }
1581 }
1582
~TextureDerivateCase(void)1583 TextureDerivateCase::~TextureDerivateCase(void)
1584 {
1585 }
1586
createInstance(Context & context) const1587 TestInstance *TextureDerivateCase::createInstance(Context &context) const
1588 {
1589 DE_ASSERT(m_uniformSetup != DE_NULL);
1590 return new TextureDerivateCaseInstance(context, *m_uniformSetup, m_definitions, m_values, m_textureValues);
1591 }
1592
initPrograms(vk::SourceCollections & programCollection) const1593 void TextureDerivateCase::initPrograms(vk::SourceCollections &programCollection) const
1594 {
1595 // Generate shader
1596 {
1597 const char *fragmentTmpl = "#version 450\n"
1598 "layout(location = 0) in highp vec2 v_coord;\n"
1599 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1600 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1601 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1602 "layout(binding = 2) uniform ${PRECISION} sampler2D u_sampler;\n"
1603 "void main (void)\n"
1604 "{\n"
1605 " ${PRECISION} vec4 tex = texture(u_sampler, v_coord);\n"
1606 " ${PRECISION} ${DATATYPE} res = ${FUNC}(tex${SWIZZLE}) * u_scale + u_bias;\n"
1607 " o_color = ${CAST_TO_OUTPUT};\n"
1608 "}\n";
1609
1610 const bool packToInt = m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO;
1611 map<string, string> fragmentParams;
1612
1613 fragmentParams["OUTPUT_TYPE"] = glu::getDataTypeName(packToInt ? glu::TYPE_UINT_VEC4 : glu::TYPE_FLOAT_VEC4);
1614 fragmentParams["OUTPUT_PREC"] =
1615 glu::getPrecisionName(packToInt ? glu::PRECISION_HIGHP : m_definitions.precision);
1616 fragmentParams["PRECISION"] = glu::getPrecisionName(m_definitions.precision);
1617 fragmentParams["DATATYPE"] = glu::getDataTypeName(m_definitions.dataType);
1618 fragmentParams["FUNC"] = getDerivateFuncName(m_definitions.func);
1619 fragmentParams["SWIZZLE"] = m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "" :
1620 m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? ".xyz" :
1621 m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? ".xy" :
1622 /* TYPE_FLOAT */ ".x";
1623
1624 if (packToInt)
1625 {
1626 fragmentParams["CAST_TO_OUTPUT"] = m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ?
1627 "floatBitsToUint(res)" :
1628 m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ?
1629 "floatBitsToUint(vec4(res, 1.0))" :
1630 m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ?
1631 "floatBitsToUint(vec4(res, 0.0, 1.0))" :
1632 /* TYPE_FLOAT */ "floatBitsToUint(vec4(res, 0.0, 0.0, 1.0))";
1633 }
1634 else
1635 {
1636 fragmentParams["CAST_TO_OUTPUT"] =
1637 m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "res" :
1638 m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? "vec4(res, 1.0)" :
1639 m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? "vec4(res, 0.0, 1.0)" :
1640 /* TYPE_FLOAT */ "vec4(res, 0.0, 0.0, 1.0)";
1641 }
1642
1643 std::string fragmentSrc = tcu::StringTemplate(fragmentTmpl).specialize(fragmentParams);
1644 programCollection.glslSources.add("vert")
1645 << glu::VertexSource(genVertexSource(m_definitions.coordDataType, m_definitions.coordPrecision));
1646 programCollection.glslSources.add("frag") << glu::FragmentSource(fragmentSrc);
1647 }
1648 }
1649
1650 // ShaderDerivateTests
1651
1652 class ShaderDerivateTests : public tcu::TestCaseGroup
1653 {
1654 public:
1655 ShaderDerivateTests(tcu::TestContext &testCtx);
1656 virtual ~ShaderDerivateTests(void);
1657
1658 virtual void init(void);
1659
1660 private:
1661 ShaderDerivateTests(const ShaderDerivateTests &); // not allowed!
1662 ShaderDerivateTests &operator=(const ShaderDerivateTests &); // not allowed!
1663 };
1664
ShaderDerivateTests(tcu::TestContext & testCtx)1665 ShaderDerivateTests::ShaderDerivateTests(tcu::TestContext &testCtx) : TestCaseGroup(testCtx, "derivate")
1666 {
1667 }
1668
~ShaderDerivateTests(void)1669 ShaderDerivateTests::~ShaderDerivateTests(void)
1670 {
1671 }
1672
1673 struct FunctionSpec
1674 {
1675 std::string name;
1676 DerivateFunc function;
1677 glu::DataType dataType;
1678 glu::Precision precision;
1679
FunctionSpecvkt::sr::__anon71e082ae0111::FunctionSpec1680 FunctionSpec(const std::string &name_, DerivateFunc function_, glu::DataType dataType_, glu::Precision precision_)
1681 : name(name_)
1682 , function(function_)
1683 , dataType(dataType_)
1684 , precision(precision_)
1685 {
1686 }
1687 };
1688
init(void)1689 void ShaderDerivateTests::init(void)
1690 {
1691 static const struct
1692 {
1693 const char *name;
1694 const char *description;
1695 const char *source;
1696 BaseUniformType usedDefaultUniform;
1697 bool inNonUniformControlFlow;
1698 bool demoteToHelperInvocation;
1699 } s_linearDerivateCases[] = {
1700 {"linear", "Basic derivate of linearly interpolated argument",
1701
1702 "#version 450\n"
1703 "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1704 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1705 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1706 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1707 "void main (void)\n"
1708 "{\n"
1709 " ${PRECISION} ${DATATYPE} res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1710 " o_color = ${CAST_TO_OUTPUT};\n"
1711 "}\n",
1712
1713 U_LAST, false, false},
1714 {"in_function", "Derivate of linear function argument",
1715
1716 "#version 450\n"
1717 "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1718 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1719 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1720 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1721 "\n"
1722 "${PRECISION} ${DATATYPE} computeRes (${PRECISION} ${DATATYPE} value)\n"
1723 "{\n"
1724 " return ${FUNC}(v_coord) * u_scale + u_bias;\n"
1725 "}\n"
1726 "\n"
1727 "void main (void)\n"
1728 "{\n"
1729 " ${PRECISION} ${DATATYPE} res = computeRes(v_coord);\n"
1730 " o_color = ${CAST_TO_OUTPUT};\n"
1731 "}\n",
1732
1733 U_LAST, false, false},
1734 {"static_if", "Derivate of linearly interpolated value in static if",
1735
1736 "#version 450\n"
1737 "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1738 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1739 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1740 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1741 "void main (void)\n"
1742 "{\n"
1743 " ${PRECISION} ${DATATYPE} res;\n"
1744 " if (false)\n"
1745 " res = ${FUNC}(-v_coord) * u_scale + u_bias;\n"
1746 " else\n"
1747 " res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1748 " o_color = ${CAST_TO_OUTPUT};\n"
1749 "}\n",
1750
1751 U_LAST, false, false},
1752 {"static_loop", "Derivate of linearly interpolated value in static loop",
1753
1754 "#version 450\n"
1755 "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1756 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1757 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1758 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1759 "void main (void)\n"
1760 "{\n"
1761 " ${PRECISION} ${DATATYPE} res = ${DATATYPE}(0.0);\n"
1762 " for (int i = 0; i < 2; i++)\n"
1763 " res += ${FUNC}(v_coord * float(i));\n"
1764 " res = res * u_scale + u_bias;\n"
1765 " o_color = ${CAST_TO_OUTPUT};\n"
1766 "}\n",
1767
1768 U_LAST, false, false},
1769 {"static_switch", "Derivate of linearly interpolated value in static switch",
1770
1771 "#version 450\n"
1772 "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1773 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1774 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1775 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1776 "void main (void)\n"
1777 "{\n"
1778 " ${PRECISION} ${DATATYPE} res;\n"
1779 " switch (1)\n"
1780 " {\n"
1781 " case 0: res = ${FUNC}(-v_coord) * u_scale + u_bias; break;\n"
1782 " case 1: res = ${FUNC}(v_coord) * u_scale + u_bias; break;\n"
1783 " }\n"
1784 " o_color = ${CAST_TO_OUTPUT};\n"
1785 "}\n",
1786
1787 U_LAST, false, false},
1788 {"uniform_if", "Derivate of linearly interpolated value in uniform if",
1789
1790 "#version 450\n"
1791 "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1792 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1793 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1794 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1795 "layout(binding = 2, std140) uniform Ui_true { bool ub_true; };\n"
1796 "void main (void)\n"
1797 "{\n"
1798 " ${PRECISION} ${DATATYPE} res;\n"
1799 " if (ub_true)"
1800 " res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1801 " else\n"
1802 " res = ${FUNC}(-v_coord) * u_scale + u_bias;\n"
1803 " o_color = ${CAST_TO_OUTPUT};\n"
1804 "}\n",
1805
1806 UB_TRUE, false, false},
1807 {"uniform_loop", "Derivate of linearly interpolated value in uniform loop",
1808
1809 "#version 450\n"
1810 "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1811 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1812 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1813 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1814 "layout(binding = 2, std140) uniform Ui_two { int ui_two; };\n"
1815 "void main (void)\n"
1816 "{\n"
1817 " ${PRECISION} ${DATATYPE} res = ${DATATYPE}(0.0);\n"
1818 " for (int i = 0; i < ui_two; i++)\n"
1819 " res += ${FUNC}(v_coord * float(i));\n"
1820 " res = res * u_scale + u_bias;\n"
1821 " o_color = ${CAST_TO_OUTPUT};\n"
1822 "}\n",
1823
1824 UI_TWO, false, false},
1825 {"uniform_switch", "Derivate of linearly interpolated value in uniform switch",
1826
1827 "#version 450\n"
1828 "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1829 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1830 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1831 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1832 "layout(binding = 2, std140) uniform Ui_one { int ui_one; };\n"
1833 "void main (void)\n"
1834 "{\n"
1835 " ${PRECISION} ${DATATYPE} res;\n"
1836 " switch (ui_one)\n"
1837 " {\n"
1838 " case 0: res = ${FUNC}(-v_coord) * u_scale + u_bias; break;\n"
1839 " case 1: res = ${FUNC}(v_coord) * u_scale + u_bias; break;\n"
1840 " }\n"
1841 " o_color = ${CAST_TO_OUTPUT};\n"
1842 "}\n",
1843
1844 UI_ONE, false, false},
1845 {"dynamic_if", "Derivate of linearly interpolated value in static if",
1846
1847 "#version 450\n"
1848 "#extension GL_KHR_shader_subgroup_ballot : require\n"
1849 "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1850 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1851 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1852 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1853 "layout(binding = 2, std140) uniform Ui_one { int ui_one; };\n"
1854 "void main (void)\n"
1855 "{\n"
1856 " ${PRECISION} ${DATATYPE} res;\n"
1857 " bool non_uniform = ((uint(gl_FragCoord.x * 0.4) + uint(gl_FragCoord.y * 0.3)) & 2) != 0;\n"
1858 " uvec4 quad_ballot = uvec4(0);\n"
1859 " quad_ballot[gl_SubgroupInvocationID >> 5] = 0xf << (gl_SubgroupInvocationID & 0x1c);\n"
1860 " bool quad_uniform = (subgroupBallot(non_uniform) & quad_ballot) == quad_ballot;\n"
1861 " if (quad_uniform)\n"
1862 " res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1863 " else\n"
1864 " res = ${FUNC}(v_coord * float(ui_one)) * u_scale + u_bias;\n"
1865 " o_color = ${CAST_TO_OUTPUT};\n"
1866 "}\n",
1867
1868 UI_ONE, true, false},
1869 {"dynamic_loop", "Derivate of linearly interpolated value in uniform loop",
1870
1871 "#version 450\n"
1872 "#extension GL_KHR_shader_subgroup_ballot : require\n"
1873 "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1874 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1875 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1876 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1877 "layout(binding = 2, std140) uniform Ui_one { int ui_one; };\n"
1878 "void main (void)\n"
1879 "{\n"
1880 " ${PRECISION} ${DATATYPE} res = ${DATATYPE}(0.0);\n"
1881 " bool non_uniform = ((uint(gl_FragCoord.x * 0.4) + uint(gl_FragCoord.y * 0.3)) & 2) != 0;\n"
1882 " uvec4 quad_ballot = uvec4(0);\n"
1883 " quad_ballot[gl_SubgroupInvocationID >> 5] = 0xf << (gl_SubgroupInvocationID & 0x1c);\n"
1884 " bool quad_uniform = (subgroupBallot(non_uniform) & quad_ballot) == quad_ballot;\n"
1885 " for (int i = 0; i < ui_one + int(quad_uniform); i++)\n"
1886 " res = ${FUNC}(v_coord * float(i - int(quad_uniform) + 1));\n"
1887 " res = res * u_scale + u_bias;\n"
1888 " o_color = ${CAST_TO_OUTPUT};\n"
1889 "}\n",
1890
1891 UI_ONE, true, false},
1892 {"dynamic_switch", "Derivate of linearly interpolated value in uniform switch",
1893
1894 "#version 450\n"
1895 "#extension GL_KHR_shader_subgroup_ballot : require\n"
1896 "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1897 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1898 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1899 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1900 "layout(binding = 2, std140) uniform Ui_one { int ui_one; };\n"
1901 "void main (void)\n"
1902 "{\n"
1903 " ${PRECISION} ${DATATYPE} res;\n"
1904 " bool non_uniform = ((uint(gl_FragCoord.x * 0.4) + uint(gl_FragCoord.y * 0.3)) & 2) != 0;\n"
1905 " uvec4 quad_ballot = uvec4(0);\n"
1906 " quad_ballot[gl_SubgroupInvocationID >> 5] = 0xf << (gl_SubgroupInvocationID & 0x1c);\n"
1907 " bool quad_uniform = (subgroupBallot(non_uniform) & quad_ballot) == quad_ballot;\n"
1908 " switch (int(quad_uniform))\n"
1909 " {\n"
1910 " case 0: res = ${FUNC}(v_coord) * u_scale + u_bias; break;\n"
1911 " case 1: res = ${FUNC}(v_coord * float(ui_one)) * u_scale + u_bias; break;\n"
1912 " }\n"
1913 " o_color = ${CAST_TO_OUTPUT};\n"
1914 "}\n",
1915
1916 UI_ONE, true, false},
1917 {"output_store", "Store variable to output and read it before using in a derivative",
1918
1919 "#version 450\n"
1920 "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1921 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1922 "layout(location = 1) out ${PRECISION} ${DATATYPE} intermediateStore;\n"
1923 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1924 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1925 "void main (void)\n"
1926 "{\n"
1927 " intermediateStore = v_coord;\n"
1928 " ${PRECISION} ${DATATYPE} res = ${FUNC}(intermediateStore) * u_scale + u_bias;\n"
1929 " o_color = ${CAST_TO_OUTPUT};\n"
1930 "}\n",
1931
1932 U_LAST, false, true},
1933 {"private_store", "Store variable to global and read it before using in a derivative",
1934
1935 "#version 450\n"
1936 "#extension GL_EXT_demote_to_helper_invocation : enable\n"
1937 "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1938 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1939 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1940 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1941 "${PRECISION} ${DATATYPE} intermediateStore;\n"
1942 "void main (void)\n"
1943 "{\n"
1944 " intermediateStore = v_coord;\n"
1945 " if (mod(gl_FragCoord.y, 2.0f) == 1.0f) demote;\n"
1946 " ${PRECISION} ${DATATYPE} res = ${FUNC}(intermediateStore) * u_scale + u_bias;\n"
1947 " o_color = ${CAST_TO_OUTPUT};\n"
1948 "}\n",
1949
1950 U_LAST, false, true},
1951 };
1952
1953 const char *dFdxSubgroupSource =
1954 "#version 450\n"
1955 "#extension GL_KHR_shader_subgroup_ballot : require\n"
1956 "#extension GL_KHR_shader_subgroup_quad : require\n"
1957 "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1958 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1959 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1960 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1961 "${DATATYPE} dFdxSubgroup(${DATATYPE} f)\n"
1962 "{\n"
1963 " ${DATATYPE} left, right;\n"
1964 " if ((gl_SubgroupInvocationID & 2) == 0) {\n"
1965 " left = subgroupQuadBroadcast(f, 0);\n"
1966 " right = subgroupQuadBroadcast(f, 1);\n"
1967 " } else {\n"
1968 " left = subgroupQuadBroadcast(f, 2);\n"
1969 " right = subgroupQuadBroadcast(f, 3);\n"
1970 " }\n"
1971 " return right - left;\n"
1972 "}\n"
1973 "\n"
1974 "void main (void)\n"
1975 "{\n"
1976 " uvec4 quad_ballot = uvec4(0);\n"
1977 " ${PRECISION} ${DATATYPE} res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1978 " o_color = ${CAST_TO_OUTPUT};\n"
1979 "}\n";
1980
1981 const char *dFdySubgroupSource =
1982 "#version 450\n"
1983 "#extension GL_KHR_shader_subgroup_quad : require\n"
1984 "#extension GL_KHR_shader_subgroup_ballot : require\n"
1985 "layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1986 "layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1987 "layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1988 "layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1989 "${DATATYPE} dFdySubgroup(${DATATYPE} f)\n"
1990 "{\n"
1991 " ${DATATYPE} top, bottom;\n"
1992 " if ((gl_SubgroupInvocationID & 1) == 0) {\n"
1993 " top = subgroupQuadBroadcast(f, 0);\n"
1994 " bottom = subgroupQuadBroadcast(f, 2);\n"
1995 " } else {\n"
1996 " top = subgroupQuadBroadcast(f, 1);\n"
1997 " bottom = subgroupQuadBroadcast(f, 3);\n"
1998 " }\n"
1999 " return bottom - top;\n"
2000 "}\n"
2001 "\n"
2002 "void main (void)\n"
2003 "{\n"
2004 " uvec4 quad_ballot = uvec4(0);\n"
2005 " quad_ballot[gl_SubgroupInvocationID >> 5] = 0xf << (gl_SubgroupInvocationID & 0x1c);\n"
2006 " ${PRECISION} ${DATATYPE} res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
2007 " o_color = ${CAST_TO_OUTPUT};\n"
2008 "}\n";
2009
2010 static const struct
2011 {
2012 const char *name;
2013 SurfaceType surfaceType;
2014 int numSamples;
2015 } s_fboConfigs[] = {
2016 {"fbo", SURFACETYPE_UNORM_FBO, 0},
2017 {"fbo_msaa2", SURFACETYPE_UNORM_FBO, 2},
2018 {"fbo_msaa4", SURFACETYPE_UNORM_FBO, 4},
2019 {"fbo_float", SURFACETYPE_FLOAT_FBO, 0},
2020 };
2021
2022 static const struct
2023 {
2024 const char *name;
2025 SurfaceType surfaceType;
2026 int numSamples;
2027 } s_textureConfigs[] = {
2028 {"basic", SURFACETYPE_UNORM_FBO, 0},
2029 {"msaa4", SURFACETYPE_UNORM_FBO, 4},
2030 {"float", SURFACETYPE_FLOAT_FBO, 0},
2031 };
2032
2033 // .dfdx[fine|coarse], .dfdy[fine|coarse], .fwidth[fine|coarse]
2034 for (int funcNdx = 0; funcNdx < DERIVATE_LAST; funcNdx++)
2035 {
2036 const DerivateFunc function = DerivateFunc(funcNdx);
2037 de::MovePtr<tcu::TestCaseGroup> functionGroup(
2038 new tcu::TestCaseGroup(m_testCtx, getDerivateFuncCaseName(function)));
2039
2040 // .constant - no precision variants and no subgroup derivatives, checks that derivate of constant arguments is 0
2041 if (!isSubgroupFunc(function))
2042 {
2043 // Derivate of constant argument
2044 de::MovePtr<tcu::TestCaseGroup> constantGroup(new tcu::TestCaseGroup(m_testCtx, "constant"));
2045
2046 for (int vecSize = 1; vecSize <= 4; vecSize++)
2047 {
2048 const glu::DataType dataType = vecSize > 1 ? glu::getDataTypeFloatVec(vecSize) : glu::TYPE_FLOAT;
2049 constantGroup->addChild(
2050 new ConstantDerivateCase(m_testCtx, glu::getDataTypeName(dataType), function, dataType));
2051 }
2052
2053 functionGroup->addChild(constantGroup.release());
2054 }
2055
2056 // Cases based on LinearDerivateCase; subgroup derivatives are handled separately
2057 if (!isSubgroupFunc(function))
2058 {
2059 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(s_linearDerivateCases); caseNdx++)
2060 {
2061 de::MovePtr<tcu::TestCaseGroup> linearCaseGroup(
2062 new tcu::TestCaseGroup(m_testCtx, s_linearDerivateCases[caseNdx].name));
2063 const char *source = s_linearDerivateCases[caseNdx].source;
2064
2065 for (int vecSize = 1; vecSize <= 4; vecSize++)
2066 {
2067 for (int precNdx = 0; precNdx < glu::PRECISION_LAST; precNdx++)
2068 {
2069 const glu::DataType dataType =
2070 vecSize > 1 ? glu::getDataTypeFloatVec(vecSize) : glu::TYPE_FLOAT;
2071 const glu::Precision precision = glu::Precision(precNdx);
2072 const SurfaceType surfaceType = SURFACETYPE_UNORM_FBO;
2073 const int numSamples = 0;
2074 std::ostringstream caseName;
2075
2076 if (caseNdx != 0 && precision == glu::PRECISION_LOWP)
2077 continue; // Skip as lowp doesn't actually produce any bits when rendered to default FB.
2078
2079 caseName << glu::getDataTypeName(dataType) << "_" << glu::getPrecisionName(precision);
2080
2081 linearCaseGroup->addChild(new LinearDerivateCase(
2082 m_testCtx, caseName.str(), function, dataType, precision,
2083 s_linearDerivateCases[caseNdx].inNonUniformControlFlow, surfaceType, numSamples, source,
2084 s_linearDerivateCases[caseNdx].usedDefaultUniform,
2085 s_linearDerivateCases[caseNdx].demoteToHelperInvocation));
2086 }
2087 }
2088
2089 functionGroup->addChild(linearCaseGroup.release());
2090 }
2091 }
2092
2093 // Fbo cases
2094 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(s_fboConfigs); caseNdx++)
2095 {
2096 // Derivate usage when rendering into FBO
2097 de::MovePtr<tcu::TestCaseGroup> fboGroup(new tcu::TestCaseGroup(m_testCtx, s_fboConfigs[caseNdx].name));
2098 // use source from subgroup source or source from .linear group
2099 const char *source = function == DERIVATE_DFDXSUBGROUP ? dFdxSubgroupSource :
2100 function == DERIVATE_DFDYSUBGROUP ? dFdySubgroupSource :
2101 s_linearDerivateCases[0].source;
2102 const SurfaceType surfaceType = s_fboConfigs[caseNdx].surfaceType;
2103 const int numSamples = s_fboConfigs[caseNdx].numSamples;
2104
2105 for (int vecSize = 1; vecSize <= 4; vecSize++)
2106 {
2107 for (int precNdx = 0; precNdx < glu::PRECISION_LAST; precNdx++)
2108 {
2109 const glu::DataType dataType = vecSize > 1 ? glu::getDataTypeFloatVec(vecSize) : glu::TYPE_FLOAT;
2110 const glu::Precision precision = glu::Precision(precNdx);
2111 std::ostringstream caseName;
2112
2113 if (surfaceType != SURFACETYPE_FLOAT_FBO && precision == glu::PRECISION_LOWP)
2114 continue; // Skip as lowp doesn't actually produce any bits when rendered to U8 RT.
2115
2116 caseName << glu::getDataTypeName(dataType) << "_" << glu::getPrecisionName(precision);
2117
2118 fboGroup->addChild(new LinearDerivateCase(m_testCtx, caseName.str(), function, dataType, precision,
2119 false, surfaceType, numSamples, source, U_LAST, false));
2120 }
2121 }
2122
2123 functionGroup->addChild(fboGroup.release());
2124 }
2125
2126 // .texture
2127 if (!isSubgroupFunc(function))
2128 {
2129 de::MovePtr<tcu::TestCaseGroup> textureGroup(new tcu::TestCaseGroup(m_testCtx, "texture"));
2130
2131 for (int texCaseNdx = 0; texCaseNdx < DE_LENGTH_OF_ARRAY(s_textureConfigs); texCaseNdx++)
2132 {
2133 de::MovePtr<tcu::TestCaseGroup> caseGroup(
2134 new tcu::TestCaseGroup(m_testCtx, s_textureConfigs[texCaseNdx].name));
2135 const SurfaceType surfaceType = s_textureConfigs[texCaseNdx].surfaceType;
2136 const int numSamples = s_textureConfigs[texCaseNdx].numSamples;
2137
2138 for (int vecSize = 1; vecSize <= 4; vecSize++)
2139 {
2140 for (int precNdx = 0; precNdx < glu::PRECISION_LAST; precNdx++)
2141 {
2142 const glu::DataType dataType =
2143 vecSize > 1 ? glu::getDataTypeFloatVec(vecSize) : glu::TYPE_FLOAT;
2144 const glu::Precision precision = glu::Precision(precNdx);
2145 std::ostringstream caseName;
2146
2147 if (surfaceType != SURFACETYPE_FLOAT_FBO && precision == glu::PRECISION_LOWP)
2148 continue; // Skip as lowp doesn't actually produce any bits when rendered to U8 RT.
2149
2150 caseName << glu::getDataTypeName(dataType) << "_" << glu::getPrecisionName(precision);
2151
2152 caseGroup->addChild(new TextureDerivateCase(m_testCtx, caseName.str(), function, dataType,
2153 precision, surfaceType, numSamples));
2154 }
2155 }
2156
2157 textureGroup->addChild(caseGroup.release());
2158 }
2159
2160 functionGroup->addChild(textureGroup.release());
2161 }
2162
2163 addChild(functionGroup.release());
2164 }
2165 }
2166
2167 } // namespace
2168
createDerivateTests(tcu::TestContext & testCtx)2169 tcu::TestCaseGroup *createDerivateTests(tcu::TestContext &testCtx)
2170 {
2171 return new ShaderDerivateTests(testCtx);
2172 }
2173
2174 } // namespace sr
2175 } // namespace vkt
2176