xref: /aosp_15_r20/external/deqp/framework/referencerenderer/rrFragmentOperations.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program Reference Renderer
3  * -----------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Reference implementation for per-fragment operations.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "rrFragmentOperations.hpp"
25 #include "tcuVectorUtil.hpp"
26 #include "tcuTextureUtil.hpp"
27 #include <limits>
28 
29 using de::clamp;
30 using de::max;
31 using de::min;
32 using tcu::clamp;
33 using tcu::IVec2;
34 using tcu::IVec4;
35 using tcu::max;
36 using tcu::min;
37 using tcu::UVec4;
38 using tcu::Vec3;
39 using tcu::Vec4;
40 
41 namespace rr
42 {
43 
44 // Return oldValue with the bits indicated by mask replaced by corresponding bits of newValue.
maskedBitReplace(int oldValue,int newValue,uint32_t mask)45 static inline int maskedBitReplace(int oldValue, int newValue, uint32_t mask)
46 {
47     return (oldValue & ~mask) | (newValue & mask);
48 }
49 
isInsideRect(const IVec2 & point,const WindowRectangle & rect)50 static inline bool isInsideRect(const IVec2 &point, const WindowRectangle &rect)
51 {
52     return de::inBounds(point.x(), rect.left, rect.left + rect.width) &&
53            de::inBounds(point.y(), rect.bottom, rect.bottom + rect.height);
54 }
55 
unpremultiply(const Vec4 & v)56 static inline Vec4 unpremultiply(const Vec4 &v)
57 {
58     if (v.w() > 0.0f)
59         return Vec4(v.x() / v.w(), v.y() / v.w(), v.z() / v.w(), v.w());
60     else
61     {
62         DE_ASSERT(v.x() == 0.0f && v.y() == 0.0f && v.z() == 0.0f);
63         return Vec4(0.0f, 0.0f, 0.0f, 0.0f);
64     }
65 }
66 
clearMultisampleColorBuffer(const tcu::PixelBufferAccess & dst,const Vec4 & v,const WindowRectangle & r)67 void clearMultisampleColorBuffer(const tcu::PixelBufferAccess &dst, const Vec4 &v, const WindowRectangle &r)
68 {
69     tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);
70 }
clearMultisampleColorBuffer(const tcu::PixelBufferAccess & dst,const IVec4 & v,const WindowRectangle & r)71 void clearMultisampleColorBuffer(const tcu::PixelBufferAccess &dst, const IVec4 &v, const WindowRectangle &r)
72 {
73     tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);
74 }
clearMultisampleColorBuffer(const tcu::PixelBufferAccess & dst,const UVec4 & v,const WindowRectangle & r)75 void clearMultisampleColorBuffer(const tcu::PixelBufferAccess &dst, const UVec4 &v, const WindowRectangle &r)
76 {
77     tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v.cast<int>());
78 }
clearMultisampleDepthBuffer(const tcu::PixelBufferAccess & dst,float v,const WindowRectangle & r)79 void clearMultisampleDepthBuffer(const tcu::PixelBufferAccess &dst, float v, const WindowRectangle &r)
80 {
81     tcu::clearDepth(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);
82 }
clearMultisampleStencilBuffer(const tcu::PixelBufferAccess & dst,int v,const WindowRectangle & r)83 void clearMultisampleStencilBuffer(const tcu::PixelBufferAccess &dst, int v, const WindowRectangle &r)
84 {
85     tcu::clearStencil(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);
86 }
87 
FragmentProcessor(void)88 FragmentProcessor::FragmentProcessor(void) : m_sampleRegister()
89 {
90 }
91 
executeScissorTest(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const WindowRectangle & scissorRect)92 void FragmentProcessor::executeScissorTest(int fragNdxOffset, int numSamplesPerFragment, const Fragment *inputFragments,
93                                            const WindowRectangle &scissorRect)
94 {
95     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
96     {
97         if (m_sampleRegister[regSampleNdx].isAlive)
98         {
99             int fragNdx = fragNdxOffset + regSampleNdx / numSamplesPerFragment;
100 
101             if (!isInsideRect(inputFragments[fragNdx].pixelCoord, scissorRect))
102                 m_sampleRegister[regSampleNdx].isAlive = false;
103         }
104     }
105 }
106 
executeStencilCompare(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const StencilState & stencilState,int numStencilBits,const tcu::ConstPixelBufferAccess & stencilBuffer)107 void FragmentProcessor::executeStencilCompare(int fragNdxOffset, int numSamplesPerFragment,
108                                               const Fragment *inputFragments, const StencilState &stencilState,
109                                               int numStencilBits, const tcu::ConstPixelBufferAccess &stencilBuffer)
110 {
111 #define SAMPLE_REGISTER_STENCIL_COMPARE(COMPARE_EXPRESSION)                                              \
112     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                      \
113     {                                                                                                    \
114         if (m_sampleRegister[regSampleNdx].isAlive)                                                      \
115         {                                                                                                \
116             int fragSampleNdx    = regSampleNdx % numSamplesPerFragment;                                 \
117             const Fragment &frag = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment]; \
118             int stencilBufferValue =                                                                     \
119                 stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());    \
120             int maskedRef = stencilState.compMask & clampedStencilRef;                                   \
121             int maskedBuf = stencilState.compMask & stencilBufferValue;                                  \
122             DE_UNREF(maskedRef);                                                                         \
123             DE_UNREF(maskedBuf);                                                                         \
124                                                                                                          \
125             m_sampleRegister[regSampleNdx].stencilPassed = (COMPARE_EXPRESSION);                         \
126         }                                                                                                \
127     }
128 
129     int clampedStencilRef = de::clamp(stencilState.ref, 0, (1 << numStencilBits) - 1);
130 
131     switch (stencilState.func)
132     {
133     case TESTFUNC_NEVER:
134         SAMPLE_REGISTER_STENCIL_COMPARE(false) break;
135     case TESTFUNC_ALWAYS:
136         SAMPLE_REGISTER_STENCIL_COMPARE(true) break;
137     case TESTFUNC_LESS:
138         SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef < maskedBuf) break;
139     case TESTFUNC_LEQUAL:
140         SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <= maskedBuf) break;
141     case TESTFUNC_GREATER:
142         SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef > maskedBuf) break;
143     case TESTFUNC_GEQUAL:
144         SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >= maskedBuf) break;
145     case TESTFUNC_EQUAL:
146         SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef == maskedBuf) break;
147     case TESTFUNC_NOTEQUAL:
148         SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef != maskedBuf) break;
149     default:
150         DE_ASSERT(false);
151     }
152 
153 #undef SAMPLE_REGISTER_STENCIL_COMPARE
154 }
155 
executeStencilSFail(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const StencilState & stencilState,int numStencilBits,const tcu::PixelBufferAccess & stencilBuffer)156 void FragmentProcessor::executeStencilSFail(int fragNdxOffset, int numSamplesPerFragment,
157                                             const Fragment *inputFragments, const StencilState &stencilState,
158                                             int numStencilBits, const tcu::PixelBufferAccess &stencilBuffer)
159 {
160 #define SAMPLE_REGISTER_SFAIL(SFAIL_EXPRESSION)                                                                  \
161     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                              \
162     {                                                                                                            \
163         if (m_sampleRegister[regSampleNdx].isAlive && !m_sampleRegister[regSampleNdx].stencilPassed)             \
164         {                                                                                                        \
165             int fragSampleNdx    = regSampleNdx % numSamplesPerFragment;                                         \
166             const Fragment &frag = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment];         \
167             int stencilBufferValue =                                                                             \
168                 stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());            \
169                                                                                                                  \
170             stencilBuffer.setPixStencil(                                                                         \
171                 maskedBitReplace(stencilBufferValue, (SFAIL_EXPRESSION), stencilState.writeMask), fragSampleNdx, \
172                 frag.pixelCoord.x(), frag.pixelCoord.y());                                                       \
173             m_sampleRegister[regSampleNdx].isAlive = false;                                                      \
174         }                                                                                                        \
175     }
176 
177     int clampedStencilRef = de::clamp(stencilState.ref, 0, (1 << numStencilBits) - 1);
178 
179     switch (stencilState.sFail)
180     {
181     case STENCILOP_KEEP:
182         SAMPLE_REGISTER_SFAIL(stencilBufferValue) break;
183     case STENCILOP_ZERO:
184         SAMPLE_REGISTER_SFAIL(0) break;
185     case STENCILOP_REPLACE:
186         SAMPLE_REGISTER_SFAIL(clampedStencilRef) break;
187     case STENCILOP_INCR:
188         SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue + 1, 0, (1 << numStencilBits) - 1)) break;
189     case STENCILOP_DECR:
190         SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue - 1, 0, (1 << numStencilBits) - 1)) break;
191     case STENCILOP_INCR_WRAP:
192         SAMPLE_REGISTER_SFAIL((stencilBufferValue + 1) & ((1 << numStencilBits) - 1)) break;
193     case STENCILOP_DECR_WRAP:
194         SAMPLE_REGISTER_SFAIL((stencilBufferValue - 1) & ((1 << numStencilBits) - 1)) break;
195     case STENCILOP_INVERT:
196         SAMPLE_REGISTER_SFAIL((~stencilBufferValue) & ((1 << numStencilBits) - 1)) break;
197     default:
198         DE_ASSERT(false);
199     }
200 
201 #undef SAMPLE_REGISTER_SFAIL
202 }
203 
executeDepthBoundsTest(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const float minDepthBound,const float maxDepthBound,const tcu::ConstPixelBufferAccess & depthBuffer)204 void FragmentProcessor::executeDepthBoundsTest(int fragNdxOffset, int numSamplesPerFragment,
205                                                const Fragment *inputFragments, const float minDepthBound,
206                                                const float maxDepthBound,
207                                                const tcu::ConstPixelBufferAccess &depthBuffer)
208 {
209     if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT ||
210         depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
211     {
212         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; ++regSampleNdx)
213         {
214             if (m_sampleRegister[regSampleNdx].isAlive)
215             {
216                 const int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
217                 const Fragment &frag    = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment];
218                 const float depthBufferValue =
219                     depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
220 
221                 if (!de::inRange(depthBufferValue, minDepthBound, maxDepthBound))
222                     m_sampleRegister[regSampleNdx].isAlive = false;
223             }
224         }
225     }
226     else
227     {
228         /* Convert float bounds to target buffer format for comparison */
229 
230         uint32_t minDepthBoundUint, maxDepthBoundUint;
231         {
232             uint32_t buffer[2];
233             DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize());
234 
235             tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);
236             access.setPixDepth(minDepthBound, 0, 0, 0);
237             minDepthBoundUint = access.getPixelUint(0, 0, 0).x();
238         }
239         {
240             uint32_t buffer[2];
241 
242             tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);
243             access.setPixDepth(maxDepthBound, 0, 0, 0);
244             maxDepthBoundUint = access.getPixelUint(0, 0, 0).x();
245         }
246 
247         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; ++regSampleNdx)
248         {
249             if (m_sampleRegister[regSampleNdx].isAlive)
250             {
251                 const int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
252                 const Fragment &frag    = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment];
253                 const uint32_t depthBufferValue =
254                     depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x();
255 
256                 if (!de::inRange(depthBufferValue, minDepthBoundUint, maxDepthBoundUint))
257                     m_sampleRegister[regSampleNdx].isAlive = false;
258             }
259         }
260     }
261 }
262 
executeDepthCompare(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,TestFunc depthFunc,const tcu::ConstPixelBufferAccess & depthBuffer)263 void FragmentProcessor::executeDepthCompare(int fragNdxOffset, int numSamplesPerFragment,
264                                             const Fragment *inputFragments, TestFunc depthFunc,
265                                             const tcu::ConstPixelBufferAccess &depthBuffer)
266 {
267 #define SAMPLE_REGISTER_DEPTH_COMPARE_F(COMPARE_EXPRESSION)                                                            \
268     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                    \
269     {                                                                                                                  \
270         if (m_sampleRegister[regSampleNdx].isAlive)                                                                    \
271         {                                                                                                              \
272             int fragSampleNdx      = regSampleNdx % numSamplesPerFragment;                                             \
273             const Fragment &frag   = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment];             \
274             float depthBufferValue = depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
275             float sampleDepthFloat = frag.sampleDepths[fragSampleNdx];                                                 \
276             float sampleDepth      = de::clamp(sampleDepthFloat, 0.0f, 1.0f);                                          \
277                                                                                                                        \
278             m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION);                                         \
279                                                                                                                        \
280             DE_UNREF(depthBufferValue);                                                                                \
281             DE_UNREF(sampleDepth);                                                                                     \
282         }                                                                                                              \
283     }
284 
285 #define SAMPLE_REGISTER_DEPTH_COMPARE_UI(COMPARE_EXPRESSION)                                             \
286     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                      \
287     {                                                                                                    \
288         if (m_sampleRegister[regSampleNdx].isAlive)                                                      \
289         {                                                                                                \
290             int fragSampleNdx    = regSampleNdx % numSamplesPerFragment;                                 \
291             const Fragment &frag = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment]; \
292             uint32_t depthBufferValue =                                                                  \
293                 depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x();   \
294             float sampleDepthFloat = frag.sampleDepths[fragSampleNdx];                                   \
295                                                                                                          \
296             /* Convert input float to target buffer format for comparison */                             \
297                                                                                                          \
298             uint32_t buffer[2];                                                                          \
299                                                                                                          \
300             DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize());                 \
301                                                                                                          \
302             tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);                    \
303             access.setPixDepth(sampleDepthFloat, 0, 0, 0);                                               \
304             uint32_t sampleDepth = access.getPixelUint(0, 0, 0).x();                                     \
305                                                                                                          \
306             m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION);                           \
307                                                                                                          \
308             DE_UNREF(depthBufferValue);                                                                  \
309             DE_UNREF(sampleDepth);                                                                       \
310         }                                                                                                \
311     }
312 
313     if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT ||
314         depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
315     {
316 
317         switch (depthFunc)
318         {
319         case TESTFUNC_NEVER:
320             SAMPLE_REGISTER_DEPTH_COMPARE_F(false) break;
321         case TESTFUNC_ALWAYS:
322             SAMPLE_REGISTER_DEPTH_COMPARE_F(true) break;
323         case TESTFUNC_LESS:
324             SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth < depthBufferValue) break;
325         case TESTFUNC_LEQUAL:
326             SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <= depthBufferValue) break;
327         case TESTFUNC_GREATER:
328             SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth > depthBufferValue) break;
329         case TESTFUNC_GEQUAL:
330             SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >= depthBufferValue) break;
331         case TESTFUNC_EQUAL:
332             SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth == depthBufferValue) break;
333         case TESTFUNC_NOTEQUAL:
334             SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth != depthBufferValue) break;
335         default:
336             DE_ASSERT(false);
337         }
338     }
339     else
340     {
341         switch (depthFunc)
342         {
343         case TESTFUNC_NEVER:
344             SAMPLE_REGISTER_DEPTH_COMPARE_UI(false) break;
345         case TESTFUNC_ALWAYS:
346             SAMPLE_REGISTER_DEPTH_COMPARE_UI(true) break;
347         case TESTFUNC_LESS:
348             SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth < depthBufferValue) break;
349         case TESTFUNC_LEQUAL:
350             SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <= depthBufferValue) break;
351         case TESTFUNC_GREATER:
352             SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth > depthBufferValue) break;
353         case TESTFUNC_GEQUAL:
354             SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >= depthBufferValue) break;
355         case TESTFUNC_EQUAL:
356             SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth == depthBufferValue) break;
357         case TESTFUNC_NOTEQUAL:
358             SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth != depthBufferValue) break;
359         default:
360             DE_ASSERT(false);
361         }
362     }
363 
364 #undef SAMPLE_REGISTER_DEPTH_COMPARE_F
365 #undef SAMPLE_REGISTER_DEPTH_COMPARE_UI
366 }
367 
executeDepthWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::PixelBufferAccess & depthBuffer)368 void FragmentProcessor::executeDepthWrite(int fragNdxOffset, int numSamplesPerFragment, const Fragment *inputFragments,
369                                           const tcu::PixelBufferAccess &depthBuffer)
370 {
371     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
372     {
373         if (m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed)
374         {
375             int fragSampleNdx        = regSampleNdx % numSamplesPerFragment;
376             const Fragment &frag     = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment];
377             const float clampedDepth = de::clamp(frag.sampleDepths[fragSampleNdx], 0.0f, 1.0f);
378 
379             depthBuffer.setPixDepth(clampedDepth, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
380         }
381     }
382 }
383 
executeStencilDpFailAndPass(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const StencilState & stencilState,int numStencilBits,const tcu::PixelBufferAccess & stencilBuffer)384 void FragmentProcessor::executeStencilDpFailAndPass(int fragNdxOffset, int numSamplesPerFragment,
385                                                     const Fragment *inputFragments, const StencilState &stencilState,
386                                                     int numStencilBits, const tcu::PixelBufferAccess &stencilBuffer)
387 {
388 #define SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, EXPRESSION)                                                     \
389     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)                                 \
390     {                                                                                                               \
391         if (m_sampleRegister[regSampleNdx].isAlive && (CONDITION))                                                  \
392         {                                                                                                           \
393             int fragSampleNdx    = regSampleNdx % numSamplesPerFragment;                                            \
394             const Fragment &frag = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment];            \
395             int stencilBufferValue =                                                                                \
396                 stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());               \
397                                                                                                                     \
398             stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (EXPRESSION), stencilState.writeMask), \
399                                         fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());                   \
400         }                                                                                                           \
401     }
402 
403 #define SWITCH_DPFAIL_OR_DPPASS(OP_NAME, CONDITION)                                                                  \
404     switch (stencilState.OP_NAME)                                                                                    \
405     {                                                                                                                \
406     case STENCILOP_KEEP:                                                                                             \
407         SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, stencilBufferValue) break;                                       \
408     case STENCILOP_ZERO:                                                                                             \
409         SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, 0) break;                                                        \
410     case STENCILOP_REPLACE:                                                                                          \
411         SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, clampedStencilRef) break;                                        \
412     case STENCILOP_INCR:                                                                                             \
413         SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue + 1, 0, (1 << numStencilBits) - 1)) \
414         break;                                                                                                       \
415     case STENCILOP_DECR:                                                                                             \
416         SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue - 1, 0, (1 << numStencilBits) - 1)) \
417         break;                                                                                                       \
418     case STENCILOP_INCR_WRAP:                                                                                        \
419         SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue + 1) & ((1 << numStencilBits) - 1)) break;   \
420     case STENCILOP_DECR_WRAP:                                                                                        \
421         SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue - 1) & ((1 << numStencilBits) - 1)) break;   \
422     case STENCILOP_INVERT:                                                                                           \
423         SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (~stencilBufferValue) & ((1 << numStencilBits) - 1)) break;      \
424     default:                                                                                                         \
425         DE_ASSERT(false);                                                                                            \
426     }
427 
428     int clampedStencilRef = de::clamp(stencilState.ref, 0, (1 << numStencilBits) - 1);
429 
430     SWITCH_DPFAIL_OR_DPPASS(dpFail, !m_sampleRegister[regSampleNdx].depthPassed)
431     SWITCH_DPFAIL_OR_DPPASS(dpPass, m_sampleRegister[regSampleNdx].depthPassed)
432 
433 #undef SWITCH_DPFAIL_OR_DPPASS
434 #undef SAMPLE_REGISTER_DPFAIL_OR_DPPASS
435 }
436 
executeBlendFactorComputeRGB(const Vec4 & blendColor,const BlendState & blendRGBState)437 void FragmentProcessor::executeBlendFactorComputeRGB(const Vec4 &blendColor, const BlendState &blendRGBState)
438 {
439 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION)                 \
440     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)  \
441     {                                                                                \
442         if (m_sampleRegister[regSampleNdx].isAlive)                                  \
443         {                                                                            \
444             const Vec4 &src  = m_sampleRegister[regSampleNdx].clampedBlendSrcColor;  \
445             const Vec4 &src1 = m_sampleRegister[regSampleNdx].clampedBlendSrc1Color; \
446             const Vec4 &dst  = m_sampleRegister[regSampleNdx].clampedBlendDstColor;  \
447             DE_UNREF(src);                                                           \
448             DE_UNREF(src1);                                                          \
449             DE_UNREF(dst);                                                           \
450                                                                                      \
451             m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION);        \
452         }                                                                            \
453     }
454 
455 #define SWITCH_SRC_OR_DST_FACTOR_RGB(FUNC_NAME, FACTOR_NAME)                                       \
456     switch (blendRGBState.FUNC_NAME)                                                               \
457     {                                                                                              \
458     case BLENDFUNC_ZERO:                                                                           \
459         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(0.0f)) break;                               \
460     case BLENDFUNC_ONE:                                                                            \
461         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f)) break;                               \
462     case BLENDFUNC_SRC_COLOR:                                                                      \
463         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.swizzle(0, 1, 2)) break;                     \
464     case BLENDFUNC_ONE_MINUS_SRC_COLOR:                                                            \
465         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src.swizzle(0, 1, 2)) break;        \
466     case BLENDFUNC_DST_COLOR:                                                                      \
467         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.swizzle(0, 1, 2)) break;                     \
468     case BLENDFUNC_ONE_MINUS_DST_COLOR:                                                            \
469         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - dst.swizzle(0, 1, 2)) break;        \
470     case BLENDFUNC_SRC_ALPHA:                                                                      \
471         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src.w())) break;                            \
472     case BLENDFUNC_ONE_MINUS_SRC_ALPHA:                                                            \
473         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src.w())) break;                     \
474     case BLENDFUNC_DST_ALPHA:                                                                      \
475         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(dst.w())) break;                            \
476     case BLENDFUNC_ONE_MINUS_DST_ALPHA:                                                            \
477         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - dst.w())) break;                     \
478     case BLENDFUNC_CONSTANT_COLOR:                                                                 \
479         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.swizzle(0, 1, 2)) break;              \
480     case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR:                                                       \
481         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - blendColor.swizzle(0, 1, 2)) break; \
482     case BLENDFUNC_CONSTANT_ALPHA:                                                                 \
483         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(blendColor.w())) break;                     \
484     case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA:                                                       \
485         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - blendColor.w())) break;              \
486     case BLENDFUNC_SRC_ALPHA_SATURATE:                                                             \
487         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(de::min(src.w(), 1.0f - dst.w()))) break;   \
488     case BLENDFUNC_SRC1_COLOR:                                                                     \
489         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.swizzle(0, 1, 2)) break;                    \
490     case BLENDFUNC_ONE_MINUS_SRC1_COLOR:                                                           \
491         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src1.swizzle(0, 1, 2)) break;       \
492     case BLENDFUNC_SRC1_ALPHA:                                                                     \
493         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src1.w())) break;                           \
494     case BLENDFUNC_ONE_MINUS_SRC1_ALPHA:                                                           \
495         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src1.w())) break;                    \
496     default:                                                                                       \
497         DE_ASSERT(false);                                                                          \
498     }
499 
500     SWITCH_SRC_OR_DST_FACTOR_RGB(srcFunc, blendSrcFactorRGB)
501     SWITCH_SRC_OR_DST_FACTOR_RGB(dstFunc, blendDstFactorRGB)
502 
503 #undef SWITCH_SRC_OR_DST_FACTOR_RGB
504 #undef SAMPLE_REGISTER_BLEND_FACTOR
505 }
506 
executeBlendFactorComputeA(const Vec4 & blendColor,const BlendState & blendAState)507 void FragmentProcessor::executeBlendFactorComputeA(const Vec4 &blendColor, const BlendState &blendAState)
508 {
509 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION)                 \
510     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)  \
511     {                                                                                \
512         if (m_sampleRegister[regSampleNdx].isAlive)                                  \
513         {                                                                            \
514             const Vec4 &src  = m_sampleRegister[regSampleNdx].clampedBlendSrcColor;  \
515             const Vec4 &src1 = m_sampleRegister[regSampleNdx].clampedBlendSrc1Color; \
516             const Vec4 &dst  = m_sampleRegister[regSampleNdx].clampedBlendDstColor;  \
517             DE_UNREF(src);                                                           \
518             DE_UNREF(src1);                                                          \
519             DE_UNREF(dst);                                                           \
520                                                                                      \
521             m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION);        \
522         }                                                                            \
523     }
524 
525 #define SWITCH_SRC_OR_DST_FACTOR_A(FUNC_NAME, FACTOR_NAME)                      \
526     switch (blendAState.FUNC_NAME)                                              \
527     {                                                                           \
528     case BLENDFUNC_ZERO:                                                        \
529         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 0.0f) break;                  \
530     case BLENDFUNC_ONE:                                                         \
531         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f) break;                  \
532     case BLENDFUNC_SRC_COLOR:                                                   \
533         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w()) break;               \
534     case BLENDFUNC_ONE_MINUS_SRC_COLOR:                                         \
535         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w()) break;        \
536     case BLENDFUNC_DST_COLOR:                                                   \
537         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w()) break;               \
538     case BLENDFUNC_ONE_MINUS_DST_COLOR:                                         \
539         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w()) break;        \
540     case BLENDFUNC_SRC_ALPHA:                                                   \
541         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w()) break;               \
542     case BLENDFUNC_ONE_MINUS_SRC_ALPHA:                                         \
543         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w()) break;        \
544     case BLENDFUNC_DST_ALPHA:                                                   \
545         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w()) break;               \
546     case BLENDFUNC_ONE_MINUS_DST_ALPHA:                                         \
547         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w()) break;        \
548     case BLENDFUNC_CONSTANT_COLOR:                                              \
549         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w()) break;        \
550     case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR:                                    \
551         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w()) break; \
552     case BLENDFUNC_CONSTANT_ALPHA:                                              \
553         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w()) break;        \
554     case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA:                                    \
555         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w()) break; \
556     case BLENDFUNC_SRC_ALPHA_SATURATE:                                          \
557         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f) break;                  \
558     case BLENDFUNC_SRC1_COLOR:                                                  \
559         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w()) break;              \
560     case BLENDFUNC_ONE_MINUS_SRC1_COLOR:                                        \
561         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w()) break;       \
562     case BLENDFUNC_SRC1_ALPHA:                                                  \
563         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w()) break;              \
564     case BLENDFUNC_ONE_MINUS_SRC1_ALPHA:                                        \
565         SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w()) break;       \
566     default:                                                                    \
567         DE_ASSERT(false);                                                       \
568     }
569 
570     SWITCH_SRC_OR_DST_FACTOR_A(srcFunc, blendSrcFactorA)
571     SWITCH_SRC_OR_DST_FACTOR_A(dstFunc, blendDstFactorA)
572 
573 #undef SWITCH_SRC_OR_DST_FACTOR_A
574 #undef SAMPLE_REGISTER_BLEND_FACTOR
575 }
576 
executeBlend(const BlendState & blendRGBState,const BlendState & blendAState)577 void FragmentProcessor::executeBlend(const BlendState &blendRGBState, const BlendState &blendAState)
578 {
579 #define SAMPLE_REGISTER_BLENDED_COLOR(COLOR_NAME, COLOR_EXPRESSION)                 \
580     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
581     {                                                                               \
582         if (m_sampleRegister[regSampleNdx].isAlive)                                 \
583         {                                                                           \
584             SampleData &sample   = m_sampleRegister[regSampleNdx];                  \
585             const Vec4 &srcColor = sample.clampedBlendSrcColor;                     \
586             const Vec4 &dstColor = sample.clampedBlendDstColor;                     \
587                                                                                     \
588             sample.COLOR_NAME = (COLOR_EXPRESSION);                                 \
589         }                                                                           \
590     }
591 
592     switch (blendRGBState.equation)
593     {
594     case BLENDEQUATION_ADD:
595         SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0, 1, 2) * sample.blendSrcFactorRGB +
596                                                       dstColor.swizzle(0, 1, 2) * sample.blendDstFactorRGB)
597         break;
598     case BLENDEQUATION_SUBTRACT:
599         SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0, 1, 2) * sample.blendSrcFactorRGB -
600                                                       dstColor.swizzle(0, 1, 2) * sample.blendDstFactorRGB)
601         break;
602     case BLENDEQUATION_REVERSE_SUBTRACT:
603         SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, dstColor.swizzle(0, 1, 2) * sample.blendDstFactorRGB -
604                                                       srcColor.swizzle(0, 1, 2) * sample.blendSrcFactorRGB)
605         break;
606     case BLENDEQUATION_MIN:
607         SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, min(srcColor.swizzle(0, 1, 2), dstColor.swizzle(0, 1, 2))) break;
608     case BLENDEQUATION_MAX:
609         SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, max(srcColor.swizzle(0, 1, 2), dstColor.swizzle(0, 1, 2))) break;
610     default:
611         DE_ASSERT(false);
612     }
613 
614     switch (blendAState.equation)
615     {
616     case BLENDEQUATION_ADD:
617         SAMPLE_REGISTER_BLENDED_COLOR(blendedA,
618                                       srcColor.w() * sample.blendSrcFactorA + dstColor.w() * sample.blendDstFactorA)
619         break;
620     case BLENDEQUATION_SUBTRACT:
621         SAMPLE_REGISTER_BLENDED_COLOR(blendedA,
622                                       srcColor.w() * sample.blendSrcFactorA - dstColor.w() * sample.blendDstFactorA)
623         break;
624     case BLENDEQUATION_REVERSE_SUBTRACT:
625         SAMPLE_REGISTER_BLENDED_COLOR(blendedA,
626                                       dstColor.w() * sample.blendDstFactorA - srcColor.w() * sample.blendSrcFactorA)
627         break;
628     case BLENDEQUATION_MIN:
629         SAMPLE_REGISTER_BLENDED_COLOR(blendedA, min(srcColor.w(), dstColor.w())) break;
630     case BLENDEQUATION_MAX:
631         SAMPLE_REGISTER_BLENDED_COLOR(blendedA, max(srcColor.w(), dstColor.w())) break;
632     default:
633         DE_ASSERT(false);
634     }
635 #undef SAMPLE_REGISTER_BLENDED_COLOR
636 }
637 
638 namespace advblend
639 {
640 
multiply(float src,float dst)641 inline float multiply(float src, float dst)
642 {
643     return src * dst;
644 }
screen(float src,float dst)645 inline float screen(float src, float dst)
646 {
647     return src + dst - src * dst;
648 }
darken(float src,float dst)649 inline float darken(float src, float dst)
650 {
651     return de::min(src, dst);
652 }
lighten(float src,float dst)653 inline float lighten(float src, float dst)
654 {
655     return de::max(src, dst);
656 }
difference(float src,float dst)657 inline float difference(float src, float dst)
658 {
659     return de::abs(dst - src);
660 }
exclusion(float src,float dst)661 inline float exclusion(float src, float dst)
662 {
663     return src + dst - 2.0f * src * dst;
664 }
665 
overlay(float src,float dst)666 inline float overlay(float src, float dst)
667 {
668     if (dst <= 0.5f)
669         return 2.0f * src * dst;
670     else
671         return 1.0f - 2.0f * (1.0f - src) * (1.0f - dst);
672 }
673 
colordodge(float src,float dst)674 inline float colordodge(float src, float dst)
675 {
676     if (dst <= 0.0f)
677         return 0.0f;
678     else if (src < 1.0f)
679         return de::min(1.0f, dst / (1.0f - src));
680     else
681         return 1.0f;
682 }
683 
colorburn(float src,float dst)684 inline float colorburn(float src, float dst)
685 {
686     if (dst >= 1.0f)
687         return 1.0f;
688     else if (src > 0.0f)
689         return 1.0f - de::min(1.0f, (1.0f - dst) / src);
690     else
691         return 0.0f;
692 }
693 
hardlight(float src,float dst)694 inline float hardlight(float src, float dst)
695 {
696     if (src <= 0.5f)
697         return 2.0f * src * dst;
698     else
699         return 1.0f - 2.0f * (1.0f - src) * (1.0f - dst);
700 }
701 
softlight(float src,float dst)702 inline float softlight(float src, float dst)
703 {
704     if (src <= 0.5f)
705         return dst - (1.0f - 2.0f * src) * dst * (1.0f - dst);
706     else if (dst <= 0.25f)
707         return dst + (2.0f * src - 1.0f) * dst * ((16.0f * dst - 12.0f) * dst + 3.0f);
708     else
709         return dst + (2.0f * src - 1.0f) * (deFloatSqrt(dst) - dst);
710 }
711 
minComp(const Vec3 & v)712 inline float minComp(const Vec3 &v)
713 {
714     return de::min(de::min(v.x(), v.y()), v.z());
715 }
716 
maxComp(const Vec3 & v)717 inline float maxComp(const Vec3 &v)
718 {
719     return de::max(de::max(v.x(), v.y()), v.z());
720 }
721 
luminosity(const Vec3 & rgb)722 inline float luminosity(const Vec3 &rgb)
723 {
724     return dot(rgb, Vec3(0.3f, 0.59f, 0.11f));
725 }
726 
saturation(const Vec3 & rgb)727 inline float saturation(const Vec3 &rgb)
728 {
729     return maxComp(rgb) - minComp(rgb);
730 }
731 
setLum(const Vec3 & cbase,const Vec3 & clum)732 Vec3 setLum(const Vec3 &cbase, const Vec3 &clum)
733 {
734     const float lbase = luminosity(cbase);
735     const float llum  = luminosity(clum);
736     const float ldiff = llum - lbase;
737     const Vec3 color  = cbase + Vec3(ldiff);
738     const float minC  = minComp(color);
739     const float maxC  = maxComp(color);
740 
741     if (minC < 0.0f)
742         return llum + ((color - llum) * llum / (llum != minC ? (llum - minC) : 1.0f));
743     else if (maxC > 1.0f)
744         return llum + ((color - llum) * (1.0f - llum) / (llum != maxC ? (maxC - llum) : 1.0f));
745     else
746         return color;
747 }
748 
setLumSat(const Vec3 & cbase,const Vec3 & csat,const Vec3 & clum)749 Vec3 setLumSat(const Vec3 &cbase, const Vec3 &csat, const Vec3 &clum)
750 {
751     const float minbase = minComp(cbase);
752     const float sbase   = saturation(cbase);
753     const float ssat    = saturation(csat);
754     Vec3 color          = Vec3(0.0f);
755 
756     if (sbase > 0.0f)
757         color = (cbase - minbase) * ssat / sbase;
758 
759     return setLum(color, clum);
760 }
761 
762 } // namespace advblend
763 
executeAdvancedBlend(BlendEquationAdvanced equation)764 void FragmentProcessor::executeAdvancedBlend(BlendEquationAdvanced equation)
765 {
766     using namespace advblend;
767 
768 #define SAMPLE_REGISTER_ADV_BLEND(FUNCTION_NAME)                                               \
769     do                                                                                         \
770     {                                                                                          \
771         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)        \
772         {                                                                                      \
773             if (m_sampleRegister[regSampleNdx].isAlive)                                        \
774             {                                                                                  \
775                 SampleData &sample   = m_sampleRegister[regSampleNdx];                         \
776                 const Vec4 &srcColor = sample.clampedBlendSrcColor;                            \
777                 const Vec4 &dstColor = sample.clampedBlendDstColor;                            \
778                 const Vec3 &bias     = sample.blendSrcFactorRGB;                               \
779                 const float p0       = sample.blendSrcFactorA;                                 \
780                 const float r        = FUNCTION_NAME(srcColor[0], dstColor[0]) * p0 + bias[0]; \
781                 const float g        = FUNCTION_NAME(srcColor[1], dstColor[1]) * p0 + bias[1]; \
782                 const float b        = FUNCTION_NAME(srcColor[2], dstColor[2]) * p0 + bias[2]; \
783                                                                                                \
784                 sample.blendedRGB = Vec3(r, g, b);                                             \
785             }                                                                                  \
786         }                                                                                      \
787     } while (0)
788 
789 #define SAMPLE_REGISTER_ADV_BLEND_HSL(COLOR_EXPRESSION)                                 \
790     do                                                                                  \
791     {                                                                                   \
792         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
793         {                                                                               \
794             if (m_sampleRegister[regSampleNdx].isAlive)                                 \
795             {                                                                           \
796                 SampleData &sample  = m_sampleRegister[regSampleNdx];                   \
797                 const Vec3 srcColor = sample.clampedBlendSrcColor.swizzle(0, 1, 2);     \
798                 const Vec3 dstColor = sample.clampedBlendDstColor.swizzle(0, 1, 2);     \
799                 const Vec3 &bias    = sample.blendSrcFactorRGB;                         \
800                 const float p0      = sample.blendSrcFactorA;                           \
801                                                                                         \
802                 sample.blendedRGB = (COLOR_EXPRESSION)*p0 + bias;                       \
803             }                                                                           \
804         }                                                                               \
805     } while (0)
806 
807     // Pre-compute factors & compute alpha \todo [2014-03-18 pyry] Re-using variable names.
808     // \note clampedBlend*Color contains clamped & unpremultiplied colors
809     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
810     {
811         if (m_sampleRegister[regSampleNdx].isAlive)
812         {
813             SampleData &sample   = m_sampleRegister[regSampleNdx];
814             const Vec4 &srcColor = sample.clampedBlendSrcColor;
815             const Vec4 &dstColor = sample.clampedBlendDstColor;
816             const float srcA     = srcColor.w();
817             const float dstA     = dstColor.w();
818             const float p0       = srcA * dstA;
819             const float p1       = srcA * (1.0f - dstA);
820             const float p2       = dstA * (1.0f - srcA);
821             const Vec3 bias(srcColor[0] * p1 + dstColor[0] * p2, srcColor[1] * p1 + dstColor[1] * p2,
822                             srcColor[2] * p1 + dstColor[2] * p2);
823 
824             sample.blendSrcFactorRGB = bias;
825             sample.blendSrcFactorA   = p0;
826             sample.blendedA          = p0 + p1 + p2;
827         }
828     }
829 
830     switch (equation)
831     {
832     case BLENDEQUATION_ADVANCED_MULTIPLY:
833         SAMPLE_REGISTER_ADV_BLEND(multiply);
834         break;
835     case BLENDEQUATION_ADVANCED_SCREEN:
836         SAMPLE_REGISTER_ADV_BLEND(screen);
837         break;
838     case BLENDEQUATION_ADVANCED_OVERLAY:
839         SAMPLE_REGISTER_ADV_BLEND(overlay);
840         break;
841     case BLENDEQUATION_ADVANCED_DARKEN:
842         SAMPLE_REGISTER_ADV_BLEND(darken);
843         break;
844     case BLENDEQUATION_ADVANCED_LIGHTEN:
845         SAMPLE_REGISTER_ADV_BLEND(lighten);
846         break;
847     case BLENDEQUATION_ADVANCED_COLORDODGE:
848         SAMPLE_REGISTER_ADV_BLEND(colordodge);
849         break;
850     case BLENDEQUATION_ADVANCED_COLORBURN:
851         SAMPLE_REGISTER_ADV_BLEND(colorburn);
852         break;
853     case BLENDEQUATION_ADVANCED_HARDLIGHT:
854         SAMPLE_REGISTER_ADV_BLEND(hardlight);
855         break;
856     case BLENDEQUATION_ADVANCED_SOFTLIGHT:
857         SAMPLE_REGISTER_ADV_BLEND(softlight);
858         break;
859     case BLENDEQUATION_ADVANCED_DIFFERENCE:
860         SAMPLE_REGISTER_ADV_BLEND(difference);
861         break;
862     case BLENDEQUATION_ADVANCED_EXCLUSION:
863         SAMPLE_REGISTER_ADV_BLEND(exclusion);
864         break;
865     case BLENDEQUATION_ADVANCED_HSL_HUE:
866         SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(srcColor, dstColor, dstColor));
867         break;
868     case BLENDEQUATION_ADVANCED_HSL_SATURATION:
869         SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(dstColor, srcColor, dstColor));
870         break;
871     case BLENDEQUATION_ADVANCED_HSL_COLOR:
872         SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(srcColor, dstColor));
873         break;
874     case BLENDEQUATION_ADVANCED_HSL_LUMINOSITY:
875         SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(dstColor, srcColor));
876         break;
877     default:
878         DE_ASSERT(false);
879     }
880 
881 #undef SAMPLE_REGISTER_ADV_BLEND
882 #undef SAMPLE_REGISTER_ADV_BLEND_HSL
883 }
884 
executeColorWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,bool isSRGB,const tcu::PixelBufferAccess & colorBuffer)885 void FragmentProcessor::executeColorWrite(int fragNdxOffset, int numSamplesPerFragment, const Fragment *inputFragments,
886                                           bool isSRGB, const tcu::PixelBufferAccess &colorBuffer)
887 {
888     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
889     {
890         if (m_sampleRegister[regSampleNdx].isAlive)
891         {
892             int fragSampleNdx    = regSampleNdx % numSamplesPerFragment;
893             const Fragment &frag = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment];
894             Vec4 combinedColor;
895 
896             combinedColor.xyz() = m_sampleRegister[regSampleNdx].blendedRGB;
897             combinedColor.w()   = m_sampleRegister[regSampleNdx].blendedA;
898 
899             if (isSRGB)
900                 combinedColor = tcu::linearToSRGB(combinedColor);
901 
902             colorBuffer.setPixel(combinedColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
903         }
904     }
905 }
906 
executeRGBA8ColorWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::PixelBufferAccess & colorBuffer)907 void FragmentProcessor::executeRGBA8ColorWrite(int fragNdxOffset, int numSamplesPerFragment,
908                                                const Fragment *inputFragments,
909                                                const tcu::PixelBufferAccess &colorBuffer)
910 {
911     const int fragStride   = 4;
912     const int xStride      = colorBuffer.getRowPitch();
913     const int yStride      = colorBuffer.getSlicePitch();
914     uint8_t *const basePtr = (uint8_t *)colorBuffer.getDataPtr();
915 
916     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
917     {
918         if (m_sampleRegister[regSampleNdx].isAlive)
919         {
920             const int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
921             const Fragment &frag    = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment];
922             uint8_t *dstPtr =
923                 basePtr + fragSampleNdx * fragStride + frag.pixelCoord.x() * xStride + frag.pixelCoord.y() * yStride;
924 
925             dstPtr[0] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.x());
926             dstPtr[1] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.y());
927             dstPtr[2] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.z());
928             dstPtr[3] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedA);
929         }
930     }
931 }
932 
executeMaskedColorWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const Vec4 & colorMaskFactor,const Vec4 & colorMaskNegationFactor,bool isSRGB,const tcu::PixelBufferAccess & colorBuffer)933 void FragmentProcessor::executeMaskedColorWrite(int fragNdxOffset, int numSamplesPerFragment,
934                                                 const Fragment *inputFragments, const Vec4 &colorMaskFactor,
935                                                 const Vec4 &colorMaskNegationFactor, bool isSRGB,
936                                                 const tcu::PixelBufferAccess &colorBuffer)
937 {
938     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
939     {
940         if (m_sampleRegister[regSampleNdx].isAlive)
941         {
942             int fragSampleNdx    = regSampleNdx % numSamplesPerFragment;
943             const Fragment &frag = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment];
944             Vec4 originalColor   = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
945             Vec4 newColor;
946 
947             newColor.xyz() = m_sampleRegister[regSampleNdx].blendedRGB;
948             newColor.w()   = m_sampleRegister[regSampleNdx].blendedA;
949 
950             if (isSRGB)
951                 newColor = tcu::linearToSRGB(newColor);
952 
953             newColor = colorMaskFactor * newColor + colorMaskNegationFactor * originalColor;
954 
955             colorBuffer.setPixel(newColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
956         }
957     }
958 }
959 
executeSignedValueWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::BVec4 & colorMask,const tcu::PixelBufferAccess & colorBuffer)960 void FragmentProcessor::executeSignedValueWrite(int fragNdxOffset, int numSamplesPerFragment,
961                                                 const Fragment *inputFragments, const tcu::BVec4 &colorMask,
962                                                 const tcu::PixelBufferAccess &colorBuffer)
963 {
964     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
965     {
966         if (m_sampleRegister[regSampleNdx].isAlive)
967         {
968             int fragSampleNdx    = regSampleNdx % numSamplesPerFragment;
969             const Fragment &frag = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment];
970             const IVec4 originalValue =
971                 colorBuffer.getPixelInt(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
972 
973             colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].signedValue, originalValue, colorMask),
974                                  fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
975         }
976     }
977 }
978 
executeUnsignedValueWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::BVec4 & colorMask,const tcu::PixelBufferAccess & colorBuffer)979 void FragmentProcessor::executeUnsignedValueWrite(int fragNdxOffset, int numSamplesPerFragment,
980                                                   const Fragment *inputFragments, const tcu::BVec4 &colorMask,
981                                                   const tcu::PixelBufferAccess &colorBuffer)
982 {
983     for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
984     {
985         if (m_sampleRegister[regSampleNdx].isAlive)
986         {
987             int fragSampleNdx    = regSampleNdx % numSamplesPerFragment;
988             const Fragment &frag = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment];
989             const UVec4 originalValue =
990                 colorBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
991 
992             colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].unsignedValue, originalValue, colorMask),
993                                  fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
994         }
995     }
996 }
997 
render(const rr::MultisamplePixelBufferAccess & msColorBuffer,const rr::MultisamplePixelBufferAccess & msDepthBuffer,const rr::MultisamplePixelBufferAccess & msStencilBuffer,const Fragment * inputFragments,int numFragments,FaceType fragmentFacing,const FragmentOperationState & state)998 void FragmentProcessor::render(const rr::MultisamplePixelBufferAccess &msColorBuffer,
999                                const rr::MultisamplePixelBufferAccess &msDepthBuffer,
1000                                const rr::MultisamplePixelBufferAccess &msStencilBuffer, const Fragment *inputFragments,
1001                                int numFragments, FaceType fragmentFacing, const FragmentOperationState &state)
1002 {
1003     DE_ASSERT(fragmentFacing < FACETYPE_LAST);
1004     DE_ASSERT(state.numStencilBits < 32); // code bitshifts numStencilBits, avoid undefined behavior
1005 
1006     const tcu::PixelBufferAccess &colorBuffer   = msColorBuffer.raw();
1007     const tcu::PixelBufferAccess &depthBuffer   = msDepthBuffer.raw();
1008     const tcu::PixelBufferAccess &stencilBuffer = msStencilBuffer.raw();
1009 
1010     bool hasDepth   = depthBuffer.getWidth() > 0 && depthBuffer.getHeight() > 0 && depthBuffer.getDepth() > 0;
1011     bool hasStencil = stencilBuffer.getWidth() > 0 && stencilBuffer.getHeight() > 0 && stencilBuffer.getDepth() > 0;
1012     bool doDepthBoundsTest = hasDepth && state.depthBoundsTestEnabled;
1013     bool doDepthTest       = hasDepth && state.depthTestEnabled;
1014     bool doStencilTest     = hasStencil && state.stencilTestEnabled;
1015 
1016     tcu::TextureChannelClass colorbufferClass = tcu::getTextureChannelClass(msColorBuffer.raw().getFormat().type);
1017     rr::GenericVecType fragmentDataType =
1018         (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER) ?
1019             (rr::GENERICVECTYPE_INT32) :
1020             ((colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER) ? (rr::GENERICVECTYPE_UINT32) :
1021                                                                                (rr::GENERICVECTYPE_FLOAT));
1022 
1023     DE_ASSERT((!hasDepth || colorBuffer.getWidth() == depthBuffer.getWidth()) &&
1024               (!hasStencil || colorBuffer.getWidth() == stencilBuffer.getWidth()));
1025     DE_ASSERT((!hasDepth || colorBuffer.getHeight() == depthBuffer.getHeight()) &&
1026               (!hasStencil || colorBuffer.getHeight() == stencilBuffer.getHeight()));
1027     DE_ASSERT((!hasDepth || colorBuffer.getDepth() == depthBuffer.getDepth()) &&
1028               (!hasStencil || colorBuffer.getDepth() == stencilBuffer.getDepth()));
1029 
1030     // Combined formats must be separated beforehand
1031     DE_ASSERT(!hasDepth || (!tcu::isCombinedDepthStencilType(depthBuffer.getFormat().type) &&
1032                             depthBuffer.getFormat().order == tcu::TextureFormat::D));
1033     DE_ASSERT(!hasStencil || (!tcu::isCombinedDepthStencilType(stencilBuffer.getFormat().type) &&
1034                               stencilBuffer.getFormat().order == tcu::TextureFormat::S));
1035 
1036     int numSamplesPerFragment = colorBuffer.getWidth();
1037     int totalNumSamples       = numFragments * numSamplesPerFragment;
1038     int numSampleGroups =
1039         (totalNumSamples - 1) / SAMPLE_REGISTER_SIZE + 1; // \note totalNumSamples/SAMPLE_REGISTER_SIZE rounded up.
1040     const StencilState &stencilState = state.stencilStates[fragmentFacing];
1041     Vec4 colorMaskFactor(state.colorMask[0] ? 1.0f : 0.0f, state.colorMask[1] ? 1.0f : 0.0f,
1042                          state.colorMask[2] ? 1.0f : 0.0f, state.colorMask[3] ? 1.0f : 0.0f);
1043     Vec4 colorMaskNegationFactor(state.colorMask[0] ? 0.0f : 1.0f, state.colorMask[1] ? 0.0f : 1.0f,
1044                                  state.colorMask[2] ? 0.0f : 1.0f, state.colorMask[3] ? 0.0f : 1.0f);
1045     bool sRGBTarget = state.sRGBEnabled && tcu::isSRGB(colorBuffer.getFormat());
1046 
1047     DE_ASSERT(SAMPLE_REGISTER_SIZE % numSamplesPerFragment == 0);
1048 
1049     // Divide the fragments' samples into groups of size SAMPLE_REGISTER_SIZE, and perform
1050     // the per-sample operations for one group at a time.
1051 
1052     for (int sampleGroupNdx = 0; sampleGroupNdx < numSampleGroups; sampleGroupNdx++)
1053     {
1054         // The index of the fragment of the sample at the beginning of m_sampleRegisters.
1055         int groupFirstFragNdx = (sampleGroupNdx * SAMPLE_REGISTER_SIZE) / numSamplesPerFragment;
1056 
1057         // Initialize sample data in the sample register.
1058 
1059         for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
1060         {
1061             int fragNdx       = groupFirstFragNdx + regSampleNdx / numSamplesPerFragment;
1062             int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
1063 
1064             if (fragNdx < numFragments)
1065             {
1066                 m_sampleRegister[regSampleNdx].isAlive =
1067                     (inputFragments[fragNdx].coverage & (1u << fragSampleNdx)) != 0;
1068                 m_sampleRegister[regSampleNdx].depthPassed =
1069                     true; // \note This will stay true if depth test is disabled.
1070             }
1071             else
1072                 m_sampleRegister[regSampleNdx].isAlive = false;
1073         }
1074 
1075         // Scissor test.
1076 
1077         if (state.scissorTestEnabled)
1078             executeScissorTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.scissorRectangle);
1079 
1080         // Depth bounds test.
1081 
1082         if (doDepthBoundsTest)
1083             executeDepthBoundsTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.minDepthBound,
1084                                    state.maxDepthBound, depthBuffer);
1085 
1086         // Stencil test.
1087 
1088         if (doStencilTest)
1089         {
1090             executeStencilCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState,
1091                                   state.numStencilBits, stencilBuffer);
1092             executeStencilSFail(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState,
1093                                 state.numStencilBits, stencilBuffer);
1094         }
1095 
1096         // Depth test.
1097         // \note Current value of isAlive is needed for dpPass and dpFail, so it's only updated after them and not right after depth test.
1098 
1099         if (doDepthTest)
1100         {
1101             executeDepthCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.depthFunc, depthBuffer);
1102 
1103             if (state.depthMask)
1104                 executeDepthWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, depthBuffer);
1105         }
1106 
1107         // Do dpFail and dpPass stencil writes.
1108 
1109         if (doStencilTest)
1110             executeStencilDpFailAndPass(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState,
1111                                         state.numStencilBits, stencilBuffer);
1112 
1113         // Kill the samples that failed depth test.
1114 
1115         if (doDepthTest)
1116         {
1117             for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
1118                 m_sampleRegister[regSampleNdx].isAlive =
1119                     m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed;
1120         }
1121 
1122         // Paint fragments to target
1123 
1124         switch (fragmentDataType)
1125         {
1126         case rr::GENERICVECTYPE_FLOAT:
1127         {
1128             // Select min/max clamping values for blending factors and operands
1129             Vec4 minClampValue;
1130             Vec4 maxClampValue;
1131 
1132             if (colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT)
1133             {
1134                 minClampValue = Vec4(0.0f);
1135                 maxClampValue = Vec4(1.0f);
1136             }
1137             else if (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT)
1138             {
1139                 minClampValue = Vec4(-1.0f);
1140                 maxClampValue = Vec4(1.0f);
1141             }
1142             else
1143             {
1144                 // No clamping
1145                 minClampValue = Vec4(-std::numeric_limits<float>::infinity());
1146                 maxClampValue = Vec4(std::numeric_limits<float>::infinity());
1147             }
1148 
1149             // Blend calculation - only if using blend.
1150             if (state.blendMode == BLENDMODE_STANDARD)
1151             {
1152                 // Put dst color to register, doing srgb-to-linear conversion if needed.
1153                 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
1154                 {
1155                     if (m_sampleRegister[regSampleNdx].isAlive)
1156                     {
1157                         int fragSampleNdx    = regSampleNdx % numSamplesPerFragment;
1158                         const Fragment &frag = inputFragments[groupFirstFragNdx + regSampleNdx / numSamplesPerFragment];
1159                         Vec4 dstColor = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
1160 
1161                         m_sampleRegister[regSampleNdx].clampedBlendSrcColor =
1162                             clamp(frag.value.get<float>(), minClampValue, maxClampValue);
1163                         m_sampleRegister[regSampleNdx].clampedBlendSrc1Color =
1164                             clamp(frag.value1.get<float>(), minClampValue, maxClampValue);
1165                         m_sampleRegister[regSampleNdx].clampedBlendDstColor =
1166                             clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue);
1167                     }
1168                 }
1169 
1170                 // Calculate blend factors to register.
1171                 executeBlendFactorComputeRGB(state.blendColor, state.blendRGBState);
1172                 executeBlendFactorComputeA(state.blendColor, state.blendAState);
1173 
1174                 // Compute blended color.
1175                 executeBlend(state.blendRGBState, state.blendAState);
1176             }
1177             else if (state.blendMode == BLENDMODE_ADVANCED)
1178             {
1179                 // Unpremultiply colors for blending, and do sRGB->linear if necessary
1180                 // \todo [2014-03-17 pyry] Re-consider clampedBlend*Color var names
1181                 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
1182                 {
1183                     if (m_sampleRegister[regSampleNdx].isAlive)
1184                     {
1185                         int fragSampleNdx    = regSampleNdx % numSamplesPerFragment;
1186                         const Fragment &frag = inputFragments[groupFirstFragNdx + regSampleNdx / numSamplesPerFragment];
1187                         const Vec4 srcColor  = frag.value.get<float>();
1188                         const Vec4 dstColor =
1189                             colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
1190 
1191                         m_sampleRegister[regSampleNdx].clampedBlendSrcColor =
1192                             unpremultiply(clamp(srcColor, minClampValue, maxClampValue));
1193                         m_sampleRegister[regSampleNdx].clampedBlendDstColor = unpremultiply(
1194                             clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue));
1195                     }
1196                 }
1197 
1198                 executeAdvancedBlend(state.blendEquationAdvaced);
1199             }
1200             else
1201             {
1202                 // Not using blend - just put values to register as-is.
1203                 DE_ASSERT(state.blendMode == BLENDMODE_NONE);
1204 
1205                 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
1206                 {
1207                     if (m_sampleRegister[regSampleNdx].isAlive)
1208                     {
1209                         const Fragment &frag = inputFragments[groupFirstFragNdx + regSampleNdx / numSamplesPerFragment];
1210 
1211                         m_sampleRegister[regSampleNdx].blendedRGB = frag.value.get<float>().xyz();
1212                         m_sampleRegister[regSampleNdx].blendedA   = frag.value.get<float>().w();
1213                     }
1214                 }
1215             }
1216 
1217             // Clamp result values in sample register
1218             if (colorbufferClass != tcu::TEXTURECHANNELCLASS_FLOATING_POINT)
1219             {
1220                 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
1221                 {
1222                     if (m_sampleRegister[regSampleNdx].isAlive)
1223                     {
1224                         m_sampleRegister[regSampleNdx].blendedRGB =
1225                             clamp(m_sampleRegister[regSampleNdx].blendedRGB, minClampValue.swizzle(0, 1, 2),
1226                                   maxClampValue.swizzle(0, 1, 2));
1227                         m_sampleRegister[regSampleNdx].blendedA =
1228                             clamp(m_sampleRegister[regSampleNdx].blendedA, minClampValue.w(), maxClampValue.w());
1229                     }
1230                 }
1231             }
1232 
1233             // Finally, write the colors to the color buffer.
1234 
1235             if (state.colorMask[0] && state.colorMask[1] && state.colorMask[2] && state.colorMask[3])
1236             {
1237                 if (colorBuffer.getFormat() ==
1238                     tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8))
1239                     executeRGBA8ColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorBuffer);
1240                 else
1241                     executeColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, sRGBTarget,
1242                                       colorBuffer);
1243             }
1244             else if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
1245                 executeMaskedColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorMaskFactor,
1246                                         colorMaskNegationFactor, sRGBTarget, colorBuffer);
1247             break;
1248         }
1249         case rr::GENERICVECTYPE_INT32:
1250             // Write fragments
1251             for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
1252             {
1253                 if (m_sampleRegister[regSampleNdx].isAlive)
1254                 {
1255                     const Fragment &frag = inputFragments[groupFirstFragNdx + regSampleNdx / numSamplesPerFragment];
1256 
1257                     m_sampleRegister[regSampleNdx].signedValue = frag.value.get<int32_t>();
1258                 }
1259             }
1260 
1261             if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
1262                 executeSignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask,
1263                                         colorBuffer);
1264             break;
1265 
1266         case rr::GENERICVECTYPE_UINT32:
1267             // Write fragments
1268             for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
1269             {
1270                 if (m_sampleRegister[regSampleNdx].isAlive)
1271                 {
1272                     const Fragment &frag = inputFragments[groupFirstFragNdx + regSampleNdx / numSamplesPerFragment];
1273 
1274                     m_sampleRegister[regSampleNdx].unsignedValue = frag.value.get<uint32_t>();
1275                 }
1276             }
1277 
1278             if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
1279                 executeUnsignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask,
1280                                           colorBuffer);
1281             break;
1282 
1283         default:
1284             DE_ASSERT(false);
1285         }
1286     }
1287 }
1288 
1289 } // namespace rr
1290