xref: /aosp_15_r20/external/deqp/modules/gles3/performance/es3pShaderOperatorTests.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.0 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Shader operator performance tests.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es3pShaderOperatorTests.hpp"
25 #include "glsCalibration.hpp"
26 #include "gluShaderUtil.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "gluPixelTransfer.hpp"
29 #include "tcuTestLog.hpp"
30 #include "tcuRenderTarget.hpp"
31 #include "tcuCommandLine.hpp"
32 #include "tcuSurface.hpp"
33 #include "deStringUtil.hpp"
34 #include "deSharedPtr.hpp"
35 #include "deClock.h"
36 #include "deMath.h"
37 
38 #include "glwEnums.hpp"
39 #include "glwFunctions.hpp"
40 
41 #include <map>
42 #include <algorithm>
43 #include <limits>
44 #include <set>
45 
46 namespace deqp
47 {
48 namespace gles3
49 {
50 namespace Performance
51 {
52 
53 using namespace gls;
54 using namespace glu;
55 using de::SharedPtr;
56 using tcu::TestLog;
57 using tcu::Vec2;
58 using tcu::Vec4;
59 
60 using std::string;
61 using std::vector;
62 
63 #define MEASUREMENT_FAIL() \
64     throw tcu::InternalError("Unable to get sensible measurements for estimation", DE_NULL, __FILE__, __LINE__)
65 
66 // Number of measurements in OperatorPerformanceCase for each workload size, unless specified otherwise by a command line argument.
67 static const int DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD = 3;
68 // How many different workload sizes are used by OperatorPerformanceCase.
69 static const int NUM_WORKLOADS = 8;
70 // Maximum workload size that can be attempted. In a sensible case, this most likely won't be reached.
71 static const int MAX_WORKLOAD_SIZE = 1 << 29;
72 
73 // BinaryOpCase-specific constants for shader generation.
74 static const int BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS = 4;
75 static const int BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT  = 2;
76 static const int BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT    = 4;
77 
78 // FunctionCase-specific constants for shader generation.
79 static const int FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS = 4;
80 
81 static const char *const s_swizzles[][4] = {{"x", "yx", "yzx", "wzyx"},
82                                             {"y", "zy", "wyz", "xwzy"},
83                                             {"z", "wy", "zxy", "yzwx"},
84                                             {"w", "xw", "yxw", "zyxw"}};
85 
86 template <int N>
mean(const vector<tcu::Vector<float,N>> & data)87 static tcu::Vector<float, N> mean(const vector<tcu::Vector<float, N>> &data)
88 {
89     tcu::Vector<float, N> sum(0.0f);
90     for (int i = 0; i < (int)data.size(); i++)
91         sum += data[i];
92     return sum / tcu::Vector<float, N>((float)data.size());
93 }
94 
uniformNfv(const glw::Functions & gl,int n,int location,int count,const float * data)95 static void uniformNfv(const glw::Functions &gl, int n, int location, int count, const float *data)
96 {
97     switch (n)
98     {
99     case 1:
100         gl.uniform1fv(location, count, data);
101         break;
102     case 2:
103         gl.uniform2fv(location, count, data);
104         break;
105     case 3:
106         gl.uniform3fv(location, count, data);
107         break;
108     case 4:
109         gl.uniform4fv(location, count, data);
110         break;
111     default:
112         DE_ASSERT(false);
113     }
114 }
115 
uniformNiv(const glw::Functions & gl,int n,int location,int count,const int * data)116 static void uniformNiv(const glw::Functions &gl, int n, int location, int count, const int *data)
117 {
118     switch (n)
119     {
120     case 1:
121         gl.uniform1iv(location, count, data);
122         break;
123     case 2:
124         gl.uniform2iv(location, count, data);
125         break;
126     case 3:
127         gl.uniform3iv(location, count, data);
128         break;
129     case 4:
130         gl.uniform4iv(location, count, data);
131         break;
132     default:
133         DE_ASSERT(false);
134     }
135 }
136 
uniformMatrixNfv(const glw::Functions & gl,int n,int location,int count,const float * data)137 static void uniformMatrixNfv(const glw::Functions &gl, int n, int location, int count, const float *data)
138 {
139     switch (n)
140     {
141     case 2:
142         gl.uniformMatrix2fv(location, count, GL_FALSE, &data[0]);
143         break;
144     case 3:
145         gl.uniformMatrix3fv(location, count, GL_FALSE, &data[0]);
146         break;
147     case 4:
148         gl.uniformMatrix4fv(location, count, GL_FALSE, &data[0]);
149         break;
150     default:
151         DE_ASSERT(false);
152     }
153 }
154 
getDataTypeFloatOrVec(int size)155 static glu::DataType getDataTypeFloatOrVec(int size)
156 {
157     return size == 1 ? glu::TYPE_FLOAT : glu::getDataTypeFloatVec(size);
158 }
159 
getIterationCountOrDefault(const tcu::CommandLine & cmdLine,int def)160 static int getIterationCountOrDefault(const tcu::CommandLine &cmdLine, int def)
161 {
162     const int cmdLineVal = cmdLine.getTestIterationCount();
163     return cmdLineVal > 0 ? cmdLineVal : def;
164 }
165 
lineParamsString(const LineParameters & params)166 static string lineParamsString(const LineParameters &params)
167 {
168     return "y = " + de::toString(params.offset) + " + " + de::toString(params.coefficient) + "*x";
169 }
170 
171 namespace
172 {
173 
174 /*--------------------------------------------------------------------*//*!
175  * \brief Abstract class for measuring shader operator performance.
176  *
177  * This class draws multiple times with different workload sizes (set
178  * via a uniform, by subclass). Time for each frame is measured, and the
179  * slope of the workload size vs frame time data is estimated. This slope
180  * tells us the estimated increase in frame time caused by a workload
181  * increase of 1 unit (what 1 workload unit means is up to subclass).
182  *
183  * Generally, the shaders contain not just the operation we're interested
184  * in (e.g. addition) but also some other stuff (e.g. loop overhead). To
185  * eliminate this cost, we actually do the stuff described in the above
186  * paragraph with multiple programs (usually two), which contain different
187  * kinds of workload (e.g. different loop contents). Then we can (in
188  * theory) compute the cost of just one operation in a subclass-dependent
189  * manner.
190  *
191  * At this point, the result tells us the increase in frame time caused
192  * by the addition of one operation. Dividing this by the amount of
193  * draw calls in a frame, and further by the amount of vertices or
194  * fragments in a draw call, we get the time cost of one operation.
195  *
196  * In reality, there sometimes isn't just a trivial linear dependence
197  * between workload size and frame time. Instead, there tends to be some
198  * amount of initial "free" operations. That is, it may be that all
199  * workload sizes below some positive integer C yield the same frame time,
200  * and only workload sizes beyond C increase the frame time in a supposedly
201  * linear manner. Graphically, this means that there graph consists of two
202  * parts: a horizontal left part, and a linearly increasing right part; the
203  * right part starts where the left parts ends. The principal task of these
204  * tests is to look at the slope of the increasing right part. Additionally
205  * an estimate for the amount of initial free operations is calculated.
206  * Note that it is also normal to get graphs where the horizontal left part
207  * is of zero width, i.e. there are no free operations.
208  *//*--------------------------------------------------------------------*/
209 class OperatorPerformanceCase : public tcu::TestCase
210 {
211 public:
212     enum CaseType
213     {
214         CASETYPE_VERTEX = 0,
215         CASETYPE_FRAGMENT,
216 
217         CASETYPE_LAST
218     };
219 
220     struct InitialCalibration
221     {
222         int initialNumCalls;
InitialCalibrationdeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::InitialCalibration223         InitialCalibration(void) : initialNumCalls(1)
224         {
225         }
226     };
227 
228     typedef SharedPtr<InitialCalibration> InitialCalibrationStorage;
229 
230     OperatorPerformanceCase(tcu::TestContext &testCtx, glu::RenderContext &renderCtx, const char *name,
231                             const char *description, CaseType caseType, int numWorkloads,
232                             const InitialCalibrationStorage &initialCalibrationStorage);
233     ~OperatorPerformanceCase(void);
234 
235     void init(void);
236     void deinit(void);
237 
238     IterateResult iterate(void);
239 
240     struct AttribSpec
241     {
AttribSpecdeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::AttribSpec242         AttribSpec(const char *name_, const tcu::Vec4 &p00_, const tcu::Vec4 &p01_, const tcu::Vec4 &p10_,
243                    const tcu::Vec4 &p11_)
244             : name(name_)
245             , p00(p00_)
246             , p01(p01_)
247             , p10(p10_)
248             , p11(p11_)
249         {
250         }
251 
AttribSpecdeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::AttribSpec252         AttribSpec(void)
253         {
254         }
255 
256         std::string name;
257         tcu::Vec4 p00; //!< Bottom left.
258         tcu::Vec4 p01; //!< Bottom right.
259         tcu::Vec4 p10; //!< Top left.
260         tcu::Vec4 p11; //!< Top right.
261     };
262 
263 protected:
264     struct ProgramContext
265     {
266         string vertShaderSource;
267         string fragShaderSource;
268         vector<AttribSpec> attributes;
269 
270         string description;
271 
ProgramContextdeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::ProgramContext272         ProgramContext(void)
273         {
274         }
ProgramContextdeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::ProgramContext275         ProgramContext(const string &vs, const string &fs, const vector<AttribSpec> &attrs, const string &desc)
276             : vertShaderSource(vs)
277             , fragShaderSource(fs)
278             , attributes(attrs)
279             , description(desc)
280         {
281         }
282     };
283 
284     virtual vector<ProgramContext> generateProgramData(void) const = 0;
285     //! Sets program-specific uniforms that don't depend on the workload size.
286     virtual void setGeneralUniforms(uint32_t program) const = 0;
287     //! Sets the uniform(s) that specifies the workload size in the shader.
288     virtual void setWorkloadSizeUniform(uint32_t program, int workload) const = 0;
289     //! Computes the cost of a single operation, given the workload costs per program.
290     virtual float computeSingleOperationTime(const vector<float> &perProgramWorkloadCosts) const = 0;
291     //! Logs a human-readable description of what computeSingleOperationTime does.
292     virtual void logSingleOperationCalculationInfo(void) const = 0;
293 
294     glu::RenderContext &m_renderCtx;
295 
296     CaseType m_caseType;
297 
298 private:
299     enum State
300     {
301         STATE_CALIBRATING = 0, //!< Calibrate draw call count, using first program in m_programs, with workload size 1.
302         STATE_FIND_HIGH_WORKLOAD, //!< Find an appropriate lower bound for the highest workload size we intend to use (one with high-enough frame time compared to workload size 1) for each program.
303         STATE_MEASURING,          //!< Do actual measurements, for each program in m_programs.
304         STATE_REPORTING,          //!< Measurements are done; calculate results and log.
305         STATE_FINISHED,           //!< All done.
306 
307         STATE_LAST
308     };
309 
310     struct WorkloadRecord
311     {
312         int workloadSize;
313         vector<float> frameTimes; //!< In microseconds.
314 
WorkloadRecorddeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::WorkloadRecord315         WorkloadRecord(int workloadSize_) : workloadSize(workloadSize_)
316         {
317         }
operator <deqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::WorkloadRecord318         bool operator<(const WorkloadRecord &other) const
319         {
320             return this->workloadSize < other.workloadSize;
321         }
addFrameTimedeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::WorkloadRecord322         void addFrameTime(float time)
323         {
324             frameTimes.push_back(time);
325         }
getMedianTimedeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::WorkloadRecord326         float getMedianTime(void) const
327         {
328             vector<float> times = frameTimes;
329             std::sort(times.begin(), times.end());
330             return times.size() % 2 == 0 ? (times[times.size() / 2 - 1] + times[times.size() / 2]) * 0.5f :
331                                            times[times.size() / 2];
332         }
333     };
334 
335     void prepareProgram(int progNdx); //!< Sets attributes and uniforms for m_programs[progNdx].
336     void prepareWorkload(
337         int progNdx,
338         int workload); //!< Calls setWorkloadSizeUniform and draws, in case the implementation does some draw-time compilation.
339     void prepareNextRound(void); //!< Increases workload and/or updates m_state.
340     void render(int numDrawCalls);
341     uint64_t renderAndMeasure(int numDrawCalls);
342     void adjustAndLogGridAndViewport(
343         void); //!< Log grid and viewport sizes, after possibly reducing them to reduce draw time.
344 
345     vector<Vec2> getWorkloadMedianDataPoints(
346         int progNdx) const; //!< [ Vec2(r.workloadSize, r.getMedianTime()) for r in m_workloadRecords[progNdx] ]
347 
348     const int m_numMeasurementsPerWorkload;
349     const int m_numWorkloads; //!< How many different workload sizes are used for measurement for each program.
350 
351     int m_workloadNdx; //!< Runs from 0 to m_numWorkloads-1.
352 
353     int m_workloadMeasurementNdx;
354     vector<vector<WorkloadRecord>>
355         m_workloadRecordsFindHigh; //!< The measurements done during STATE_FIND_HIGH_WORKLOAD.
356     vector<vector<WorkloadRecord>>
357         m_workloadRecords; //!< The measurements of each program in m_programs. Generated during STATE_MEASURING, into index specified by m_measureProgramNdx.
358 
359     State m_state;
360     int m_measureProgramNdx; //!< When m_state is STATE_FIND_HIGH_WORKLOAD or STATE_MEASURING, this tells which program in m_programs is being measured.
361 
362     vector<int>
363         m_highWorkloadSizes; //!< The first workload size encountered during STATE_FIND_HIGH_WORKLOAD that was determined suitable, for each program.
364 
365     TheilSenCalibrator m_calibrator;
366     InitialCalibrationStorage m_initialCalibrationStorage;
367 
368     int m_viewportWidth;
369     int m_viewportHeight;
370     int m_gridSizeX;
371     int m_gridSizeY;
372 
373     vector<ProgramContext> m_programData;
374     vector<SharedPtr<ShaderProgram>> m_programs;
375 
376     std::vector<uint32_t> m_attribBuffers;
377 };
378 
triangleInterpolate(float v0,float v1,float v2,float x,float y)379 static inline float triangleInterpolate(float v0, float v1, float v2, float x, float y)
380 {
381     return v0 + (v2 - v0) * x + (v1 - v0) * y;
382 }
383 
triQuadInterpolate(float x,float y,const tcu::Vec4 & quad)384 static inline float triQuadInterpolate(float x, float y, const tcu::Vec4 &quad)
385 {
386     // \note Top left fill rule.
387     if (x + y < 1.0f)
388         return triangleInterpolate(quad.x(), quad.y(), quad.z(), x, y);
389     else
390         return triangleInterpolate(quad.w(), quad.z(), quad.y(), 1.0f - x, 1.0f - y);
391 }
392 
getNumVertices(int gridSizeX,int gridSizeY)393 static inline int getNumVertices(int gridSizeX, int gridSizeY)
394 {
395     return gridSizeX * gridSizeY * 2 * 3;
396 }
397 
generateVertices(std::vector<float> & dst,int gridSizeX,int gridSizeY,const OperatorPerformanceCase::AttribSpec & spec)398 static void generateVertices(std::vector<float> &dst, int gridSizeX, int gridSizeY,
399                              const OperatorPerformanceCase::AttribSpec &spec)
400 {
401     const int numComponents = 4;
402 
403     DE_ASSERT(gridSizeX >= 1 && gridSizeY >= 1);
404     dst.resize(getNumVertices(gridSizeX, gridSizeY) * numComponents);
405 
406     {
407         int dstNdx = 0;
408 
409         for (int baseY = 0; baseY < gridSizeY; baseY++)
410             for (int baseX = 0; baseX < gridSizeX; baseX++)
411             {
412                 const float xf0 = (float)(baseX + 0) / (float)gridSizeX;
413                 const float yf0 = (float)(baseY + 0) / (float)gridSizeY;
414                 const float xf1 = (float)(baseX + 1) / (float)gridSizeX;
415                 const float yf1 = (float)(baseY + 1) / (float)gridSizeY;
416 
417 #define ADD_VERTEX(XF, YF)                                    \
418     for (int compNdx = 0; compNdx < numComponents; compNdx++) \
419     dst[dstNdx++] = triQuadInterpolate(                       \
420         (XF), (YF), tcu::Vec4(spec.p00[compNdx], spec.p01[compNdx], spec.p10[compNdx], spec.p11[compNdx]))
421 
422                 ADD_VERTEX(xf0, yf0);
423                 ADD_VERTEX(xf1, yf0);
424                 ADD_VERTEX(xf0, yf1);
425 
426                 ADD_VERTEX(xf1, yf0);
427                 ADD_VERTEX(xf1, yf1);
428                 ADD_VERTEX(xf0, yf1);
429 
430 #undef ADD_VERTEX
431             }
432     }
433 }
434 
intersectionX(const gls::LineParameters & a,const gls::LineParameters & b)435 static float intersectionX(const gls::LineParameters &a, const gls::LineParameters &b)
436 {
437     return (a.offset - b.offset) / (b.coefficient - a.coefficient);
438 }
439 
numDistinctX(const vector<Vec2> & data)440 static int numDistinctX(const vector<Vec2> &data)
441 {
442     std::set<float> xs;
443     for (int i = 0; i < (int)data.size(); i++)
444         xs.insert(data[i].x());
445     return (int)xs.size();
446 }
447 
simpleLinearRegression(const vector<Vec2> & data)448 static gls::LineParameters simpleLinearRegression(const vector<Vec2> &data)
449 {
450     const Vec2 mid = mean(data);
451 
452     float slopeNumerator   = 0.0f;
453     float slopeDenominator = 0.0f;
454 
455     for (int i = 0; i < (int)data.size(); i++)
456     {
457         const Vec2 diff = data[i] - mid;
458 
459         slopeNumerator += diff.x() * diff.y();
460         slopeDenominator += diff.x() * diff.x();
461     }
462 
463     const float slope  = slopeNumerator / slopeDenominator;
464     const float offset = mid.y() - slope * mid.x();
465 
466     return gls::LineParameters(offset, slope);
467 }
468 
simpleLinearRegressionError(const vector<Vec2> & data)469 static float simpleLinearRegressionError(const vector<Vec2> &data)
470 {
471     if (numDistinctX(data) <= 2)
472         return 0.0f;
473     else
474     {
475         const gls::LineParameters estimator = simpleLinearRegression(data);
476         float error                         = 0.0f;
477 
478         for (int i = 0; i < (int)data.size(); i++)
479         {
480             const float estY = estimator.offset + estimator.coefficient * data[i].x();
481             const float diff = estY - data[i].y();
482             error += diff * diff;
483         }
484 
485         return error / (float)data.size();
486     }
487 }
488 
verticalVariance(const vector<Vec2> & data)489 static float verticalVariance(const vector<Vec2> &data)
490 {
491     if (numDistinctX(data) <= 2)
492         return 0.0f;
493     else
494     {
495         const float meanY = mean(data).y();
496         float error       = 0.0f;
497 
498         for (int i = 0; i < (int)data.size(); i++)
499         {
500             const float diff = meanY - data[i].y();
501             error += diff * diff;
502         }
503 
504         return error / (float)data.size();
505     }
506 }
507 
508 /*--------------------------------------------------------------------*//*!
509  * \brief Find the x coord that divides the input data into two slopes.
510  *
511  * The operator performance measurements tend to produce results where
512  * we get small operation counts "for free" (e.g. because the operations
513  * are performed during some memory transfer overhead or something),
514  * resulting in a curve with two parts: an initial horizontal line segment,
515  * and a rising line.
516  *
517  * This function finds the x coordinate that divides the input data into
518  * two parts such that the sum of the mean square errors for the
519  * least-squares estimated lines for the two parts is minimized, under the
520  * additional condition that the left line is horizontal.
521  *
522  * This function returns a number X s.t. { pt | pt is in data, pt.x >= X }
523  * is the right line, and the rest of data is the left line.
524  *//*--------------------------------------------------------------------*/
findSlopePivotX(const vector<Vec2> & data)525 static float findSlopePivotX(const vector<Vec2> &data)
526 {
527     std::set<float> xCoords;
528     for (int i = 0; i < (int)data.size(); i++)
529         xCoords.insert(data[i].x());
530 
531     float lowestError = std::numeric_limits<float>::infinity();
532     float bestPivotX  = -std::numeric_limits<float>::infinity();
533 
534     for (std::set<float>::const_iterator pivotX = xCoords.begin(); pivotX != xCoords.end(); ++pivotX)
535     {
536         vector<Vec2> leftData;
537         vector<Vec2> rightData;
538         for (int i = 0; i < (int)data.size(); i++)
539         {
540             if (data[i].x() < *pivotX)
541                 leftData.push_back(data[i]);
542             else
543                 rightData.push_back(data[i]);
544         }
545 
546         if (numDistinctX(rightData) < 3) // We don't trust the right data if there's too little of it.
547             break;
548 
549         {
550             const float totalError = verticalVariance(leftData) + simpleLinearRegressionError(rightData);
551 
552             if (totalError < lowestError)
553             {
554                 lowestError = totalError;
555                 bestPivotX  = *pivotX;
556             }
557         }
558     }
559 
560     DE_ASSERT(lowestError < std::numeric_limits<float>::infinity());
561 
562     return bestPivotX;
563 }
564 
565 struct SegmentedEstimator
566 {
567     float pivotX; //!< Value returned by findSlopePivotX, or -infinity if only single line.
568     gls::LineParameters left;
569     gls::LineParameters right;
SegmentedEstimatordeqp::gles3::Performance::__anond3a910f80111::SegmentedEstimator570     SegmentedEstimator(const gls::LineParameters &l, const gls::LineParameters &r, float pivotX_)
571         : pivotX(pivotX_)
572         , left(l)
573         , right(r)
574     {
575     }
576 };
577 
578 /*--------------------------------------------------------------------*//*!
579  * \brief Compute line estimators for (potentially) two-segment data.
580  *
581  * Splits the given data into left and right parts (using findSlopePivotX)
582  * and returns the line estimates for them.
583  *
584  * Sometimes, however (especially in fragment shader cases) the data is
585  * in fact not segmented, but a straight line. This function attempts to
586  * detect if this the case, and if so, sets left.offset = right.offset and
587  * left.slope = 0, meaning essentially that the initial "flat" part of the
588  * data has zero width.
589  *//*--------------------------------------------------------------------*/
computeSegmentedEstimator(const vector<Vec2> & data)590 static SegmentedEstimator computeSegmentedEstimator(const vector<Vec2> &data)
591 {
592     const float pivotX = findSlopePivotX(data);
593     vector<Vec2> leftData;
594     vector<Vec2> rightData;
595 
596     for (int i = 0; i < (int)data.size(); i++)
597     {
598         if (data[i].x() < pivotX)
599             leftData.push_back(data[i]);
600         else
601             rightData.push_back(data[i]);
602     }
603 
604     {
605         const gls::LineParameters leftLine  = gls::theilSenLinearRegression(leftData);
606         const gls::LineParameters rightLine = gls::theilSenLinearRegression(rightData);
607 
608         if (numDistinctX(leftData) < 2 || leftLine.coefficient > rightLine.coefficient * 0.5f)
609         {
610             // Left data doesn't seem credible; assume the data is just a single line.
611             const gls::LineParameters entireLine = gls::theilSenLinearRegression(data);
612             return SegmentedEstimator(gls::LineParameters(entireLine.offset, 0.0f), entireLine,
613                                       -std::numeric_limits<float>::infinity());
614         }
615         else
616             return SegmentedEstimator(leftLine, rightLine, pivotX);
617     }
618 }
619 
OperatorPerformanceCase(tcu::TestContext & testCtx,glu::RenderContext & renderCtx,const char * name,const char * description,CaseType caseType,int numWorkloads,const InitialCalibrationStorage & initialCalibrationStorage)620 OperatorPerformanceCase::OperatorPerformanceCase(tcu::TestContext &testCtx, glu::RenderContext &renderCtx,
621                                                  const char *name, const char *description, CaseType caseType,
622                                                  int numWorkloads,
623                                                  const InitialCalibrationStorage &initialCalibrationStorage)
624     : tcu::TestCase(testCtx, tcu::NODETYPE_PERFORMANCE, name, description)
625     , m_renderCtx(renderCtx)
626     , m_caseType(caseType)
627     , m_numMeasurementsPerWorkload(
628           getIterationCountOrDefault(m_testCtx.getCommandLine(), DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD))
629     , m_numWorkloads(numWorkloads)
630     , m_workloadNdx(-1)
631     , m_workloadMeasurementNdx(-1)
632     , m_state(STATE_LAST)
633     , m_measureProgramNdx(-1)
634     , m_initialCalibrationStorage(initialCalibrationStorage)
635     , m_viewportWidth(caseType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getWidth())
636     , m_viewportHeight(caseType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getHeight())
637     , m_gridSizeX(caseType == CASETYPE_FRAGMENT ? 1 : 100)
638     , m_gridSizeY(caseType == CASETYPE_FRAGMENT ? 1 : 100)
639 {
640     DE_ASSERT(m_numWorkloads > 0);
641 }
642 
~OperatorPerformanceCase(void)643 OperatorPerformanceCase::~OperatorPerformanceCase(void)
644 {
645     if (!m_attribBuffers.empty())
646     {
647         m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
648         m_attribBuffers.clear();
649     }
650 }
651 
logRenderTargetInfo(TestLog & log,const tcu::RenderTarget & renderTarget)652 static void logRenderTargetInfo(TestLog &log, const tcu::RenderTarget &renderTarget)
653 {
654     log << TestLog::Section("RenderTarget", "Render target") << TestLog::Message << "size: " << renderTarget.getWidth()
655         << "x" << renderTarget.getHeight() << TestLog::EndMessage << TestLog::Message << "bits:"
656         << " R" << renderTarget.getPixelFormat().redBits << " G" << renderTarget.getPixelFormat().greenBits << " B"
657         << renderTarget.getPixelFormat().blueBits << " A" << renderTarget.getPixelFormat().alphaBits << " D"
658         << renderTarget.getDepthBits() << " S" << renderTarget.getStencilBits() << TestLog::EndMessage;
659 
660     if (renderTarget.getNumSamples() != 0)
661         log << TestLog::Message << renderTarget.getNumSamples() << "x MSAA" << TestLog::EndMessage;
662     else
663         log << TestLog::Message << "No MSAA" << TestLog::EndMessage;
664 
665     log << TestLog::EndSection;
666 }
667 
getWorkloadMedianDataPoints(int progNdx) const668 vector<Vec2> OperatorPerformanceCase::getWorkloadMedianDataPoints(int progNdx) const
669 {
670     const vector<WorkloadRecord> &records = m_workloadRecords[progNdx];
671     vector<Vec2> result;
672 
673     for (int i = 0; i < (int)records.size(); i++)
674         result.push_back(Vec2((float)records[i].workloadSize, records[i].getMedianTime()));
675 
676     return result;
677 }
678 
prepareProgram(int progNdx)679 void OperatorPerformanceCase::prepareProgram(int progNdx)
680 {
681     DE_ASSERT(progNdx < (int)m_programs.size());
682     DE_ASSERT(m_programData.size() == m_programs.size());
683 
684     const glw::Functions &gl     = m_renderCtx.getFunctions();
685     const ShaderProgram &program = *m_programs[progNdx];
686 
687     vector<AttribSpec> attributes = m_programData[progNdx].attributes;
688 
689     attributes.push_back(AttribSpec("a_position", Vec4(-1.0f, -1.0f, 0.0f, 1.0f), Vec4(1.0f, -1.0f, 0.0f, 1.0f),
690                                     Vec4(-1.0f, 1.0f, 0.0f, 1.0f), Vec4(1.0f, 1.0f, 0.0f, 1.0f)));
691 
692     DE_ASSERT(program.isOk());
693 
694     // Generate vertices.
695     if (!m_attribBuffers.empty())
696         gl.deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
697     m_attribBuffers.resize(attributes.size(), 0);
698     gl.genBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
699     GLU_EXPECT_NO_ERROR(gl.getError(), "glGenBuffers()");
700 
701     for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++)
702     {
703         std::vector<float> vertices;
704         generateVertices(vertices, m_gridSizeX, m_gridSizeY, attributes[attribNdx]);
705 
706         gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
707         gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertices.size() * sizeof(float)), &vertices[0],
708                       GL_STATIC_DRAW);
709         GLU_EXPECT_NO_ERROR(gl.getError(), "Upload buffer data");
710     }
711 
712     // Setup attribute bindings.
713     for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++)
714     {
715         int location = gl.getAttribLocation(program.getProgram(), attributes[attribNdx].name.c_str());
716 
717         if (location >= 0)
718         {
719             gl.enableVertexAttribArray(location);
720             gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
721             gl.vertexAttribPointer(location, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
722         }
723     }
724     GLU_EXPECT_NO_ERROR(gl.getError(), "Setup vertex input state");
725 
726     gl.useProgram(program.getProgram());
727     setGeneralUniforms(program.getProgram());
728     gl.viewport(0, 0, m_viewportWidth, m_viewportHeight);
729 }
730 
prepareWorkload(int progNdx,int workload)731 void OperatorPerformanceCase::prepareWorkload(int progNdx, int workload)
732 {
733     setWorkloadSizeUniform(m_programs[progNdx]->getProgram(), workload);
734     render(m_calibrator.getCallCount());
735 }
736 
prepareNextRound(void)737 void OperatorPerformanceCase::prepareNextRound(void)
738 {
739     DE_ASSERT(m_state == STATE_CALIBRATING || m_state == STATE_FIND_HIGH_WORKLOAD || m_state == STATE_MEASURING);
740 
741     TestLog &log = m_testCtx.getLog();
742 
743     if (m_state == STATE_CALIBRATING && m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED)
744     {
745         m_measureProgramNdx = 0;
746         m_state             = STATE_FIND_HIGH_WORKLOAD;
747     }
748 
749     if (m_state == STATE_CALIBRATING)
750         prepareWorkload(0, 1);
751     else if (m_state == STATE_FIND_HIGH_WORKLOAD)
752     {
753         vector<WorkloadRecord> &records = m_workloadRecordsFindHigh[m_measureProgramNdx];
754 
755         if (records.empty() || records.back().getMedianTime() < 2.0f * records[0].getMedianTime())
756         {
757             int workloadSize;
758 
759             if (records.empty())
760                 workloadSize = 1;
761             else
762             {
763                 workloadSize = records.back().workloadSize * 2;
764 
765                 if (workloadSize > MAX_WORKLOAD_SIZE)
766                 {
767                     log << TestLog::Message << "Even workload size " << records.back().workloadSize
768                         << " doesn't give high enough frame time for program " << m_measureProgramNdx
769                         << ". Can't get sensible result." << TestLog::EndMessage;
770                     MEASUREMENT_FAIL();
771                 }
772             }
773 
774             records.push_back(WorkloadRecord(workloadSize));
775             prepareWorkload(0, workloadSize);
776             m_workloadMeasurementNdx = 0;
777         }
778         else
779         {
780             m_highWorkloadSizes[m_measureProgramNdx] = records.back().workloadSize;
781             m_measureProgramNdx++;
782 
783             if (m_measureProgramNdx >= (int)m_programs.size())
784             {
785                 m_state             = STATE_MEASURING;
786                 m_workloadNdx       = -1;
787                 m_measureProgramNdx = 0;
788             }
789 
790             prepareProgram(m_measureProgramNdx);
791             prepareNextRound();
792         }
793     }
794     else
795     {
796         m_workloadNdx++;
797 
798         if (m_workloadNdx < m_numWorkloads)
799         {
800             DE_ASSERT(m_numWorkloads > 1);
801             const int highWorkload = m_highWorkloadSizes[m_measureProgramNdx];
802             const int workload     = highWorkload > m_numWorkloads ?
803                                          1 + m_workloadNdx * (highWorkload - 1) / (m_numWorkloads - 1) :
804                                          1 + m_workloadNdx;
805 
806             prepareWorkload(m_measureProgramNdx, workload);
807 
808             m_workloadMeasurementNdx = 0;
809 
810             m_workloadRecords[m_measureProgramNdx].push_back(WorkloadRecord(workload));
811         }
812         else
813         {
814             m_measureProgramNdx++;
815 
816             if (m_measureProgramNdx < (int)m_programs.size())
817             {
818                 m_workloadNdx            = -1;
819                 m_workloadMeasurementNdx = 0;
820                 prepareProgram(m_measureProgramNdx);
821                 prepareNextRound();
822             }
823             else
824                 m_state = STATE_REPORTING;
825         }
826     }
827 }
828 
init(void)829 void OperatorPerformanceCase::init(void)
830 {
831     TestLog &log             = m_testCtx.getLog();
832     const glw::Functions &gl = m_renderCtx.getFunctions();
833 
834     // Validate that we have sane grid and viewport setup.
835     DE_ASSERT(de::inBounds(m_gridSizeX, 1, 256) && de::inBounds(m_gridSizeY, 1, 256));
836     TCU_CHECK(de::inRange(m_viewportWidth, 1, m_renderCtx.getRenderTarget().getWidth()) &&
837               de::inRange(m_viewportHeight, 1, m_renderCtx.getRenderTarget().getHeight()));
838 
839     logRenderTargetInfo(log, m_renderCtx.getRenderTarget());
840 
841     log << TestLog::Message << "Using additive blending." << TestLog::EndMessage;
842     gl.enable(GL_BLEND);
843     gl.blendEquation(GL_FUNC_ADD);
844     gl.blendFunc(GL_ONE, GL_ONE);
845 
846     // Generate programs.
847     DE_ASSERT(m_programs.empty());
848     m_programData = generateProgramData();
849     DE_ASSERT(!m_programData.empty());
850 
851     for (int progNdx = 0; progNdx < (int)m_programData.size(); progNdx++)
852     {
853         const string &vert = m_programData[progNdx].vertShaderSource;
854         const string &frag = m_programData[progNdx].fragShaderSource;
855 
856         m_programs.push_back(
857             SharedPtr<ShaderProgram>(new ShaderProgram(m_renderCtx, glu::makeVtxFragSources(vert, frag))));
858 
859         if (!m_programs.back()->isOk())
860         {
861             log << *m_programs.back();
862             TCU_FAIL("Compile failed");
863         }
864     }
865 
866     // Log all programs.
867     for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
868         log << TestLog::Section("Program" + de::toString(progNdx), "Program " + de::toString(progNdx))
869             << TestLog::Message << m_programData[progNdx].description << TestLog::EndMessage << *m_programs[progNdx]
870             << TestLog::EndSection;
871 
872     m_highWorkloadSizes.resize(m_programData.size());
873     m_workloadRecordsFindHigh.resize(m_programData.size());
874     m_workloadRecords.resize(m_programData.size());
875 
876     m_calibrator.clear(
877         CalibratorParameters(m_initialCalibrationStorage->initialNumCalls, 10 /* calibrate iteration frames */,
878                              2000.0f /* calibrate iteration shortcut threshold (ms) */,
879                              16 /* max calibrate iterations */, 1000.0f / 30.0f /* frame time (ms) */,
880                              1000.0f / 60.0f /* frame time cap (ms) */, 1000.0f /* target measure duration (ms) */));
881     m_state = STATE_CALIBRATING;
882 
883     prepareProgram(0);
884     prepareNextRound();
885 }
886 
deinit(void)887 void OperatorPerformanceCase::deinit(void)
888 {
889     if (!m_attribBuffers.empty())
890     {
891         m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
892         m_attribBuffers.clear();
893     }
894 
895     m_programs.clear();
896 }
897 
render(int numDrawCalls)898 void OperatorPerformanceCase::render(int numDrawCalls)
899 {
900     const glw::Functions &gl = m_renderCtx.getFunctions();
901     const int numVertices    = getNumVertices(m_gridSizeX, m_gridSizeY);
902 
903     for (int callNdx = 0; callNdx < numDrawCalls; callNdx++)
904         gl.drawArrays(GL_TRIANGLES, 0, numVertices);
905 
906     glu::readPixels(m_renderCtx, 0, 0,
907                     tcu::Surface(1, 1).getAccess()); // \note Serves as a more reliable replacement for glFinish().
908 }
909 
renderAndMeasure(int numDrawCalls)910 uint64_t OperatorPerformanceCase::renderAndMeasure(int numDrawCalls)
911 {
912     const uint64_t startTime = deGetMicroseconds();
913     render(numDrawCalls);
914     return deGetMicroseconds() - startTime;
915 }
916 
adjustAndLogGridAndViewport(void)917 void OperatorPerformanceCase::adjustAndLogGridAndViewport(void)
918 {
919     TestLog &log = m_testCtx.getLog();
920 
921     // If call count is just 1, and the target frame time still wasn't reached, reduce grid or viewport size.
922     if (m_calibrator.getCallCount() == 1)
923     {
924         const gls::MeasureState &calibratorMeasure = m_calibrator.getMeasureState();
925         const float drawCallTime = (float)calibratorMeasure.getTotalTime() / (float)calibratorMeasure.frameTimes.size();
926         const float targetDrawCallTime = m_calibrator.getParameters().targetFrameTimeUs;
927         const float targetRatio        = targetDrawCallTime / drawCallTime;
928 
929         if (targetRatio < 0.95f)
930         {
931             // Reduce grid or viewport size assuming draw call time scales proportionally.
932             if (m_caseType == CASETYPE_VERTEX)
933             {
934                 const float targetRatioSqrt = deFloatSqrt(targetRatio);
935                 m_gridSizeX                 = (int)(targetRatioSqrt * (float)m_gridSizeX);
936                 m_gridSizeY                 = (int)(targetRatioSqrt * (float)m_gridSizeY);
937                 TCU_CHECK_MSG(m_gridSizeX >= 1 && m_gridSizeY >= 1,
938                               "Can't decrease grid size enough to achieve low-enough draw times");
939                 log << TestLog::Message
940                     << "Note: triangle grid size reduced from original; it's now smaller than during calibration."
941                     << TestLog::EndMessage;
942             }
943             else
944             {
945                 const float targetRatioSqrt = deFloatSqrt(targetRatio);
946                 m_viewportWidth             = (int)(targetRatioSqrt * (float)m_viewportWidth);
947                 m_viewportHeight            = (int)(targetRatioSqrt * (float)m_viewportHeight);
948                 TCU_CHECK_MSG(m_viewportWidth >= 1 && m_viewportHeight >= 1,
949                               "Can't decrease viewport size enough to achieve low-enough draw times");
950                 log << TestLog::Message
951                     << "Note: viewport size reduced from original; it's now smaller than during calibration."
952                     << TestLog::EndMessage;
953             }
954         }
955     }
956 
957     prepareProgram(0);
958 
959     // Log grid and viewport sizes.
960     log << TestLog::Message << "Grid size: " << m_gridSizeX << "x" << m_gridSizeY << TestLog::EndMessage;
961     log << TestLog::Message << "Viewport: " << m_viewportWidth << "x" << m_viewportHeight << TestLog::EndMessage;
962 }
963 
iterate(void)964 OperatorPerformanceCase::IterateResult OperatorPerformanceCase::iterate(void)
965 {
966     const TheilSenCalibrator::State calibratorState = m_calibrator.getState();
967 
968     if (calibratorState != TheilSenCalibrator::STATE_FINISHED)
969     {
970         if (calibratorState == TheilSenCalibrator::STATE_RECOMPUTE_PARAMS)
971             m_calibrator.recomputeParameters();
972         else if (calibratorState == TheilSenCalibrator::STATE_MEASURE)
973             m_calibrator.recordIteration(renderAndMeasure(m_calibrator.getCallCount()));
974         else
975             DE_ASSERT(false);
976 
977         if (m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED)
978         {
979             logCalibrationInfo(m_testCtx.getLog(), m_calibrator);
980             adjustAndLogGridAndViewport();
981             prepareNextRound();
982             m_initialCalibrationStorage->initialNumCalls = m_calibrator.getCallCount();
983         }
984     }
985     else if (m_state == STATE_FIND_HIGH_WORKLOAD || m_state == STATE_MEASURING)
986     {
987         if (m_workloadMeasurementNdx < m_numMeasurementsPerWorkload)
988         {
989             vector<WorkloadRecord> &records = m_state == STATE_FIND_HIGH_WORKLOAD ?
990                                                   m_workloadRecordsFindHigh[m_measureProgramNdx] :
991                                                   m_workloadRecords[m_measureProgramNdx];
992             records.back().addFrameTime((float)renderAndMeasure(m_calibrator.getCallCount()));
993             m_workloadMeasurementNdx++;
994         }
995         else
996             prepareNextRound();
997     }
998     else
999     {
1000         DE_ASSERT(m_state == STATE_REPORTING);
1001 
1002         TestLog &log            = m_testCtx.getLog();
1003         const int drawCallCount = m_calibrator.getCallCount();
1004 
1005         {
1006             // Compute per-program estimators for measurements.
1007             vector<SegmentedEstimator> estimators;
1008             for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
1009                 estimators.push_back(computeSegmentedEstimator(getWorkloadMedianDataPoints(progNdx)));
1010 
1011             // Log measurements and their estimators for all programs.
1012             for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
1013             {
1014                 const SegmentedEstimator &estimator = estimators[progNdx];
1015                 const string progNdxStr             = de::toString(progNdx);
1016                 vector<WorkloadRecord> records      = m_workloadRecords[progNdx];
1017                 std::sort(records.begin(), records.end());
1018 
1019                 {
1020                     const tcu::ScopedLogSection section(log, "Program" + progNdxStr + "Measurements",
1021                                                         "Measurements for program " + progNdxStr);
1022 
1023                     // Sample list of individual frame times.
1024 
1025                     log << TestLog::SampleList("Program" + progNdxStr + "IndividualFrameTimes",
1026                                                "Individual frame times")
1027                         << TestLog::SampleInfo
1028                         << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1029                         << TestLog::ValueInfo("FrameTime", "Frame time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1030                         << TestLog::EndSampleInfo;
1031 
1032                     for (int i = 0; i < (int)records.size(); i++)
1033                         for (int j = 0; j < (int)records[i].frameTimes.size(); j++)
1034                             log << TestLog::Sample << records[i].workloadSize << records[i].frameTimes[j]
1035                                 << TestLog::EndSample;
1036 
1037                     log << TestLog::EndSampleList;
1038 
1039                     // Sample list of median frame times.
1040 
1041                     log << TestLog::SampleList("Program" + progNdxStr + "MedianFrameTimes", "Median frame times")
1042                         << TestLog::SampleInfo
1043                         << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1044                         << TestLog::ValueInfo("MedianFrameTime", "Median frame time", "us",
1045                                               QP_SAMPLE_VALUE_TAG_RESPONSE)
1046                         << TestLog::EndSampleInfo;
1047 
1048                     for (int i = 0; i < (int)records.size(); i++)
1049                         log << TestLog::Sample << records[i].workloadSize << records[i].getMedianTime()
1050                             << TestLog::EndSample;
1051 
1052                     log << TestLog::EndSampleList;
1053 
1054                     log << TestLog::Float("Program" + progNdxStr + "WorkloadCostEstimate", "Workload cost estimate",
1055                                           "us / workload", QP_KEY_TAG_TIME, estimator.right.coefficient);
1056 
1057                     if (estimator.pivotX > -std::numeric_limits<float>::infinity())
1058                         log << TestLog::Message << "Note: the data points with x coordinate greater than or equal to "
1059                             << estimator.pivotX
1060                             << " seem to form a rising line, and the rest of data points seem to form a "
1061                                "near-horizontal line"
1062                             << TestLog::EndMessage << TestLog::Message << "Note: the left line is estimated to be "
1063                             << lineParamsString(estimator.left) << " and the right line "
1064                             << lineParamsString(estimator.right) << TestLog::EndMessage;
1065                     else
1066                         log << TestLog::Message
1067                             << "Note: the data seem to form a single line: " << lineParamsString(estimator.right)
1068                             << TestLog::EndMessage;
1069                 }
1070             }
1071 
1072             for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
1073             {
1074                 if (estimators[progNdx].right.coefficient <= 0.0f)
1075                 {
1076                     log << TestLog::Message << "Slope of measurements for program " << progNdx
1077                         << " isn't positive. Can't get sensible result." << TestLog::EndMessage;
1078                     MEASUREMENT_FAIL();
1079                 }
1080             }
1081 
1082             // \note For each estimator, .right.coefficient is the increase in draw time (in microseconds) when
1083             // incrementing shader workload size by 1, when D draw calls are done, with a vertex/fragment count
1084             // of R.
1085             //
1086             // The measurements of any single program can't tell us the final result (time of single operation),
1087             // so we use computeSingleOperationTime to compute it from multiple programs' measurements in a
1088             // subclass-defined manner.
1089             //
1090             // After that, microseconds per operation can be calculated as singleOperationTime / (D * R).
1091 
1092             {
1093                 vector<float> perProgramSlopes;
1094                 for (int i = 0; i < (int)m_programs.size(); i++)
1095                     perProgramSlopes.push_back(estimators[i].right.coefficient);
1096 
1097                 logSingleOperationCalculationInfo();
1098 
1099                 const float maxSlope            = *std::max_element(perProgramSlopes.begin(), perProgramSlopes.end());
1100                 const float usecsPerFramePerOp  = computeSingleOperationTime(perProgramSlopes);
1101                 const int vertexOrFragmentCount = m_caseType == CASETYPE_VERTEX ?
1102                                                       getNumVertices(m_gridSizeX, m_gridSizeY) :
1103                                                       m_viewportWidth * m_viewportHeight;
1104                 const double usecsPerDrawCallPerOp = usecsPerFramePerOp / (double)drawCallCount;
1105                 const double usecsPerSingleOp      = usecsPerDrawCallPerOp / (double)vertexOrFragmentCount;
1106                 const double megaOpsPerSecond = (double)(drawCallCount * vertexOrFragmentCount) / usecsPerFramePerOp;
1107                 const int numFreeOps          = de::max(
1108                     0, (int)deFloatFloor(intersectionX(
1109                            estimators[0].left, LineParameters(estimators[0].right.offset, usecsPerFramePerOp))));
1110 
1111                 log << TestLog::Integer("VertexOrFragmentCount",
1112                                         "R = " + string(m_caseType == CASETYPE_VERTEX ? "Vertex" : "Fragment") +
1113                                             " count",
1114                                         "", QP_KEY_TAG_NONE, vertexOrFragmentCount)
1115 
1116                     << TestLog::Integer("DrawCallsPerFrame", "D = Draw calls per frame", "", QP_KEY_TAG_NONE,
1117                                         drawCallCount)
1118 
1119                     << TestLog::Integer("VerticesOrFragmentsPerFrame",
1120                                         "R*D = " + string(m_caseType == CASETYPE_VERTEX ? "Vertices" : "Fragments") +
1121                                             " per frame",
1122                                         "", QP_KEY_TAG_NONE, vertexOrFragmentCount * drawCallCount)
1123 
1124                     << TestLog::Float("TimePerFramePerOp",
1125                                       "Estimated cost of R*D " +
1126                                           string(m_caseType == CASETYPE_VERTEX ? "vertices" : "fragments") +
1127                                           " (i.e. one frame) with one shader operation",
1128                                       "us", QP_KEY_TAG_TIME, (float)usecsPerFramePerOp)
1129 
1130                     << TestLog::Float("TimePerDrawcallPerOp",
1131                                       "Estimated cost of one draw call with one shader operation", "us",
1132                                       QP_KEY_TAG_TIME, (float)usecsPerDrawCallPerOp)
1133 
1134                     << TestLog::Float("TimePerSingleOp", "Estimated cost of a single shader operation", "us",
1135                                       QP_KEY_TAG_TIME, (float)usecsPerSingleOp);
1136 
1137                 // \note Sometimes, when the operation is free or very cheap, it can happen that the shader with the operation runs,
1138                 //         for some reason, a bit faster than the shader without the operation, and thus we get a negative result. The
1139                 //         following threshold values for accepting a negative or almost-zero result are rather quick and dirty.
1140                 if (usecsPerFramePerOp <= -0.1f * maxSlope)
1141                 {
1142                     log << TestLog::Message << "Got strongly negative result." << TestLog::EndMessage;
1143                     MEASUREMENT_FAIL();
1144                 }
1145                 else if (usecsPerFramePerOp <= 0.001 * maxSlope)
1146                 {
1147                     log << TestLog::Message << "Cost of operation seems to be approximately zero."
1148                         << TestLog::EndMessage;
1149                     m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1150                 }
1151                 else
1152                 {
1153                     log << TestLog::Float("OpsPerSecond", "Operations per second", "Million/s", QP_KEY_TAG_PERFORMANCE,
1154                                           (float)megaOpsPerSecond)
1155 
1156                         << TestLog::Integer("NumFreeOps", "Estimated number of \"free\" operations", "",
1157                                             QP_KEY_TAG_PERFORMANCE, numFreeOps);
1158 
1159                     m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString((float)megaOpsPerSecond, 2).c_str());
1160                 }
1161 
1162                 m_state = STATE_FINISHED;
1163             }
1164         }
1165 
1166         return STOP;
1167     }
1168 
1169     return CONTINUE;
1170 }
1171 
1172 // Binary operator case.
1173 class BinaryOpCase : public OperatorPerformanceCase
1174 {
1175 public:
1176     BinaryOpCase(Context &context, const char *name, const char *description, const char *op, glu::DataType type,
1177                  glu::Precision precision, bool useSwizzle, bool isVertex,
1178                  const InitialCalibrationStorage &initialCalibration);
1179 
1180 protected:
1181     vector<ProgramContext> generateProgramData(void) const;
1182     void setGeneralUniforms(uint32_t program) const;
1183     void setWorkloadSizeUniform(uint32_t program, int numOperations) const;
1184     float computeSingleOperationTime(const vector<float> &perProgramOperationCosts) const;
1185     void logSingleOperationCalculationInfo(void) const;
1186 
1187 private:
1188     enum ProgramID
1189     {
1190         // \note 0-based sequential numbering is relevant, because these are also used as vector indices.
1191         // \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow.
1192         PROGRAM_WITH_BIGGER_LOOP = 0,
1193         PROGRAM_WITH_SMALLER_LOOP,
1194 
1195         PROGRAM_LAST
1196     };
1197 
1198     ProgramContext generateSingleProgramData(ProgramID) const;
1199 
1200     const string m_op;
1201     const glu::DataType m_type;
1202     const glu::Precision m_precision;
1203     const bool m_useSwizzle;
1204 };
1205 
BinaryOpCase(Context & context,const char * name,const char * description,const char * op,glu::DataType type,glu::Precision precision,bool useSwizzle,bool isVertex,const InitialCalibrationStorage & initialCalibration)1206 BinaryOpCase::BinaryOpCase(Context &context, const char *name, const char *description, const char *op,
1207                            glu::DataType type, glu::Precision precision, bool useSwizzle, bool isVertex,
1208                            const InitialCalibrationStorage &initialCalibration)
1209     : OperatorPerformanceCase(context.getTestContext(), context.getRenderContext(), name, description,
1210                               isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration)
1211     , m_op(op)
1212     , m_type(type)
1213     , m_precision(precision)
1214     , m_useSwizzle(useSwizzle)
1215 {
1216 }
1217 
generateSingleProgramData(ProgramID programID) const1218 BinaryOpCase::ProgramContext BinaryOpCase::generateSingleProgramData(ProgramID programID) const
1219 {
1220     DE_ASSERT(glu::isDataTypeFloatOrVec(m_type) || glu::isDataTypeIntOrIVec(m_type));
1221 
1222     const bool isVertexCase     = m_caseType == CASETYPE_VERTEX;
1223     const char *const precision = glu::getPrecisionName(m_precision);
1224     const char *const inputPrecision =
1225         glu::isDataTypeIntOrIVec(m_type) && m_precision == glu::PRECISION_LOWP ? "mediump" : precision;
1226     const char *const typeName = getDataTypeName(m_type);
1227 
1228     std::ostringstream vtx;
1229     std::ostringstream frag;
1230     std::ostringstream &op = isVertexCase ? vtx : frag;
1231 
1232     vtx << "#version 300 es\n";
1233     frag << "#version 300 es\n"
1234          << "layout (location = 0) out mediump vec4 o_color;\n";
1235 
1236     // Attributes.
1237     vtx << "in highp vec4 a_position;\n";
1238     for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS + 1; i++)
1239         vtx << "in " << inputPrecision << " vec4 a_in" << i << ";\n";
1240 
1241     if (isVertexCase)
1242     {
1243         vtx << "out mediump vec4 v_color;\n";
1244         frag << "in mediump vec4 v_color;\n";
1245     }
1246     else
1247     {
1248         for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS + 1; i++)
1249         {
1250             vtx << "out " << inputPrecision << " vec4 v_in" << i << ";\n";
1251             frag << "in " << inputPrecision << " vec4 v_in" << i << ";\n";
1252         }
1253     }
1254 
1255     op << "uniform mediump int u_numLoopIterations;\n";
1256     if (isVertexCase)
1257         op << "uniform mediump float u_zero;\n";
1258 
1259     vtx << "\n";
1260     vtx << "void main()\n";
1261     vtx << "{\n";
1262 
1263     if (!isVertexCase)
1264         vtx << "\tgl_Position = a_position;\n";
1265 
1266     frag << "\n";
1267     frag << "void main()\n";
1268     frag << "{\n";
1269 
1270     // Expression inputs.
1271     const char *const prefix = isVertexCase ? "a_" : "v_";
1272     for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS + 1; i++)
1273     {
1274         const int inSize = getDataTypeScalarSize(m_type);
1275         const bool isInt = de::inRange<int>(m_type, TYPE_INT, TYPE_INT_VEC4);
1276         const bool cast  = isInt || (!m_useSwizzle && m_type != TYPE_FLOAT_VEC4);
1277 
1278         op << "\t" << precision << " " << typeName << " in" << i << " = ";
1279 
1280         if (cast)
1281             op << typeName << "(";
1282 
1283         op << prefix << "in" << i;
1284 
1285         if (m_useSwizzle)
1286             op << "." << s_swizzles[i % DE_LENGTH_OF_ARRAY(s_swizzles)][inSize - 1];
1287 
1288         if (cast)
1289             op << ")";
1290 
1291         op << ";\n";
1292     }
1293 
1294     // Operation accumulation variables.
1295     for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1296     {
1297         op << "\t" << precision << " " << typeName << " acc" << i << "a"
1298            << " = in" << i + 0 << ";\n";
1299         op << "\t" << precision << " " << typeName << " acc" << i << "b"
1300            << " = in" << i + 1 << ";\n";
1301     }
1302 
1303     // Loop, with expressions in it.
1304     op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n";
1305     op << "\t{\n";
1306     {
1307         const int unrollAmount = programID == PROGRAM_WITH_SMALLER_LOOP ?
1308                                      BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT :
1309                                      BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
1310         for (int unrollNdx = 0; unrollNdx < unrollAmount; unrollNdx++)
1311         {
1312             for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1313             {
1314                 if (i > 0 || unrollNdx > 0)
1315                     op << "\n";
1316                 op << "\t\tacc" << i << "a = acc" << i << "b " << m_op << " acc" << i << "a"
1317                    << ";\n";
1318                 op << "\t\tacc" << i << "b = acc" << i << "a " << m_op << " acc" << i << "b"
1319                    << ";\n";
1320             }
1321         }
1322     }
1323     op << "\t}\n";
1324     op << "\n";
1325 
1326     // Result variable (sum of accumulation variables).
1327     op << "\t" << precision << " " << typeName << " res =";
1328     for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1329         op << (i > 0 ? " " + m_op : "") << " acc" << i << "b";
1330     op << ";\n";
1331 
1332     // Convert to color.
1333     op << "\tmediump vec4 color = ";
1334     if (m_type == TYPE_FLOAT_VEC4)
1335         op << "res";
1336     else
1337     {
1338         int size = getDataTypeScalarSize(m_type);
1339         op << "vec4(res";
1340 
1341         for (int i = size; i < 4; i++)
1342             op << ", " << (i == 3 ? "1.0" : "0.0");
1343 
1344         op << ")";
1345     }
1346     op << ";\n";
1347     op << "\t" << (isVertexCase ? "v_color" : "o_color") << " = color;\n";
1348 
1349     if (isVertexCase)
1350     {
1351         vtx << "    gl_Position = a_position + u_zero*color;\n";
1352         frag << "    o_color = v_color;\n";
1353     }
1354     else
1355     {
1356         for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS + 1; i++)
1357             vtx << "    v_in" << i << " = a_in" << i << ";\n";
1358     }
1359 
1360     vtx << "}\n";
1361     frag << "}\n";
1362 
1363     {
1364         vector<AttribSpec> attributes;
1365         for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS + 1; i++)
1366             attributes.push_back(
1367                 AttribSpec(("a_in" + de::toString(i)).c_str(),
1368                            Vec4(2.0f, 2.0f, 2.0f, 1.0f).swizzle((i + 0) % 4, (i + 1) % 4, (i + 2) % 4, (i + 3) % 4),
1369                            Vec4(1.0f, 2.0f, 1.0f, 2.0f).swizzle((i + 0) % 4, (i + 1) % 4, (i + 2) % 4, (i + 3) % 4),
1370                            Vec4(2.0f, 1.0f, 2.0f, 2.0f).swizzle((i + 0) % 4, (i + 1) % 4, (i + 2) % 4, (i + 3) % 4),
1371                            Vec4(1.0f, 1.0f, 2.0f, 1.0f).swizzle((i + 0) % 4, (i + 1) % 4, (i + 2) % 4, (i + 3) % 4)));
1372 
1373         {
1374             string description = "This is the program with the ";
1375 
1376             description += programID == PROGRAM_WITH_SMALLER_LOOP ? "smaller" :
1377                            programID == PROGRAM_WITH_BIGGER_LOOP  ? "bigger" :
1378                                                                     DE_NULL;
1379 
1380             description += " loop.\n"
1381                            "Note: workload size for this program means the number of loop iterations.";
1382 
1383             return ProgramContext(vtx.str(), frag.str(), attributes, description);
1384         }
1385     }
1386 }
1387 
generateProgramData(void) const1388 vector<BinaryOpCase::ProgramContext> BinaryOpCase::generateProgramData(void) const
1389 {
1390     vector<ProgramContext> progData;
1391     for (int i = 0; i < PROGRAM_LAST; i++)
1392         progData.push_back(generateSingleProgramData((ProgramID)i));
1393     return progData;
1394 }
1395 
setGeneralUniforms(uint32_t program) const1396 void BinaryOpCase::setGeneralUniforms(uint32_t program) const
1397 {
1398     const glw::Functions &gl = m_renderCtx.getFunctions();
1399     gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f);
1400 }
1401 
setWorkloadSizeUniform(uint32_t program,int numLoopIterations) const1402 void BinaryOpCase::setWorkloadSizeUniform(uint32_t program, int numLoopIterations) const
1403 {
1404     const glw::Functions &gl = m_renderCtx.getFunctions();
1405     gl.uniform1i(gl.getUniformLocation(program, "u_numLoopIterations"), numLoopIterations);
1406 }
1407 
computeSingleOperationTime(const vector<float> & perProgramOperationCosts) const1408 float BinaryOpCase::computeSingleOperationTime(const vector<float> &perProgramOperationCosts) const
1409 {
1410     DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST);
1411 
1412     const int baseNumOpsInsideLoop           = 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS;
1413     const int numOpsInsideLoopInSmallProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT;
1414     const int numOpsInsideLoopInBigProgram   = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
1415     DE_STATIC_ASSERT(numOpsInsideLoopInBigProgram > numOpsInsideLoopInSmallProgram);
1416     const int opDiff = numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram;
1417     const float programOperationCostDiff =
1418         perProgramOperationCosts[PROGRAM_WITH_BIGGER_LOOP] - perProgramOperationCosts[PROGRAM_WITH_SMALLER_LOOP];
1419 
1420     return programOperationCostDiff / (float)opDiff;
1421 }
1422 
logSingleOperationCalculationInfo(void) const1423 void BinaryOpCase::logSingleOperationCalculationInfo(void) const
1424 {
1425     const int baseNumOpsInsideLoop           = 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS;
1426     const int numOpsInsideLoopInSmallProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT;
1427     const int numOpsInsideLoopInBigProgram   = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
1428     const int opDiff                         = numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram;
1429     const char *const opName                 = m_op == "+" ? "addition" :
1430                                                m_op == "-" ? "subtraction" :
1431                                                m_op == "*" ? "multiplication" :
1432                                                m_op == "/" ? "division" :
1433                                                              DE_NULL;
1434     DE_ASSERT(opName != DE_NULL);
1435 
1436     m_testCtx.getLog()
1437         << TestLog::Message << "Note: the bigger program contains " << opDiff << " more " << opName
1438         << " operations in one loop iteration than the small program; "
1439         << "cost of one operation is calculated as (cost_of_bigger_workload - cost_of_smaller_workload) / " << opDiff
1440         << TestLog::EndMessage;
1441 }
1442 
1443 // Built-in function case.
1444 class FunctionCase : public OperatorPerformanceCase
1445 {
1446 public:
1447     enum
1448     {
1449         MAX_PARAMS = 3
1450     };
1451 
1452     FunctionCase(
1453         Context &context, const char *name, const char *description, const char *func, glu::DataType returnType,
1454         const glu::DataType paramTypes[MAX_PARAMS], const Vec4 &attribute,
1455         int modifyParamNdx, //!< Add a compile-time constant (2.0) to the parameter at this index. This is ignored if negative.
1456         bool useNearlyConstantINputs, //!< Function inputs shouldn't be much bigger than 'attribute'.
1457         glu::Precision precision, bool isVertex, const InitialCalibrationStorage &initialCalibration);
1458 
1459 protected:
1460     vector<ProgramContext> generateProgramData(void) const;
1461     void setGeneralUniforms(uint32_t program) const;
1462     void setWorkloadSizeUniform(uint32_t program, int numOperations) const;
1463     float computeSingleOperationTime(const vector<float> &perProgramOperationCosts) const;
1464     void logSingleOperationCalculationInfo(void) const;
1465 
1466 private:
1467     enum ProgramID
1468     {
1469         // \note 0-based sequential numbering is relevant, because these are also used as vector indices.
1470         // \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow.
1471         PROGRAM_WITH_FUNCTION_CALLS = 0,
1472         PROGRAM_WITHOUT_FUNCTION_CALLS,
1473 
1474         PROGRAM_LAST
1475     };
1476 
1477     //! Forms a "sum" expression from aExpr and bExpr; for booleans, this is "equal(a,b)", otherwise actual sum.
1478     static string sumExpr(const string &aExpr, const string &bExpr, glu::DataType type);
1479     //! Forms an expression used to increment an input value in the shader. If type is boolean, this is just
1480     //! baseExpr; otherwise, baseExpr is modified by multiplication or division by a loop index,
1481     //! to prevent simple compiler optimizations. See m_useNearlyConstantInputs for more explanation.
1482     static string incrementExpr(const string &baseExpr, glu::DataType type, bool divide);
1483 
1484     ProgramContext generateSingleProgramData(ProgramID) const;
1485 
1486     const string m_func;
1487     const glu::DataType m_returnType;
1488     glu::DataType m_paramTypes[MAX_PARAMS];
1489     // \note m_modifyParamNdx, if not negative, specifies the index of the parameter to which a
1490     //         compile-time constant (2.0) is added. This is a quick and dirty way to deal with
1491     //         functions like clamp or smoothstep that require that a certain parameter is
1492     //         greater than a certain other parameter.
1493     const int m_modifyParamNdx;
1494     // \note m_useNearlyConstantInputs determines whether the inputs given to the function
1495     //         should increase (w.r.t m_attribute) only by very small amounts. This is relevant
1496     //         for functions like asin, which requires its inputs to be in a specific range.
1497     //         In practice, this affects whether expressions used to increment the input
1498     //         variables use division instead of multiplication; normally, multiplication is used,
1499     //         but it's hard to keep the increments very small that way, and division shouldn't
1500     //         be the default, since for many functions (probably not asin, luckily), division
1501     //         is too heavy and dominates time-wise.
1502     const bool m_useNearlyConstantInputs;
1503     const Vec4 m_attribute;
1504     const glu::Precision m_precision;
1505 };
1506 
FunctionCase(Context & context,const char * name,const char * description,const char * func,glu::DataType returnType,const glu::DataType paramTypes[MAX_PARAMS],const Vec4 & attribute,int modifyParamNdx,bool useNearlyConstantInputs,glu::Precision precision,bool isVertex,const InitialCalibrationStorage & initialCalibration)1507 FunctionCase::FunctionCase(Context &context, const char *name, const char *description, const char *func,
1508                            glu::DataType returnType, const glu::DataType paramTypes[MAX_PARAMS], const Vec4 &attribute,
1509                            int modifyParamNdx, bool useNearlyConstantInputs, glu::Precision precision, bool isVertex,
1510                            const InitialCalibrationStorage &initialCalibration)
1511     : OperatorPerformanceCase(context.getTestContext(), context.getRenderContext(), name, description,
1512                               isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration)
1513     , m_func(func)
1514     , m_returnType(returnType)
1515     , m_modifyParamNdx(modifyParamNdx)
1516     , m_useNearlyConstantInputs(useNearlyConstantInputs)
1517     , m_attribute(attribute)
1518     , m_precision(precision)
1519 {
1520     for (int i = 0; i < MAX_PARAMS; i++)
1521         m_paramTypes[i] = paramTypes[i];
1522 }
1523 
sumExpr(const string & aExpr,const string & bExpr,glu::DataType type)1524 string FunctionCase::sumExpr(const string &aExpr, const string &bExpr, glu::DataType type)
1525 {
1526     if (glu::isDataTypeBoolOrBVec(type))
1527     {
1528         if (type == glu::TYPE_BOOL)
1529             return "(" + aExpr + " == " + bExpr + ")";
1530         else
1531             return "equal(" + aExpr + ", " + bExpr + ")";
1532     }
1533     else
1534         return "(" + aExpr + " + " + bExpr + ")";
1535 }
1536 
incrementExpr(const string & baseExpr,glu::DataType type,bool divide)1537 string FunctionCase::incrementExpr(const string &baseExpr, glu::DataType type, bool divide)
1538 {
1539     const string mulOrDiv = divide ? "/" : "*";
1540 
1541     return glu::isDataTypeBoolOrBVec(type) ? baseExpr :
1542            glu::isDataTypeIntOrIVec(type)  ? "(" + baseExpr + mulOrDiv + "(i+1))" :
1543                                              "(" + baseExpr + mulOrDiv + "float(i+1))";
1544 }
1545 
generateSingleProgramData(ProgramID programID) const1546 FunctionCase::ProgramContext FunctionCase::generateSingleProgramData(ProgramID programID) const
1547 {
1548     const bool isVertexCase           = m_caseType == CASETYPE_VERTEX;
1549     const char *const precision       = glu::getPrecisionName(m_precision);
1550     const char *const returnTypeName  = getDataTypeName(m_returnType);
1551     const string returnPrecisionMaybe = glu::isDataTypeBoolOrBVec(m_returnType) ? "" : string() + precision + " ";
1552     const char *inputPrecision        = DE_NULL;
1553     const bool isMatrixReturn         = isDataTypeMatrix(m_returnType);
1554     int numParams                     = 0;
1555     const char *paramTypeNames[MAX_PARAMS];
1556     string paramPrecisionsMaybe[MAX_PARAMS];
1557 
1558     for (int i = 0; i < MAX_PARAMS; i++)
1559     {
1560         paramTypeNames[i]       = getDataTypeName(m_paramTypes[i]);
1561         paramPrecisionsMaybe[i] = glu::isDataTypeBoolOrBVec(m_paramTypes[i]) ? "" : string() + precision + " ";
1562 
1563         if (inputPrecision == DE_NULL && isDataTypeIntOrIVec(m_paramTypes[i]) && m_precision == glu::PRECISION_LOWP)
1564             inputPrecision = "mediump";
1565 
1566         if (m_paramTypes[i] != TYPE_INVALID)
1567             numParams = i + 1;
1568     }
1569 
1570     DE_ASSERT(numParams > 0);
1571 
1572     if (inputPrecision == DE_NULL)
1573         inputPrecision = precision;
1574 
1575     int numAttributes = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS + numParams - 1;
1576     std::ostringstream vtx;
1577     std::ostringstream frag;
1578     std::ostringstream &op = isVertexCase ? vtx : frag;
1579 
1580     vtx << "#version 300 es\n";
1581     frag << "#version 300 es\n"
1582          << "layout (location = 0) out mediump vec4 o_color;\n";
1583 
1584     // Attributes.
1585     vtx << "in highp vec4 a_position;\n";
1586     for (int i = 0; i < numAttributes; i++)
1587         vtx << "in " << inputPrecision << " vec4 a_in" << i << ";\n";
1588 
1589     if (isVertexCase)
1590     {
1591         vtx << "out mediump vec4 v_color;\n";
1592         frag << "in mediump vec4 v_color;\n";
1593     }
1594     else
1595     {
1596         for (int i = 0; i < numAttributes; i++)
1597         {
1598             vtx << "out " << inputPrecision << " vec4 v_in" << i << ";\n";
1599             frag << "in " << inputPrecision << " vec4 v_in" << i << ";\n";
1600         }
1601     }
1602 
1603     op << "uniform mediump int u_numLoopIterations;\n";
1604     if (isVertexCase)
1605         op << "uniform mediump float u_zero;\n";
1606 
1607     for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
1608         op << "uniform " << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " u_inc"
1609            << (char)('A' + paramNdx) << ";\n";
1610 
1611     vtx << "\n";
1612     vtx << "void main()\n";
1613     vtx << "{\n";
1614 
1615     if (!isVertexCase)
1616         vtx << "\tgl_Position = a_position;\n";
1617 
1618     frag << "\n";
1619     frag << "void main()\n";
1620     frag << "{\n";
1621 
1622     // Function call input and return value accumulation variables.
1623     {
1624         const char *const inPrefix = isVertexCase ? "a_" : "v_";
1625 
1626         for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++)
1627         {
1628             for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
1629             {
1630                 const glu::DataType paramType = m_paramTypes[paramNdx];
1631                 const bool mustCast           = paramType != glu::TYPE_FLOAT_VEC4;
1632 
1633                 op << "\t" << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " in" << calcNdx
1634                    << (char)('a' + paramNdx) << " = ";
1635 
1636                 if (mustCast)
1637                     op << paramTypeNames[paramNdx] << "(";
1638 
1639                 if (glu::isDataTypeMatrix(paramType))
1640                 {
1641                     static const char *const swizzles[3] = {"x", "xy", "xyz"};
1642                     const int numRows                    = glu::getDataTypeMatrixNumRows(paramType);
1643                     const int numCols                    = glu::getDataTypeMatrixNumColumns(paramType);
1644                     const string swizzle                 = numRows < 4 ? string() + "." + swizzles[numRows - 1] : "";
1645 
1646                     for (int i = 0; i < numCols; i++)
1647                         op << (i > 0 ? ", " : "") << inPrefix << "in" << calcNdx + paramNdx << swizzle;
1648                 }
1649                 else
1650                 {
1651                     op << inPrefix << "in" << calcNdx + paramNdx;
1652 
1653                     if (paramNdx == m_modifyParamNdx)
1654                     {
1655                         DE_ASSERT(glu::isDataTypeFloatOrVec(paramType));
1656                         op << " + 2.0";
1657                     }
1658                 }
1659 
1660                 if (mustCast)
1661                     op << ")";
1662 
1663                 op << ";\n";
1664             }
1665 
1666             op << "\t" << returnPrecisionMaybe << returnTypeName << " res" << calcNdx << " = " << returnTypeName
1667                << "(0);\n";
1668         }
1669     }
1670 
1671     // Loop with expressions in it.
1672     op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n";
1673     op << "\t{\n";
1674     for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++)
1675     {
1676         if (calcNdx > 0)
1677             op << "\n";
1678 
1679         op << "\t\t{\n";
1680 
1681         for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1682         {
1683             const string inputName = "in" + de::toString(calcNdx) + (char)('a' + inputNdx);
1684             const string incName   = string() + "u_inc" + (char)('A' + inputNdx);
1685             const string incExpr   = incrementExpr(incName, m_paramTypes[inputNdx], m_useNearlyConstantInputs);
1686 
1687             op << "\t\t\t" << inputName << " = " << sumExpr(inputName, incExpr, m_paramTypes[inputNdx]) << ";\n";
1688         }
1689 
1690         op << "\t\t\t" << returnPrecisionMaybe << returnTypeName << " eval" << calcNdx << " = ";
1691 
1692         if (programID == PROGRAM_WITH_FUNCTION_CALLS)
1693         {
1694             op << m_func << "(";
1695 
1696             for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
1697             {
1698                 if (paramNdx > 0)
1699                     op << ", ";
1700 
1701                 op << "in" << calcNdx << (char)('a' + paramNdx);
1702             }
1703 
1704             op << ")";
1705         }
1706         else
1707         {
1708             DE_ASSERT(programID == PROGRAM_WITHOUT_FUNCTION_CALLS);
1709             op << returnTypeName << "(1)";
1710         }
1711 
1712         op << ";\n";
1713 
1714         {
1715             const string resName  = "res" + de::toString(calcNdx);
1716             const string evalName = "eval" + de::toString(calcNdx);
1717             const string incExpr  = incrementExpr(evalName, m_returnType, m_useNearlyConstantInputs);
1718 
1719             op << "\t\t\tres" << calcNdx << " = " << sumExpr(resName, incExpr, m_returnType) << ";\n";
1720         }
1721 
1722         op << "\t\t}\n";
1723     }
1724     op << "\t}\n";
1725     op << "\n";
1726 
1727     // Result variables.
1728     for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1729     {
1730         op << "\t" << paramPrecisionsMaybe[inputNdx] << paramTypeNames[inputNdx] << " sumIn" << (char)('A' + inputNdx)
1731            << " = ";
1732         {
1733             string expr = string() + "in0" + (char)('a' + inputNdx);
1734             for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1735                 expr =
1736                     sumExpr(expr, string() + "in" + de::toString(i) + (char)('a' + inputNdx), m_paramTypes[inputNdx]);
1737             op << expr;
1738         }
1739         op << ";\n";
1740     }
1741 
1742     op << "\t" << returnPrecisionMaybe << returnTypeName << " sumRes = ";
1743     {
1744         string expr = "res0";
1745         for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1746             expr = sumExpr(expr, "res" + de::toString(i), m_returnType);
1747         op << expr;
1748     }
1749     op << ";\n";
1750 
1751     {
1752         glu::DataType finalResultDataType = glu::TYPE_LAST;
1753 
1754         if (glu::isDataTypeMatrix(m_returnType))
1755         {
1756             finalResultDataType = m_returnType;
1757 
1758             op << "\t" << precision << " " << returnTypeName << " finalRes = ";
1759 
1760             for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1761             {
1762                 DE_ASSERT(m_paramTypes[inputNdx] == m_returnType);
1763                 op << "sumIn" << (char)('A' + inputNdx) << " + ";
1764             }
1765             op << "sumRes;\n";
1766         }
1767         else
1768         {
1769             int numFinalResComponents = glu::getDataTypeScalarSize(m_returnType);
1770             for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1771                 numFinalResComponents =
1772                     de::max(numFinalResComponents, glu::getDataTypeScalarSize(m_paramTypes[inputNdx]));
1773 
1774             finalResultDataType = getDataTypeFloatOrVec(numFinalResComponents);
1775 
1776             {
1777                 const string finalResType = glu::getDataTypeName(finalResultDataType);
1778                 op << "\t" << precision << " " << finalResType << " finalRes = ";
1779                 for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1780                     op << finalResType << "(sumIn" << (char)('A' + inputNdx) << ") + ";
1781                 op << finalResType << "(sumRes);\n";
1782             }
1783         }
1784 
1785         // Convert to color.
1786         op << "\tmediump vec4 color = ";
1787         if (finalResultDataType == TYPE_FLOAT_VEC4)
1788             op << "finalRes";
1789         else
1790         {
1791             int size = isMatrixReturn ? getDataTypeMatrixNumRows(finalResultDataType) :
1792                                         getDataTypeScalarSize(finalResultDataType);
1793 
1794             op << "vec4(";
1795 
1796             if (isMatrixReturn)
1797             {
1798                 for (int i = 0; i < getDataTypeMatrixNumColumns(finalResultDataType); i++)
1799                 {
1800                     if (i > 0)
1801                         op << " + ";
1802                     op << "finalRes[" << i << "]";
1803                 }
1804             }
1805             else
1806                 op << "finalRes";
1807 
1808             for (int i = size; i < 4; i++)
1809                 op << ", " << (i == 3 ? "1.0" : "0.0");
1810 
1811             op << ")";
1812         }
1813         op << ";\n";
1814         op << "\t" << (isVertexCase ? "v_color" : "o_color") << " = color;\n";
1815 
1816         if (isVertexCase)
1817         {
1818             vtx << "    gl_Position = a_position + u_zero*color;\n";
1819             frag << "    o_color = v_color;\n";
1820         }
1821         else
1822         {
1823             for (int i = 0; i < numAttributes; i++)
1824                 vtx << "    v_in" << i << " = a_in" << i << ";\n";
1825         }
1826 
1827         vtx << "}\n";
1828         frag << "}\n";
1829     }
1830 
1831     {
1832         vector<AttribSpec> attributes;
1833         for (int i = 0; i < numAttributes; i++)
1834             attributes.push_back(AttribSpec(("a_in" + de::toString(i)).c_str(),
1835                                             m_attribute.swizzle((i + 0) % 4, (i + 1) % 4, (i + 2) % 4, (i + 3) % 4),
1836                                             m_attribute.swizzle((i + 1) % 4, (i + 2) % 4, (i + 3) % 4, (i + 0) % 4),
1837                                             m_attribute.swizzle((i + 2) % 4, (i + 3) % 4, (i + 0) % 4, (i + 1) % 4),
1838                                             m_attribute.swizzle((i + 3) % 4, (i + 0) % 4, (i + 1) % 4, (i + 2) % 4)));
1839 
1840         {
1841             string description = "This is the program ";
1842 
1843             description += programID == PROGRAM_WITHOUT_FUNCTION_CALLS ? "without" :
1844                            programID == PROGRAM_WITH_FUNCTION_CALLS    ? "with" :
1845                                                                          DE_NULL;
1846 
1847             description += " '" + m_func +
1848                            "' function calls.\n"
1849                            "Note: workload size for this program means the number of loop iterations.";
1850 
1851             return ProgramContext(vtx.str(), frag.str(), attributes, description);
1852         }
1853     }
1854 }
1855 
generateProgramData(void) const1856 vector<FunctionCase::ProgramContext> FunctionCase::generateProgramData(void) const
1857 {
1858     vector<ProgramContext> progData;
1859     for (int i = 0; i < PROGRAM_LAST; i++)
1860         progData.push_back(generateSingleProgramData((ProgramID)i));
1861     return progData;
1862 }
1863 
setGeneralUniforms(uint32_t program) const1864 void FunctionCase::setGeneralUniforms(uint32_t program) const
1865 {
1866     const glw::Functions &gl = m_renderCtx.getFunctions();
1867 
1868     gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f);
1869 
1870     for (int paramNdx = 0; paramNdx < MAX_PARAMS; paramNdx++)
1871     {
1872         if (m_paramTypes[paramNdx] != glu::TYPE_INVALID)
1873         {
1874             const glu::DataType paramType = m_paramTypes[paramNdx];
1875             const int scalarSize          = glu::getDataTypeScalarSize(paramType);
1876             const int location = gl.getUniformLocation(program, (string() + "u_inc" + (char)('A' + paramNdx)).c_str());
1877 
1878             if (glu::isDataTypeFloatOrVec(paramType))
1879             {
1880                 float values[4];
1881                 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1882                     values[i] = (float)paramNdx * 0.01f + (float)i * 0.001f; // Arbitrary small values.
1883                 uniformNfv(gl, scalarSize, location, 1, &values[0]);
1884             }
1885             else if (glu::isDataTypeIntOrIVec(paramType))
1886             {
1887                 int values[4];
1888                 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1889                     values[i] = paramNdx * 100 + i; // Arbitrary values.
1890                 uniformNiv(gl, scalarSize, location, 1, &values[0]);
1891             }
1892             else if (glu::isDataTypeBoolOrBVec(paramType))
1893             {
1894                 int values[4];
1895                 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1896                     values[i] = (paramNdx >> i) & 1; // Arbitrary values.
1897                 uniformNiv(gl, scalarSize, location, 1, &values[0]);
1898             }
1899             else if (glu::isDataTypeMatrix(paramType))
1900             {
1901                 const int size = glu::getDataTypeMatrixNumRows(paramType);
1902                 DE_ASSERT(size == glu::getDataTypeMatrixNumColumns(paramType));
1903                 float values[4 * 4];
1904                 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1905                     values[i] = (float)paramNdx * 0.01f + (float)i * 0.001f; // Arbitrary values.
1906                 uniformMatrixNfv(gl, size, location, 1, &values[0]);
1907             }
1908             else
1909                 DE_ASSERT(false);
1910         }
1911     }
1912 }
1913 
setWorkloadSizeUniform(uint32_t program,int numLoopIterations) const1914 void FunctionCase::setWorkloadSizeUniform(uint32_t program, int numLoopIterations) const
1915 {
1916     const glw::Functions &gl = m_renderCtx.getFunctions();
1917     const int loc            = gl.getUniformLocation(program, "u_numLoopIterations");
1918 
1919     gl.uniform1i(loc, numLoopIterations);
1920 }
1921 
computeSingleOperationTime(const vector<float> & perProgramOperationCosts) const1922 float FunctionCase::computeSingleOperationTime(const vector<float> &perProgramOperationCosts) const
1923 {
1924     DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST);
1925     const int numFunctionCalls           = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS;
1926     const float programOperationCostDiff = perProgramOperationCosts[PROGRAM_WITH_FUNCTION_CALLS] -
1927                                            perProgramOperationCosts[PROGRAM_WITHOUT_FUNCTION_CALLS];
1928 
1929     return programOperationCostDiff / (float)numFunctionCalls;
1930 }
1931 
logSingleOperationCalculationInfo(void) const1932 void FunctionCase::logSingleOperationCalculationInfo(void) const
1933 {
1934     const int numFunctionCalls = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS;
1935 
1936     m_testCtx.getLog() << TestLog::Message << "Note: program " << (int)PROGRAM_WITH_FUNCTION_CALLS << " contains "
1937                        << numFunctionCalls << " calls to '" << m_func << "' in one loop iteration; "
1938                        << "cost of one operation is calculated as "
1939                        << "(cost_of_workload_with_calls - cost_of_workload_without_calls) / " << numFunctionCalls
1940                        << TestLog::EndMessage;
1941 }
1942 
1943 } // namespace
1944 
ShaderOperatorTests(Context & context)1945 ShaderOperatorTests::ShaderOperatorTests(Context &context)
1946     : TestCaseGroup(context, "operator", "Operator Performance Tests")
1947 {
1948 }
1949 
~ShaderOperatorTests(void)1950 ShaderOperatorTests::~ShaderOperatorTests(void)
1951 {
1952 }
1953 
init(void)1954 void ShaderOperatorTests::init(void)
1955 {
1956     // Binary operator cases
1957 
1958     static const DataType binaryOpTypes[] = {
1959         TYPE_FLOAT, TYPE_FLOAT_VEC2, TYPE_FLOAT_VEC3, TYPE_FLOAT_VEC4,
1960         TYPE_INT,   TYPE_INT_VEC2,   TYPE_INT_VEC3,   TYPE_INT_VEC4,
1961     };
1962     static const Precision precisions[] = {PRECISION_LOWP, PRECISION_MEDIUMP, PRECISION_HIGHP};
1963     static const struct
1964     {
1965         const char *name;
1966         const char *op;
1967         bool swizzle;
1968     } binaryOps[] = {{"add", "+", false}, {"sub", "-", true}, {"mul", "*", false}, {"div", "/", true}};
1969 
1970     tcu::TestCaseGroup *const binaryOpsGroup =
1971         new tcu::TestCaseGroup(m_testCtx, "binary_operator", "Binary Operator Performance Tests");
1972     addChild(binaryOpsGroup);
1973 
1974     for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(binaryOps); opNdx++)
1975     {
1976         tcu::TestCaseGroup *const opGroup = new tcu::TestCaseGroup(m_testCtx, binaryOps[opNdx].name, "");
1977         binaryOpsGroup->addChild(opGroup);
1978 
1979         for (int isFrag = 0; isFrag <= 1; isFrag++)
1980         {
1981             const BinaryOpCase::InitialCalibrationStorage shaderGroupCalibrationStorage(
1982                 new BinaryOpCase::InitialCalibration);
1983             const bool isVertex = isFrag == 0;
1984             tcu::TestCaseGroup *const shaderGroup =
1985                 new tcu::TestCaseGroup(m_testCtx, isVertex ? "vertex" : "fragment", "");
1986             opGroup->addChild(shaderGroup);
1987 
1988             for (int typeNdx = 0; typeNdx < DE_LENGTH_OF_ARRAY(binaryOpTypes); typeNdx++)
1989             {
1990                 for (int precNdx = 0; precNdx < DE_LENGTH_OF_ARRAY(precisions); precNdx++)
1991                 {
1992                     const DataType type       = binaryOpTypes[typeNdx];
1993                     const Precision precision = precisions[precNdx];
1994                     const char *const op      = binaryOps[opNdx].op;
1995                     const bool useSwizzle     = binaryOps[opNdx].swizzle;
1996                     std::ostringstream name;
1997 
1998                     name << getPrecisionName(precision) << "_" << getDataTypeName(type);
1999 
2000                     shaderGroup->addChild(new BinaryOpCase(m_context, name.str().c_str(), "", op, type, precision,
2001                                                            useSwizzle, isVertex, shaderGroupCalibrationStorage));
2002                 }
2003             }
2004         }
2005     }
2006 
2007     // Built-in function cases.
2008 
2009     // Non-specific (i.e. includes gentypes) parameter types for the functions.
2010     enum ValueType
2011     {
2012         VALUE_NONE          = 0,
2013         VALUE_FLOAT         = (1 << 0),  // float scalar
2014         VALUE_FLOAT_VEC     = (1 << 1),  // float vector
2015         VALUE_FLOAT_VEC34   = (1 << 2),  // float vector of size 3 or 4
2016         VALUE_FLOAT_GENTYPE = (1 << 3),  // float scalar/vector
2017         VALUE_VEC3          = (1 << 4),  // vec3 only
2018         VALUE_VEC4          = (1 << 5),  // vec4 only
2019         VALUE_MATRIX        = (1 << 6),  // matrix
2020         VALUE_BOOL          = (1 << 7),  // boolean scalar
2021         VALUE_BOOL_VEC      = (1 << 8),  // boolean vector
2022         VALUE_BOOL_VEC4     = (1 << 9),  // bvec4 only
2023         VALUE_BOOL_GENTYPE  = (1 << 10), // boolean scalar/vector
2024         VALUE_INT           = (1 << 11), // int scalar
2025         VALUE_INT_VEC       = (1 << 12), // int vector
2026         VALUE_INT_VEC4      = (1 << 13), // ivec4 only
2027         VALUE_INT_GENTYPE   = (1 << 14), // int scalar/vector
2028 
2029         // Shorthands.
2030         N   = VALUE_NONE,
2031         F   = VALUE_FLOAT,
2032         FV  = VALUE_FLOAT_VEC,
2033         VL  = VALUE_FLOAT_VEC34, // L for "large"
2034         GT  = VALUE_FLOAT_GENTYPE,
2035         V3  = VALUE_VEC3,
2036         V4  = VALUE_VEC4,
2037         M   = VALUE_MATRIX,
2038         B   = VALUE_BOOL,
2039         BV  = VALUE_BOOL_VEC,
2040         B4  = VALUE_BOOL_VEC4,
2041         BGT = VALUE_BOOL_GENTYPE,
2042         I   = VALUE_INT,
2043         IV  = VALUE_INT_VEC,
2044         I4  = VALUE_INT_VEC4,
2045         IGT = VALUE_INT_GENTYPE,
2046 
2047         VALUE_ANY_FLOAT =
2048             VALUE_FLOAT | VALUE_FLOAT_VEC | VALUE_FLOAT_GENTYPE | VALUE_VEC3 | VALUE_VEC4 | VALUE_FLOAT_VEC34,
2049         VALUE_ANY_INT  = VALUE_INT | VALUE_INT_VEC | VALUE_INT_GENTYPE | VALUE_INT_VEC4,
2050         VALUE_ANY_BOOL = VALUE_BOOL | VALUE_BOOL_VEC | VALUE_BOOL_GENTYPE | VALUE_BOOL_VEC4,
2051 
2052         VALUE_ANY_GENTYPE = VALUE_FLOAT_VEC | VALUE_FLOAT_GENTYPE | VALUE_FLOAT_VEC34 | VALUE_BOOL_VEC |
2053                             VALUE_BOOL_GENTYPE | VALUE_INT_VEC | VALUE_INT_GENTYPE | VALUE_MATRIX
2054     };
2055     enum PrecisionMask
2056     {
2057         PRECMASK_NA      = 0, //!< Precision not applicable (booleans)
2058         PRECMASK_LOWP    = (1 << PRECISION_LOWP),
2059         PRECMASK_MEDIUMP = (1 << PRECISION_MEDIUMP),
2060         PRECMASK_HIGHP   = (1 << PRECISION_HIGHP),
2061 
2062         PRECMASK_MEDIUMP_HIGHP = (1 << PRECISION_MEDIUMP) | (1 << PRECISION_HIGHP),
2063         PRECMASK_ALL           = (1 << PRECISION_LOWP) | (1 << PRECISION_MEDIUMP) | (1 << PRECISION_HIGHP)
2064     };
2065 
2066     static const DataType floatTypes[]  = {TYPE_FLOAT, TYPE_FLOAT_VEC2, TYPE_FLOAT_VEC3, TYPE_FLOAT_VEC4};
2067     static const DataType intTypes[]    = {TYPE_INT, TYPE_INT_VEC2, TYPE_INT_VEC3, TYPE_INT_VEC4};
2068     static const DataType boolTypes[]   = {TYPE_BOOL, TYPE_BOOL_VEC2, TYPE_BOOL_VEC3, TYPE_BOOL_VEC4};
2069     static const DataType matrixTypes[] = {TYPE_FLOAT_MAT2, TYPE_FLOAT_MAT3, TYPE_FLOAT_MAT4};
2070 
2071     tcu::TestCaseGroup *const angleAndTrigonometryGroup = new tcu::TestCaseGroup(
2072         m_testCtx, "angle_and_trigonometry", "Built-In Angle and Trigonometry Function Performance Tests");
2073     tcu::TestCaseGroup *const exponentialGroup =
2074         new tcu::TestCaseGroup(m_testCtx, "exponential", "Built-In Exponential Function Performance Tests");
2075     tcu::TestCaseGroup *const commonFunctionsGroup =
2076         new tcu::TestCaseGroup(m_testCtx, "common_functions", "Built-In Common Function Performance Tests");
2077     tcu::TestCaseGroup *const geometricFunctionsGroup =
2078         new tcu::TestCaseGroup(m_testCtx, "geometric", "Built-In Geometric Function Performance Tests");
2079     tcu::TestCaseGroup *const matrixFunctionsGroup =
2080         new tcu::TestCaseGroup(m_testCtx, "matrix", "Built-In Matrix Function Performance Tests");
2081     tcu::TestCaseGroup *const floatCompareGroup = new tcu::TestCaseGroup(
2082         m_testCtx, "float_compare", "Built-In Floating Point Comparison Function Performance Tests");
2083     tcu::TestCaseGroup *const intCompareGroup =
2084         new tcu::TestCaseGroup(m_testCtx, "int_compare", "Built-In Integer Comparison Function Performance Tests");
2085     tcu::TestCaseGroup *const boolCompareGroup =
2086         new tcu::TestCaseGroup(m_testCtx, "bool_compare", "Built-In Boolean Comparison Function Performance Tests");
2087 
2088     addChild(angleAndTrigonometryGroup);
2089     addChild(exponentialGroup);
2090     addChild(commonFunctionsGroup);
2091     addChild(geometricFunctionsGroup);
2092     addChild(matrixFunctionsGroup);
2093     addChild(floatCompareGroup);
2094     addChild(intCompareGroup);
2095     addChild(boolCompareGroup);
2096 
2097     // Some attributes to be used as parameters for the functions.
2098     const Vec4 attrPos    = Vec4(2.3f, 1.9f, 0.8f, 0.7f);
2099     const Vec4 attrNegPos = Vec4(-1.3f, 2.5f, -3.5f, 4.3f);
2100     const Vec4 attrSmall  = Vec4(-0.9f, 0.8f, -0.4f, 0.2f);
2101     const Vec4 attrBig    = Vec4(1.3f, 2.4f, 3.0f, 4.0f);
2102 
2103     // \todo The following functions and variants are missing, and should be added in the future:
2104     //         - modf (has an output parameter, not currently handled by test code)
2105     //         - functions with uint/uvec* return or parameter types
2106     //         - non-matrix <-> matrix functions (outerProduct etc.)
2107     // \note Remember to update test spec when these are added.
2108 
2109     // Function name, return type and parameter type information; also, what attribute should be used in the test.
2110     // \note Different versions of the same function (i.e. with the same group name) can be defined by putting them successively in this array.
2111     // \note In order to reduce case count and thus total execution time, we don't test all input type combinations for every function.
2112     static const struct
2113     {
2114         tcu::TestCaseGroup *parentGroup;
2115         const char *groupName;
2116         const char *func;
2117         const ValueType types[FunctionCase::MAX_PARAMS + 1]; // Return type and parameter types, in that order.
2118         const Vec4 &attribute;
2119         int modifyParamNdx;
2120         bool useNearlyConstantInputs;
2121         bool booleanCase;
2122         PrecisionMask precMask;
2123     } functionCaseGroups[] = {
2124         {angleAndTrigonometryGroup, "radians", "radians", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2125         {angleAndTrigonometryGroup, "degrees", "degrees", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2126         {angleAndTrigonometryGroup, "sin", "sin", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2127         {angleAndTrigonometryGroup, "cos", "cos", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2128         {angleAndTrigonometryGroup, "tan", "tan", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2129         {angleAndTrigonometryGroup, "asin", "asin", {F, F, N, N}, attrSmall, -1, true, false, PRECMASK_ALL},
2130         {angleAndTrigonometryGroup, "acos", "acos", {F, F, N, N}, attrSmall, -1, true, false, PRECMASK_ALL},
2131         {angleAndTrigonometryGroup, "atan2", "atan", {F, F, F, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2132         {angleAndTrigonometryGroup, "atan", "atan", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2133         {angleAndTrigonometryGroup, "sinh", "sinh", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2134         {angleAndTrigonometryGroup, "cosh", "cosh", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2135         {angleAndTrigonometryGroup, "tanh", "tanh", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2136         {angleAndTrigonometryGroup, "asinh", "asinh", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2137         {angleAndTrigonometryGroup, "acosh", "acosh", {F, F, N, N}, attrBig, -1, false, false, PRECMASK_ALL},
2138         {angleAndTrigonometryGroup, "atanh", "atanh", {F, F, N, N}, attrSmall, -1, true, false, PRECMASK_ALL},
2139 
2140         {exponentialGroup, "pow", "pow", {F, F, F, N}, attrPos, -1, false, false, PRECMASK_ALL},
2141         {exponentialGroup, "exp", "exp", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2142         {exponentialGroup, "log", "log", {F, F, N, N}, attrPos, -1, false, false, PRECMASK_ALL},
2143         {exponentialGroup, "exp2", "exp2", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2144         {exponentialGroup, "log2", "log2", {F, F, N, N}, attrPos, -1, false, false, PRECMASK_ALL},
2145         {exponentialGroup, "sqrt", "sqrt", {F, F, N, N}, attrPos, -1, false, false, PRECMASK_ALL},
2146         {exponentialGroup, "inversesqrt", "inversesqrt", {F, F, N, N}, attrPos, -1, false, false, PRECMASK_ALL},
2147 
2148         {commonFunctionsGroup, "abs", "abs", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2149         {commonFunctionsGroup, "abs", "abs", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2150         {commonFunctionsGroup, "sign", "sign", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2151         {commonFunctionsGroup, "sign", "sign", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2152         {commonFunctionsGroup, "floor", "floor", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2153         {commonFunctionsGroup, "floor", "floor", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2154         {commonFunctionsGroup, "trunc", "trunc", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2155         {commonFunctionsGroup, "trunc", "trunc", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2156         {commonFunctionsGroup, "round", "round", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2157         {commonFunctionsGroup, "round", "round", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2158         {commonFunctionsGroup,
2159          "roundEven",
2160          "roundEven",
2161          {F, F, N, N},
2162          attrNegPos,
2163          -1,
2164          false,
2165          false,
2166          PRECMASK_MEDIUMP_HIGHP},
2167         {commonFunctionsGroup, "roundEven", "roundEven", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2168         {commonFunctionsGroup, "ceil", "ceil", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2169         {commonFunctionsGroup, "ceil", "ceil", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2170         {commonFunctionsGroup, "fract", "fract", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2171         {commonFunctionsGroup, "fract", "fract", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2172         {commonFunctionsGroup, "mod", "mod", {GT, GT, GT, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2173         {commonFunctionsGroup, "min", "min", {F, F, F, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2174         {commonFunctionsGroup, "min", "min", {V4, V4, V4, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2175         {commonFunctionsGroup, "max", "max", {F, F, F, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2176         {commonFunctionsGroup, "max", "max", {V4, V4, V4, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2177         {commonFunctionsGroup, "clamp", "clamp", {F, F, F, F}, attrSmall, 2, false, false, PRECMASK_MEDIUMP_HIGHP},
2178         {commonFunctionsGroup, "clamp", "clamp", {V4, V4, V4, V4}, attrSmall, 2, false, false, PRECMASK_ALL},
2179         {commonFunctionsGroup, "mix", "mix", {F, F, F, F}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2180         {commonFunctionsGroup, "mix", "mix", {V4, V4, V4, V4}, attrNegPos, -1, false, false, PRECMASK_ALL},
2181         {commonFunctionsGroup, "mix", "mix", {F, F, F, B}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2182         {commonFunctionsGroup, "mix", "mix", {V4, V4, V4, B4}, attrNegPos, -1, false, false, PRECMASK_ALL},
2183         {commonFunctionsGroup, "step", "step", {F, F, F, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2184         {commonFunctionsGroup, "step", "step", {V4, V4, V4, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2185         {commonFunctionsGroup,
2186          "smoothstep",
2187          "smoothstep",
2188          {F, F, F, F},
2189          attrSmall,
2190          1,
2191          false,
2192          false,
2193          PRECMASK_MEDIUMP_HIGHP},
2194         {commonFunctionsGroup, "smoothstep", "smoothstep", {V4, V4, V4, V4}, attrSmall, 1, false, false, PRECMASK_ALL},
2195         {commonFunctionsGroup, "isnan", "isnan", {B, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2196         {commonFunctionsGroup, "isnan", "isnan", {B4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2197         {commonFunctionsGroup, "isinf", "isinf", {B, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2198         {commonFunctionsGroup, "isinf", "isinf", {B4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2199         {commonFunctionsGroup,
2200          "floatBitsToInt",
2201          "floatBitsToInt",
2202          {I, F, N, N},
2203          attrNegPos,
2204          -1,
2205          false,
2206          false,
2207          PRECMASK_MEDIUMP_HIGHP},
2208         {commonFunctionsGroup,
2209          "floatBitsToInt",
2210          "floatBitsToInt",
2211          {I4, V4, N, N},
2212          attrNegPos,
2213          -1,
2214          false,
2215          false,
2216          PRECMASK_ALL},
2217         {commonFunctionsGroup,
2218          "intBitsToFloat",
2219          "intBitsToFloat",
2220          {F, I, N, N},
2221          attrNegPos,
2222          -1,
2223          false,
2224          false,
2225          PRECMASK_MEDIUMP_HIGHP},
2226         {commonFunctionsGroup,
2227          "intBitsToFloat",
2228          "intBitsToFloat",
2229          {V4, I4, N, N},
2230          attrNegPos,
2231          -1,
2232          false,
2233          false,
2234          PRECMASK_ALL},
2235 
2236         {geometricFunctionsGroup, "length", "length", {F, VL, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2237         {geometricFunctionsGroup, "distance", "distance", {F, VL, VL, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2238         {geometricFunctionsGroup, "dot", "dot", {F, VL, VL, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2239         {geometricFunctionsGroup, "cross", "cross", {V3, V3, V3, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2240         {geometricFunctionsGroup, "normalize", "normalize", {VL, VL, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2241         {geometricFunctionsGroup,
2242          "faceforward",
2243          "faceforward",
2244          {VL, VL, VL, VL},
2245          attrNegPos,
2246          -1,
2247          false,
2248          false,
2249          PRECMASK_ALL},
2250         {geometricFunctionsGroup, "reflect", "reflect", {VL, VL, VL, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2251         {geometricFunctionsGroup, "refract", "refract", {VL, VL, VL, F}, attrNegPos, -1, false, false, PRECMASK_ALL},
2252 
2253         {matrixFunctionsGroup,
2254          "matrixCompMult",
2255          "matrixCompMult",
2256          {M, M, M, N},
2257          attrNegPos,
2258          -1,
2259          false,
2260          false,
2261          PRECMASK_ALL},
2262         {matrixFunctionsGroup, "transpose", "transpose", {M, M, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2263         {matrixFunctionsGroup, "inverse", "inverse", {M, M, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2264 
2265         {floatCompareGroup, "lessThan", "lessThan", {BV, FV, FV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2266         {floatCompareGroup,
2267          "lessThanEqual",
2268          "lessThanEqual",
2269          {BV, FV, FV, N},
2270          attrNegPos,
2271          -1,
2272          false,
2273          false,
2274          PRECMASK_ALL},
2275         {floatCompareGroup, "greaterThan", "greaterThan", {BV, FV, FV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2276         {floatCompareGroup,
2277          "greaterThanEqual",
2278          "greaterThanEqual",
2279          {BV, FV, FV, N},
2280          attrNegPos,
2281          -1,
2282          false,
2283          false,
2284          PRECMASK_ALL},
2285         {floatCompareGroup, "equal", "equal", {BV, FV, FV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2286         {floatCompareGroup, "notEqual", "notEqual", {BV, FV, FV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2287 
2288         {intCompareGroup, "lessThan", "lessThan", {BV, IV, IV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2289         {intCompareGroup,
2290          "lessThanEqual",
2291          "lessThanEqual",
2292          {BV, IV, IV, N},
2293          attrNegPos,
2294          -1,
2295          false,
2296          false,
2297          PRECMASK_ALL},
2298         {intCompareGroup, "greaterThan", "greaterThan", {BV, IV, IV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2299         {intCompareGroup,
2300          "greaterThanEqual",
2301          "greaterThanEqual",
2302          {BV, IV, IV, N},
2303          attrNegPos,
2304          -1,
2305          false,
2306          false,
2307          PRECMASK_ALL},
2308         {intCompareGroup, "equal", "equal", {BV, IV, IV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2309         {intCompareGroup, "notEqual", "notEqual", {BV, IV, IV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2310 
2311         {boolCompareGroup, "equal", "equal", {BV, BV, BV, N}, attrNegPos, -1, false, true, PRECMASK_MEDIUMP},
2312         {boolCompareGroup, "notEqual", "notEqual", {BV, BV, BV, N}, attrNegPos, -1, false, true, PRECMASK_MEDIUMP},
2313         {boolCompareGroup, "any", "any", {B, BV, N, N}, attrNegPos, -1, false, true, PRECMASK_MEDIUMP},
2314         {boolCompareGroup, "all", "all", {B, BV, N, N}, attrNegPos, -1, false, true, PRECMASK_MEDIUMP},
2315         {boolCompareGroup, "not", "not", {BV, BV, N, N}, attrNegPos, -1, false, true, PRECMASK_MEDIUMP}};
2316 
2317     // vertexSubGroup and fragmentSubGroup are the groups where the various vertex/fragment cases of a single function are added.
2318     // \note These are defined here so that different versions (different entries in the functionCaseGroups array) of the same function can be put in the same group.
2319     tcu::TestCaseGroup *vertexSubGroup   = DE_NULL;
2320     tcu::TestCaseGroup *fragmentSubGroup = DE_NULL;
2321     FunctionCase::InitialCalibrationStorage vertexSubGroupCalibrationStorage;
2322     FunctionCase::InitialCalibrationStorage fragmentSubGroupCalibrationStorage;
2323     for (int funcNdx = 0; funcNdx < DE_LENGTH_OF_ARRAY(functionCaseGroups); funcNdx++)
2324     {
2325         tcu::TestCaseGroup *const parentGroup = functionCaseGroups[funcNdx].parentGroup;
2326         const char *const groupName           = functionCaseGroups[funcNdx].groupName;
2327         const char *const groupFunc           = functionCaseGroups[funcNdx].func;
2328         const ValueType *const funcTypes      = functionCaseGroups[funcNdx].types;
2329         const Vec4 &groupAttribute            = functionCaseGroups[funcNdx].attribute;
2330         const int modifyParamNdx              = functionCaseGroups[funcNdx].modifyParamNdx;
2331         const bool useNearlyConstantInputs    = functionCaseGroups[funcNdx].useNearlyConstantInputs;
2332         const bool booleanCase                = functionCaseGroups[funcNdx].booleanCase;
2333         const PrecisionMask precMask          = functionCaseGroups[funcNdx].precMask;
2334 
2335         // If this is a new function and not just a different version of the previously defined function, create a new group.
2336         if (funcNdx == 0 || parentGroup != functionCaseGroups[funcNdx - 1].parentGroup ||
2337             string(groupName) != functionCaseGroups[funcNdx - 1].groupName)
2338         {
2339             tcu::TestCaseGroup *const funcGroup = new tcu::TestCaseGroup(m_testCtx, groupName, "");
2340             functionCaseGroups[funcNdx].parentGroup->addChild(funcGroup);
2341 
2342             vertexSubGroup   = new tcu::TestCaseGroup(m_testCtx, "vertex", "");
2343             fragmentSubGroup = new tcu::TestCaseGroup(m_testCtx, "fragment", "");
2344 
2345             funcGroup->addChild(vertexSubGroup);
2346             funcGroup->addChild(fragmentSubGroup);
2347 
2348             vertexSubGroupCalibrationStorage =
2349                 FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration);
2350             fragmentSubGroupCalibrationStorage =
2351                 FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration);
2352         }
2353 
2354         DE_ASSERT(vertexSubGroup != DE_NULL);
2355         DE_ASSERT(fragmentSubGroup != DE_NULL);
2356 
2357         // Find the type size range of parameters (e.g. from 2 to 4 in case of vectors).
2358         int genTypeFirstSize = 1;
2359         int genTypeLastSize  = 1;
2360 
2361         // Find the first return value or parameter with a gentype (if any) and set sizes accordingly.
2362         // \note Assumes only matching sizes gentypes are to be found, e.g. no "genType func (vec param)"
2363         for (int i = 0; i < FunctionCase::MAX_PARAMS + 1 && genTypeLastSize == 1; i++)
2364         {
2365             switch (funcTypes[i])
2366             {
2367             case VALUE_FLOAT_VEC:
2368             case VALUE_BOOL_VEC:
2369             case VALUE_INT_VEC: // \note Fall-through.
2370                 genTypeFirstSize = 2;
2371                 genTypeLastSize  = 4;
2372                 break;
2373             case VALUE_FLOAT_VEC34:
2374                 genTypeFirstSize = 3;
2375                 genTypeLastSize  = 4;
2376                 break;
2377             case VALUE_FLOAT_GENTYPE:
2378             case VALUE_BOOL_GENTYPE:
2379             case VALUE_INT_GENTYPE: // \note Fall-through.
2380                 genTypeFirstSize = 1;
2381                 genTypeLastSize  = 4;
2382                 break;
2383             case VALUE_MATRIX:
2384                 genTypeFirstSize = 2;
2385                 genTypeLastSize  = 4;
2386                 break;
2387             // If none of the above, keep looping.
2388             default:
2389                 break;
2390             }
2391         }
2392 
2393         // Create a case for each possible size of the gentype.
2394         for (int curSize = genTypeFirstSize; curSize <= genTypeLastSize; curSize++)
2395         {
2396             // Determine specific types for return value and the parameters, according to curSize. Non-gentypes not affected by curSize.
2397             DataType types[FunctionCase::MAX_PARAMS + 1];
2398             for (int i = 0; i < FunctionCase::MAX_PARAMS + 1; i++)
2399             {
2400                 if (funcTypes[i] == VALUE_NONE)
2401                     types[i] = TYPE_INVALID;
2402                 else
2403                 {
2404                     int isFloat      = funcTypes[i] & VALUE_ANY_FLOAT;
2405                     int isBool       = funcTypes[i] & VALUE_ANY_BOOL;
2406                     int isInt        = funcTypes[i] & VALUE_ANY_INT;
2407                     int isMat        = funcTypes[i] == VALUE_MATRIX;
2408                     int inSize       = (funcTypes[i] & VALUE_ANY_GENTYPE) ? curSize :
2409                                        funcTypes[i] == VALUE_VEC3         ? 3 :
2410                                        funcTypes[i] == VALUE_VEC4         ? 4 :
2411                                        funcTypes[i] == VALUE_BOOL_VEC4    ? 4 :
2412                                        funcTypes[i] == VALUE_INT_VEC4     ? 4 :
2413                                                                             1;
2414                     int typeArrayNdx = isMat ? inSize - 2 : inSize - 1; // \note No matrices of size 1.
2415 
2416                     types[i] = isFloat ? floatTypes[typeArrayNdx] :
2417                                isBool  ? boolTypes[typeArrayNdx] :
2418                                isInt   ? intTypes[typeArrayNdx] :
2419                                isMat   ? matrixTypes[typeArrayNdx] :
2420                                          TYPE_LAST;
2421                 }
2422 
2423                 DE_ASSERT(types[i] != TYPE_LAST);
2424             }
2425 
2426             // Array for just the parameter types.
2427             DataType paramTypes[FunctionCase::MAX_PARAMS];
2428             for (int i = 0; i < FunctionCase::MAX_PARAMS; i++)
2429                 paramTypes[i] = types[i + 1];
2430 
2431             for (int prec = (int)PRECISION_LOWP; prec < (int)PRECISION_LAST; prec++)
2432             {
2433                 if ((precMask & (1 << prec)) == 0)
2434                     continue;
2435 
2436                 const string precisionPrefix = booleanCase ? "" : (string(getPrecisionName((Precision)prec)) + "_");
2437                 std::ostringstream caseName;
2438 
2439                 caseName << precisionPrefix;
2440 
2441                 // Write the name of each distinct parameter data type into the test case name.
2442                 for (int i = 1; i < FunctionCase::MAX_PARAMS + 1 && types[i] != TYPE_INVALID; i++)
2443                 {
2444                     if (i == 1 || types[i] != types[i - 1])
2445                     {
2446                         if (i > 1)
2447                             caseName << "_";
2448 
2449                         caseName << getDataTypeName(types[i]);
2450                     }
2451                 }
2452 
2453                 for (int fragI = 0; fragI <= 1; fragI++)
2454                 {
2455                     const bool vert                 = fragI == 0;
2456                     tcu::TestCaseGroup *const group = vert ? vertexSubGroup : fragmentSubGroup;
2457                     group->addChild(
2458                         new FunctionCase(m_context, caseName.str().c_str(), "", groupFunc, types[0], paramTypes,
2459                                          groupAttribute, modifyParamNdx, useNearlyConstantInputs, (Precision)prec, vert,
2460                                          vert ? vertexSubGroupCalibrationStorage : fragmentSubGroupCalibrationStorage));
2461                 }
2462             }
2463         }
2464     }
2465 }
2466 
2467 } // namespace Performance
2468 } // namespace gles3
2469 } // namespace deqp
2470