1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.0 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Shader operator performance tests.
22 *//*--------------------------------------------------------------------*/
23
24 #include "es3pShaderOperatorTests.hpp"
25 #include "glsCalibration.hpp"
26 #include "gluShaderUtil.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "gluPixelTransfer.hpp"
29 #include "tcuTestLog.hpp"
30 #include "tcuRenderTarget.hpp"
31 #include "tcuCommandLine.hpp"
32 #include "tcuSurface.hpp"
33 #include "deStringUtil.hpp"
34 #include "deSharedPtr.hpp"
35 #include "deClock.h"
36 #include "deMath.h"
37
38 #include "glwEnums.hpp"
39 #include "glwFunctions.hpp"
40
41 #include <map>
42 #include <algorithm>
43 #include <limits>
44 #include <set>
45
46 namespace deqp
47 {
48 namespace gles3
49 {
50 namespace Performance
51 {
52
53 using namespace gls;
54 using namespace glu;
55 using de::SharedPtr;
56 using tcu::TestLog;
57 using tcu::Vec2;
58 using tcu::Vec4;
59
60 using std::string;
61 using std::vector;
62
63 #define MEASUREMENT_FAIL() \
64 throw tcu::InternalError("Unable to get sensible measurements for estimation", DE_NULL, __FILE__, __LINE__)
65
66 // Number of measurements in OperatorPerformanceCase for each workload size, unless specified otherwise by a command line argument.
67 static const int DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD = 3;
68 // How many different workload sizes are used by OperatorPerformanceCase.
69 static const int NUM_WORKLOADS = 8;
70 // Maximum workload size that can be attempted. In a sensible case, this most likely won't be reached.
71 static const int MAX_WORKLOAD_SIZE = 1 << 29;
72
73 // BinaryOpCase-specific constants for shader generation.
74 static const int BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS = 4;
75 static const int BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT = 2;
76 static const int BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT = 4;
77
78 // FunctionCase-specific constants for shader generation.
79 static const int FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS = 4;
80
81 static const char *const s_swizzles[][4] = {{"x", "yx", "yzx", "wzyx"},
82 {"y", "zy", "wyz", "xwzy"},
83 {"z", "wy", "zxy", "yzwx"},
84 {"w", "xw", "yxw", "zyxw"}};
85
86 template <int N>
mean(const vector<tcu::Vector<float,N>> & data)87 static tcu::Vector<float, N> mean(const vector<tcu::Vector<float, N>> &data)
88 {
89 tcu::Vector<float, N> sum(0.0f);
90 for (int i = 0; i < (int)data.size(); i++)
91 sum += data[i];
92 return sum / tcu::Vector<float, N>((float)data.size());
93 }
94
uniformNfv(const glw::Functions & gl,int n,int location,int count,const float * data)95 static void uniformNfv(const glw::Functions &gl, int n, int location, int count, const float *data)
96 {
97 switch (n)
98 {
99 case 1:
100 gl.uniform1fv(location, count, data);
101 break;
102 case 2:
103 gl.uniform2fv(location, count, data);
104 break;
105 case 3:
106 gl.uniform3fv(location, count, data);
107 break;
108 case 4:
109 gl.uniform4fv(location, count, data);
110 break;
111 default:
112 DE_ASSERT(false);
113 }
114 }
115
uniformNiv(const glw::Functions & gl,int n,int location,int count,const int * data)116 static void uniformNiv(const glw::Functions &gl, int n, int location, int count, const int *data)
117 {
118 switch (n)
119 {
120 case 1:
121 gl.uniform1iv(location, count, data);
122 break;
123 case 2:
124 gl.uniform2iv(location, count, data);
125 break;
126 case 3:
127 gl.uniform3iv(location, count, data);
128 break;
129 case 4:
130 gl.uniform4iv(location, count, data);
131 break;
132 default:
133 DE_ASSERT(false);
134 }
135 }
136
uniformMatrixNfv(const glw::Functions & gl,int n,int location,int count,const float * data)137 static void uniformMatrixNfv(const glw::Functions &gl, int n, int location, int count, const float *data)
138 {
139 switch (n)
140 {
141 case 2:
142 gl.uniformMatrix2fv(location, count, GL_FALSE, &data[0]);
143 break;
144 case 3:
145 gl.uniformMatrix3fv(location, count, GL_FALSE, &data[0]);
146 break;
147 case 4:
148 gl.uniformMatrix4fv(location, count, GL_FALSE, &data[0]);
149 break;
150 default:
151 DE_ASSERT(false);
152 }
153 }
154
getDataTypeFloatOrVec(int size)155 static glu::DataType getDataTypeFloatOrVec(int size)
156 {
157 return size == 1 ? glu::TYPE_FLOAT : glu::getDataTypeFloatVec(size);
158 }
159
getIterationCountOrDefault(const tcu::CommandLine & cmdLine,int def)160 static int getIterationCountOrDefault(const tcu::CommandLine &cmdLine, int def)
161 {
162 const int cmdLineVal = cmdLine.getTestIterationCount();
163 return cmdLineVal > 0 ? cmdLineVal : def;
164 }
165
lineParamsString(const LineParameters & params)166 static string lineParamsString(const LineParameters ¶ms)
167 {
168 return "y = " + de::toString(params.offset) + " + " + de::toString(params.coefficient) + "*x";
169 }
170
171 namespace
172 {
173
174 /*--------------------------------------------------------------------*//*!
175 * \brief Abstract class for measuring shader operator performance.
176 *
177 * This class draws multiple times with different workload sizes (set
178 * via a uniform, by subclass). Time for each frame is measured, and the
179 * slope of the workload size vs frame time data is estimated. This slope
180 * tells us the estimated increase in frame time caused by a workload
181 * increase of 1 unit (what 1 workload unit means is up to subclass).
182 *
183 * Generally, the shaders contain not just the operation we're interested
184 * in (e.g. addition) but also some other stuff (e.g. loop overhead). To
185 * eliminate this cost, we actually do the stuff described in the above
186 * paragraph with multiple programs (usually two), which contain different
187 * kinds of workload (e.g. different loop contents). Then we can (in
188 * theory) compute the cost of just one operation in a subclass-dependent
189 * manner.
190 *
191 * At this point, the result tells us the increase in frame time caused
192 * by the addition of one operation. Dividing this by the amount of
193 * draw calls in a frame, and further by the amount of vertices or
194 * fragments in a draw call, we get the time cost of one operation.
195 *
196 * In reality, there sometimes isn't just a trivial linear dependence
197 * between workload size and frame time. Instead, there tends to be some
198 * amount of initial "free" operations. That is, it may be that all
199 * workload sizes below some positive integer C yield the same frame time,
200 * and only workload sizes beyond C increase the frame time in a supposedly
201 * linear manner. Graphically, this means that there graph consists of two
202 * parts: a horizontal left part, and a linearly increasing right part; the
203 * right part starts where the left parts ends. The principal task of these
204 * tests is to look at the slope of the increasing right part. Additionally
205 * an estimate for the amount of initial free operations is calculated.
206 * Note that it is also normal to get graphs where the horizontal left part
207 * is of zero width, i.e. there are no free operations.
208 *//*--------------------------------------------------------------------*/
209 class OperatorPerformanceCase : public tcu::TestCase
210 {
211 public:
212 enum CaseType
213 {
214 CASETYPE_VERTEX = 0,
215 CASETYPE_FRAGMENT,
216
217 CASETYPE_LAST
218 };
219
220 struct InitialCalibration
221 {
222 int initialNumCalls;
InitialCalibrationdeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::InitialCalibration223 InitialCalibration(void) : initialNumCalls(1)
224 {
225 }
226 };
227
228 typedef SharedPtr<InitialCalibration> InitialCalibrationStorage;
229
230 OperatorPerformanceCase(tcu::TestContext &testCtx, glu::RenderContext &renderCtx, const char *name,
231 const char *description, CaseType caseType, int numWorkloads,
232 const InitialCalibrationStorage &initialCalibrationStorage);
233 ~OperatorPerformanceCase(void);
234
235 void init(void);
236 void deinit(void);
237
238 IterateResult iterate(void);
239
240 struct AttribSpec
241 {
AttribSpecdeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::AttribSpec242 AttribSpec(const char *name_, const tcu::Vec4 &p00_, const tcu::Vec4 &p01_, const tcu::Vec4 &p10_,
243 const tcu::Vec4 &p11_)
244 : name(name_)
245 , p00(p00_)
246 , p01(p01_)
247 , p10(p10_)
248 , p11(p11_)
249 {
250 }
251
AttribSpecdeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::AttribSpec252 AttribSpec(void)
253 {
254 }
255
256 std::string name;
257 tcu::Vec4 p00; //!< Bottom left.
258 tcu::Vec4 p01; //!< Bottom right.
259 tcu::Vec4 p10; //!< Top left.
260 tcu::Vec4 p11; //!< Top right.
261 };
262
263 protected:
264 struct ProgramContext
265 {
266 string vertShaderSource;
267 string fragShaderSource;
268 vector<AttribSpec> attributes;
269
270 string description;
271
ProgramContextdeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::ProgramContext272 ProgramContext(void)
273 {
274 }
ProgramContextdeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::ProgramContext275 ProgramContext(const string &vs, const string &fs, const vector<AttribSpec> &attrs, const string &desc)
276 : vertShaderSource(vs)
277 , fragShaderSource(fs)
278 , attributes(attrs)
279 , description(desc)
280 {
281 }
282 };
283
284 virtual vector<ProgramContext> generateProgramData(void) const = 0;
285 //! Sets program-specific uniforms that don't depend on the workload size.
286 virtual void setGeneralUniforms(uint32_t program) const = 0;
287 //! Sets the uniform(s) that specifies the workload size in the shader.
288 virtual void setWorkloadSizeUniform(uint32_t program, int workload) const = 0;
289 //! Computes the cost of a single operation, given the workload costs per program.
290 virtual float computeSingleOperationTime(const vector<float> &perProgramWorkloadCosts) const = 0;
291 //! Logs a human-readable description of what computeSingleOperationTime does.
292 virtual void logSingleOperationCalculationInfo(void) const = 0;
293
294 glu::RenderContext &m_renderCtx;
295
296 CaseType m_caseType;
297
298 private:
299 enum State
300 {
301 STATE_CALIBRATING = 0, //!< Calibrate draw call count, using first program in m_programs, with workload size 1.
302 STATE_FIND_HIGH_WORKLOAD, //!< Find an appropriate lower bound for the highest workload size we intend to use (one with high-enough frame time compared to workload size 1) for each program.
303 STATE_MEASURING, //!< Do actual measurements, for each program in m_programs.
304 STATE_REPORTING, //!< Measurements are done; calculate results and log.
305 STATE_FINISHED, //!< All done.
306
307 STATE_LAST
308 };
309
310 struct WorkloadRecord
311 {
312 int workloadSize;
313 vector<float> frameTimes; //!< In microseconds.
314
WorkloadRecorddeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::WorkloadRecord315 WorkloadRecord(int workloadSize_) : workloadSize(workloadSize_)
316 {
317 }
operator <deqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::WorkloadRecord318 bool operator<(const WorkloadRecord &other) const
319 {
320 return this->workloadSize < other.workloadSize;
321 }
addFrameTimedeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::WorkloadRecord322 void addFrameTime(float time)
323 {
324 frameTimes.push_back(time);
325 }
getMedianTimedeqp::gles3::Performance::__anond3a910f80111::OperatorPerformanceCase::WorkloadRecord326 float getMedianTime(void) const
327 {
328 vector<float> times = frameTimes;
329 std::sort(times.begin(), times.end());
330 return times.size() % 2 == 0 ? (times[times.size() / 2 - 1] + times[times.size() / 2]) * 0.5f :
331 times[times.size() / 2];
332 }
333 };
334
335 void prepareProgram(int progNdx); //!< Sets attributes and uniforms for m_programs[progNdx].
336 void prepareWorkload(
337 int progNdx,
338 int workload); //!< Calls setWorkloadSizeUniform and draws, in case the implementation does some draw-time compilation.
339 void prepareNextRound(void); //!< Increases workload and/or updates m_state.
340 void render(int numDrawCalls);
341 uint64_t renderAndMeasure(int numDrawCalls);
342 void adjustAndLogGridAndViewport(
343 void); //!< Log grid and viewport sizes, after possibly reducing them to reduce draw time.
344
345 vector<Vec2> getWorkloadMedianDataPoints(
346 int progNdx) const; //!< [ Vec2(r.workloadSize, r.getMedianTime()) for r in m_workloadRecords[progNdx] ]
347
348 const int m_numMeasurementsPerWorkload;
349 const int m_numWorkloads; //!< How many different workload sizes are used for measurement for each program.
350
351 int m_workloadNdx; //!< Runs from 0 to m_numWorkloads-1.
352
353 int m_workloadMeasurementNdx;
354 vector<vector<WorkloadRecord>>
355 m_workloadRecordsFindHigh; //!< The measurements done during STATE_FIND_HIGH_WORKLOAD.
356 vector<vector<WorkloadRecord>>
357 m_workloadRecords; //!< The measurements of each program in m_programs. Generated during STATE_MEASURING, into index specified by m_measureProgramNdx.
358
359 State m_state;
360 int m_measureProgramNdx; //!< When m_state is STATE_FIND_HIGH_WORKLOAD or STATE_MEASURING, this tells which program in m_programs is being measured.
361
362 vector<int>
363 m_highWorkloadSizes; //!< The first workload size encountered during STATE_FIND_HIGH_WORKLOAD that was determined suitable, for each program.
364
365 TheilSenCalibrator m_calibrator;
366 InitialCalibrationStorage m_initialCalibrationStorage;
367
368 int m_viewportWidth;
369 int m_viewportHeight;
370 int m_gridSizeX;
371 int m_gridSizeY;
372
373 vector<ProgramContext> m_programData;
374 vector<SharedPtr<ShaderProgram>> m_programs;
375
376 std::vector<uint32_t> m_attribBuffers;
377 };
378
triangleInterpolate(float v0,float v1,float v2,float x,float y)379 static inline float triangleInterpolate(float v0, float v1, float v2, float x, float y)
380 {
381 return v0 + (v2 - v0) * x + (v1 - v0) * y;
382 }
383
triQuadInterpolate(float x,float y,const tcu::Vec4 & quad)384 static inline float triQuadInterpolate(float x, float y, const tcu::Vec4 &quad)
385 {
386 // \note Top left fill rule.
387 if (x + y < 1.0f)
388 return triangleInterpolate(quad.x(), quad.y(), quad.z(), x, y);
389 else
390 return triangleInterpolate(quad.w(), quad.z(), quad.y(), 1.0f - x, 1.0f - y);
391 }
392
getNumVertices(int gridSizeX,int gridSizeY)393 static inline int getNumVertices(int gridSizeX, int gridSizeY)
394 {
395 return gridSizeX * gridSizeY * 2 * 3;
396 }
397
generateVertices(std::vector<float> & dst,int gridSizeX,int gridSizeY,const OperatorPerformanceCase::AttribSpec & spec)398 static void generateVertices(std::vector<float> &dst, int gridSizeX, int gridSizeY,
399 const OperatorPerformanceCase::AttribSpec &spec)
400 {
401 const int numComponents = 4;
402
403 DE_ASSERT(gridSizeX >= 1 && gridSizeY >= 1);
404 dst.resize(getNumVertices(gridSizeX, gridSizeY) * numComponents);
405
406 {
407 int dstNdx = 0;
408
409 for (int baseY = 0; baseY < gridSizeY; baseY++)
410 for (int baseX = 0; baseX < gridSizeX; baseX++)
411 {
412 const float xf0 = (float)(baseX + 0) / (float)gridSizeX;
413 const float yf0 = (float)(baseY + 0) / (float)gridSizeY;
414 const float xf1 = (float)(baseX + 1) / (float)gridSizeX;
415 const float yf1 = (float)(baseY + 1) / (float)gridSizeY;
416
417 #define ADD_VERTEX(XF, YF) \
418 for (int compNdx = 0; compNdx < numComponents; compNdx++) \
419 dst[dstNdx++] = triQuadInterpolate( \
420 (XF), (YF), tcu::Vec4(spec.p00[compNdx], spec.p01[compNdx], spec.p10[compNdx], spec.p11[compNdx]))
421
422 ADD_VERTEX(xf0, yf0);
423 ADD_VERTEX(xf1, yf0);
424 ADD_VERTEX(xf0, yf1);
425
426 ADD_VERTEX(xf1, yf0);
427 ADD_VERTEX(xf1, yf1);
428 ADD_VERTEX(xf0, yf1);
429
430 #undef ADD_VERTEX
431 }
432 }
433 }
434
intersectionX(const gls::LineParameters & a,const gls::LineParameters & b)435 static float intersectionX(const gls::LineParameters &a, const gls::LineParameters &b)
436 {
437 return (a.offset - b.offset) / (b.coefficient - a.coefficient);
438 }
439
numDistinctX(const vector<Vec2> & data)440 static int numDistinctX(const vector<Vec2> &data)
441 {
442 std::set<float> xs;
443 for (int i = 0; i < (int)data.size(); i++)
444 xs.insert(data[i].x());
445 return (int)xs.size();
446 }
447
simpleLinearRegression(const vector<Vec2> & data)448 static gls::LineParameters simpleLinearRegression(const vector<Vec2> &data)
449 {
450 const Vec2 mid = mean(data);
451
452 float slopeNumerator = 0.0f;
453 float slopeDenominator = 0.0f;
454
455 for (int i = 0; i < (int)data.size(); i++)
456 {
457 const Vec2 diff = data[i] - mid;
458
459 slopeNumerator += diff.x() * diff.y();
460 slopeDenominator += diff.x() * diff.x();
461 }
462
463 const float slope = slopeNumerator / slopeDenominator;
464 const float offset = mid.y() - slope * mid.x();
465
466 return gls::LineParameters(offset, slope);
467 }
468
simpleLinearRegressionError(const vector<Vec2> & data)469 static float simpleLinearRegressionError(const vector<Vec2> &data)
470 {
471 if (numDistinctX(data) <= 2)
472 return 0.0f;
473 else
474 {
475 const gls::LineParameters estimator = simpleLinearRegression(data);
476 float error = 0.0f;
477
478 for (int i = 0; i < (int)data.size(); i++)
479 {
480 const float estY = estimator.offset + estimator.coefficient * data[i].x();
481 const float diff = estY - data[i].y();
482 error += diff * diff;
483 }
484
485 return error / (float)data.size();
486 }
487 }
488
verticalVariance(const vector<Vec2> & data)489 static float verticalVariance(const vector<Vec2> &data)
490 {
491 if (numDistinctX(data) <= 2)
492 return 0.0f;
493 else
494 {
495 const float meanY = mean(data).y();
496 float error = 0.0f;
497
498 for (int i = 0; i < (int)data.size(); i++)
499 {
500 const float diff = meanY - data[i].y();
501 error += diff * diff;
502 }
503
504 return error / (float)data.size();
505 }
506 }
507
508 /*--------------------------------------------------------------------*//*!
509 * \brief Find the x coord that divides the input data into two slopes.
510 *
511 * The operator performance measurements tend to produce results where
512 * we get small operation counts "for free" (e.g. because the operations
513 * are performed during some memory transfer overhead or something),
514 * resulting in a curve with two parts: an initial horizontal line segment,
515 * and a rising line.
516 *
517 * This function finds the x coordinate that divides the input data into
518 * two parts such that the sum of the mean square errors for the
519 * least-squares estimated lines for the two parts is minimized, under the
520 * additional condition that the left line is horizontal.
521 *
522 * This function returns a number X s.t. { pt | pt is in data, pt.x >= X }
523 * is the right line, and the rest of data is the left line.
524 *//*--------------------------------------------------------------------*/
findSlopePivotX(const vector<Vec2> & data)525 static float findSlopePivotX(const vector<Vec2> &data)
526 {
527 std::set<float> xCoords;
528 for (int i = 0; i < (int)data.size(); i++)
529 xCoords.insert(data[i].x());
530
531 float lowestError = std::numeric_limits<float>::infinity();
532 float bestPivotX = -std::numeric_limits<float>::infinity();
533
534 for (std::set<float>::const_iterator pivotX = xCoords.begin(); pivotX != xCoords.end(); ++pivotX)
535 {
536 vector<Vec2> leftData;
537 vector<Vec2> rightData;
538 for (int i = 0; i < (int)data.size(); i++)
539 {
540 if (data[i].x() < *pivotX)
541 leftData.push_back(data[i]);
542 else
543 rightData.push_back(data[i]);
544 }
545
546 if (numDistinctX(rightData) < 3) // We don't trust the right data if there's too little of it.
547 break;
548
549 {
550 const float totalError = verticalVariance(leftData) + simpleLinearRegressionError(rightData);
551
552 if (totalError < lowestError)
553 {
554 lowestError = totalError;
555 bestPivotX = *pivotX;
556 }
557 }
558 }
559
560 DE_ASSERT(lowestError < std::numeric_limits<float>::infinity());
561
562 return bestPivotX;
563 }
564
565 struct SegmentedEstimator
566 {
567 float pivotX; //!< Value returned by findSlopePivotX, or -infinity if only single line.
568 gls::LineParameters left;
569 gls::LineParameters right;
SegmentedEstimatordeqp::gles3::Performance::__anond3a910f80111::SegmentedEstimator570 SegmentedEstimator(const gls::LineParameters &l, const gls::LineParameters &r, float pivotX_)
571 : pivotX(pivotX_)
572 , left(l)
573 , right(r)
574 {
575 }
576 };
577
578 /*--------------------------------------------------------------------*//*!
579 * \brief Compute line estimators for (potentially) two-segment data.
580 *
581 * Splits the given data into left and right parts (using findSlopePivotX)
582 * and returns the line estimates for them.
583 *
584 * Sometimes, however (especially in fragment shader cases) the data is
585 * in fact not segmented, but a straight line. This function attempts to
586 * detect if this the case, and if so, sets left.offset = right.offset and
587 * left.slope = 0, meaning essentially that the initial "flat" part of the
588 * data has zero width.
589 *//*--------------------------------------------------------------------*/
computeSegmentedEstimator(const vector<Vec2> & data)590 static SegmentedEstimator computeSegmentedEstimator(const vector<Vec2> &data)
591 {
592 const float pivotX = findSlopePivotX(data);
593 vector<Vec2> leftData;
594 vector<Vec2> rightData;
595
596 for (int i = 0; i < (int)data.size(); i++)
597 {
598 if (data[i].x() < pivotX)
599 leftData.push_back(data[i]);
600 else
601 rightData.push_back(data[i]);
602 }
603
604 {
605 const gls::LineParameters leftLine = gls::theilSenLinearRegression(leftData);
606 const gls::LineParameters rightLine = gls::theilSenLinearRegression(rightData);
607
608 if (numDistinctX(leftData) < 2 || leftLine.coefficient > rightLine.coefficient * 0.5f)
609 {
610 // Left data doesn't seem credible; assume the data is just a single line.
611 const gls::LineParameters entireLine = gls::theilSenLinearRegression(data);
612 return SegmentedEstimator(gls::LineParameters(entireLine.offset, 0.0f), entireLine,
613 -std::numeric_limits<float>::infinity());
614 }
615 else
616 return SegmentedEstimator(leftLine, rightLine, pivotX);
617 }
618 }
619
OperatorPerformanceCase(tcu::TestContext & testCtx,glu::RenderContext & renderCtx,const char * name,const char * description,CaseType caseType,int numWorkloads,const InitialCalibrationStorage & initialCalibrationStorage)620 OperatorPerformanceCase::OperatorPerformanceCase(tcu::TestContext &testCtx, glu::RenderContext &renderCtx,
621 const char *name, const char *description, CaseType caseType,
622 int numWorkloads,
623 const InitialCalibrationStorage &initialCalibrationStorage)
624 : tcu::TestCase(testCtx, tcu::NODETYPE_PERFORMANCE, name, description)
625 , m_renderCtx(renderCtx)
626 , m_caseType(caseType)
627 , m_numMeasurementsPerWorkload(
628 getIterationCountOrDefault(m_testCtx.getCommandLine(), DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD))
629 , m_numWorkloads(numWorkloads)
630 , m_workloadNdx(-1)
631 , m_workloadMeasurementNdx(-1)
632 , m_state(STATE_LAST)
633 , m_measureProgramNdx(-1)
634 , m_initialCalibrationStorage(initialCalibrationStorage)
635 , m_viewportWidth(caseType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getWidth())
636 , m_viewportHeight(caseType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getHeight())
637 , m_gridSizeX(caseType == CASETYPE_FRAGMENT ? 1 : 100)
638 , m_gridSizeY(caseType == CASETYPE_FRAGMENT ? 1 : 100)
639 {
640 DE_ASSERT(m_numWorkloads > 0);
641 }
642
~OperatorPerformanceCase(void)643 OperatorPerformanceCase::~OperatorPerformanceCase(void)
644 {
645 if (!m_attribBuffers.empty())
646 {
647 m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
648 m_attribBuffers.clear();
649 }
650 }
651
logRenderTargetInfo(TestLog & log,const tcu::RenderTarget & renderTarget)652 static void logRenderTargetInfo(TestLog &log, const tcu::RenderTarget &renderTarget)
653 {
654 log << TestLog::Section("RenderTarget", "Render target") << TestLog::Message << "size: " << renderTarget.getWidth()
655 << "x" << renderTarget.getHeight() << TestLog::EndMessage << TestLog::Message << "bits:"
656 << " R" << renderTarget.getPixelFormat().redBits << " G" << renderTarget.getPixelFormat().greenBits << " B"
657 << renderTarget.getPixelFormat().blueBits << " A" << renderTarget.getPixelFormat().alphaBits << " D"
658 << renderTarget.getDepthBits() << " S" << renderTarget.getStencilBits() << TestLog::EndMessage;
659
660 if (renderTarget.getNumSamples() != 0)
661 log << TestLog::Message << renderTarget.getNumSamples() << "x MSAA" << TestLog::EndMessage;
662 else
663 log << TestLog::Message << "No MSAA" << TestLog::EndMessage;
664
665 log << TestLog::EndSection;
666 }
667
getWorkloadMedianDataPoints(int progNdx) const668 vector<Vec2> OperatorPerformanceCase::getWorkloadMedianDataPoints(int progNdx) const
669 {
670 const vector<WorkloadRecord> &records = m_workloadRecords[progNdx];
671 vector<Vec2> result;
672
673 for (int i = 0; i < (int)records.size(); i++)
674 result.push_back(Vec2((float)records[i].workloadSize, records[i].getMedianTime()));
675
676 return result;
677 }
678
prepareProgram(int progNdx)679 void OperatorPerformanceCase::prepareProgram(int progNdx)
680 {
681 DE_ASSERT(progNdx < (int)m_programs.size());
682 DE_ASSERT(m_programData.size() == m_programs.size());
683
684 const glw::Functions &gl = m_renderCtx.getFunctions();
685 const ShaderProgram &program = *m_programs[progNdx];
686
687 vector<AttribSpec> attributes = m_programData[progNdx].attributes;
688
689 attributes.push_back(AttribSpec("a_position", Vec4(-1.0f, -1.0f, 0.0f, 1.0f), Vec4(1.0f, -1.0f, 0.0f, 1.0f),
690 Vec4(-1.0f, 1.0f, 0.0f, 1.0f), Vec4(1.0f, 1.0f, 0.0f, 1.0f)));
691
692 DE_ASSERT(program.isOk());
693
694 // Generate vertices.
695 if (!m_attribBuffers.empty())
696 gl.deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
697 m_attribBuffers.resize(attributes.size(), 0);
698 gl.genBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
699 GLU_EXPECT_NO_ERROR(gl.getError(), "glGenBuffers()");
700
701 for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++)
702 {
703 std::vector<float> vertices;
704 generateVertices(vertices, m_gridSizeX, m_gridSizeY, attributes[attribNdx]);
705
706 gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
707 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertices.size() * sizeof(float)), &vertices[0],
708 GL_STATIC_DRAW);
709 GLU_EXPECT_NO_ERROR(gl.getError(), "Upload buffer data");
710 }
711
712 // Setup attribute bindings.
713 for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++)
714 {
715 int location = gl.getAttribLocation(program.getProgram(), attributes[attribNdx].name.c_str());
716
717 if (location >= 0)
718 {
719 gl.enableVertexAttribArray(location);
720 gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
721 gl.vertexAttribPointer(location, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
722 }
723 }
724 GLU_EXPECT_NO_ERROR(gl.getError(), "Setup vertex input state");
725
726 gl.useProgram(program.getProgram());
727 setGeneralUniforms(program.getProgram());
728 gl.viewport(0, 0, m_viewportWidth, m_viewportHeight);
729 }
730
prepareWorkload(int progNdx,int workload)731 void OperatorPerformanceCase::prepareWorkload(int progNdx, int workload)
732 {
733 setWorkloadSizeUniform(m_programs[progNdx]->getProgram(), workload);
734 render(m_calibrator.getCallCount());
735 }
736
prepareNextRound(void)737 void OperatorPerformanceCase::prepareNextRound(void)
738 {
739 DE_ASSERT(m_state == STATE_CALIBRATING || m_state == STATE_FIND_HIGH_WORKLOAD || m_state == STATE_MEASURING);
740
741 TestLog &log = m_testCtx.getLog();
742
743 if (m_state == STATE_CALIBRATING && m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED)
744 {
745 m_measureProgramNdx = 0;
746 m_state = STATE_FIND_HIGH_WORKLOAD;
747 }
748
749 if (m_state == STATE_CALIBRATING)
750 prepareWorkload(0, 1);
751 else if (m_state == STATE_FIND_HIGH_WORKLOAD)
752 {
753 vector<WorkloadRecord> &records = m_workloadRecordsFindHigh[m_measureProgramNdx];
754
755 if (records.empty() || records.back().getMedianTime() < 2.0f * records[0].getMedianTime())
756 {
757 int workloadSize;
758
759 if (records.empty())
760 workloadSize = 1;
761 else
762 {
763 workloadSize = records.back().workloadSize * 2;
764
765 if (workloadSize > MAX_WORKLOAD_SIZE)
766 {
767 log << TestLog::Message << "Even workload size " << records.back().workloadSize
768 << " doesn't give high enough frame time for program " << m_measureProgramNdx
769 << ". Can't get sensible result." << TestLog::EndMessage;
770 MEASUREMENT_FAIL();
771 }
772 }
773
774 records.push_back(WorkloadRecord(workloadSize));
775 prepareWorkload(0, workloadSize);
776 m_workloadMeasurementNdx = 0;
777 }
778 else
779 {
780 m_highWorkloadSizes[m_measureProgramNdx] = records.back().workloadSize;
781 m_measureProgramNdx++;
782
783 if (m_measureProgramNdx >= (int)m_programs.size())
784 {
785 m_state = STATE_MEASURING;
786 m_workloadNdx = -1;
787 m_measureProgramNdx = 0;
788 }
789
790 prepareProgram(m_measureProgramNdx);
791 prepareNextRound();
792 }
793 }
794 else
795 {
796 m_workloadNdx++;
797
798 if (m_workloadNdx < m_numWorkloads)
799 {
800 DE_ASSERT(m_numWorkloads > 1);
801 const int highWorkload = m_highWorkloadSizes[m_measureProgramNdx];
802 const int workload = highWorkload > m_numWorkloads ?
803 1 + m_workloadNdx * (highWorkload - 1) / (m_numWorkloads - 1) :
804 1 + m_workloadNdx;
805
806 prepareWorkload(m_measureProgramNdx, workload);
807
808 m_workloadMeasurementNdx = 0;
809
810 m_workloadRecords[m_measureProgramNdx].push_back(WorkloadRecord(workload));
811 }
812 else
813 {
814 m_measureProgramNdx++;
815
816 if (m_measureProgramNdx < (int)m_programs.size())
817 {
818 m_workloadNdx = -1;
819 m_workloadMeasurementNdx = 0;
820 prepareProgram(m_measureProgramNdx);
821 prepareNextRound();
822 }
823 else
824 m_state = STATE_REPORTING;
825 }
826 }
827 }
828
init(void)829 void OperatorPerformanceCase::init(void)
830 {
831 TestLog &log = m_testCtx.getLog();
832 const glw::Functions &gl = m_renderCtx.getFunctions();
833
834 // Validate that we have sane grid and viewport setup.
835 DE_ASSERT(de::inBounds(m_gridSizeX, 1, 256) && de::inBounds(m_gridSizeY, 1, 256));
836 TCU_CHECK(de::inRange(m_viewportWidth, 1, m_renderCtx.getRenderTarget().getWidth()) &&
837 de::inRange(m_viewportHeight, 1, m_renderCtx.getRenderTarget().getHeight()));
838
839 logRenderTargetInfo(log, m_renderCtx.getRenderTarget());
840
841 log << TestLog::Message << "Using additive blending." << TestLog::EndMessage;
842 gl.enable(GL_BLEND);
843 gl.blendEquation(GL_FUNC_ADD);
844 gl.blendFunc(GL_ONE, GL_ONE);
845
846 // Generate programs.
847 DE_ASSERT(m_programs.empty());
848 m_programData = generateProgramData();
849 DE_ASSERT(!m_programData.empty());
850
851 for (int progNdx = 0; progNdx < (int)m_programData.size(); progNdx++)
852 {
853 const string &vert = m_programData[progNdx].vertShaderSource;
854 const string &frag = m_programData[progNdx].fragShaderSource;
855
856 m_programs.push_back(
857 SharedPtr<ShaderProgram>(new ShaderProgram(m_renderCtx, glu::makeVtxFragSources(vert, frag))));
858
859 if (!m_programs.back()->isOk())
860 {
861 log << *m_programs.back();
862 TCU_FAIL("Compile failed");
863 }
864 }
865
866 // Log all programs.
867 for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
868 log << TestLog::Section("Program" + de::toString(progNdx), "Program " + de::toString(progNdx))
869 << TestLog::Message << m_programData[progNdx].description << TestLog::EndMessage << *m_programs[progNdx]
870 << TestLog::EndSection;
871
872 m_highWorkloadSizes.resize(m_programData.size());
873 m_workloadRecordsFindHigh.resize(m_programData.size());
874 m_workloadRecords.resize(m_programData.size());
875
876 m_calibrator.clear(
877 CalibratorParameters(m_initialCalibrationStorage->initialNumCalls, 10 /* calibrate iteration frames */,
878 2000.0f /* calibrate iteration shortcut threshold (ms) */,
879 16 /* max calibrate iterations */, 1000.0f / 30.0f /* frame time (ms) */,
880 1000.0f / 60.0f /* frame time cap (ms) */, 1000.0f /* target measure duration (ms) */));
881 m_state = STATE_CALIBRATING;
882
883 prepareProgram(0);
884 prepareNextRound();
885 }
886
deinit(void)887 void OperatorPerformanceCase::deinit(void)
888 {
889 if (!m_attribBuffers.empty())
890 {
891 m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
892 m_attribBuffers.clear();
893 }
894
895 m_programs.clear();
896 }
897
render(int numDrawCalls)898 void OperatorPerformanceCase::render(int numDrawCalls)
899 {
900 const glw::Functions &gl = m_renderCtx.getFunctions();
901 const int numVertices = getNumVertices(m_gridSizeX, m_gridSizeY);
902
903 for (int callNdx = 0; callNdx < numDrawCalls; callNdx++)
904 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
905
906 glu::readPixels(m_renderCtx, 0, 0,
907 tcu::Surface(1, 1).getAccess()); // \note Serves as a more reliable replacement for glFinish().
908 }
909
renderAndMeasure(int numDrawCalls)910 uint64_t OperatorPerformanceCase::renderAndMeasure(int numDrawCalls)
911 {
912 const uint64_t startTime = deGetMicroseconds();
913 render(numDrawCalls);
914 return deGetMicroseconds() - startTime;
915 }
916
adjustAndLogGridAndViewport(void)917 void OperatorPerformanceCase::adjustAndLogGridAndViewport(void)
918 {
919 TestLog &log = m_testCtx.getLog();
920
921 // If call count is just 1, and the target frame time still wasn't reached, reduce grid or viewport size.
922 if (m_calibrator.getCallCount() == 1)
923 {
924 const gls::MeasureState &calibratorMeasure = m_calibrator.getMeasureState();
925 const float drawCallTime = (float)calibratorMeasure.getTotalTime() / (float)calibratorMeasure.frameTimes.size();
926 const float targetDrawCallTime = m_calibrator.getParameters().targetFrameTimeUs;
927 const float targetRatio = targetDrawCallTime / drawCallTime;
928
929 if (targetRatio < 0.95f)
930 {
931 // Reduce grid or viewport size assuming draw call time scales proportionally.
932 if (m_caseType == CASETYPE_VERTEX)
933 {
934 const float targetRatioSqrt = deFloatSqrt(targetRatio);
935 m_gridSizeX = (int)(targetRatioSqrt * (float)m_gridSizeX);
936 m_gridSizeY = (int)(targetRatioSqrt * (float)m_gridSizeY);
937 TCU_CHECK_MSG(m_gridSizeX >= 1 && m_gridSizeY >= 1,
938 "Can't decrease grid size enough to achieve low-enough draw times");
939 log << TestLog::Message
940 << "Note: triangle grid size reduced from original; it's now smaller than during calibration."
941 << TestLog::EndMessage;
942 }
943 else
944 {
945 const float targetRatioSqrt = deFloatSqrt(targetRatio);
946 m_viewportWidth = (int)(targetRatioSqrt * (float)m_viewportWidth);
947 m_viewportHeight = (int)(targetRatioSqrt * (float)m_viewportHeight);
948 TCU_CHECK_MSG(m_viewportWidth >= 1 && m_viewportHeight >= 1,
949 "Can't decrease viewport size enough to achieve low-enough draw times");
950 log << TestLog::Message
951 << "Note: viewport size reduced from original; it's now smaller than during calibration."
952 << TestLog::EndMessage;
953 }
954 }
955 }
956
957 prepareProgram(0);
958
959 // Log grid and viewport sizes.
960 log << TestLog::Message << "Grid size: " << m_gridSizeX << "x" << m_gridSizeY << TestLog::EndMessage;
961 log << TestLog::Message << "Viewport: " << m_viewportWidth << "x" << m_viewportHeight << TestLog::EndMessage;
962 }
963
iterate(void)964 OperatorPerformanceCase::IterateResult OperatorPerformanceCase::iterate(void)
965 {
966 const TheilSenCalibrator::State calibratorState = m_calibrator.getState();
967
968 if (calibratorState != TheilSenCalibrator::STATE_FINISHED)
969 {
970 if (calibratorState == TheilSenCalibrator::STATE_RECOMPUTE_PARAMS)
971 m_calibrator.recomputeParameters();
972 else if (calibratorState == TheilSenCalibrator::STATE_MEASURE)
973 m_calibrator.recordIteration(renderAndMeasure(m_calibrator.getCallCount()));
974 else
975 DE_ASSERT(false);
976
977 if (m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED)
978 {
979 logCalibrationInfo(m_testCtx.getLog(), m_calibrator);
980 adjustAndLogGridAndViewport();
981 prepareNextRound();
982 m_initialCalibrationStorage->initialNumCalls = m_calibrator.getCallCount();
983 }
984 }
985 else if (m_state == STATE_FIND_HIGH_WORKLOAD || m_state == STATE_MEASURING)
986 {
987 if (m_workloadMeasurementNdx < m_numMeasurementsPerWorkload)
988 {
989 vector<WorkloadRecord> &records = m_state == STATE_FIND_HIGH_WORKLOAD ?
990 m_workloadRecordsFindHigh[m_measureProgramNdx] :
991 m_workloadRecords[m_measureProgramNdx];
992 records.back().addFrameTime((float)renderAndMeasure(m_calibrator.getCallCount()));
993 m_workloadMeasurementNdx++;
994 }
995 else
996 prepareNextRound();
997 }
998 else
999 {
1000 DE_ASSERT(m_state == STATE_REPORTING);
1001
1002 TestLog &log = m_testCtx.getLog();
1003 const int drawCallCount = m_calibrator.getCallCount();
1004
1005 {
1006 // Compute per-program estimators for measurements.
1007 vector<SegmentedEstimator> estimators;
1008 for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
1009 estimators.push_back(computeSegmentedEstimator(getWorkloadMedianDataPoints(progNdx)));
1010
1011 // Log measurements and their estimators for all programs.
1012 for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
1013 {
1014 const SegmentedEstimator &estimator = estimators[progNdx];
1015 const string progNdxStr = de::toString(progNdx);
1016 vector<WorkloadRecord> records = m_workloadRecords[progNdx];
1017 std::sort(records.begin(), records.end());
1018
1019 {
1020 const tcu::ScopedLogSection section(log, "Program" + progNdxStr + "Measurements",
1021 "Measurements for program " + progNdxStr);
1022
1023 // Sample list of individual frame times.
1024
1025 log << TestLog::SampleList("Program" + progNdxStr + "IndividualFrameTimes",
1026 "Individual frame times")
1027 << TestLog::SampleInfo
1028 << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1029 << TestLog::ValueInfo("FrameTime", "Frame time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
1030 << TestLog::EndSampleInfo;
1031
1032 for (int i = 0; i < (int)records.size(); i++)
1033 for (int j = 0; j < (int)records[i].frameTimes.size(); j++)
1034 log << TestLog::Sample << records[i].workloadSize << records[i].frameTimes[j]
1035 << TestLog::EndSample;
1036
1037 log << TestLog::EndSampleList;
1038
1039 // Sample list of median frame times.
1040
1041 log << TestLog::SampleList("Program" + progNdxStr + "MedianFrameTimes", "Median frame times")
1042 << TestLog::SampleInfo
1043 << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR)
1044 << TestLog::ValueInfo("MedianFrameTime", "Median frame time", "us",
1045 QP_SAMPLE_VALUE_TAG_RESPONSE)
1046 << TestLog::EndSampleInfo;
1047
1048 for (int i = 0; i < (int)records.size(); i++)
1049 log << TestLog::Sample << records[i].workloadSize << records[i].getMedianTime()
1050 << TestLog::EndSample;
1051
1052 log << TestLog::EndSampleList;
1053
1054 log << TestLog::Float("Program" + progNdxStr + "WorkloadCostEstimate", "Workload cost estimate",
1055 "us / workload", QP_KEY_TAG_TIME, estimator.right.coefficient);
1056
1057 if (estimator.pivotX > -std::numeric_limits<float>::infinity())
1058 log << TestLog::Message << "Note: the data points with x coordinate greater than or equal to "
1059 << estimator.pivotX
1060 << " seem to form a rising line, and the rest of data points seem to form a "
1061 "near-horizontal line"
1062 << TestLog::EndMessage << TestLog::Message << "Note: the left line is estimated to be "
1063 << lineParamsString(estimator.left) << " and the right line "
1064 << lineParamsString(estimator.right) << TestLog::EndMessage;
1065 else
1066 log << TestLog::Message
1067 << "Note: the data seem to form a single line: " << lineParamsString(estimator.right)
1068 << TestLog::EndMessage;
1069 }
1070 }
1071
1072 for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
1073 {
1074 if (estimators[progNdx].right.coefficient <= 0.0f)
1075 {
1076 log << TestLog::Message << "Slope of measurements for program " << progNdx
1077 << " isn't positive. Can't get sensible result." << TestLog::EndMessage;
1078 MEASUREMENT_FAIL();
1079 }
1080 }
1081
1082 // \note For each estimator, .right.coefficient is the increase in draw time (in microseconds) when
1083 // incrementing shader workload size by 1, when D draw calls are done, with a vertex/fragment count
1084 // of R.
1085 //
1086 // The measurements of any single program can't tell us the final result (time of single operation),
1087 // so we use computeSingleOperationTime to compute it from multiple programs' measurements in a
1088 // subclass-defined manner.
1089 //
1090 // After that, microseconds per operation can be calculated as singleOperationTime / (D * R).
1091
1092 {
1093 vector<float> perProgramSlopes;
1094 for (int i = 0; i < (int)m_programs.size(); i++)
1095 perProgramSlopes.push_back(estimators[i].right.coefficient);
1096
1097 logSingleOperationCalculationInfo();
1098
1099 const float maxSlope = *std::max_element(perProgramSlopes.begin(), perProgramSlopes.end());
1100 const float usecsPerFramePerOp = computeSingleOperationTime(perProgramSlopes);
1101 const int vertexOrFragmentCount = m_caseType == CASETYPE_VERTEX ?
1102 getNumVertices(m_gridSizeX, m_gridSizeY) :
1103 m_viewportWidth * m_viewportHeight;
1104 const double usecsPerDrawCallPerOp = usecsPerFramePerOp / (double)drawCallCount;
1105 const double usecsPerSingleOp = usecsPerDrawCallPerOp / (double)vertexOrFragmentCount;
1106 const double megaOpsPerSecond = (double)(drawCallCount * vertexOrFragmentCount) / usecsPerFramePerOp;
1107 const int numFreeOps = de::max(
1108 0, (int)deFloatFloor(intersectionX(
1109 estimators[0].left, LineParameters(estimators[0].right.offset, usecsPerFramePerOp))));
1110
1111 log << TestLog::Integer("VertexOrFragmentCount",
1112 "R = " + string(m_caseType == CASETYPE_VERTEX ? "Vertex" : "Fragment") +
1113 " count",
1114 "", QP_KEY_TAG_NONE, vertexOrFragmentCount)
1115
1116 << TestLog::Integer("DrawCallsPerFrame", "D = Draw calls per frame", "", QP_KEY_TAG_NONE,
1117 drawCallCount)
1118
1119 << TestLog::Integer("VerticesOrFragmentsPerFrame",
1120 "R*D = " + string(m_caseType == CASETYPE_VERTEX ? "Vertices" : "Fragments") +
1121 " per frame",
1122 "", QP_KEY_TAG_NONE, vertexOrFragmentCount * drawCallCount)
1123
1124 << TestLog::Float("TimePerFramePerOp",
1125 "Estimated cost of R*D " +
1126 string(m_caseType == CASETYPE_VERTEX ? "vertices" : "fragments") +
1127 " (i.e. one frame) with one shader operation",
1128 "us", QP_KEY_TAG_TIME, (float)usecsPerFramePerOp)
1129
1130 << TestLog::Float("TimePerDrawcallPerOp",
1131 "Estimated cost of one draw call with one shader operation", "us",
1132 QP_KEY_TAG_TIME, (float)usecsPerDrawCallPerOp)
1133
1134 << TestLog::Float("TimePerSingleOp", "Estimated cost of a single shader operation", "us",
1135 QP_KEY_TAG_TIME, (float)usecsPerSingleOp);
1136
1137 // \note Sometimes, when the operation is free or very cheap, it can happen that the shader with the operation runs,
1138 // for some reason, a bit faster than the shader without the operation, and thus we get a negative result. The
1139 // following threshold values for accepting a negative or almost-zero result are rather quick and dirty.
1140 if (usecsPerFramePerOp <= -0.1f * maxSlope)
1141 {
1142 log << TestLog::Message << "Got strongly negative result." << TestLog::EndMessage;
1143 MEASUREMENT_FAIL();
1144 }
1145 else if (usecsPerFramePerOp <= 0.001 * maxSlope)
1146 {
1147 log << TestLog::Message << "Cost of operation seems to be approximately zero."
1148 << TestLog::EndMessage;
1149 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1150 }
1151 else
1152 {
1153 log << TestLog::Float("OpsPerSecond", "Operations per second", "Million/s", QP_KEY_TAG_PERFORMANCE,
1154 (float)megaOpsPerSecond)
1155
1156 << TestLog::Integer("NumFreeOps", "Estimated number of \"free\" operations", "",
1157 QP_KEY_TAG_PERFORMANCE, numFreeOps);
1158
1159 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString((float)megaOpsPerSecond, 2).c_str());
1160 }
1161
1162 m_state = STATE_FINISHED;
1163 }
1164 }
1165
1166 return STOP;
1167 }
1168
1169 return CONTINUE;
1170 }
1171
1172 // Binary operator case.
1173 class BinaryOpCase : public OperatorPerformanceCase
1174 {
1175 public:
1176 BinaryOpCase(Context &context, const char *name, const char *description, const char *op, glu::DataType type,
1177 glu::Precision precision, bool useSwizzle, bool isVertex,
1178 const InitialCalibrationStorage &initialCalibration);
1179
1180 protected:
1181 vector<ProgramContext> generateProgramData(void) const;
1182 void setGeneralUniforms(uint32_t program) const;
1183 void setWorkloadSizeUniform(uint32_t program, int numOperations) const;
1184 float computeSingleOperationTime(const vector<float> &perProgramOperationCosts) const;
1185 void logSingleOperationCalculationInfo(void) const;
1186
1187 private:
1188 enum ProgramID
1189 {
1190 // \note 0-based sequential numbering is relevant, because these are also used as vector indices.
1191 // \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow.
1192 PROGRAM_WITH_BIGGER_LOOP = 0,
1193 PROGRAM_WITH_SMALLER_LOOP,
1194
1195 PROGRAM_LAST
1196 };
1197
1198 ProgramContext generateSingleProgramData(ProgramID) const;
1199
1200 const string m_op;
1201 const glu::DataType m_type;
1202 const glu::Precision m_precision;
1203 const bool m_useSwizzle;
1204 };
1205
BinaryOpCase(Context & context,const char * name,const char * description,const char * op,glu::DataType type,glu::Precision precision,bool useSwizzle,bool isVertex,const InitialCalibrationStorage & initialCalibration)1206 BinaryOpCase::BinaryOpCase(Context &context, const char *name, const char *description, const char *op,
1207 glu::DataType type, glu::Precision precision, bool useSwizzle, bool isVertex,
1208 const InitialCalibrationStorage &initialCalibration)
1209 : OperatorPerformanceCase(context.getTestContext(), context.getRenderContext(), name, description,
1210 isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration)
1211 , m_op(op)
1212 , m_type(type)
1213 , m_precision(precision)
1214 , m_useSwizzle(useSwizzle)
1215 {
1216 }
1217
generateSingleProgramData(ProgramID programID) const1218 BinaryOpCase::ProgramContext BinaryOpCase::generateSingleProgramData(ProgramID programID) const
1219 {
1220 DE_ASSERT(glu::isDataTypeFloatOrVec(m_type) || glu::isDataTypeIntOrIVec(m_type));
1221
1222 const bool isVertexCase = m_caseType == CASETYPE_VERTEX;
1223 const char *const precision = glu::getPrecisionName(m_precision);
1224 const char *const inputPrecision =
1225 glu::isDataTypeIntOrIVec(m_type) && m_precision == glu::PRECISION_LOWP ? "mediump" : precision;
1226 const char *const typeName = getDataTypeName(m_type);
1227
1228 std::ostringstream vtx;
1229 std::ostringstream frag;
1230 std::ostringstream &op = isVertexCase ? vtx : frag;
1231
1232 vtx << "#version 300 es\n";
1233 frag << "#version 300 es\n"
1234 << "layout (location = 0) out mediump vec4 o_color;\n";
1235
1236 // Attributes.
1237 vtx << "in highp vec4 a_position;\n";
1238 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS + 1; i++)
1239 vtx << "in " << inputPrecision << " vec4 a_in" << i << ";\n";
1240
1241 if (isVertexCase)
1242 {
1243 vtx << "out mediump vec4 v_color;\n";
1244 frag << "in mediump vec4 v_color;\n";
1245 }
1246 else
1247 {
1248 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS + 1; i++)
1249 {
1250 vtx << "out " << inputPrecision << " vec4 v_in" << i << ";\n";
1251 frag << "in " << inputPrecision << " vec4 v_in" << i << ";\n";
1252 }
1253 }
1254
1255 op << "uniform mediump int u_numLoopIterations;\n";
1256 if (isVertexCase)
1257 op << "uniform mediump float u_zero;\n";
1258
1259 vtx << "\n";
1260 vtx << "void main()\n";
1261 vtx << "{\n";
1262
1263 if (!isVertexCase)
1264 vtx << "\tgl_Position = a_position;\n";
1265
1266 frag << "\n";
1267 frag << "void main()\n";
1268 frag << "{\n";
1269
1270 // Expression inputs.
1271 const char *const prefix = isVertexCase ? "a_" : "v_";
1272 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS + 1; i++)
1273 {
1274 const int inSize = getDataTypeScalarSize(m_type);
1275 const bool isInt = de::inRange<int>(m_type, TYPE_INT, TYPE_INT_VEC4);
1276 const bool cast = isInt || (!m_useSwizzle && m_type != TYPE_FLOAT_VEC4);
1277
1278 op << "\t" << precision << " " << typeName << " in" << i << " = ";
1279
1280 if (cast)
1281 op << typeName << "(";
1282
1283 op << prefix << "in" << i;
1284
1285 if (m_useSwizzle)
1286 op << "." << s_swizzles[i % DE_LENGTH_OF_ARRAY(s_swizzles)][inSize - 1];
1287
1288 if (cast)
1289 op << ")";
1290
1291 op << ";\n";
1292 }
1293
1294 // Operation accumulation variables.
1295 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1296 {
1297 op << "\t" << precision << " " << typeName << " acc" << i << "a"
1298 << " = in" << i + 0 << ";\n";
1299 op << "\t" << precision << " " << typeName << " acc" << i << "b"
1300 << " = in" << i + 1 << ";\n";
1301 }
1302
1303 // Loop, with expressions in it.
1304 op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n";
1305 op << "\t{\n";
1306 {
1307 const int unrollAmount = programID == PROGRAM_WITH_SMALLER_LOOP ?
1308 BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT :
1309 BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
1310 for (int unrollNdx = 0; unrollNdx < unrollAmount; unrollNdx++)
1311 {
1312 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1313 {
1314 if (i > 0 || unrollNdx > 0)
1315 op << "\n";
1316 op << "\t\tacc" << i << "a = acc" << i << "b " << m_op << " acc" << i << "a"
1317 << ";\n";
1318 op << "\t\tacc" << i << "b = acc" << i << "a " << m_op << " acc" << i << "b"
1319 << ";\n";
1320 }
1321 }
1322 }
1323 op << "\t}\n";
1324 op << "\n";
1325
1326 // Result variable (sum of accumulation variables).
1327 op << "\t" << precision << " " << typeName << " res =";
1328 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1329 op << (i > 0 ? " " + m_op : "") << " acc" << i << "b";
1330 op << ";\n";
1331
1332 // Convert to color.
1333 op << "\tmediump vec4 color = ";
1334 if (m_type == TYPE_FLOAT_VEC4)
1335 op << "res";
1336 else
1337 {
1338 int size = getDataTypeScalarSize(m_type);
1339 op << "vec4(res";
1340
1341 for (int i = size; i < 4; i++)
1342 op << ", " << (i == 3 ? "1.0" : "0.0");
1343
1344 op << ")";
1345 }
1346 op << ";\n";
1347 op << "\t" << (isVertexCase ? "v_color" : "o_color") << " = color;\n";
1348
1349 if (isVertexCase)
1350 {
1351 vtx << " gl_Position = a_position + u_zero*color;\n";
1352 frag << " o_color = v_color;\n";
1353 }
1354 else
1355 {
1356 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS + 1; i++)
1357 vtx << " v_in" << i << " = a_in" << i << ";\n";
1358 }
1359
1360 vtx << "}\n";
1361 frag << "}\n";
1362
1363 {
1364 vector<AttribSpec> attributes;
1365 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS + 1; i++)
1366 attributes.push_back(
1367 AttribSpec(("a_in" + de::toString(i)).c_str(),
1368 Vec4(2.0f, 2.0f, 2.0f, 1.0f).swizzle((i + 0) % 4, (i + 1) % 4, (i + 2) % 4, (i + 3) % 4),
1369 Vec4(1.0f, 2.0f, 1.0f, 2.0f).swizzle((i + 0) % 4, (i + 1) % 4, (i + 2) % 4, (i + 3) % 4),
1370 Vec4(2.0f, 1.0f, 2.0f, 2.0f).swizzle((i + 0) % 4, (i + 1) % 4, (i + 2) % 4, (i + 3) % 4),
1371 Vec4(1.0f, 1.0f, 2.0f, 1.0f).swizzle((i + 0) % 4, (i + 1) % 4, (i + 2) % 4, (i + 3) % 4)));
1372
1373 {
1374 string description = "This is the program with the ";
1375
1376 description += programID == PROGRAM_WITH_SMALLER_LOOP ? "smaller" :
1377 programID == PROGRAM_WITH_BIGGER_LOOP ? "bigger" :
1378 DE_NULL;
1379
1380 description += " loop.\n"
1381 "Note: workload size for this program means the number of loop iterations.";
1382
1383 return ProgramContext(vtx.str(), frag.str(), attributes, description);
1384 }
1385 }
1386 }
1387
generateProgramData(void) const1388 vector<BinaryOpCase::ProgramContext> BinaryOpCase::generateProgramData(void) const
1389 {
1390 vector<ProgramContext> progData;
1391 for (int i = 0; i < PROGRAM_LAST; i++)
1392 progData.push_back(generateSingleProgramData((ProgramID)i));
1393 return progData;
1394 }
1395
setGeneralUniforms(uint32_t program) const1396 void BinaryOpCase::setGeneralUniforms(uint32_t program) const
1397 {
1398 const glw::Functions &gl = m_renderCtx.getFunctions();
1399 gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f);
1400 }
1401
setWorkloadSizeUniform(uint32_t program,int numLoopIterations) const1402 void BinaryOpCase::setWorkloadSizeUniform(uint32_t program, int numLoopIterations) const
1403 {
1404 const glw::Functions &gl = m_renderCtx.getFunctions();
1405 gl.uniform1i(gl.getUniformLocation(program, "u_numLoopIterations"), numLoopIterations);
1406 }
1407
computeSingleOperationTime(const vector<float> & perProgramOperationCosts) const1408 float BinaryOpCase::computeSingleOperationTime(const vector<float> &perProgramOperationCosts) const
1409 {
1410 DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST);
1411
1412 const int baseNumOpsInsideLoop = 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS;
1413 const int numOpsInsideLoopInSmallProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT;
1414 const int numOpsInsideLoopInBigProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
1415 DE_STATIC_ASSERT(numOpsInsideLoopInBigProgram > numOpsInsideLoopInSmallProgram);
1416 const int opDiff = numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram;
1417 const float programOperationCostDiff =
1418 perProgramOperationCosts[PROGRAM_WITH_BIGGER_LOOP] - perProgramOperationCosts[PROGRAM_WITH_SMALLER_LOOP];
1419
1420 return programOperationCostDiff / (float)opDiff;
1421 }
1422
logSingleOperationCalculationInfo(void) const1423 void BinaryOpCase::logSingleOperationCalculationInfo(void) const
1424 {
1425 const int baseNumOpsInsideLoop = 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS;
1426 const int numOpsInsideLoopInSmallProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT;
1427 const int numOpsInsideLoopInBigProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
1428 const int opDiff = numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram;
1429 const char *const opName = m_op == "+" ? "addition" :
1430 m_op == "-" ? "subtraction" :
1431 m_op == "*" ? "multiplication" :
1432 m_op == "/" ? "division" :
1433 DE_NULL;
1434 DE_ASSERT(opName != DE_NULL);
1435
1436 m_testCtx.getLog()
1437 << TestLog::Message << "Note: the bigger program contains " << opDiff << " more " << opName
1438 << " operations in one loop iteration than the small program; "
1439 << "cost of one operation is calculated as (cost_of_bigger_workload - cost_of_smaller_workload) / " << opDiff
1440 << TestLog::EndMessage;
1441 }
1442
1443 // Built-in function case.
1444 class FunctionCase : public OperatorPerformanceCase
1445 {
1446 public:
1447 enum
1448 {
1449 MAX_PARAMS = 3
1450 };
1451
1452 FunctionCase(
1453 Context &context, const char *name, const char *description, const char *func, glu::DataType returnType,
1454 const glu::DataType paramTypes[MAX_PARAMS], const Vec4 &attribute,
1455 int modifyParamNdx, //!< Add a compile-time constant (2.0) to the parameter at this index. This is ignored if negative.
1456 bool useNearlyConstantINputs, //!< Function inputs shouldn't be much bigger than 'attribute'.
1457 glu::Precision precision, bool isVertex, const InitialCalibrationStorage &initialCalibration);
1458
1459 protected:
1460 vector<ProgramContext> generateProgramData(void) const;
1461 void setGeneralUniforms(uint32_t program) const;
1462 void setWorkloadSizeUniform(uint32_t program, int numOperations) const;
1463 float computeSingleOperationTime(const vector<float> &perProgramOperationCosts) const;
1464 void logSingleOperationCalculationInfo(void) const;
1465
1466 private:
1467 enum ProgramID
1468 {
1469 // \note 0-based sequential numbering is relevant, because these are also used as vector indices.
1470 // \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow.
1471 PROGRAM_WITH_FUNCTION_CALLS = 0,
1472 PROGRAM_WITHOUT_FUNCTION_CALLS,
1473
1474 PROGRAM_LAST
1475 };
1476
1477 //! Forms a "sum" expression from aExpr and bExpr; for booleans, this is "equal(a,b)", otherwise actual sum.
1478 static string sumExpr(const string &aExpr, const string &bExpr, glu::DataType type);
1479 //! Forms an expression used to increment an input value in the shader. If type is boolean, this is just
1480 //! baseExpr; otherwise, baseExpr is modified by multiplication or division by a loop index,
1481 //! to prevent simple compiler optimizations. See m_useNearlyConstantInputs for more explanation.
1482 static string incrementExpr(const string &baseExpr, glu::DataType type, bool divide);
1483
1484 ProgramContext generateSingleProgramData(ProgramID) const;
1485
1486 const string m_func;
1487 const glu::DataType m_returnType;
1488 glu::DataType m_paramTypes[MAX_PARAMS];
1489 // \note m_modifyParamNdx, if not negative, specifies the index of the parameter to which a
1490 // compile-time constant (2.0) is added. This is a quick and dirty way to deal with
1491 // functions like clamp or smoothstep that require that a certain parameter is
1492 // greater than a certain other parameter.
1493 const int m_modifyParamNdx;
1494 // \note m_useNearlyConstantInputs determines whether the inputs given to the function
1495 // should increase (w.r.t m_attribute) only by very small amounts. This is relevant
1496 // for functions like asin, which requires its inputs to be in a specific range.
1497 // In practice, this affects whether expressions used to increment the input
1498 // variables use division instead of multiplication; normally, multiplication is used,
1499 // but it's hard to keep the increments very small that way, and division shouldn't
1500 // be the default, since for many functions (probably not asin, luckily), division
1501 // is too heavy and dominates time-wise.
1502 const bool m_useNearlyConstantInputs;
1503 const Vec4 m_attribute;
1504 const glu::Precision m_precision;
1505 };
1506
FunctionCase(Context & context,const char * name,const char * description,const char * func,glu::DataType returnType,const glu::DataType paramTypes[MAX_PARAMS],const Vec4 & attribute,int modifyParamNdx,bool useNearlyConstantInputs,glu::Precision precision,bool isVertex,const InitialCalibrationStorage & initialCalibration)1507 FunctionCase::FunctionCase(Context &context, const char *name, const char *description, const char *func,
1508 glu::DataType returnType, const glu::DataType paramTypes[MAX_PARAMS], const Vec4 &attribute,
1509 int modifyParamNdx, bool useNearlyConstantInputs, glu::Precision precision, bool isVertex,
1510 const InitialCalibrationStorage &initialCalibration)
1511 : OperatorPerformanceCase(context.getTestContext(), context.getRenderContext(), name, description,
1512 isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration)
1513 , m_func(func)
1514 , m_returnType(returnType)
1515 , m_modifyParamNdx(modifyParamNdx)
1516 , m_useNearlyConstantInputs(useNearlyConstantInputs)
1517 , m_attribute(attribute)
1518 , m_precision(precision)
1519 {
1520 for (int i = 0; i < MAX_PARAMS; i++)
1521 m_paramTypes[i] = paramTypes[i];
1522 }
1523
sumExpr(const string & aExpr,const string & bExpr,glu::DataType type)1524 string FunctionCase::sumExpr(const string &aExpr, const string &bExpr, glu::DataType type)
1525 {
1526 if (glu::isDataTypeBoolOrBVec(type))
1527 {
1528 if (type == glu::TYPE_BOOL)
1529 return "(" + aExpr + " == " + bExpr + ")";
1530 else
1531 return "equal(" + aExpr + ", " + bExpr + ")";
1532 }
1533 else
1534 return "(" + aExpr + " + " + bExpr + ")";
1535 }
1536
incrementExpr(const string & baseExpr,glu::DataType type,bool divide)1537 string FunctionCase::incrementExpr(const string &baseExpr, glu::DataType type, bool divide)
1538 {
1539 const string mulOrDiv = divide ? "/" : "*";
1540
1541 return glu::isDataTypeBoolOrBVec(type) ? baseExpr :
1542 glu::isDataTypeIntOrIVec(type) ? "(" + baseExpr + mulOrDiv + "(i+1))" :
1543 "(" + baseExpr + mulOrDiv + "float(i+1))";
1544 }
1545
generateSingleProgramData(ProgramID programID) const1546 FunctionCase::ProgramContext FunctionCase::generateSingleProgramData(ProgramID programID) const
1547 {
1548 const bool isVertexCase = m_caseType == CASETYPE_VERTEX;
1549 const char *const precision = glu::getPrecisionName(m_precision);
1550 const char *const returnTypeName = getDataTypeName(m_returnType);
1551 const string returnPrecisionMaybe = glu::isDataTypeBoolOrBVec(m_returnType) ? "" : string() + precision + " ";
1552 const char *inputPrecision = DE_NULL;
1553 const bool isMatrixReturn = isDataTypeMatrix(m_returnType);
1554 int numParams = 0;
1555 const char *paramTypeNames[MAX_PARAMS];
1556 string paramPrecisionsMaybe[MAX_PARAMS];
1557
1558 for (int i = 0; i < MAX_PARAMS; i++)
1559 {
1560 paramTypeNames[i] = getDataTypeName(m_paramTypes[i]);
1561 paramPrecisionsMaybe[i] = glu::isDataTypeBoolOrBVec(m_paramTypes[i]) ? "" : string() + precision + " ";
1562
1563 if (inputPrecision == DE_NULL && isDataTypeIntOrIVec(m_paramTypes[i]) && m_precision == glu::PRECISION_LOWP)
1564 inputPrecision = "mediump";
1565
1566 if (m_paramTypes[i] != TYPE_INVALID)
1567 numParams = i + 1;
1568 }
1569
1570 DE_ASSERT(numParams > 0);
1571
1572 if (inputPrecision == DE_NULL)
1573 inputPrecision = precision;
1574
1575 int numAttributes = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS + numParams - 1;
1576 std::ostringstream vtx;
1577 std::ostringstream frag;
1578 std::ostringstream &op = isVertexCase ? vtx : frag;
1579
1580 vtx << "#version 300 es\n";
1581 frag << "#version 300 es\n"
1582 << "layout (location = 0) out mediump vec4 o_color;\n";
1583
1584 // Attributes.
1585 vtx << "in highp vec4 a_position;\n";
1586 for (int i = 0; i < numAttributes; i++)
1587 vtx << "in " << inputPrecision << " vec4 a_in" << i << ";\n";
1588
1589 if (isVertexCase)
1590 {
1591 vtx << "out mediump vec4 v_color;\n";
1592 frag << "in mediump vec4 v_color;\n";
1593 }
1594 else
1595 {
1596 for (int i = 0; i < numAttributes; i++)
1597 {
1598 vtx << "out " << inputPrecision << " vec4 v_in" << i << ";\n";
1599 frag << "in " << inputPrecision << " vec4 v_in" << i << ";\n";
1600 }
1601 }
1602
1603 op << "uniform mediump int u_numLoopIterations;\n";
1604 if (isVertexCase)
1605 op << "uniform mediump float u_zero;\n";
1606
1607 for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
1608 op << "uniform " << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " u_inc"
1609 << (char)('A' + paramNdx) << ";\n";
1610
1611 vtx << "\n";
1612 vtx << "void main()\n";
1613 vtx << "{\n";
1614
1615 if (!isVertexCase)
1616 vtx << "\tgl_Position = a_position;\n";
1617
1618 frag << "\n";
1619 frag << "void main()\n";
1620 frag << "{\n";
1621
1622 // Function call input and return value accumulation variables.
1623 {
1624 const char *const inPrefix = isVertexCase ? "a_" : "v_";
1625
1626 for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++)
1627 {
1628 for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
1629 {
1630 const glu::DataType paramType = m_paramTypes[paramNdx];
1631 const bool mustCast = paramType != glu::TYPE_FLOAT_VEC4;
1632
1633 op << "\t" << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " in" << calcNdx
1634 << (char)('a' + paramNdx) << " = ";
1635
1636 if (mustCast)
1637 op << paramTypeNames[paramNdx] << "(";
1638
1639 if (glu::isDataTypeMatrix(paramType))
1640 {
1641 static const char *const swizzles[3] = {"x", "xy", "xyz"};
1642 const int numRows = glu::getDataTypeMatrixNumRows(paramType);
1643 const int numCols = glu::getDataTypeMatrixNumColumns(paramType);
1644 const string swizzle = numRows < 4 ? string() + "." + swizzles[numRows - 1] : "";
1645
1646 for (int i = 0; i < numCols; i++)
1647 op << (i > 0 ? ", " : "") << inPrefix << "in" << calcNdx + paramNdx << swizzle;
1648 }
1649 else
1650 {
1651 op << inPrefix << "in" << calcNdx + paramNdx;
1652
1653 if (paramNdx == m_modifyParamNdx)
1654 {
1655 DE_ASSERT(glu::isDataTypeFloatOrVec(paramType));
1656 op << " + 2.0";
1657 }
1658 }
1659
1660 if (mustCast)
1661 op << ")";
1662
1663 op << ";\n";
1664 }
1665
1666 op << "\t" << returnPrecisionMaybe << returnTypeName << " res" << calcNdx << " = " << returnTypeName
1667 << "(0);\n";
1668 }
1669 }
1670
1671 // Loop with expressions in it.
1672 op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n";
1673 op << "\t{\n";
1674 for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++)
1675 {
1676 if (calcNdx > 0)
1677 op << "\n";
1678
1679 op << "\t\t{\n";
1680
1681 for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1682 {
1683 const string inputName = "in" + de::toString(calcNdx) + (char)('a' + inputNdx);
1684 const string incName = string() + "u_inc" + (char)('A' + inputNdx);
1685 const string incExpr = incrementExpr(incName, m_paramTypes[inputNdx], m_useNearlyConstantInputs);
1686
1687 op << "\t\t\t" << inputName << " = " << sumExpr(inputName, incExpr, m_paramTypes[inputNdx]) << ";\n";
1688 }
1689
1690 op << "\t\t\t" << returnPrecisionMaybe << returnTypeName << " eval" << calcNdx << " = ";
1691
1692 if (programID == PROGRAM_WITH_FUNCTION_CALLS)
1693 {
1694 op << m_func << "(";
1695
1696 for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
1697 {
1698 if (paramNdx > 0)
1699 op << ", ";
1700
1701 op << "in" << calcNdx << (char)('a' + paramNdx);
1702 }
1703
1704 op << ")";
1705 }
1706 else
1707 {
1708 DE_ASSERT(programID == PROGRAM_WITHOUT_FUNCTION_CALLS);
1709 op << returnTypeName << "(1)";
1710 }
1711
1712 op << ";\n";
1713
1714 {
1715 const string resName = "res" + de::toString(calcNdx);
1716 const string evalName = "eval" + de::toString(calcNdx);
1717 const string incExpr = incrementExpr(evalName, m_returnType, m_useNearlyConstantInputs);
1718
1719 op << "\t\t\tres" << calcNdx << " = " << sumExpr(resName, incExpr, m_returnType) << ";\n";
1720 }
1721
1722 op << "\t\t}\n";
1723 }
1724 op << "\t}\n";
1725 op << "\n";
1726
1727 // Result variables.
1728 for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1729 {
1730 op << "\t" << paramPrecisionsMaybe[inputNdx] << paramTypeNames[inputNdx] << " sumIn" << (char)('A' + inputNdx)
1731 << " = ";
1732 {
1733 string expr = string() + "in0" + (char)('a' + inputNdx);
1734 for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1735 expr =
1736 sumExpr(expr, string() + "in" + de::toString(i) + (char)('a' + inputNdx), m_paramTypes[inputNdx]);
1737 op << expr;
1738 }
1739 op << ";\n";
1740 }
1741
1742 op << "\t" << returnPrecisionMaybe << returnTypeName << " sumRes = ";
1743 {
1744 string expr = "res0";
1745 for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1746 expr = sumExpr(expr, "res" + de::toString(i), m_returnType);
1747 op << expr;
1748 }
1749 op << ";\n";
1750
1751 {
1752 glu::DataType finalResultDataType = glu::TYPE_LAST;
1753
1754 if (glu::isDataTypeMatrix(m_returnType))
1755 {
1756 finalResultDataType = m_returnType;
1757
1758 op << "\t" << precision << " " << returnTypeName << " finalRes = ";
1759
1760 for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1761 {
1762 DE_ASSERT(m_paramTypes[inputNdx] == m_returnType);
1763 op << "sumIn" << (char)('A' + inputNdx) << " + ";
1764 }
1765 op << "sumRes;\n";
1766 }
1767 else
1768 {
1769 int numFinalResComponents = glu::getDataTypeScalarSize(m_returnType);
1770 for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1771 numFinalResComponents =
1772 de::max(numFinalResComponents, glu::getDataTypeScalarSize(m_paramTypes[inputNdx]));
1773
1774 finalResultDataType = getDataTypeFloatOrVec(numFinalResComponents);
1775
1776 {
1777 const string finalResType = glu::getDataTypeName(finalResultDataType);
1778 op << "\t" << precision << " " << finalResType << " finalRes = ";
1779 for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1780 op << finalResType << "(sumIn" << (char)('A' + inputNdx) << ") + ";
1781 op << finalResType << "(sumRes);\n";
1782 }
1783 }
1784
1785 // Convert to color.
1786 op << "\tmediump vec4 color = ";
1787 if (finalResultDataType == TYPE_FLOAT_VEC4)
1788 op << "finalRes";
1789 else
1790 {
1791 int size = isMatrixReturn ? getDataTypeMatrixNumRows(finalResultDataType) :
1792 getDataTypeScalarSize(finalResultDataType);
1793
1794 op << "vec4(";
1795
1796 if (isMatrixReturn)
1797 {
1798 for (int i = 0; i < getDataTypeMatrixNumColumns(finalResultDataType); i++)
1799 {
1800 if (i > 0)
1801 op << " + ";
1802 op << "finalRes[" << i << "]";
1803 }
1804 }
1805 else
1806 op << "finalRes";
1807
1808 for (int i = size; i < 4; i++)
1809 op << ", " << (i == 3 ? "1.0" : "0.0");
1810
1811 op << ")";
1812 }
1813 op << ";\n";
1814 op << "\t" << (isVertexCase ? "v_color" : "o_color") << " = color;\n";
1815
1816 if (isVertexCase)
1817 {
1818 vtx << " gl_Position = a_position + u_zero*color;\n";
1819 frag << " o_color = v_color;\n";
1820 }
1821 else
1822 {
1823 for (int i = 0; i < numAttributes; i++)
1824 vtx << " v_in" << i << " = a_in" << i << ";\n";
1825 }
1826
1827 vtx << "}\n";
1828 frag << "}\n";
1829 }
1830
1831 {
1832 vector<AttribSpec> attributes;
1833 for (int i = 0; i < numAttributes; i++)
1834 attributes.push_back(AttribSpec(("a_in" + de::toString(i)).c_str(),
1835 m_attribute.swizzle((i + 0) % 4, (i + 1) % 4, (i + 2) % 4, (i + 3) % 4),
1836 m_attribute.swizzle((i + 1) % 4, (i + 2) % 4, (i + 3) % 4, (i + 0) % 4),
1837 m_attribute.swizzle((i + 2) % 4, (i + 3) % 4, (i + 0) % 4, (i + 1) % 4),
1838 m_attribute.swizzle((i + 3) % 4, (i + 0) % 4, (i + 1) % 4, (i + 2) % 4)));
1839
1840 {
1841 string description = "This is the program ";
1842
1843 description += programID == PROGRAM_WITHOUT_FUNCTION_CALLS ? "without" :
1844 programID == PROGRAM_WITH_FUNCTION_CALLS ? "with" :
1845 DE_NULL;
1846
1847 description += " '" + m_func +
1848 "' function calls.\n"
1849 "Note: workload size for this program means the number of loop iterations.";
1850
1851 return ProgramContext(vtx.str(), frag.str(), attributes, description);
1852 }
1853 }
1854 }
1855
generateProgramData(void) const1856 vector<FunctionCase::ProgramContext> FunctionCase::generateProgramData(void) const
1857 {
1858 vector<ProgramContext> progData;
1859 for (int i = 0; i < PROGRAM_LAST; i++)
1860 progData.push_back(generateSingleProgramData((ProgramID)i));
1861 return progData;
1862 }
1863
setGeneralUniforms(uint32_t program) const1864 void FunctionCase::setGeneralUniforms(uint32_t program) const
1865 {
1866 const glw::Functions &gl = m_renderCtx.getFunctions();
1867
1868 gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f);
1869
1870 for (int paramNdx = 0; paramNdx < MAX_PARAMS; paramNdx++)
1871 {
1872 if (m_paramTypes[paramNdx] != glu::TYPE_INVALID)
1873 {
1874 const glu::DataType paramType = m_paramTypes[paramNdx];
1875 const int scalarSize = glu::getDataTypeScalarSize(paramType);
1876 const int location = gl.getUniformLocation(program, (string() + "u_inc" + (char)('A' + paramNdx)).c_str());
1877
1878 if (glu::isDataTypeFloatOrVec(paramType))
1879 {
1880 float values[4];
1881 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1882 values[i] = (float)paramNdx * 0.01f + (float)i * 0.001f; // Arbitrary small values.
1883 uniformNfv(gl, scalarSize, location, 1, &values[0]);
1884 }
1885 else if (glu::isDataTypeIntOrIVec(paramType))
1886 {
1887 int values[4];
1888 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1889 values[i] = paramNdx * 100 + i; // Arbitrary values.
1890 uniformNiv(gl, scalarSize, location, 1, &values[0]);
1891 }
1892 else if (glu::isDataTypeBoolOrBVec(paramType))
1893 {
1894 int values[4];
1895 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1896 values[i] = (paramNdx >> i) & 1; // Arbitrary values.
1897 uniformNiv(gl, scalarSize, location, 1, &values[0]);
1898 }
1899 else if (glu::isDataTypeMatrix(paramType))
1900 {
1901 const int size = glu::getDataTypeMatrixNumRows(paramType);
1902 DE_ASSERT(size == glu::getDataTypeMatrixNumColumns(paramType));
1903 float values[4 * 4];
1904 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1905 values[i] = (float)paramNdx * 0.01f + (float)i * 0.001f; // Arbitrary values.
1906 uniformMatrixNfv(gl, size, location, 1, &values[0]);
1907 }
1908 else
1909 DE_ASSERT(false);
1910 }
1911 }
1912 }
1913
setWorkloadSizeUniform(uint32_t program,int numLoopIterations) const1914 void FunctionCase::setWorkloadSizeUniform(uint32_t program, int numLoopIterations) const
1915 {
1916 const glw::Functions &gl = m_renderCtx.getFunctions();
1917 const int loc = gl.getUniformLocation(program, "u_numLoopIterations");
1918
1919 gl.uniform1i(loc, numLoopIterations);
1920 }
1921
computeSingleOperationTime(const vector<float> & perProgramOperationCosts) const1922 float FunctionCase::computeSingleOperationTime(const vector<float> &perProgramOperationCosts) const
1923 {
1924 DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST);
1925 const int numFunctionCalls = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS;
1926 const float programOperationCostDiff = perProgramOperationCosts[PROGRAM_WITH_FUNCTION_CALLS] -
1927 perProgramOperationCosts[PROGRAM_WITHOUT_FUNCTION_CALLS];
1928
1929 return programOperationCostDiff / (float)numFunctionCalls;
1930 }
1931
logSingleOperationCalculationInfo(void) const1932 void FunctionCase::logSingleOperationCalculationInfo(void) const
1933 {
1934 const int numFunctionCalls = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS;
1935
1936 m_testCtx.getLog() << TestLog::Message << "Note: program " << (int)PROGRAM_WITH_FUNCTION_CALLS << " contains "
1937 << numFunctionCalls << " calls to '" << m_func << "' in one loop iteration; "
1938 << "cost of one operation is calculated as "
1939 << "(cost_of_workload_with_calls - cost_of_workload_without_calls) / " << numFunctionCalls
1940 << TestLog::EndMessage;
1941 }
1942
1943 } // namespace
1944
ShaderOperatorTests(Context & context)1945 ShaderOperatorTests::ShaderOperatorTests(Context &context)
1946 : TestCaseGroup(context, "operator", "Operator Performance Tests")
1947 {
1948 }
1949
~ShaderOperatorTests(void)1950 ShaderOperatorTests::~ShaderOperatorTests(void)
1951 {
1952 }
1953
init(void)1954 void ShaderOperatorTests::init(void)
1955 {
1956 // Binary operator cases
1957
1958 static const DataType binaryOpTypes[] = {
1959 TYPE_FLOAT, TYPE_FLOAT_VEC2, TYPE_FLOAT_VEC3, TYPE_FLOAT_VEC4,
1960 TYPE_INT, TYPE_INT_VEC2, TYPE_INT_VEC3, TYPE_INT_VEC4,
1961 };
1962 static const Precision precisions[] = {PRECISION_LOWP, PRECISION_MEDIUMP, PRECISION_HIGHP};
1963 static const struct
1964 {
1965 const char *name;
1966 const char *op;
1967 bool swizzle;
1968 } binaryOps[] = {{"add", "+", false}, {"sub", "-", true}, {"mul", "*", false}, {"div", "/", true}};
1969
1970 tcu::TestCaseGroup *const binaryOpsGroup =
1971 new tcu::TestCaseGroup(m_testCtx, "binary_operator", "Binary Operator Performance Tests");
1972 addChild(binaryOpsGroup);
1973
1974 for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(binaryOps); opNdx++)
1975 {
1976 tcu::TestCaseGroup *const opGroup = new tcu::TestCaseGroup(m_testCtx, binaryOps[opNdx].name, "");
1977 binaryOpsGroup->addChild(opGroup);
1978
1979 for (int isFrag = 0; isFrag <= 1; isFrag++)
1980 {
1981 const BinaryOpCase::InitialCalibrationStorage shaderGroupCalibrationStorage(
1982 new BinaryOpCase::InitialCalibration);
1983 const bool isVertex = isFrag == 0;
1984 tcu::TestCaseGroup *const shaderGroup =
1985 new tcu::TestCaseGroup(m_testCtx, isVertex ? "vertex" : "fragment", "");
1986 opGroup->addChild(shaderGroup);
1987
1988 for (int typeNdx = 0; typeNdx < DE_LENGTH_OF_ARRAY(binaryOpTypes); typeNdx++)
1989 {
1990 for (int precNdx = 0; precNdx < DE_LENGTH_OF_ARRAY(precisions); precNdx++)
1991 {
1992 const DataType type = binaryOpTypes[typeNdx];
1993 const Precision precision = precisions[precNdx];
1994 const char *const op = binaryOps[opNdx].op;
1995 const bool useSwizzle = binaryOps[opNdx].swizzle;
1996 std::ostringstream name;
1997
1998 name << getPrecisionName(precision) << "_" << getDataTypeName(type);
1999
2000 shaderGroup->addChild(new BinaryOpCase(m_context, name.str().c_str(), "", op, type, precision,
2001 useSwizzle, isVertex, shaderGroupCalibrationStorage));
2002 }
2003 }
2004 }
2005 }
2006
2007 // Built-in function cases.
2008
2009 // Non-specific (i.e. includes gentypes) parameter types for the functions.
2010 enum ValueType
2011 {
2012 VALUE_NONE = 0,
2013 VALUE_FLOAT = (1 << 0), // float scalar
2014 VALUE_FLOAT_VEC = (1 << 1), // float vector
2015 VALUE_FLOAT_VEC34 = (1 << 2), // float vector of size 3 or 4
2016 VALUE_FLOAT_GENTYPE = (1 << 3), // float scalar/vector
2017 VALUE_VEC3 = (1 << 4), // vec3 only
2018 VALUE_VEC4 = (1 << 5), // vec4 only
2019 VALUE_MATRIX = (1 << 6), // matrix
2020 VALUE_BOOL = (1 << 7), // boolean scalar
2021 VALUE_BOOL_VEC = (1 << 8), // boolean vector
2022 VALUE_BOOL_VEC4 = (1 << 9), // bvec4 only
2023 VALUE_BOOL_GENTYPE = (1 << 10), // boolean scalar/vector
2024 VALUE_INT = (1 << 11), // int scalar
2025 VALUE_INT_VEC = (1 << 12), // int vector
2026 VALUE_INT_VEC4 = (1 << 13), // ivec4 only
2027 VALUE_INT_GENTYPE = (1 << 14), // int scalar/vector
2028
2029 // Shorthands.
2030 N = VALUE_NONE,
2031 F = VALUE_FLOAT,
2032 FV = VALUE_FLOAT_VEC,
2033 VL = VALUE_FLOAT_VEC34, // L for "large"
2034 GT = VALUE_FLOAT_GENTYPE,
2035 V3 = VALUE_VEC3,
2036 V4 = VALUE_VEC4,
2037 M = VALUE_MATRIX,
2038 B = VALUE_BOOL,
2039 BV = VALUE_BOOL_VEC,
2040 B4 = VALUE_BOOL_VEC4,
2041 BGT = VALUE_BOOL_GENTYPE,
2042 I = VALUE_INT,
2043 IV = VALUE_INT_VEC,
2044 I4 = VALUE_INT_VEC4,
2045 IGT = VALUE_INT_GENTYPE,
2046
2047 VALUE_ANY_FLOAT =
2048 VALUE_FLOAT | VALUE_FLOAT_VEC | VALUE_FLOAT_GENTYPE | VALUE_VEC3 | VALUE_VEC4 | VALUE_FLOAT_VEC34,
2049 VALUE_ANY_INT = VALUE_INT | VALUE_INT_VEC | VALUE_INT_GENTYPE | VALUE_INT_VEC4,
2050 VALUE_ANY_BOOL = VALUE_BOOL | VALUE_BOOL_VEC | VALUE_BOOL_GENTYPE | VALUE_BOOL_VEC4,
2051
2052 VALUE_ANY_GENTYPE = VALUE_FLOAT_VEC | VALUE_FLOAT_GENTYPE | VALUE_FLOAT_VEC34 | VALUE_BOOL_VEC |
2053 VALUE_BOOL_GENTYPE | VALUE_INT_VEC | VALUE_INT_GENTYPE | VALUE_MATRIX
2054 };
2055 enum PrecisionMask
2056 {
2057 PRECMASK_NA = 0, //!< Precision not applicable (booleans)
2058 PRECMASK_LOWP = (1 << PRECISION_LOWP),
2059 PRECMASK_MEDIUMP = (1 << PRECISION_MEDIUMP),
2060 PRECMASK_HIGHP = (1 << PRECISION_HIGHP),
2061
2062 PRECMASK_MEDIUMP_HIGHP = (1 << PRECISION_MEDIUMP) | (1 << PRECISION_HIGHP),
2063 PRECMASK_ALL = (1 << PRECISION_LOWP) | (1 << PRECISION_MEDIUMP) | (1 << PRECISION_HIGHP)
2064 };
2065
2066 static const DataType floatTypes[] = {TYPE_FLOAT, TYPE_FLOAT_VEC2, TYPE_FLOAT_VEC3, TYPE_FLOAT_VEC4};
2067 static const DataType intTypes[] = {TYPE_INT, TYPE_INT_VEC2, TYPE_INT_VEC3, TYPE_INT_VEC4};
2068 static const DataType boolTypes[] = {TYPE_BOOL, TYPE_BOOL_VEC2, TYPE_BOOL_VEC3, TYPE_BOOL_VEC4};
2069 static const DataType matrixTypes[] = {TYPE_FLOAT_MAT2, TYPE_FLOAT_MAT3, TYPE_FLOAT_MAT4};
2070
2071 tcu::TestCaseGroup *const angleAndTrigonometryGroup = new tcu::TestCaseGroup(
2072 m_testCtx, "angle_and_trigonometry", "Built-In Angle and Trigonometry Function Performance Tests");
2073 tcu::TestCaseGroup *const exponentialGroup =
2074 new tcu::TestCaseGroup(m_testCtx, "exponential", "Built-In Exponential Function Performance Tests");
2075 tcu::TestCaseGroup *const commonFunctionsGroup =
2076 new tcu::TestCaseGroup(m_testCtx, "common_functions", "Built-In Common Function Performance Tests");
2077 tcu::TestCaseGroup *const geometricFunctionsGroup =
2078 new tcu::TestCaseGroup(m_testCtx, "geometric", "Built-In Geometric Function Performance Tests");
2079 tcu::TestCaseGroup *const matrixFunctionsGroup =
2080 new tcu::TestCaseGroup(m_testCtx, "matrix", "Built-In Matrix Function Performance Tests");
2081 tcu::TestCaseGroup *const floatCompareGroup = new tcu::TestCaseGroup(
2082 m_testCtx, "float_compare", "Built-In Floating Point Comparison Function Performance Tests");
2083 tcu::TestCaseGroup *const intCompareGroup =
2084 new tcu::TestCaseGroup(m_testCtx, "int_compare", "Built-In Integer Comparison Function Performance Tests");
2085 tcu::TestCaseGroup *const boolCompareGroup =
2086 new tcu::TestCaseGroup(m_testCtx, "bool_compare", "Built-In Boolean Comparison Function Performance Tests");
2087
2088 addChild(angleAndTrigonometryGroup);
2089 addChild(exponentialGroup);
2090 addChild(commonFunctionsGroup);
2091 addChild(geometricFunctionsGroup);
2092 addChild(matrixFunctionsGroup);
2093 addChild(floatCompareGroup);
2094 addChild(intCompareGroup);
2095 addChild(boolCompareGroup);
2096
2097 // Some attributes to be used as parameters for the functions.
2098 const Vec4 attrPos = Vec4(2.3f, 1.9f, 0.8f, 0.7f);
2099 const Vec4 attrNegPos = Vec4(-1.3f, 2.5f, -3.5f, 4.3f);
2100 const Vec4 attrSmall = Vec4(-0.9f, 0.8f, -0.4f, 0.2f);
2101 const Vec4 attrBig = Vec4(1.3f, 2.4f, 3.0f, 4.0f);
2102
2103 // \todo The following functions and variants are missing, and should be added in the future:
2104 // - modf (has an output parameter, not currently handled by test code)
2105 // - functions with uint/uvec* return or parameter types
2106 // - non-matrix <-> matrix functions (outerProduct etc.)
2107 // \note Remember to update test spec when these are added.
2108
2109 // Function name, return type and parameter type information; also, what attribute should be used in the test.
2110 // \note Different versions of the same function (i.e. with the same group name) can be defined by putting them successively in this array.
2111 // \note In order to reduce case count and thus total execution time, we don't test all input type combinations for every function.
2112 static const struct
2113 {
2114 tcu::TestCaseGroup *parentGroup;
2115 const char *groupName;
2116 const char *func;
2117 const ValueType types[FunctionCase::MAX_PARAMS + 1]; // Return type and parameter types, in that order.
2118 const Vec4 &attribute;
2119 int modifyParamNdx;
2120 bool useNearlyConstantInputs;
2121 bool booleanCase;
2122 PrecisionMask precMask;
2123 } functionCaseGroups[] = {
2124 {angleAndTrigonometryGroup, "radians", "radians", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2125 {angleAndTrigonometryGroup, "degrees", "degrees", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2126 {angleAndTrigonometryGroup, "sin", "sin", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2127 {angleAndTrigonometryGroup, "cos", "cos", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2128 {angleAndTrigonometryGroup, "tan", "tan", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2129 {angleAndTrigonometryGroup, "asin", "asin", {F, F, N, N}, attrSmall, -1, true, false, PRECMASK_ALL},
2130 {angleAndTrigonometryGroup, "acos", "acos", {F, F, N, N}, attrSmall, -1, true, false, PRECMASK_ALL},
2131 {angleAndTrigonometryGroup, "atan2", "atan", {F, F, F, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2132 {angleAndTrigonometryGroup, "atan", "atan", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2133 {angleAndTrigonometryGroup, "sinh", "sinh", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2134 {angleAndTrigonometryGroup, "cosh", "cosh", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2135 {angleAndTrigonometryGroup, "tanh", "tanh", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2136 {angleAndTrigonometryGroup, "asinh", "asinh", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2137 {angleAndTrigonometryGroup, "acosh", "acosh", {F, F, N, N}, attrBig, -1, false, false, PRECMASK_ALL},
2138 {angleAndTrigonometryGroup, "atanh", "atanh", {F, F, N, N}, attrSmall, -1, true, false, PRECMASK_ALL},
2139
2140 {exponentialGroup, "pow", "pow", {F, F, F, N}, attrPos, -1, false, false, PRECMASK_ALL},
2141 {exponentialGroup, "exp", "exp", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2142 {exponentialGroup, "log", "log", {F, F, N, N}, attrPos, -1, false, false, PRECMASK_ALL},
2143 {exponentialGroup, "exp2", "exp2", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2144 {exponentialGroup, "log2", "log2", {F, F, N, N}, attrPos, -1, false, false, PRECMASK_ALL},
2145 {exponentialGroup, "sqrt", "sqrt", {F, F, N, N}, attrPos, -1, false, false, PRECMASK_ALL},
2146 {exponentialGroup, "inversesqrt", "inversesqrt", {F, F, N, N}, attrPos, -1, false, false, PRECMASK_ALL},
2147
2148 {commonFunctionsGroup, "abs", "abs", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2149 {commonFunctionsGroup, "abs", "abs", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2150 {commonFunctionsGroup, "sign", "sign", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2151 {commonFunctionsGroup, "sign", "sign", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2152 {commonFunctionsGroup, "floor", "floor", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2153 {commonFunctionsGroup, "floor", "floor", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2154 {commonFunctionsGroup, "trunc", "trunc", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2155 {commonFunctionsGroup, "trunc", "trunc", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2156 {commonFunctionsGroup, "round", "round", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2157 {commonFunctionsGroup, "round", "round", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2158 {commonFunctionsGroup,
2159 "roundEven",
2160 "roundEven",
2161 {F, F, N, N},
2162 attrNegPos,
2163 -1,
2164 false,
2165 false,
2166 PRECMASK_MEDIUMP_HIGHP},
2167 {commonFunctionsGroup, "roundEven", "roundEven", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2168 {commonFunctionsGroup, "ceil", "ceil", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2169 {commonFunctionsGroup, "ceil", "ceil", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2170 {commonFunctionsGroup, "fract", "fract", {F, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2171 {commonFunctionsGroup, "fract", "fract", {V4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2172 {commonFunctionsGroup, "mod", "mod", {GT, GT, GT, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2173 {commonFunctionsGroup, "min", "min", {F, F, F, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2174 {commonFunctionsGroup, "min", "min", {V4, V4, V4, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2175 {commonFunctionsGroup, "max", "max", {F, F, F, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2176 {commonFunctionsGroup, "max", "max", {V4, V4, V4, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2177 {commonFunctionsGroup, "clamp", "clamp", {F, F, F, F}, attrSmall, 2, false, false, PRECMASK_MEDIUMP_HIGHP},
2178 {commonFunctionsGroup, "clamp", "clamp", {V4, V4, V4, V4}, attrSmall, 2, false, false, PRECMASK_ALL},
2179 {commonFunctionsGroup, "mix", "mix", {F, F, F, F}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2180 {commonFunctionsGroup, "mix", "mix", {V4, V4, V4, V4}, attrNegPos, -1, false, false, PRECMASK_ALL},
2181 {commonFunctionsGroup, "mix", "mix", {F, F, F, B}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2182 {commonFunctionsGroup, "mix", "mix", {V4, V4, V4, B4}, attrNegPos, -1, false, false, PRECMASK_ALL},
2183 {commonFunctionsGroup, "step", "step", {F, F, F, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2184 {commonFunctionsGroup, "step", "step", {V4, V4, V4, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2185 {commonFunctionsGroup,
2186 "smoothstep",
2187 "smoothstep",
2188 {F, F, F, F},
2189 attrSmall,
2190 1,
2191 false,
2192 false,
2193 PRECMASK_MEDIUMP_HIGHP},
2194 {commonFunctionsGroup, "smoothstep", "smoothstep", {V4, V4, V4, V4}, attrSmall, 1, false, false, PRECMASK_ALL},
2195 {commonFunctionsGroup, "isnan", "isnan", {B, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2196 {commonFunctionsGroup, "isnan", "isnan", {B4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2197 {commonFunctionsGroup, "isinf", "isinf", {B, F, N, N}, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP},
2198 {commonFunctionsGroup, "isinf", "isinf", {B4, V4, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2199 {commonFunctionsGroup,
2200 "floatBitsToInt",
2201 "floatBitsToInt",
2202 {I, F, N, N},
2203 attrNegPos,
2204 -1,
2205 false,
2206 false,
2207 PRECMASK_MEDIUMP_HIGHP},
2208 {commonFunctionsGroup,
2209 "floatBitsToInt",
2210 "floatBitsToInt",
2211 {I4, V4, N, N},
2212 attrNegPos,
2213 -1,
2214 false,
2215 false,
2216 PRECMASK_ALL},
2217 {commonFunctionsGroup,
2218 "intBitsToFloat",
2219 "intBitsToFloat",
2220 {F, I, N, N},
2221 attrNegPos,
2222 -1,
2223 false,
2224 false,
2225 PRECMASK_MEDIUMP_HIGHP},
2226 {commonFunctionsGroup,
2227 "intBitsToFloat",
2228 "intBitsToFloat",
2229 {V4, I4, N, N},
2230 attrNegPos,
2231 -1,
2232 false,
2233 false,
2234 PRECMASK_ALL},
2235
2236 {geometricFunctionsGroup, "length", "length", {F, VL, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2237 {geometricFunctionsGroup, "distance", "distance", {F, VL, VL, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2238 {geometricFunctionsGroup, "dot", "dot", {F, VL, VL, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2239 {geometricFunctionsGroup, "cross", "cross", {V3, V3, V3, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2240 {geometricFunctionsGroup, "normalize", "normalize", {VL, VL, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2241 {geometricFunctionsGroup,
2242 "faceforward",
2243 "faceforward",
2244 {VL, VL, VL, VL},
2245 attrNegPos,
2246 -1,
2247 false,
2248 false,
2249 PRECMASK_ALL},
2250 {geometricFunctionsGroup, "reflect", "reflect", {VL, VL, VL, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2251 {geometricFunctionsGroup, "refract", "refract", {VL, VL, VL, F}, attrNegPos, -1, false, false, PRECMASK_ALL},
2252
2253 {matrixFunctionsGroup,
2254 "matrixCompMult",
2255 "matrixCompMult",
2256 {M, M, M, N},
2257 attrNegPos,
2258 -1,
2259 false,
2260 false,
2261 PRECMASK_ALL},
2262 {matrixFunctionsGroup, "transpose", "transpose", {M, M, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2263 {matrixFunctionsGroup, "inverse", "inverse", {M, M, N, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2264
2265 {floatCompareGroup, "lessThan", "lessThan", {BV, FV, FV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2266 {floatCompareGroup,
2267 "lessThanEqual",
2268 "lessThanEqual",
2269 {BV, FV, FV, N},
2270 attrNegPos,
2271 -1,
2272 false,
2273 false,
2274 PRECMASK_ALL},
2275 {floatCompareGroup, "greaterThan", "greaterThan", {BV, FV, FV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2276 {floatCompareGroup,
2277 "greaterThanEqual",
2278 "greaterThanEqual",
2279 {BV, FV, FV, N},
2280 attrNegPos,
2281 -1,
2282 false,
2283 false,
2284 PRECMASK_ALL},
2285 {floatCompareGroup, "equal", "equal", {BV, FV, FV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2286 {floatCompareGroup, "notEqual", "notEqual", {BV, FV, FV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2287
2288 {intCompareGroup, "lessThan", "lessThan", {BV, IV, IV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2289 {intCompareGroup,
2290 "lessThanEqual",
2291 "lessThanEqual",
2292 {BV, IV, IV, N},
2293 attrNegPos,
2294 -1,
2295 false,
2296 false,
2297 PRECMASK_ALL},
2298 {intCompareGroup, "greaterThan", "greaterThan", {BV, IV, IV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2299 {intCompareGroup,
2300 "greaterThanEqual",
2301 "greaterThanEqual",
2302 {BV, IV, IV, N},
2303 attrNegPos,
2304 -1,
2305 false,
2306 false,
2307 PRECMASK_ALL},
2308 {intCompareGroup, "equal", "equal", {BV, IV, IV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2309 {intCompareGroup, "notEqual", "notEqual", {BV, IV, IV, N}, attrNegPos, -1, false, false, PRECMASK_ALL},
2310
2311 {boolCompareGroup, "equal", "equal", {BV, BV, BV, N}, attrNegPos, -1, false, true, PRECMASK_MEDIUMP},
2312 {boolCompareGroup, "notEqual", "notEqual", {BV, BV, BV, N}, attrNegPos, -1, false, true, PRECMASK_MEDIUMP},
2313 {boolCompareGroup, "any", "any", {B, BV, N, N}, attrNegPos, -1, false, true, PRECMASK_MEDIUMP},
2314 {boolCompareGroup, "all", "all", {B, BV, N, N}, attrNegPos, -1, false, true, PRECMASK_MEDIUMP},
2315 {boolCompareGroup, "not", "not", {BV, BV, N, N}, attrNegPos, -1, false, true, PRECMASK_MEDIUMP}};
2316
2317 // vertexSubGroup and fragmentSubGroup are the groups where the various vertex/fragment cases of a single function are added.
2318 // \note These are defined here so that different versions (different entries in the functionCaseGroups array) of the same function can be put in the same group.
2319 tcu::TestCaseGroup *vertexSubGroup = DE_NULL;
2320 tcu::TestCaseGroup *fragmentSubGroup = DE_NULL;
2321 FunctionCase::InitialCalibrationStorage vertexSubGroupCalibrationStorage;
2322 FunctionCase::InitialCalibrationStorage fragmentSubGroupCalibrationStorage;
2323 for (int funcNdx = 0; funcNdx < DE_LENGTH_OF_ARRAY(functionCaseGroups); funcNdx++)
2324 {
2325 tcu::TestCaseGroup *const parentGroup = functionCaseGroups[funcNdx].parentGroup;
2326 const char *const groupName = functionCaseGroups[funcNdx].groupName;
2327 const char *const groupFunc = functionCaseGroups[funcNdx].func;
2328 const ValueType *const funcTypes = functionCaseGroups[funcNdx].types;
2329 const Vec4 &groupAttribute = functionCaseGroups[funcNdx].attribute;
2330 const int modifyParamNdx = functionCaseGroups[funcNdx].modifyParamNdx;
2331 const bool useNearlyConstantInputs = functionCaseGroups[funcNdx].useNearlyConstantInputs;
2332 const bool booleanCase = functionCaseGroups[funcNdx].booleanCase;
2333 const PrecisionMask precMask = functionCaseGroups[funcNdx].precMask;
2334
2335 // If this is a new function and not just a different version of the previously defined function, create a new group.
2336 if (funcNdx == 0 || parentGroup != functionCaseGroups[funcNdx - 1].parentGroup ||
2337 string(groupName) != functionCaseGroups[funcNdx - 1].groupName)
2338 {
2339 tcu::TestCaseGroup *const funcGroup = new tcu::TestCaseGroup(m_testCtx, groupName, "");
2340 functionCaseGroups[funcNdx].parentGroup->addChild(funcGroup);
2341
2342 vertexSubGroup = new tcu::TestCaseGroup(m_testCtx, "vertex", "");
2343 fragmentSubGroup = new tcu::TestCaseGroup(m_testCtx, "fragment", "");
2344
2345 funcGroup->addChild(vertexSubGroup);
2346 funcGroup->addChild(fragmentSubGroup);
2347
2348 vertexSubGroupCalibrationStorage =
2349 FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration);
2350 fragmentSubGroupCalibrationStorage =
2351 FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration);
2352 }
2353
2354 DE_ASSERT(vertexSubGroup != DE_NULL);
2355 DE_ASSERT(fragmentSubGroup != DE_NULL);
2356
2357 // Find the type size range of parameters (e.g. from 2 to 4 in case of vectors).
2358 int genTypeFirstSize = 1;
2359 int genTypeLastSize = 1;
2360
2361 // Find the first return value or parameter with a gentype (if any) and set sizes accordingly.
2362 // \note Assumes only matching sizes gentypes are to be found, e.g. no "genType func (vec param)"
2363 for (int i = 0; i < FunctionCase::MAX_PARAMS + 1 && genTypeLastSize == 1; i++)
2364 {
2365 switch (funcTypes[i])
2366 {
2367 case VALUE_FLOAT_VEC:
2368 case VALUE_BOOL_VEC:
2369 case VALUE_INT_VEC: // \note Fall-through.
2370 genTypeFirstSize = 2;
2371 genTypeLastSize = 4;
2372 break;
2373 case VALUE_FLOAT_VEC34:
2374 genTypeFirstSize = 3;
2375 genTypeLastSize = 4;
2376 break;
2377 case VALUE_FLOAT_GENTYPE:
2378 case VALUE_BOOL_GENTYPE:
2379 case VALUE_INT_GENTYPE: // \note Fall-through.
2380 genTypeFirstSize = 1;
2381 genTypeLastSize = 4;
2382 break;
2383 case VALUE_MATRIX:
2384 genTypeFirstSize = 2;
2385 genTypeLastSize = 4;
2386 break;
2387 // If none of the above, keep looping.
2388 default:
2389 break;
2390 }
2391 }
2392
2393 // Create a case for each possible size of the gentype.
2394 for (int curSize = genTypeFirstSize; curSize <= genTypeLastSize; curSize++)
2395 {
2396 // Determine specific types for return value and the parameters, according to curSize. Non-gentypes not affected by curSize.
2397 DataType types[FunctionCase::MAX_PARAMS + 1];
2398 for (int i = 0; i < FunctionCase::MAX_PARAMS + 1; i++)
2399 {
2400 if (funcTypes[i] == VALUE_NONE)
2401 types[i] = TYPE_INVALID;
2402 else
2403 {
2404 int isFloat = funcTypes[i] & VALUE_ANY_FLOAT;
2405 int isBool = funcTypes[i] & VALUE_ANY_BOOL;
2406 int isInt = funcTypes[i] & VALUE_ANY_INT;
2407 int isMat = funcTypes[i] == VALUE_MATRIX;
2408 int inSize = (funcTypes[i] & VALUE_ANY_GENTYPE) ? curSize :
2409 funcTypes[i] == VALUE_VEC3 ? 3 :
2410 funcTypes[i] == VALUE_VEC4 ? 4 :
2411 funcTypes[i] == VALUE_BOOL_VEC4 ? 4 :
2412 funcTypes[i] == VALUE_INT_VEC4 ? 4 :
2413 1;
2414 int typeArrayNdx = isMat ? inSize - 2 : inSize - 1; // \note No matrices of size 1.
2415
2416 types[i] = isFloat ? floatTypes[typeArrayNdx] :
2417 isBool ? boolTypes[typeArrayNdx] :
2418 isInt ? intTypes[typeArrayNdx] :
2419 isMat ? matrixTypes[typeArrayNdx] :
2420 TYPE_LAST;
2421 }
2422
2423 DE_ASSERT(types[i] != TYPE_LAST);
2424 }
2425
2426 // Array for just the parameter types.
2427 DataType paramTypes[FunctionCase::MAX_PARAMS];
2428 for (int i = 0; i < FunctionCase::MAX_PARAMS; i++)
2429 paramTypes[i] = types[i + 1];
2430
2431 for (int prec = (int)PRECISION_LOWP; prec < (int)PRECISION_LAST; prec++)
2432 {
2433 if ((precMask & (1 << prec)) == 0)
2434 continue;
2435
2436 const string precisionPrefix = booleanCase ? "" : (string(getPrecisionName((Precision)prec)) + "_");
2437 std::ostringstream caseName;
2438
2439 caseName << precisionPrefix;
2440
2441 // Write the name of each distinct parameter data type into the test case name.
2442 for (int i = 1; i < FunctionCase::MAX_PARAMS + 1 && types[i] != TYPE_INVALID; i++)
2443 {
2444 if (i == 1 || types[i] != types[i - 1])
2445 {
2446 if (i > 1)
2447 caseName << "_";
2448
2449 caseName << getDataTypeName(types[i]);
2450 }
2451 }
2452
2453 for (int fragI = 0; fragI <= 1; fragI++)
2454 {
2455 const bool vert = fragI == 0;
2456 tcu::TestCaseGroup *const group = vert ? vertexSubGroup : fragmentSubGroup;
2457 group->addChild(
2458 new FunctionCase(m_context, caseName.str().c_str(), "", groupFunc, types[0], paramTypes,
2459 groupAttribute, modifyParamNdx, useNearlyConstantInputs, (Precision)prec, vert,
2460 vert ? vertexSubGroupCalibrationStorage : fragmentSubGroupCalibrationStorage));
2461 }
2462 }
2463 }
2464 }
2465 }
2466
2467 } // namespace Performance
2468 } // namespace gles3
2469 } // namespace deqp
2470