1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.0 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Optimized vs unoptimized shader performance tests.
22 *//*--------------------------------------------------------------------*/
23
24 #include "es3pShaderOptimizationTests.hpp"
25 #include "glsShaderPerformanceMeasurer.hpp"
26 #include "gluRenderContext.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "tcuTestLog.hpp"
29 #include "tcuVector.hpp"
30 #include "tcuStringTemplate.hpp"
31 #include "deSharedPtr.hpp"
32 #include "deStringUtil.hpp"
33 #include "deMath.h"
34
35 #include "glwFunctions.hpp"
36
37 #include <vector>
38 #include <string>
39 #include <map>
40
41 using de::SharedPtr;
42 using de::toString;
43 using glu::ShaderProgram;
44 using tcu::TestLog;
45 using tcu::Vec4;
46
47 using std::string;
48 using std::vector;
49
50 namespace deqp
51 {
52
53 using gls::ShaderPerformanceMeasurer;
54
55 namespace gles3
56 {
57 namespace Performance
58 {
59
singleMap(const string & key,const string & value)60 static inline std::map<string, string> singleMap(const string &key, const string &value)
61 {
62 std::map<string, string> res;
63 res[key] = value;
64 return res;
65 }
66
repeat(const string & str,int numRepeats,const string & delim="")67 static inline string repeat(const string &str, int numRepeats, const string &delim = "")
68 {
69 string result = str;
70 for (int i = 1; i < numRepeats; i++)
71 result += delim + str;
72 return result;
73 }
74
repeatIndexedTemplate(const string & strTempl,int numRepeats,const string & delim="",int ndxStart=0)75 static inline string repeatIndexedTemplate(const string &strTempl, int numRepeats, const string &delim = "",
76 int ndxStart = 0)
77 {
78 const tcu::StringTemplate templ(strTempl);
79 string result;
80 std::map<string, string> params;
81
82 for (int i = 0; i < numRepeats; i++)
83 {
84 params["PREV_NDX"] = toString(i + ndxStart - 1);
85 params["NDX"] = toString(i + ndxStart);
86
87 result += (i > 0 ? delim : "") + templ.specialize(params);
88 }
89
90 return result;
91 }
92
93 namespace
94 {
95
96 enum CaseShaderType
97 {
98 CASESHADERTYPE_VERTEX = 0,
99 CASESHADERTYPE_FRAGMENT,
100
101 CASESHADERTYPE_LAST
102 };
103
getShaderPrecision(CaseShaderType shaderType)104 static inline string getShaderPrecision(CaseShaderType shaderType)
105 {
106 switch (shaderType)
107 {
108 case CASESHADERTYPE_VERTEX:
109 return "highp";
110 case CASESHADERTYPE_FRAGMENT:
111 return "highp";
112 default:
113 DE_ASSERT(false);
114 return "";
115 }
116 }
117
118 struct ProgramData
119 {
120 glu::ProgramSources sources;
121 vector<gls::AttribSpec>
122 attributes; //!< \note Shouldn't contain a_position; that one is set by gls::ShaderPerformanceMeasurer.
123
ProgramDatadeqp::gles3::Performance::__anon24528f330111::ProgramData124 ProgramData(void)
125 {
126 }
ProgramDatadeqp::gles3::Performance::__anon24528f330111::ProgramData127 ProgramData(const glu::ProgramSources &sources_,
128 const vector<gls::AttribSpec> &attributes_ = vector<gls::AttribSpec>())
129 : sources(sources_)
130 , attributes(attributes_)
131 {
132 }
ProgramDatadeqp::gles3::Performance::__anon24528f330111::ProgramData133 ProgramData(const glu::ProgramSources &sources_, const gls::AttribSpec &attribute)
134 : sources(sources_)
135 , attributes(1, attribute)
136 {
137 }
138 };
139
140 //! Shader boilerplate helper; most cases have similar basic shader structure.
defaultProgramData(CaseShaderType shaderType,const string & funcDefs,const string & mainStatements)141 static inline ProgramData defaultProgramData(CaseShaderType shaderType, const string &funcDefs,
142 const string &mainStatements)
143 {
144 const bool isVertexCase = shaderType == CASESHADERTYPE_VERTEX;
145 const bool isFragmentCase = shaderType == CASESHADERTYPE_FRAGMENT;
146 const string vtxPrec = getShaderPrecision(CASESHADERTYPE_VERTEX);
147 const string fragPrec = getShaderPrecision(CASESHADERTYPE_FRAGMENT);
148
149 return ProgramData(glu::ProgramSources()
150 << glu::VertexSource("#version 300 es\n"
151 "in " +
152 vtxPrec +
153 " vec4 a_position;\n"
154 "in " +
155 vtxPrec +
156 " vec4 a_value;\n"
157 "out " +
158 fragPrec + " vec4 v_value;\n" + (isVertexCase ? funcDefs : "") +
159 "void main (void)\n"
160 "{\n"
161 " gl_Position = a_position;\n"
162 " " +
163 vtxPrec + " vec4 value = a_value;\n" +
164 (isVertexCase ? mainStatements : "") +
165 " v_value = value;\n"
166 "}\n")
167
168 << glu::FragmentSource("#version 300 es\n"
169 "layout (location = 0) out " +
170 fragPrec +
171 " vec4 o_color;\n"
172 "in " +
173 fragPrec + " vec4 v_value;\n" + (isFragmentCase ? funcDefs : "") +
174 "void main (void)\n"
175 "{\n"
176 " " +
177 fragPrec + " vec4 value = v_value;\n" +
178 (isFragmentCase ? mainStatements : "") +
179 " o_color = value;\n"
180 "}\n"),
181 gls::AttribSpec("a_value", Vec4(1.0f, 0.0f, 0.0f, 0.0f), Vec4(0.0f, 1.0f, 0.0f, 0.0f),
182 Vec4(0.0f, 0.0f, 1.0f, 0.0f), Vec4(0.0f, 0.0f, 0.0f, 1.0f)));
183 }
184
defaultProgramData(CaseShaderType shaderType,const string & mainStatements)185 static inline ProgramData defaultProgramData(CaseShaderType shaderType, const string &mainStatements)
186 {
187 return defaultProgramData(shaderType, "", mainStatements);
188 }
189
190 class ShaderOptimizationCase : public TestCase
191 {
192 public:
ShaderOptimizationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType)193 ShaderOptimizationCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType)
194 : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, description)
195 , m_caseShaderType(caseShaderType)
196 , m_state(STATE_LAST)
197 , m_measurer(context.getRenderContext(), caseShaderType == CASESHADERTYPE_VERTEX ? gls::CASETYPE_VERTEX :
198 caseShaderType == CASESHADERTYPE_FRAGMENT ? gls::CASETYPE_FRAGMENT :
199 gls::CASETYPE_LAST)
200 , m_unoptimizedResult(-1.0f, -1.0f)
201 , m_optimizedResult(-1.0f, -1.0f)
202 {
203 }
204
~ShaderOptimizationCase(void)205 virtual ~ShaderOptimizationCase(void)
206 {
207 }
208
209 void init(void);
210 IterateResult iterate(void);
211
212 protected:
213 virtual ProgramData generateProgramData(bool optimized) const = 0;
214
215 const CaseShaderType m_caseShaderType;
216
217 private:
218 enum State
219 {
220 STATE_INIT_UNOPTIMIZED = 0,
221 STATE_MEASURE_UNOPTIMIZED,
222 STATE_INIT_OPTIMIZED,
223 STATE_MEASURE_OPTIMIZED,
224 STATE_FINISHED,
225
226 STATE_LAST
227 };
228
programData(bool optimized)229 ProgramData &programData(bool optimized)
230 {
231 return optimized ? m_optimizedData : m_unoptimizedData;
232 }
program(bool optimized)233 SharedPtr<const ShaderProgram> &program(bool optimized)
234 {
235 return optimized ? m_optimizedProgram : m_unoptimizedProgram;
236 }
result(bool optimized)237 ShaderPerformanceMeasurer::Result &result(bool optimized)
238 {
239 return optimized ? m_optimizedResult : m_unoptimizedResult;
240 }
241
242 State m_state;
243 ShaderPerformanceMeasurer m_measurer;
244
245 ProgramData m_unoptimizedData;
246 ProgramData m_optimizedData;
247 SharedPtr<const ShaderProgram> m_unoptimizedProgram;
248 SharedPtr<const ShaderProgram> m_optimizedProgram;
249 ShaderPerformanceMeasurer::Result m_unoptimizedResult;
250 ShaderPerformanceMeasurer::Result m_optimizedResult;
251 };
252
init(void)253 void ShaderOptimizationCase::init(void)
254 {
255 const glu::RenderContext &renderCtx = m_context.getRenderContext();
256 TestLog &log = m_testCtx.getLog();
257
258 m_measurer.logParameters(log);
259
260 for (int ndx = 0; ndx < 2; ndx++)
261 {
262 const bool optimized = ndx == 1;
263
264 programData(optimized) = generateProgramData(optimized);
265
266 for (int i = 0; i < (int)programData(optimized).attributes.size(); i++)
267 DE_ASSERT(programData(optimized).attributes[i].name !=
268 "a_position"); // \note Position attribute is set by m_measurer.
269
270 program(optimized) =
271 SharedPtr<const ShaderProgram>(new ShaderProgram(renderCtx, programData(optimized).sources));
272
273 {
274 const tcu::ScopedLogSection section(log, optimized ? "OptimizedProgram" : "UnoptimizedProgram",
275 optimized ? "Hand-optimized program" : "Unoptimized program");
276 log << *program(optimized);
277 }
278
279 if (!program(optimized)->isOk())
280 TCU_FAIL("Shader compilation failed");
281 }
282
283 m_state = STATE_INIT_UNOPTIMIZED;
284 }
285
iterate(void)286 ShaderOptimizationCase::IterateResult ShaderOptimizationCase::iterate(void)
287 {
288 TestLog &log = m_testCtx.getLog();
289
290 if (m_state == STATE_INIT_UNOPTIMIZED || m_state == STATE_INIT_OPTIMIZED)
291 {
292 const bool optimized = m_state == STATE_INIT_OPTIMIZED;
293 m_measurer.init(program(optimized)->getProgram(), programData(optimized).attributes, 1);
294 m_state = optimized ? STATE_MEASURE_OPTIMIZED : STATE_MEASURE_UNOPTIMIZED;
295
296 return CONTINUE;
297 }
298 else if (m_state == STATE_MEASURE_UNOPTIMIZED || m_state == STATE_MEASURE_OPTIMIZED)
299 {
300 m_measurer.iterate();
301
302 if (m_measurer.isFinished())
303 {
304 const bool optimized = m_state == STATE_MEASURE_OPTIMIZED;
305 const tcu::ScopedLogSection section(log, optimized ? "OptimizedResult" : "UnoptimizedResult",
306 optimized ? "Measurement results for hand-optimized program" :
307 "Measurement result for unoptimized program");
308 m_measurer.logMeasurementInfo(log);
309 result(optimized) = m_measurer.getResult();
310 m_measurer.deinit();
311 m_state = optimized ? STATE_FINISHED : STATE_INIT_OPTIMIZED;
312 }
313
314 return CONTINUE;
315 }
316 else
317 {
318 DE_ASSERT(m_state == STATE_FINISHED);
319
320 const float unoptimizedRelevantResult = m_caseShaderType == CASESHADERTYPE_VERTEX ?
321 m_unoptimizedResult.megaVertPerSec :
322 m_unoptimizedResult.megaFragPerSec;
323 const float optimizedRelevantResult = m_caseShaderType == CASESHADERTYPE_VERTEX ?
324 m_optimizedResult.megaVertPerSec :
325 m_optimizedResult.megaFragPerSec;
326 const char *const relevantResultName = m_caseShaderType == CASESHADERTYPE_VERTEX ? "vertex" : "fragment";
327 const float ratio = unoptimizedRelevantResult / optimizedRelevantResult;
328 const int handOptimizationGain = (int)deFloatRound(100.0f / ratio) - 100;
329
330 log << TestLog::Message << "Unoptimized / optimized " << relevantResultName << " performance ratio: " << ratio
331 << TestLog::EndMessage;
332
333 if (handOptimizationGain >= 0)
334 log << TestLog::Message << "Note: " << handOptimizationGain
335 << "% performance gain was achieved with hand-optimized version" << TestLog::EndMessage;
336 else
337 log << TestLog::Message << "Note: hand-optimization degraded performance by " << -handOptimizationGain
338 << "%" << TestLog::EndMessage;
339
340 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(ratio, 2).c_str());
341
342 return STOP;
343 }
344 }
345
346 class LoopUnrollCase : public ShaderOptimizationCase
347 {
348 public:
349 enum CaseType
350 {
351 CASETYPE_INDEPENDENT = 0,
352 CASETYPE_DEPENDENT,
353
354 CASETYPE_LAST
355 };
356
LoopUnrollCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType,int numRepetitions)357 LoopUnrollCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
358 CaseType caseType, int numRepetitions)
359 : ShaderOptimizationCase(context, name, description, caseShaderType)
360 , m_numRepetitions(numRepetitions)
361 , m_caseType(caseType)
362 {
363 }
364
365 protected:
generateProgramData(bool optimized) const366 ProgramData generateProgramData(bool optimized) const
367 {
368 const string repetition =
369 optimized ? repeatIndexedTemplate("\t" + expressionTemplate(m_caseType) + ";\n", m_numRepetitions) :
370 loop(m_numRepetitions, expressionTemplate(m_caseType));
371
372 return defaultProgramData(m_caseShaderType, "\t" + getShaderPrecision(m_caseShaderType) +
373 " vec4 valueOrig = value;\n" + repetition);
374 }
375
376 private:
377 const int m_numRepetitions;
378 const CaseType m_caseType;
379
expressionTemplate(CaseType caseType)380 static inline string expressionTemplate(CaseType caseType)
381 {
382 switch (caseType)
383 {
384 case CASETYPE_INDEPENDENT:
385 return "value += sin(float(${NDX}+1)*valueOrig)";
386 case CASETYPE_DEPENDENT:
387 return "value = sin(value)";
388 default:
389 DE_ASSERT(false);
390 return "";
391 }
392 }
393
loop(int iterations,const string & innerExpr)394 static inline string loop(int iterations, const string &innerExpr)
395 {
396 return "\tfor (int i = 0; i < " + toString(iterations) + "; i++)\n\t\t" +
397 tcu::StringTemplate(innerExpr).specialize(singleMap("NDX", "i")) + ";\n";
398 }
399 };
400
401 class LoopInvariantCodeMotionCase : public ShaderOptimizationCase
402 {
403 public:
LoopInvariantCodeMotionCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,int numLoopIterations)404 LoopInvariantCodeMotionCase(Context &context, const char *name, const char *description,
405 CaseShaderType caseShaderType, int numLoopIterations)
406 : ShaderOptimizationCase(context, name, description, caseShaderType)
407 , m_numLoopIterations(numLoopIterations)
408 {
409 }
410
411 protected:
generateProgramData(bool optimized) const412 ProgramData generateProgramData(bool optimized) const
413 {
414 float scale = 0.0f;
415 for (int i = 0; i < m_numLoopIterations; i++)
416 scale += 3.2f * (float)i + 4.6f;
417 scale = 1.0f / scale;
418
419 const string precision = getShaderPrecision(m_caseShaderType);
420 const string statements = optimized ? " " + precision +
421 " vec4 valueOrig = value;\n"
422 " " +
423 precision +
424 " vec4 y = sin(cos(sin(valueOrig)));\n"
425 " for (int i = 0; i < " +
426 toString(m_numLoopIterations) +
427 "; i++)\n"
428 " {\n"
429 " " +
430 precision +
431 " float x = 3.2*float(i) + 4.6;\n"
432 " value += x*y;\n"
433 " }\n"
434 " value *= " +
435 toString(scale) + ";\n"
436
437 :
438 " " + precision +
439 " vec4 valueOrig = value;\n"
440 " for (int i = 0; i < " +
441 toString(m_numLoopIterations) +
442 "; i++)\n"
443 " {\n"
444 " " +
445 precision +
446 " float x = 3.2*float(i) + 4.6;\n"
447 " " +
448 precision +
449 " vec4 y = sin(cos(sin(valueOrig)));\n"
450 " value += x*y;\n"
451 " }\n"
452 " value *= " +
453 toString(scale) + ";\n";
454
455 return defaultProgramData(m_caseShaderType, statements);
456 }
457
458 private:
459 const int m_numLoopIterations;
460 };
461
462 class FunctionInliningCase : public ShaderOptimizationCase
463 {
464 public:
FunctionInliningCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,int callNestingDepth)465 FunctionInliningCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
466 int callNestingDepth)
467 : ShaderOptimizationCase(context, name, description, caseShaderType)
468 , m_callNestingDepth(callNestingDepth)
469 {
470 }
471
472 protected:
generateProgramData(bool optimized) const473 ProgramData generateProgramData(bool optimized) const
474 {
475 const string precision = getShaderPrecision(m_caseShaderType);
476 const string expression = "value*vec4(0.8, 0.7, 0.6, 0.9)";
477 const string maybeFuncDefs = optimized ? "" : funcDefinitions(m_callNestingDepth, precision, expression);
478 const string mainValueStatement =
479 (optimized ? "\tvalue = " + expression : "\tvalue = func" + toString(m_callNestingDepth - 1) + "(value)") +
480 ";\n";
481
482 return defaultProgramData(m_caseShaderType, maybeFuncDefs, mainValueStatement);
483 }
484
485 private:
486 const int m_callNestingDepth;
487
funcDefinitions(int callNestingDepth,const string & precision,const string & expression)488 static inline string funcDefinitions(int callNestingDepth, const string &precision, const string &expression)
489 {
490 string result = precision + " vec4 func0 (" + precision + " vec4 value) { return " + expression + "; }\n";
491
492 for (int i = 1; i < callNestingDepth; i++)
493 result += precision + " vec4 func" + toString(i) + " (" + precision + " vec4 v) { return func" +
494 toString(i - 1) + "(v); }\n";
495
496 return result;
497 }
498 };
499
500 class ConstantPropagationCase : public ShaderOptimizationCase
501 {
502 public:
503 enum CaseType
504 {
505 CASETYPE_BUILT_IN_FUNCTIONS = 0,
506 CASETYPE_ARRAY,
507 CASETYPE_STRUCT,
508
509 CASETYPE_LAST
510 };
511
ConstantPropagationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType,bool useConstantExpressionsOnly)512 ConstantPropagationCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
513 CaseType caseType, bool useConstantExpressionsOnly)
514 : ShaderOptimizationCase(context, name, description, caseShaderType)
515 , m_caseType(caseType)
516 , m_useConstantExpressionsOnly(useConstantExpressionsOnly)
517 {
518 }
519
520 protected:
generateProgramData(bool optimized) const521 ProgramData generateProgramData(bool optimized) const
522 {
523 const bool isVertexCase = m_caseShaderType == CASESHADERTYPE_VERTEX;
524 const string precision = getShaderPrecision(m_caseShaderType);
525 const string statements =
526 m_caseType == CASETYPE_BUILT_IN_FUNCTIONS ?
527 builtinFunctionsCaseStatements(optimized, m_useConstantExpressionsOnly, precision, isVertexCase) :
528 m_caseType == CASETYPE_ARRAY ?
529 arrayCaseStatements(optimized, m_useConstantExpressionsOnly, precision, isVertexCase) :
530 m_caseType == CASETYPE_STRUCT ?
531 structCaseStatements(optimized, m_useConstantExpressionsOnly, precision, isVertexCase) :
532 deFatalStr("Invalid CaseType");
533
534 return defaultProgramData(m_caseShaderType, statements);
535 }
536
537 private:
538 const CaseType m_caseType;
539 const bool m_useConstantExpressionsOnly;
540
builtinFunctionsCaseStatements(bool optimized,bool constantExpressionsOnly,const string & precision,bool useHeavierWorkload)541 static inline string builtinFunctionsCaseStatements(bool optimized, bool constantExpressionsOnly,
542 const string &precision, bool useHeavierWorkload)
543 {
544 const string constMaybe = constantExpressionsOnly ? "const " : "";
545 const int numSinRows = useHeavierWorkload ? 12 : 1;
546
547 return optimized ? " value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match "
548 "the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
549
550 :
551 " " + constMaybe + precision +
552 " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
553 " " +
554 constMaybe + precision +
555 " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
556 " " +
557 constMaybe +
558 "bvec4 c = bvec4(true, false, true, true);\n"
559 " " +
560 constMaybe + precision +
561 " vec4 d = exp(b + vec4(c));\n"
562 " " +
563 constMaybe + precision + " vec4 e0 = inversesqrt(mix(d+a, d+b, a));\n" +
564 repeatIndexedTemplate(" " + constMaybe + precision +
565 " vec4 e${NDX} = sin(sin(sin(sin(e${PREV_NDX}))));\n",
566 numSinRows, "", 1) +
567 " " + constMaybe + precision + " vec4 f = abs(e" + toString(numSinRows) + ");\n" +
568 " value = f*value;\n";
569 }
570
arrayCaseStatements(bool optimized,bool constantExpressionsOnly,const string & precision,bool useHeavierWorkload)571 static inline string arrayCaseStatements(bool optimized, bool constantExpressionsOnly, const string &precision,
572 bool useHeavierWorkload)
573 {
574 const string constMaybe = constantExpressionsOnly ? "const " : "";
575 const int numSinRows = useHeavierWorkload ? 12 : 1;
576
577 return optimized ? " value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match "
578 "the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
579
580 :
581 " const int arrLen = 4;\n" +
582 (constantExpressionsOnly ? " const " + precision +
583 " vec4 arr[arrLen] =\n"
584 " vec4[](vec4(0.1, 0.5, 0.9, 1.3),\n"
585 " vec4(0.2, 0.6, 1.0, 1.4),\n"
586 " vec4(0.3, 0.7, 1.1, 1.5),\n"
587 " vec4(0.4, 0.8, 1.2, 1.6));\n"
588
589 :
590 " " + precision +
591 " vec4 arr[arrLen];\n"
592 " arr[0] = vec4(0.1, 0.5, 0.9, 1.3);\n"
593 " arr[1] = vec4(0.2, 0.6, 1.0, 1.4);\n"
594 " arr[2] = vec4(0.3, 0.7, 1.1, 1.5);\n"
595 " arr[3] = vec4(0.4, 0.8, 1.2, 1.6);\n") +
596 " " + constMaybe + precision +
597 " vec4 a = (arr[0] + arr[1] + arr[2] + arr[3]) * (1.0 / float(arr.length()));\n"
598 " " +
599 constMaybe + precision + " vec4 b0 = cos(sin(a));\n" +
600 repeatIndexedTemplate(" " + constMaybe + precision +
601 " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n",
602 numSinRows, "", 1) +
603 " " + constMaybe + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
604 " value = c*value;\n";
605 }
606
structCaseStatements(bool optimized,bool constantExpressionsOnly,const string & precision,bool useHeavierWorkload)607 static inline string structCaseStatements(bool optimized, bool constantExpressionsOnly, const string &precision,
608 bool useHeavierWorkload)
609 {
610 const string constMaybe = constantExpressionsOnly ? "const " : "";
611 const int numSinRows = useHeavierWorkload ? 12 : 1;
612
613 return optimized ? " value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match "
614 "the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
615
616 :
617 " struct S\n"
618 " {\n"
619 " " +
620 precision +
621 " vec4 a;\n"
622 " " +
623 precision +
624 " vec4 b;\n"
625 " " +
626 precision +
627 " vec4 c;\n"
628 " " +
629 precision +
630 " vec4 d;\n"
631 " };\n"
632 "\n"
633 " " +
634 constMaybe +
635 "S s =\n"
636 " S(vec4(0.1, 0.5, 0.9, 1.3),\n"
637 " vec4(0.2, 0.6, 1.0, 1.4),\n"
638 " vec4(0.3, 0.7, 1.1, 1.5),\n"
639 " vec4(0.4, 0.8, 1.2, 1.6));\n"
640 " " +
641 constMaybe + precision +
642 " vec4 a = (s.a + s.b + s.c + s.d) * 0.25;\n"
643 " " +
644 constMaybe + precision + " vec4 b0 = cos(sin(a));\n" +
645 repeatIndexedTemplate(" " + constMaybe + precision +
646 " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n",
647 numSinRows, "", 1) +
648 " " + constMaybe + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
649 " value = c*value;\n";
650 }
651 };
652
653 class CommonSubexpressionCase : public ShaderOptimizationCase
654 {
655 public:
656 enum CaseType
657 {
658 CASETYPE_SINGLE_STATEMENT = 0,
659 CASETYPE_MULTIPLE_STATEMENTS,
660 CASETYPE_STATIC_BRANCH,
661 CASETYPE_LOOP,
662
663 CASETYPE_LAST
664 };
665
CommonSubexpressionCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType)666 CommonSubexpressionCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
667 CaseType caseType)
668 : ShaderOptimizationCase(context, name, description, caseShaderType)
669 , m_caseType(caseType)
670 {
671 }
672
673 protected:
generateProgramData(bool optimized) const674 ProgramData generateProgramData(bool optimized) const
675 {
676 const bool isVertexCase = m_caseShaderType == CASESHADERTYPE_VERTEX;
677 const string precision = getShaderPrecision(m_caseShaderType);
678 const string statements = m_caseType == CASETYPE_SINGLE_STATEMENT ?
679 singleStatementCaseStatements(optimized, precision, isVertexCase) :
680 m_caseType == CASETYPE_MULTIPLE_STATEMENTS ?
681 multipleStatementsCaseStatements(optimized, precision, isVertexCase) :
682 m_caseType == CASETYPE_STATIC_BRANCH ?
683 staticBranchCaseStatements(optimized, precision, isVertexCase) :
684 m_caseType == CASETYPE_LOOP ? loopCaseStatements(optimized, precision, isVertexCase) :
685 deFatalStr("Invalid CaseType");
686
687 return defaultProgramData(m_caseShaderType, statements);
688 }
689
690 private:
691 const CaseType m_caseType;
692
singleStatementCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)693 static inline string singleStatementCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
694 {
695 const int numTopLevelRepeats = useHeavierWorkload ? 4 : 1;
696
697 return optimized ? " " + precision +
698 " vec4 s = sin(value);\n"
699 " " +
700 precision +
701 " vec4 cs = cos(s);\n"
702 " " +
703 precision +
704 " vec4 d = fract(s + cs) + sqrt(s + exp(cs));\n"
705 " value = " +
706 repeat("d", numTopLevelRepeats, "+") + ";\n"
707
708 :
709 " value = " +
710 repeat("fract(sin(value) + cos(sin(value))) + sqrt(sin(value) + exp(cos(sin(value))))",
711 numTopLevelRepeats, "\n\t + ") +
712 ";\n";
713 }
714
multipleStatementsCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)715 static inline string multipleStatementsCaseStatements(bool optimized, const string &precision,
716 bool useHeavierWorkload)
717 {
718 const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
719 DE_ASSERT(numTopLevelRepeats >= 2);
720
721 return optimized ? " " + precision +
722 " vec4 a = sin(value) + cos(exp(value));\n"
723 " " +
724 precision +
725 " vec4 b = cos(cos(a));\n"
726 " a = fract(exp(sqrt(b)));\n"
727 "\n" +
728 repeat("\tvalue += a*b;\n", numTopLevelRepeats)
729
730 :
731 repeatIndexedTemplate(" " + precision +
732 " vec4 a${NDX} = sin(value) + cos(exp(value));\n"
733 " " +
734 precision +
735 " vec4 b${NDX} = cos(cos(a${NDX}));\n"
736 " a${NDX} = fract(exp(sqrt(b${NDX})));\n"
737 "\n",
738 numTopLevelRepeats) +
739
740 repeatIndexedTemplate(" value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
741 }
742
staticBranchCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)743 static inline string staticBranchCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
744 {
745 const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
746 DE_ASSERT(numTopLevelRepeats >= 2);
747
748 if (optimized)
749 {
750 return " " + precision +
751 " vec4 a = sin(value) + cos(exp(value));\n"
752 " " +
753 precision +
754 " vec4 b = cos(a);\n"
755 " b = cos(b);\n"
756 " a = fract(exp(sqrt(b)));\n"
757 "\n" +
758 repeat(" value += a*b;\n", numTopLevelRepeats);
759 }
760 else
761 {
762 string result;
763
764 for (int i = 0; i < numTopLevelRepeats; i++)
765 {
766 result += " " + precision + " vec4 a" + toString(i) +
767 " = sin(value) + cos(exp(value));\n"
768 " " +
769 precision + " vec4 b" + toString(i) + " = cos(a" + toString(i) + ");\n";
770
771 if (i % 3 == 0)
772 result += " if (1 < 2)\n"
773 " b" +
774 toString(i) + " = cos(b" + toString(i) + ");\n";
775 else if (i % 3 == 1)
776 result += " b" + toString(i) + " = cos(b" + toString(i) + ");\n";
777 else if (i % 3 == 2)
778 result += " if (2 < 1);\n"
779 " else\n"
780 " b" +
781 toString(i) + " = cos(b" + toString(i) + ");\n";
782 else
783 DE_ASSERT(false);
784
785 result += " a" + toString(i) + " = fract(exp(sqrt(b" + toString(i) + ")));\n\n";
786 }
787
788 result += repeatIndexedTemplate(" value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
789
790 return result;
791 }
792 }
793
loopCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)794 static inline string loopCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
795 {
796 const int numLoopIterations = useHeavierWorkload ? 32 : 4;
797
798 return optimized ? " " + precision +
799 " vec4 acc = value;\n"
800 " for (int i = 0; i < " +
801 toString(numLoopIterations) +
802 "; i++)\n"
803 " acc = sin(acc);\n"
804 "\n"
805 " value += acc;\n"
806 " value += acc;\n"
807
808 :
809 " " + precision +
810 " vec4 acc0 = value;\n"
811 " for (int i = 0; i < " +
812 toString(numLoopIterations) +
813 "; i++)\n"
814 " acc0 = sin(acc0);\n"
815 "\n"
816 " " +
817 precision +
818 " vec4 acc1 = value;\n"
819 " for (int i = 0; i < " +
820 toString(numLoopIterations) +
821 "; i++)\n"
822 " acc1 = sin(acc1);\n"
823 "\n"
824 " value += acc0;\n"
825 " value += acc1;\n";
826 }
827 };
828
829 class DeadCodeEliminationCase : public ShaderOptimizationCase
830 {
831 public:
832 enum CaseType
833 {
834 CASETYPE_DEAD_BRANCH_SIMPLE = 0,
835 CASETYPE_DEAD_BRANCH_COMPLEX,
836 CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST,
837 CASETYPE_DEAD_BRANCH_FUNC_CALL,
838 CASETYPE_UNUSED_VALUE_BASIC,
839 CASETYPE_UNUSED_VALUE_LOOP,
840 CASETYPE_UNUSED_VALUE_DEAD_BRANCH,
841 CASETYPE_UNUSED_VALUE_AFTER_RETURN,
842 CASETYPE_UNUSED_VALUE_MUL_ZERO,
843
844 CASETYPE_LAST
845 };
846
DeadCodeEliminationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType)847 DeadCodeEliminationCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
848 CaseType caseType)
849 : ShaderOptimizationCase(context, name, description, caseShaderType)
850 , m_caseType(caseType)
851 {
852 }
853
854 protected:
generateProgramData(bool optimized) const855 ProgramData generateProgramData(bool optimized) const
856 {
857 const bool isVertexCase = m_caseShaderType == CASESHADERTYPE_VERTEX;
858 const string precision = getShaderPrecision(m_caseShaderType);
859 const string funcDefs = m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL ?
860 deadBranchFuncCallCaseFuncDefs(optimized, precision) :
861 m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN ?
862 unusedValueAfterReturnCaseFuncDefs(optimized, precision, isVertexCase) :
863 "";
864
865 const string statements = m_caseType == CASETYPE_DEAD_BRANCH_SIMPLE ?
866 deadBranchSimpleCaseStatements(optimized, isVertexCase) :
867 m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX ?
868 deadBranchComplexCaseStatements(optimized, precision, true, isVertexCase) :
869 m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST ?
870 deadBranchComplexCaseStatements(optimized, precision, false, isVertexCase) :
871 m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL ?
872 deadBranchFuncCallCaseStatements(optimized, isVertexCase) :
873 m_caseType == CASETYPE_UNUSED_VALUE_BASIC ?
874 unusedValueBasicCaseStatements(optimized, precision, isVertexCase) :
875 m_caseType == CASETYPE_UNUSED_VALUE_LOOP ?
876 unusedValueLoopCaseStatements(optimized, precision, isVertexCase) :
877 m_caseType == CASETYPE_UNUSED_VALUE_DEAD_BRANCH ?
878 unusedValueDeadBranchCaseStatements(optimized, precision, isVertexCase) :
879 m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN ?
880 unusedValueAfterReturnCaseStatements() :
881 m_caseType == CASETYPE_UNUSED_VALUE_MUL_ZERO ?
882 unusedValueMulZeroCaseStatements(optimized, precision, isVertexCase) :
883 deFatalStr("Invalid CaseType");
884
885 return defaultProgramData(m_caseShaderType, funcDefs, statements);
886 }
887
888 private:
889 const CaseType m_caseType;
890
deadBranchSimpleCaseStatements(bool optimized,bool useHeavierWorkload)891 static inline string deadBranchSimpleCaseStatements(bool optimized, bool useHeavierWorkload)
892 {
893 const int numLoopIterations = useHeavierWorkload ? 16 : 4;
894
895 return optimized ? " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
896
897 :
898 " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
899 " if (2 < 1)\n"
900 " {\n"
901 " value = cos(exp(sin(value))*log(sqrt(value)));\n"
902 " for (int i = 0; i < " +
903 toString(numLoopIterations) +
904 "; i++)\n"
905 " value = sin(value);\n"
906 " }\n";
907 }
908
deadBranchComplexCaseStatements(bool optimized,const string & precision,bool useConst,bool useHeavierWorkload)909 static inline string deadBranchComplexCaseStatements(bool optimized, const string &precision, bool useConst,
910 bool useHeavierWorkload)
911 {
912 const string constMaybe = useConst ? "const " : "";
913 const int numLoopIterations = useHeavierWorkload ? 16 : 4;
914
915 return optimized ? " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
916
917 :
918 " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
919 " " +
920 constMaybe + precision +
921 " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
922 " " +
923 constMaybe + precision +
924 " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
925 " " +
926 constMaybe +
927 "bvec4 c = bvec4(true, false, true, true);\n"
928 " " +
929 constMaybe + precision +
930 " vec4 d = exp(b + vec4(c));\n"
931 " " +
932 constMaybe + precision +
933 " vec4 e = 1.8*abs(sin(sin(inversesqrt(mix(d+a, d+b, a)))));\n"
934 " if (e.x > 1.0)\n"
935 " {\n"
936 " value = cos(exp(sin(value))*log(sqrt(value)));\n"
937 " for (int i = 0; i < " +
938 toString(numLoopIterations) +
939 "; i++)\n"
940 " value = sin(value);\n"
941 " }\n";
942 }
943
deadBranchFuncCallCaseFuncDefs(bool optimized,const string & precision)944 static inline string deadBranchFuncCallCaseFuncDefs(bool optimized, const string &precision)
945 {
946 return optimized ? "" : precision + " float func (" + precision + " float x) { return 2.0*x; }\n";
947 }
948
deadBranchFuncCallCaseStatements(bool optimized,bool useHeavierWorkload)949 static inline string deadBranchFuncCallCaseStatements(bool optimized, bool useHeavierWorkload)
950 {
951 const int numLoopIterations = useHeavierWorkload ? 16 : 4;
952
953 return optimized ? " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
954
955 :
956 " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
957 " if (func(0.3) > 1.0)\n"
958 " {\n"
959 " value = cos(exp(sin(value))*log(sqrt(value)));\n"
960 " for (int i = 0; i < " +
961 toString(numLoopIterations) +
962 "; i++)\n"
963 " value = sin(value);\n"
964 " }\n";
965 }
966
unusedValueBasicCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)967 static inline string unusedValueBasicCaseStatements(bool optimized, const string &precision,
968 bool useHeavierWorkload)
969 {
970 const int numSinRows = useHeavierWorkload ? 12 : 1;
971
972 return optimized ? " " + precision +
973 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
974 " value = used;\n"
975
976 :
977 " " + precision +
978 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
979 " " +
980 precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value))) + used;\n" +
981 repeat(" unused = sin(sin(sin(sin(unused))));\n", numSinRows) + " value = used;\n";
982 }
983
unusedValueLoopCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)984 static inline string unusedValueLoopCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
985 {
986 const int numLoopIterations = useHeavierWorkload ? 16 : 4;
987
988 return optimized ? " " + precision +
989 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
990 " value = used;\n"
991
992 :
993 " " + precision +
994 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
995 " " +
996 precision +
997 " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
998 " for (int i = 0; i < " +
999 toString(numLoopIterations) +
1000 "; i++)\n"
1001 " unused = sin(unused + used);\n"
1002 " value = used;\n";
1003 }
1004
unusedValueAfterReturnCaseFuncDefs(bool optimized,const string & precision,bool useHeavierWorkload)1005 static inline string unusedValueAfterReturnCaseFuncDefs(bool optimized, const string &precision,
1006 bool useHeavierWorkload)
1007 {
1008 const int numSinRows = useHeavierWorkload ? 12 : 1;
1009
1010 return optimized ? precision + " vec4 func (" + precision +
1011 " vec4 v)\n"
1012 "{\n"
1013 " " +
1014 precision +
1015 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
1016 " return used;\n"
1017 "}\n"
1018
1019 :
1020 precision + " vec4 func (" + precision +
1021 " vec4 v)\n"
1022 "{\n"
1023 " " +
1024 precision +
1025 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
1026 " " +
1027 precision + " vec4 unused = cos(exp(sin(v))*log(sqrt(v)));\n" +
1028 repeat(" unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
1029 " return used;\n"
1030 " used = used*unused;"
1031 " return used;\n"
1032 "}\n";
1033 }
1034
unusedValueAfterReturnCaseStatements(void)1035 static inline string unusedValueAfterReturnCaseStatements(void)
1036 {
1037 return " value = func(value);\n";
1038 }
1039
unusedValueDeadBranchCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)1040 static inline string unusedValueDeadBranchCaseStatements(bool optimized, const string &precision,
1041 bool useHeavierWorkload)
1042 {
1043 const int numSinRows = useHeavierWorkload ? 12 : 1;
1044
1045 return optimized ? " " + precision +
1046 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1047 " value = used;\n"
1048
1049 :
1050 " " + precision +
1051 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1052 " " +
1053 precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n" +
1054 repeat(" unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
1055 " if (2 < 1)\n"
1056 " used = used*unused;\n"
1057 " value = used;\n";
1058 }
1059
unusedValueMulZeroCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)1060 static inline string unusedValueMulZeroCaseStatements(bool optimized, const string &precision,
1061 bool useHeavierWorkload)
1062 {
1063 const int numSinRows = useHeavierWorkload ? 12 : 1;
1064
1065 return optimized ? " " + precision +
1066 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1067 " value = used;\n"
1068
1069 :
1070 " " + precision +
1071 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1072 " " +
1073 precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n" +
1074 repeat(" unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
1075 " value = used + unused*float(1-1);\n";
1076 }
1077 };
1078
1079 } // namespace
1080
ShaderOptimizationTests(Context & context)1081 ShaderOptimizationTests::ShaderOptimizationTests(Context &context)
1082 : TestCaseGroup(context, "optimization", "Shader Optimization Performance Tests")
1083 {
1084 }
1085
~ShaderOptimizationTests(void)1086 ShaderOptimizationTests::~ShaderOptimizationTests(void)
1087 {
1088 }
1089
init(void)1090 void ShaderOptimizationTests::init(void)
1091 {
1092 TestCaseGroup *const unrollGroup = new TestCaseGroup(m_context, "loop_unrolling", "Loop Unrolling Cases");
1093 TestCaseGroup *const loopInvariantCodeMotionGroup =
1094 new TestCaseGroup(m_context, "loop_invariant_code_motion", "Loop-Invariant Code Motion Cases");
1095 TestCaseGroup *const inlineGroup = new TestCaseGroup(m_context, "function_inlining", "Function Inlining Cases");
1096 TestCaseGroup *const constantPropagationGroup =
1097 new TestCaseGroup(m_context, "constant_propagation", "Constant Propagation Cases");
1098 TestCaseGroup *const commonSubexpressionGroup =
1099 new TestCaseGroup(m_context, "common_subexpression_elimination", "Common Subexpression Elimination Cases");
1100 TestCaseGroup *const deadCodeEliminationGroup =
1101 new TestCaseGroup(m_context, "dead_code_elimination", "Dead Code Elimination Cases");
1102 addChild(unrollGroup);
1103 addChild(loopInvariantCodeMotionGroup);
1104 addChild(inlineGroup);
1105 addChild(constantPropagationGroup);
1106 addChild(commonSubexpressionGroup);
1107 addChild(deadCodeEliminationGroup);
1108
1109 for (int caseShaderTypeI = 0; caseShaderTypeI < CASESHADERTYPE_LAST; caseShaderTypeI++)
1110 {
1111 const CaseShaderType caseShaderType = (CaseShaderType)caseShaderTypeI;
1112 const char *const caseShaderTypeSuffix = caseShaderType == CASESHADERTYPE_VERTEX ? "_vertex" :
1113 caseShaderType == CASESHADERTYPE_FRAGMENT ? "_fragment" :
1114 DE_NULL;
1115
1116 // Loop unrolling cases.
1117
1118 {
1119 static const int loopIterationCounts[] = {4, 8, 32};
1120
1121 for (int caseTypeI = 0; caseTypeI < LoopUnrollCase::CASETYPE_LAST; caseTypeI++)
1122 {
1123 const LoopUnrollCase::CaseType caseType = (LoopUnrollCase::CaseType)caseTypeI;
1124 const string caseTypeName = caseType == LoopUnrollCase::CASETYPE_INDEPENDENT ?
1125 "independent_iterations" :
1126 caseType == LoopUnrollCase::CASETYPE_DEPENDENT ? "dependent_iterations" :
1127 DE_NULL;
1128 const string caseTypeDesc =
1129 caseType == LoopUnrollCase::CASETYPE_INDEPENDENT ? "loop iterations don't depend on each other" :
1130 caseType == LoopUnrollCase::CASETYPE_DEPENDENT ? "loop iterations depend on each other" :
1131 DE_NULL;
1132
1133 for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
1134 {
1135 const int loopIterations = loopIterationCounts[loopIterNdx];
1136 const string name = caseTypeName + "_" + toString(loopIterations) + caseShaderTypeSuffix;
1137 const string description = toString(loopIterations) + " iterations; " + caseTypeDesc;
1138
1139 unrollGroup->addChild(new LoopUnrollCase(m_context, name.c_str(), description.c_str(),
1140 caseShaderType, caseType, loopIterations));
1141 }
1142 }
1143 }
1144
1145 // Loop-invariant code motion cases.
1146
1147 {
1148 static const int loopIterationCounts[] = {4, 8, 32};
1149
1150 for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
1151 {
1152 const int loopIterations = loopIterationCounts[loopIterNdx];
1153 const string name = toString(loopIterations) + "_iterations" + caseShaderTypeSuffix;
1154
1155 loopInvariantCodeMotionGroup->addChild(
1156 new LoopInvariantCodeMotionCase(m_context, name.c_str(), "", caseShaderType, loopIterations));
1157 }
1158 }
1159
1160 // Function inlining cases.
1161
1162 {
1163 static const int callNestingDepths[] = {4, 8, 32};
1164
1165 for (int nestDepthNdx = 0; nestDepthNdx < DE_LENGTH_OF_ARRAY(callNestingDepths); nestDepthNdx++)
1166 {
1167 const int nestingDepth = callNestingDepths[nestDepthNdx];
1168 const string name = toString(nestingDepth) + "_nested" + caseShaderTypeSuffix;
1169
1170 inlineGroup->addChild(
1171 new FunctionInliningCase(m_context, name.c_str(), "", caseShaderType, nestingDepth));
1172 }
1173 }
1174
1175 // Constant propagation cases.
1176
1177 for (int caseTypeI = 0; caseTypeI < ConstantPropagationCase::CASETYPE_LAST; caseTypeI++)
1178 {
1179 const ConstantPropagationCase::CaseType caseType = (ConstantPropagationCase::CaseType)caseTypeI;
1180 const string caseTypeName = caseType == ConstantPropagationCase::CASETYPE_BUILT_IN_FUNCTIONS ?
1181 "built_in_functions" :
1182 caseType == ConstantPropagationCase::CASETYPE_ARRAY ? "array" :
1183 caseType == ConstantPropagationCase::CASETYPE_STRUCT ? "struct" :
1184 DE_NULL;
1185
1186 for (int constantExpressionsOnlyI = 0; constantExpressionsOnlyI <= 1; constantExpressionsOnlyI++)
1187 {
1188 const bool constantExpressionsOnly = constantExpressionsOnlyI != 0;
1189 const string name = caseTypeName + (constantExpressionsOnly ? "" : "_no_const") + caseShaderTypeSuffix;
1190
1191 constantPropagationGroup->addChild(new ConstantPropagationCase(
1192 m_context, name.c_str(), "", caseShaderType, caseType, constantExpressionsOnly));
1193 }
1194 }
1195
1196 // Common subexpression cases.
1197
1198 for (int caseTypeI = 0; caseTypeI < CommonSubexpressionCase::CASETYPE_LAST; caseTypeI++)
1199 {
1200 const CommonSubexpressionCase::CaseType caseType = (CommonSubexpressionCase::CaseType)caseTypeI;
1201
1202 const string caseTypeName =
1203 caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT ? "single_statement" :
1204 caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS ? "multiple_statements" :
1205 caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH ? "static_branch" :
1206 caseType == CommonSubexpressionCase::CASETYPE_LOOP ? "loop" :
1207 DE_NULL;
1208
1209 const string description = caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT ?
1210 "A single statement containing multiple uses of same subexpression" :
1211 caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS ?
1212 "Multiple statements performing same computations" :
1213 caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH ?
1214 "Multiple statements including a static conditional" :
1215 caseType == CommonSubexpressionCase::CASETYPE_LOOP ?
1216 "Multiple loops performing the same computations" :
1217 DE_NULL;
1218
1219 commonSubexpressionGroup->addChild(
1220 new CommonSubexpressionCase(m_context, (caseTypeName + caseShaderTypeSuffix).c_str(),
1221 description.c_str(), caseShaderType, caseType));
1222 }
1223
1224 // Dead code elimination cases.
1225
1226 for (int caseTypeI = 0; caseTypeI < DeadCodeEliminationCase::CASETYPE_LAST; caseTypeI++)
1227 {
1228 const DeadCodeEliminationCase::CaseType caseType = (DeadCodeEliminationCase::CaseType)caseTypeI;
1229 const char *const caseTypeName =
1230 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE ? "dead_branch_simple" :
1231 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX ? "dead_branch_complex" :
1232 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST ?
1233 "dead_branch_complex_no_const" :
1234 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL ? "dead_branch_func_call" :
1235 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC ? "unused_value_basic" :
1236 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP ? "unused_value_loop" :
1237 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH ? "unused_value_dead_branch" :
1238 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN ? "unused_value_after_return" :
1239 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO ? "unused_value_mul_zero" :
1240 DE_NULL;
1241
1242 const char *const caseTypeDescription =
1243 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE ?
1244 "Do computation inside a branch that is never taken (condition is simple false constant "
1245 "expression)" :
1246 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX ?
1247 "Do computation inside a branch that is never taken (condition is complex false constant "
1248 "expression)" :
1249 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST ?
1250 "Do computation inside a branch that is never taken (condition is complex false expression, not "
1251 "constant expression but still compile-time computable)" :
1252 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL ?
1253 "Do computation inside a branch that is never taken (condition is compile-time computable false "
1254 "expression containing function call to a simple inlineable function)" :
1255 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC ?
1256 "Compute a value that is never used even statically" :
1257 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP ?
1258 "Compute a value, using a loop, that is never used even statically" :
1259 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH ?
1260 "Compute a value that is used only inside a statically dead branch" :
1261 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN ?
1262 "Compute a value that is used only after a return statement" :
1263 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO ?
1264 "Compute a value that is used but multiplied by a zero constant expression" :
1265 DE_NULL;
1266
1267 deadCodeEliminationGroup->addChild(
1268 new DeadCodeEliminationCase(m_context, (string() + caseTypeName + caseShaderTypeSuffix).c_str(),
1269 caseTypeDescription, caseShaderType, caseType));
1270 }
1271 }
1272 }
1273
1274 } // namespace Performance
1275 } // namespace gles3
1276 } // namespace deqp
1277