1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 2.0 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Optimized vs unoptimized shader performance tests.
22 *//*--------------------------------------------------------------------*/
23
24 #include "es2pShaderOptimizationTests.hpp"
25 #include "glsShaderPerformanceMeasurer.hpp"
26 #include "gluRenderContext.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "tcuTestLog.hpp"
29 #include "tcuVector.hpp"
30 #include "tcuStringTemplate.hpp"
31 #include "deSharedPtr.hpp"
32 #include "deStringUtil.hpp"
33 #include "deMath.h"
34
35 #include "glwFunctions.hpp"
36
37 #include <vector>
38 #include <string>
39 #include <map>
40
41 using de::SharedPtr;
42 using de::toString;
43 using glu::ShaderProgram;
44 using tcu::TestLog;
45 using tcu::Vec4;
46
47 using std::string;
48 using std::vector;
49
50 namespace deqp
51 {
52
53 using gls::ShaderPerformanceMeasurer;
54
55 namespace gles2
56 {
57 namespace Performance
58 {
59
singleMap(const string & key,const string & value)60 static inline std::map<string, string> singleMap(const string &key, const string &value)
61 {
62 std::map<string, string> res;
63 res[key] = value;
64 return res;
65 }
66
repeat(const string & str,int numRepeats,const string & delim="")67 static inline string repeat(const string &str, int numRepeats, const string &delim = "")
68 {
69 string result = str;
70 for (int i = 1; i < numRepeats; i++)
71 result += delim + str;
72 return result;
73 }
74
repeatIndexedTemplate(const string & strTempl,int numRepeats,const string & delim="",int ndxStart=0)75 static inline string repeatIndexedTemplate(const string &strTempl, int numRepeats, const string &delim = "",
76 int ndxStart = 0)
77 {
78 const tcu::StringTemplate templ(strTempl);
79 string result;
80 std::map<string, string> params;
81
82 for (int i = 0; i < numRepeats; i++)
83 {
84 params["PREV_NDX"] = toString(i + ndxStart - 1);
85 params["NDX"] = toString(i + ndxStart);
86
87 result += (i > 0 ? delim : "") + templ.specialize(params);
88 }
89
90 return result;
91 }
92
93 namespace
94 {
95
96 enum CaseShaderType
97 {
98 CASESHADERTYPE_VERTEX = 0,
99 CASESHADERTYPE_FRAGMENT,
100
101 CASESHADERTYPE_LAST
102 };
103
getShaderPrecision(CaseShaderType shaderType)104 static inline string getShaderPrecision(CaseShaderType shaderType)
105 {
106 switch (shaderType)
107 {
108 case CASESHADERTYPE_VERTEX:
109 return "highp";
110 case CASESHADERTYPE_FRAGMENT:
111 return "mediump";
112 default:
113 DE_ASSERT(false);
114 return "";
115 }
116 }
117
118 struct ProgramData
119 {
120 glu::ProgramSources sources;
121 vector<gls::AttribSpec>
122 attributes; //!< \note Shouldn't contain a_position; that one is set by gls::ShaderPerformanceMeasurer.
123
ProgramDatadeqp::gles2::Performance::__anon9fd9d6310111::ProgramData124 ProgramData(void)
125 {
126 }
ProgramDatadeqp::gles2::Performance::__anon9fd9d6310111::ProgramData127 ProgramData(const glu::ProgramSources &sources_,
128 const vector<gls::AttribSpec> &attributes_ = vector<gls::AttribSpec>())
129 : sources(sources_)
130 , attributes(attributes_)
131 {
132 }
ProgramDatadeqp::gles2::Performance::__anon9fd9d6310111::ProgramData133 ProgramData(const glu::ProgramSources &sources_, const gls::AttribSpec &attribute)
134 : sources(sources_)
135 , attributes(1, attribute)
136 {
137 }
138 };
139
140 //! Shader boilerplate helper; most cases have similar basic shader structure.
defaultProgramData(CaseShaderType shaderType,const string & funcDefs,const string & mainStatements)141 static inline ProgramData defaultProgramData(CaseShaderType shaderType, const string &funcDefs,
142 const string &mainStatements)
143 {
144 const bool isVertexCase = shaderType == CASESHADERTYPE_VERTEX;
145 const bool isFragmentCase = shaderType == CASESHADERTYPE_FRAGMENT;
146 const string vtxPrec = getShaderPrecision(CASESHADERTYPE_VERTEX);
147 const string fragPrec = getShaderPrecision(CASESHADERTYPE_FRAGMENT);
148
149 return ProgramData(glu::ProgramSources()
150 << glu::VertexSource("attribute " + vtxPrec +
151 " vec4 a_position;\n"
152 "attribute " +
153 vtxPrec +
154 " vec4 a_value;\n"
155 "varying " +
156 fragPrec + " vec4 v_value;\n" + (isVertexCase ? funcDefs : "") +
157 "void main (void)\n"
158 "{\n"
159 " gl_Position = a_position;\n"
160 " " +
161 vtxPrec + " vec4 value = a_value;\n" +
162 (isVertexCase ? mainStatements : "") +
163 " v_value = value;\n"
164 "}\n")
165
166 << glu::FragmentSource(
167 "varying " + fragPrec + " vec4 v_value;\n" + (isFragmentCase ? funcDefs : "") +
168 "void main (void)\n"
169 "{\n"
170 " " +
171 fragPrec + " vec4 value = v_value;\n" + (isFragmentCase ? mainStatements : "") +
172 " gl_FragColor = value;\n"
173 "}\n"),
174 gls::AttribSpec("a_value", Vec4(1.0f, 0.0f, 0.0f, 0.0f), Vec4(0.0f, 1.0f, 0.0f, 0.0f),
175 Vec4(0.0f, 0.0f, 1.0f, 0.0f), Vec4(0.0f, 0.0f, 0.0f, 1.0f)));
176 }
177
defaultProgramData(CaseShaderType shaderType,const string & mainStatements)178 static inline ProgramData defaultProgramData(CaseShaderType shaderType, const string &mainStatements)
179 {
180 return defaultProgramData(shaderType, "", mainStatements);
181 }
182
183 class ShaderOptimizationCase : public TestCase
184 {
185 public:
ShaderOptimizationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType)186 ShaderOptimizationCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType)
187 : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, description)
188 , m_caseShaderType(caseShaderType)
189 , m_state(STATE_LAST)
190 , m_measurer(context.getRenderContext(), caseShaderType == CASESHADERTYPE_VERTEX ? gls::CASETYPE_VERTEX :
191 caseShaderType == CASESHADERTYPE_FRAGMENT ? gls::CASETYPE_FRAGMENT :
192 gls::CASETYPE_LAST)
193 , m_unoptimizedResult(-1.0f, -1.0f)
194 , m_optimizedResult(-1.0f, -1.0f)
195 {
196 }
197
~ShaderOptimizationCase(void)198 virtual ~ShaderOptimizationCase(void)
199 {
200 }
201
202 void init(void);
203 IterateResult iterate(void);
204
205 protected:
206 virtual ProgramData generateProgramData(bool optimized) const = 0;
207
208 const CaseShaderType m_caseShaderType;
209
210 private:
211 enum State
212 {
213 STATE_INIT_UNOPTIMIZED = 0,
214 STATE_MEASURE_UNOPTIMIZED,
215 STATE_INIT_OPTIMIZED,
216 STATE_MEASURE_OPTIMIZED,
217 STATE_FINISHED,
218
219 STATE_LAST
220 };
221
programData(bool optimized)222 ProgramData &programData(bool optimized)
223 {
224 return optimized ? m_optimizedData : m_unoptimizedData;
225 }
program(bool optimized)226 SharedPtr<const ShaderProgram> &program(bool optimized)
227 {
228 return optimized ? m_optimizedProgram : m_unoptimizedProgram;
229 }
result(bool optimized)230 ShaderPerformanceMeasurer::Result &result(bool optimized)
231 {
232 return optimized ? m_optimizedResult : m_unoptimizedResult;
233 }
234
235 State m_state;
236 ShaderPerformanceMeasurer m_measurer;
237
238 ProgramData m_unoptimizedData;
239 ProgramData m_optimizedData;
240 SharedPtr<const ShaderProgram> m_unoptimizedProgram;
241 SharedPtr<const ShaderProgram> m_optimizedProgram;
242 ShaderPerformanceMeasurer::Result m_unoptimizedResult;
243 ShaderPerformanceMeasurer::Result m_optimizedResult;
244 };
245
init(void)246 void ShaderOptimizationCase::init(void)
247 {
248 const glu::RenderContext &renderCtx = m_context.getRenderContext();
249 TestLog &log = m_testCtx.getLog();
250
251 m_measurer.logParameters(log);
252
253 for (int ndx = 0; ndx < 2; ndx++)
254 {
255 const bool optimized = ndx == 1;
256
257 programData(optimized) = generateProgramData(optimized);
258
259 for (int i = 0; i < (int)programData(optimized).attributes.size(); i++)
260 DE_ASSERT(programData(optimized).attributes[i].name !=
261 "a_position"); // \note Position attribute is set by m_measurer.
262
263 program(optimized) =
264 SharedPtr<const ShaderProgram>(new ShaderProgram(renderCtx, programData(optimized).sources));
265
266 {
267 const tcu::ScopedLogSection section(log, optimized ? "OptimizedProgram" : "UnoptimizedProgram",
268 optimized ? "Hand-optimized program" : "Unoptimized program");
269 log << *program(optimized);
270 }
271
272 if (!program(optimized)->isOk())
273 TCU_FAIL("Shader compilation failed");
274 }
275
276 m_state = STATE_INIT_UNOPTIMIZED;
277 }
278
iterate(void)279 ShaderOptimizationCase::IterateResult ShaderOptimizationCase::iterate(void)
280 {
281 TestLog &log = m_testCtx.getLog();
282
283 if (m_state == STATE_INIT_UNOPTIMIZED || m_state == STATE_INIT_OPTIMIZED)
284 {
285 const bool optimized = m_state == STATE_INIT_OPTIMIZED;
286 m_measurer.init(program(optimized)->getProgram(), programData(optimized).attributes, 1);
287 m_state = optimized ? STATE_MEASURE_OPTIMIZED : STATE_MEASURE_UNOPTIMIZED;
288
289 return CONTINUE;
290 }
291 else if (m_state == STATE_MEASURE_UNOPTIMIZED || m_state == STATE_MEASURE_OPTIMIZED)
292 {
293 m_measurer.iterate();
294
295 if (m_measurer.isFinished())
296 {
297 const bool optimized = m_state == STATE_MEASURE_OPTIMIZED;
298 const tcu::ScopedLogSection section(log, optimized ? "OptimizedResult" : "UnoptimizedResult",
299 optimized ? "Measurement results for hand-optimized program" :
300 "Measurement result for unoptimized program");
301 m_measurer.logMeasurementInfo(log);
302 result(optimized) = m_measurer.getResult();
303 m_measurer.deinit();
304 m_state = optimized ? STATE_FINISHED : STATE_INIT_OPTIMIZED;
305 }
306
307 return CONTINUE;
308 }
309 else
310 {
311 DE_ASSERT(m_state == STATE_FINISHED);
312
313 const float unoptimizedRelevantResult = m_caseShaderType == CASESHADERTYPE_VERTEX ?
314 m_unoptimizedResult.megaVertPerSec :
315 m_unoptimizedResult.megaFragPerSec;
316 const float optimizedRelevantResult = m_caseShaderType == CASESHADERTYPE_VERTEX ?
317 m_optimizedResult.megaVertPerSec :
318 m_optimizedResult.megaFragPerSec;
319 const char *const relevantResultName = m_caseShaderType == CASESHADERTYPE_VERTEX ? "vertex" : "fragment";
320 const float ratio = unoptimizedRelevantResult / optimizedRelevantResult;
321 const int handOptimizationGain = (int)deFloatRound(100.0f / ratio) - 100;
322
323 log << TestLog::Message << "Unoptimized / optimized " << relevantResultName << " performance ratio: " << ratio
324 << TestLog::EndMessage;
325
326 if (handOptimizationGain >= 0)
327 log << TestLog::Message << "Note: " << handOptimizationGain
328 << "% performance gain was achieved with hand-optimized version" << TestLog::EndMessage;
329 else
330 log << TestLog::Message << "Note: hand-optimization degraded performance by " << -handOptimizationGain
331 << "%" << TestLog::EndMessage;
332
333 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(ratio, 2).c_str());
334
335 return STOP;
336 }
337 }
338
339 class LoopUnrollCase : public ShaderOptimizationCase
340 {
341 public:
342 enum CaseType
343 {
344 CASETYPE_INDEPENDENT = 0,
345 CASETYPE_DEPENDENT,
346
347 CASETYPE_LAST
348 };
349
LoopUnrollCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType,int numRepetitions)350 LoopUnrollCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
351 CaseType caseType, int numRepetitions)
352 : ShaderOptimizationCase(context, name, description, caseShaderType)
353 , m_numRepetitions(numRepetitions)
354 , m_caseType(caseType)
355 {
356 }
357
358 protected:
generateProgramData(bool optimized) const359 ProgramData generateProgramData(bool optimized) const
360 {
361 const string repetition =
362 optimized ? repeatIndexedTemplate("\t" + expressionTemplate(m_caseType) + ";\n", m_numRepetitions) :
363 loop(m_numRepetitions, expressionTemplate(m_caseType));
364
365 return defaultProgramData(m_caseShaderType, "\t" + getShaderPrecision(m_caseShaderType) +
366 " vec4 valueOrig = value;\n" + repetition);
367 }
368
369 private:
370 const int m_numRepetitions;
371 const CaseType m_caseType;
372
expressionTemplate(CaseType caseType)373 static inline string expressionTemplate(CaseType caseType)
374 {
375 switch (caseType)
376 {
377 case CASETYPE_INDEPENDENT:
378 return "value += sin(float(${NDX}+1)*valueOrig)";
379 case CASETYPE_DEPENDENT:
380 return "value = sin(value)";
381 default:
382 DE_ASSERT(false);
383 return "";
384 }
385 }
386
loop(int iterations,const string & innerExpr)387 static inline string loop(int iterations, const string &innerExpr)
388 {
389 return "\tfor (int i = 0; i < " + toString(iterations) + "; i++)\n\t\t" +
390 tcu::StringTemplate(innerExpr).specialize(singleMap("NDX", "i")) + ";\n";
391 }
392 };
393
394 class LoopInvariantCodeMotionCase : public ShaderOptimizationCase
395 {
396 public:
LoopInvariantCodeMotionCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,int numLoopIterations)397 LoopInvariantCodeMotionCase(Context &context, const char *name, const char *description,
398 CaseShaderType caseShaderType, int numLoopIterations)
399 : ShaderOptimizationCase(context, name, description, caseShaderType)
400 , m_numLoopIterations(numLoopIterations)
401 {
402 }
403
404 protected:
generateProgramData(bool optimized) const405 ProgramData generateProgramData(bool optimized) const
406 {
407 float scale = 0.0f;
408 for (int i = 0; i < m_numLoopIterations; i++)
409 scale += 3.2f * (float)i + 4.6f;
410 scale = 1.0f / scale;
411
412 const string precision = getShaderPrecision(m_caseShaderType);
413 const string statements = optimized ? " " + precision +
414 " vec4 valueOrig = value;\n"
415 " " +
416 precision +
417 " vec4 y = sin(cos(sin(valueOrig)));\n"
418 " for (int i = 0; i < " +
419 toString(m_numLoopIterations) +
420 "; i++)\n"
421 " {\n"
422 " " +
423 precision +
424 " float x = 3.2*float(i) + 4.6;\n"
425 " value += x*y;\n"
426 " }\n"
427 " value *= " +
428 toString(scale) + ";\n"
429
430 :
431 " " + precision +
432 " vec4 valueOrig = value;\n"
433 " for (int i = 0; i < " +
434 toString(m_numLoopIterations) +
435 "; i++)\n"
436 " {\n"
437 " " +
438 precision +
439 " float x = 3.2*float(i) + 4.6;\n"
440 " " +
441 precision +
442 " vec4 y = sin(cos(sin(valueOrig)));\n"
443 " value += x*y;\n"
444 " }\n"
445 " value *= " +
446 toString(scale) + ";\n";
447
448 return defaultProgramData(m_caseShaderType, statements);
449 }
450
451 private:
452 const int m_numLoopIterations;
453 };
454
455 class FunctionInliningCase : public ShaderOptimizationCase
456 {
457 public:
FunctionInliningCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,int callNestingDepth)458 FunctionInliningCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
459 int callNestingDepth)
460 : ShaderOptimizationCase(context, name, description, caseShaderType)
461 , m_callNestingDepth(callNestingDepth)
462 {
463 }
464
465 protected:
generateProgramData(bool optimized) const466 ProgramData generateProgramData(bool optimized) const
467 {
468 const string precision = getShaderPrecision(m_caseShaderType);
469 const string expression = "value*vec4(0.8, 0.7, 0.6, 0.9)";
470 const string maybeFuncDefs = optimized ? "" : funcDefinitions(m_callNestingDepth, precision, expression);
471 const string mainValueStatement =
472 (optimized ? "\tvalue = " + expression : "\tvalue = func" + toString(m_callNestingDepth - 1) + "(value)") +
473 ";\n";
474
475 return defaultProgramData(m_caseShaderType, maybeFuncDefs, mainValueStatement);
476 }
477
478 private:
479 const int m_callNestingDepth;
480
funcDefinitions(int callNestingDepth,const string & precision,const string & expression)481 static inline string funcDefinitions(int callNestingDepth, const string &precision, const string &expression)
482 {
483 string result = precision + " vec4 func0 (" + precision + " vec4 value) { return " + expression + "; }\n";
484
485 for (int i = 1; i < callNestingDepth; i++)
486 result += precision + " vec4 func" + toString(i) + " (" + precision + " vec4 v) { return func" +
487 toString(i - 1) + "(v); }\n";
488
489 return result;
490 }
491 };
492
493 class ConstantPropagationCase : public ShaderOptimizationCase
494 {
495 public:
496 enum CaseType
497 {
498 CASETYPE_BUILT_IN_FUNCTIONS = 0,
499 CASETYPE_ARRAY,
500 CASETYPE_STRUCT,
501
502 CASETYPE_LAST
503 };
504
ConstantPropagationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType,bool useConstantExpressionsOnly)505 ConstantPropagationCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
506 CaseType caseType, bool useConstantExpressionsOnly)
507 : ShaderOptimizationCase(context, name, description, caseShaderType)
508 , m_caseType(caseType)
509 , m_useConstantExpressionsOnly(useConstantExpressionsOnly)
510 {
511 DE_ASSERT(
512 !(m_caseType == CASETYPE_ARRAY &&
513 m_useConstantExpressionsOnly)); // \note Would need array constructors, which GLSL ES 1 doesn't have.
514 }
515
516 protected:
generateProgramData(bool optimized) const517 ProgramData generateProgramData(bool optimized) const
518 {
519 const bool isVertexCase = m_caseShaderType == CASESHADERTYPE_VERTEX;
520 const string precision = getShaderPrecision(m_caseShaderType);
521 const string statements =
522 m_caseType == CASETYPE_BUILT_IN_FUNCTIONS ?
523 builtinFunctionsCaseStatements(optimized, m_useConstantExpressionsOnly, precision, isVertexCase) :
524 m_caseType == CASETYPE_ARRAY ?
525 arrayCaseStatements(optimized, precision, isVertexCase) :
526 m_caseType == CASETYPE_STRUCT ?
527 structCaseStatements(optimized, m_useConstantExpressionsOnly, precision, isVertexCase) :
528 deFatalStr("Invalid CaseType");
529
530 return defaultProgramData(m_caseShaderType, statements);
531 }
532
533 private:
534 const CaseType m_caseType;
535 const bool m_useConstantExpressionsOnly;
536
builtinFunctionsCaseStatements(bool optimized,bool constantExpressionsOnly,const string & precision,bool useHeavierWorkload)537 static inline string builtinFunctionsCaseStatements(bool optimized, bool constantExpressionsOnly,
538 const string &precision, bool useHeavierWorkload)
539 {
540 const string constMaybe = constantExpressionsOnly ? "const " : "";
541 const int numSinRows = useHeavierWorkload ? 12 : 1;
542
543 return optimized ? " value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match "
544 "the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
545
546 :
547 " " + constMaybe + precision +
548 " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
549 " " +
550 constMaybe + precision +
551 " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
552 " " +
553 constMaybe +
554 "bvec4 c = bvec4(true, false, true, true);\n"
555 " " +
556 constMaybe + precision +
557 " vec4 d = exp(b + vec4(c));\n"
558 " " +
559 constMaybe + precision + " vec4 e0 = inversesqrt(mix(d+a, d+b, a));\n" +
560 repeatIndexedTemplate(" " + constMaybe + precision +
561 " vec4 e${NDX} = sin(sin(sin(sin(e${PREV_NDX}))));\n",
562 numSinRows, "", 1) +
563 " " + constMaybe + precision + " vec4 f = abs(e" + toString(numSinRows) + ");\n" +
564 " value = f*value;\n";
565 }
566
arrayCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)567 static inline string arrayCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
568 {
569 const int numSinRows = useHeavierWorkload ? 12 : 1;
570
571 return optimized ?
572 " value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match the one in "
573 "unoptimized shader, but shouldn't make a difference performance-wise\n"
574
575 :
576 " const int arrLen = 4;\n"
577 " " +
578 precision +
579 " vec4 arr[arrLen];\n"
580 " arr[0] = vec4(0.1, 0.5, 0.9, 1.3);\n"
581 " arr[1] = vec4(0.2, 0.6, 1.0, 1.4);\n"
582 " arr[2] = vec4(0.3, 0.7, 1.1, 1.5);\n"
583 " arr[3] = vec4(0.4, 0.8, 1.2, 1.6);\n"
584 " " +
585 precision +
586 " vec4 a = (arr[0] + arr[1] + arr[2] + arr[3]) * 0.25;\n"
587 " " +
588 precision + " vec4 b0 = cos(sin(a));\n" +
589 repeatIndexedTemplate(" " + precision + " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n",
590 numSinRows, "", 1) +
591 " " + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
592 " value = c*value;\n";
593 }
594
structCaseStatements(bool optimized,bool constantExpressionsOnly,const string & precision,bool useHeavierWorkload)595 static inline string structCaseStatements(bool optimized, bool constantExpressionsOnly, const string &precision,
596 bool useHeavierWorkload)
597 {
598 const string constMaybe = constantExpressionsOnly ? "const " : "";
599 const int numSinRows = useHeavierWorkload ? 12 : 1;
600
601 return optimized ? " value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match "
602 "the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
603
604 :
605 " struct S\n"
606 " {\n"
607 " " +
608 precision +
609 " vec4 a;\n"
610 " " +
611 precision +
612 " vec4 b;\n"
613 " " +
614 precision +
615 " vec4 c;\n"
616 " " +
617 precision +
618 " vec4 d;\n"
619 " };\n"
620 "\n"
621 " " +
622 constMaybe +
623 "S s =\n"
624 " S(vec4(0.1, 0.5, 0.9, 1.3),\n"
625 " vec4(0.2, 0.6, 1.0, 1.4),\n"
626 " vec4(0.3, 0.7, 1.1, 1.5),\n"
627 " vec4(0.4, 0.8, 1.2, 1.6));\n"
628 " " +
629 constMaybe + precision +
630 " vec4 a = (s.a + s.b + s.c + s.d) * 0.25;\n"
631 " " +
632 constMaybe + precision + " vec4 b0 = cos(sin(a));\n" +
633 repeatIndexedTemplate(" " + constMaybe + precision +
634 " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n",
635 numSinRows, "", 1) +
636 " " + constMaybe + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
637 " value = c*value;\n";
638 }
639 };
640
641 class CommonSubexpressionCase : public ShaderOptimizationCase
642 {
643 public:
644 enum CaseType
645 {
646 CASETYPE_SINGLE_STATEMENT = 0,
647 CASETYPE_MULTIPLE_STATEMENTS,
648 CASETYPE_STATIC_BRANCH,
649 CASETYPE_LOOP,
650
651 CASETYPE_LAST
652 };
653
CommonSubexpressionCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType)654 CommonSubexpressionCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
655 CaseType caseType)
656 : ShaderOptimizationCase(context, name, description, caseShaderType)
657 , m_caseType(caseType)
658 {
659 }
660
661 protected:
generateProgramData(bool optimized) const662 ProgramData generateProgramData(bool optimized) const
663 {
664 const bool isVertexCase = m_caseShaderType == CASESHADERTYPE_VERTEX;
665 const string precision = getShaderPrecision(m_caseShaderType);
666 const string statements = m_caseType == CASETYPE_SINGLE_STATEMENT ?
667 singleStatementCaseStatements(optimized, precision, isVertexCase) :
668 m_caseType == CASETYPE_MULTIPLE_STATEMENTS ?
669 multipleStatementsCaseStatements(optimized, precision, isVertexCase) :
670 m_caseType == CASETYPE_STATIC_BRANCH ?
671 staticBranchCaseStatements(optimized, precision, isVertexCase) :
672 m_caseType == CASETYPE_LOOP ? loopCaseStatements(optimized, precision, isVertexCase) :
673 deFatalStr("Invalid CaseType");
674
675 return defaultProgramData(m_caseShaderType, statements);
676 }
677
678 private:
679 const CaseType m_caseType;
680
singleStatementCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)681 static inline string singleStatementCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
682 {
683 const int numTopLevelRepeats = useHeavierWorkload ? 4 : 1;
684
685 return optimized ? " " + precision +
686 " vec4 s = sin(value);\n"
687 " " +
688 precision +
689 " vec4 cs = cos(s);\n"
690 " " +
691 precision +
692 " vec4 d = fract(s + cs) + sqrt(s + exp(cs));\n"
693 " value = " +
694 repeat("d", numTopLevelRepeats, "+") + ";\n"
695
696 :
697 " value = " +
698 repeat("fract(sin(value) + cos(sin(value))) + sqrt(sin(value) + exp(cos(sin(value))))",
699 numTopLevelRepeats, "\n\t + ") +
700 ";\n";
701 }
702
multipleStatementsCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)703 static inline string multipleStatementsCaseStatements(bool optimized, const string &precision,
704 bool useHeavierWorkload)
705 {
706 const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
707 DE_ASSERT(numTopLevelRepeats >= 2);
708
709 return optimized ? " " + precision +
710 " vec4 a = sin(value) + cos(exp(value));\n"
711 " " +
712 precision +
713 " vec4 b = cos(cos(a));\n"
714 " a = fract(exp(sqrt(b)));\n"
715 "\n" +
716 repeat("\tvalue += a*b;\n", numTopLevelRepeats)
717
718 :
719 repeatIndexedTemplate(" " + precision +
720 " vec4 a${NDX} = sin(value) + cos(exp(value));\n"
721 " " +
722 precision +
723 " vec4 b${NDX} = cos(cos(a${NDX}));\n"
724 " a${NDX} = fract(exp(sqrt(b${NDX})));\n"
725 "\n",
726 numTopLevelRepeats) +
727
728 repeatIndexedTemplate(" value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
729 }
730
staticBranchCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)731 static inline string staticBranchCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
732 {
733 const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
734 DE_ASSERT(numTopLevelRepeats >= 2);
735
736 if (optimized)
737 {
738 return " " + precision +
739 " vec4 a = sin(value) + cos(exp(value));\n"
740 " " +
741 precision +
742 " vec4 b = cos(a);\n"
743 " b = cos(b);\n"
744 " a = fract(exp(sqrt(b)));\n"
745 "\n" +
746 repeat(" value += a*b;\n", numTopLevelRepeats);
747 }
748 else
749 {
750 string result;
751
752 for (int i = 0; i < numTopLevelRepeats; i++)
753 {
754 result += " " + precision + " vec4 a" + toString(i) +
755 " = sin(value) + cos(exp(value));\n"
756 " " +
757 precision + " vec4 b" + toString(i) + " = cos(a" + toString(i) + ");\n";
758
759 if (i % 3 == 0)
760 result += " if (1 < 2)\n"
761 " b" +
762 toString(i) + " = cos(b" + toString(i) + ");\n";
763 else if (i % 3 == 1)
764 result += " b" + toString(i) + " = cos(b" + toString(i) + ");\n";
765 else if (i % 3 == 2)
766 result += " if (2 < 1);\n"
767 " else\n"
768 " b" +
769 toString(i) + " = cos(b" + toString(i) + ");\n";
770 else
771 DE_ASSERT(false);
772
773 result += " a" + toString(i) + " = fract(exp(sqrt(b" + toString(i) + ")));\n\n";
774 }
775
776 result += repeatIndexedTemplate(" value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
777
778 return result;
779 }
780 }
781
loopCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)782 static inline string loopCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
783 {
784 const int numLoopIterations = useHeavierWorkload ? 32 : 4;
785
786 return optimized ? " " + precision +
787 " vec4 acc = value;\n"
788 " for (int i = 0; i < " +
789 toString(numLoopIterations) +
790 "; i++)\n"
791 " acc = sin(acc);\n"
792 "\n"
793 " value += acc;\n"
794 " value += acc;\n"
795
796 :
797 " " + precision +
798 " vec4 acc0 = value;\n"
799 " for (int i = 0; i < " +
800 toString(numLoopIterations) +
801 "; i++)\n"
802 " acc0 = sin(acc0);\n"
803 "\n"
804 " " +
805 precision +
806 " vec4 acc1 = value;\n"
807 " for (int i = 0; i < " +
808 toString(numLoopIterations) +
809 "; i++)\n"
810 " acc1 = sin(acc1);\n"
811 "\n"
812 " value += acc0;\n"
813 " value += acc1;\n";
814 }
815 };
816
817 class DeadCodeEliminationCase : public ShaderOptimizationCase
818 {
819 public:
820 enum CaseType
821 {
822 CASETYPE_DEAD_BRANCH_SIMPLE = 0,
823 CASETYPE_DEAD_BRANCH_COMPLEX,
824 CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST,
825 CASETYPE_DEAD_BRANCH_FUNC_CALL,
826 CASETYPE_UNUSED_VALUE_BASIC,
827 CASETYPE_UNUSED_VALUE_LOOP,
828 CASETYPE_UNUSED_VALUE_DEAD_BRANCH,
829 CASETYPE_UNUSED_VALUE_AFTER_RETURN,
830 CASETYPE_UNUSED_VALUE_MUL_ZERO,
831
832 CASETYPE_LAST
833 };
834
DeadCodeEliminationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType)835 DeadCodeEliminationCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
836 CaseType caseType)
837 : ShaderOptimizationCase(context, name, description, caseShaderType)
838 , m_caseType(caseType)
839 {
840 }
841
842 protected:
generateProgramData(bool optimized) const843 ProgramData generateProgramData(bool optimized) const
844 {
845 const bool isVertexCase = m_caseShaderType == CASESHADERTYPE_VERTEX;
846 const string precision = getShaderPrecision(m_caseShaderType);
847 const string funcDefs = m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL ?
848 deadBranchFuncCallCaseFuncDefs(optimized, precision) :
849 m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN ?
850 unusedValueAfterReturnCaseFuncDefs(optimized, precision, isVertexCase) :
851 "";
852
853 const string statements = m_caseType == CASETYPE_DEAD_BRANCH_SIMPLE ?
854 deadBranchSimpleCaseStatements(optimized, isVertexCase) :
855 m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX ?
856 deadBranchComplexCaseStatements(optimized, precision, true, isVertexCase) :
857 m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST ?
858 deadBranchComplexCaseStatements(optimized, precision, false, isVertexCase) :
859 m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL ?
860 deadBranchFuncCallCaseStatements(optimized, isVertexCase) :
861 m_caseType == CASETYPE_UNUSED_VALUE_BASIC ?
862 unusedValueBasicCaseStatements(optimized, precision, isVertexCase) :
863 m_caseType == CASETYPE_UNUSED_VALUE_LOOP ?
864 unusedValueLoopCaseStatements(optimized, precision, isVertexCase) :
865 m_caseType == CASETYPE_UNUSED_VALUE_DEAD_BRANCH ?
866 unusedValueDeadBranchCaseStatements(optimized, precision, isVertexCase) :
867 m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN ?
868 unusedValueAfterReturnCaseStatements() :
869 m_caseType == CASETYPE_UNUSED_VALUE_MUL_ZERO ?
870 unusedValueMulZeroCaseStatements(optimized, precision, isVertexCase) :
871 deFatalStr("Invalid CaseType");
872
873 return defaultProgramData(m_caseShaderType, funcDefs, statements);
874 }
875
876 private:
877 const CaseType m_caseType;
878
deadBranchSimpleCaseStatements(bool optimized,bool useHeavierWorkload)879 static inline string deadBranchSimpleCaseStatements(bool optimized, bool useHeavierWorkload)
880 {
881 const int numLoopIterations = useHeavierWorkload ? 16 : 4;
882
883 return optimized ? " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
884
885 :
886 " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
887 " if (2 < 1)\n"
888 " {\n"
889 " value = cos(exp(sin(value))*log(sqrt(value)));\n"
890 " for (int i = 0; i < " +
891 toString(numLoopIterations) +
892 "; i++)\n"
893 " value = sin(value);\n"
894 " }\n";
895 }
896
deadBranchComplexCaseStatements(bool optimized,const string & precision,bool useConst,bool useHeavierWorkload)897 static inline string deadBranchComplexCaseStatements(bool optimized, const string &precision, bool useConst,
898 bool useHeavierWorkload)
899 {
900 const string constMaybe = useConst ? "const " : "";
901 const int numLoopIterations = useHeavierWorkload ? 16 : 4;
902
903 return optimized ? " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
904
905 :
906 " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
907 " " +
908 constMaybe + precision +
909 " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
910 " " +
911 constMaybe + precision +
912 " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
913 " " +
914 constMaybe +
915 "bvec4 c = bvec4(true, false, true, true);\n"
916 " " +
917 constMaybe + precision +
918 " vec4 d = exp(b + vec4(c));\n"
919 " " +
920 constMaybe + precision +
921 " vec4 e = 1.8*abs(sin(sin(inversesqrt(mix(d+a, d+b, a)))));\n"
922 " if (e.x > 1.0)\n"
923 " {\n"
924 " value = cos(exp(sin(value))*log(sqrt(value)));\n"
925 " for (int i = 0; i < " +
926 toString(numLoopIterations) +
927 "; i++)\n"
928 " value = sin(value);\n"
929 " }\n";
930 }
931
deadBranchFuncCallCaseFuncDefs(bool optimized,const string & precision)932 static inline string deadBranchFuncCallCaseFuncDefs(bool optimized, const string &precision)
933 {
934 return optimized ? "" : precision + " float func (" + precision + " float x) { return 2.0*x; }\n";
935 }
936
deadBranchFuncCallCaseStatements(bool optimized,bool useHeavierWorkload)937 static inline string deadBranchFuncCallCaseStatements(bool optimized, bool useHeavierWorkload)
938 {
939 const int numLoopIterations = useHeavierWorkload ? 16 : 4;
940
941 return optimized ? " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
942
943 :
944 " value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
945 " if (func(0.3) > 1.0)\n"
946 " {\n"
947 " value = cos(exp(sin(value))*log(sqrt(value)));\n"
948 " for (int i = 0; i < " +
949 toString(numLoopIterations) +
950 "; i++)\n"
951 " value = sin(value);\n"
952 " }\n";
953 }
954
unusedValueBasicCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)955 static inline string unusedValueBasicCaseStatements(bool optimized, const string &precision,
956 bool useHeavierWorkload)
957 {
958 const int numSinRows = useHeavierWorkload ? 12 : 1;
959
960 return optimized ? " " + precision +
961 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
962 " value = used;\n"
963
964 :
965 " " + precision +
966 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
967 " " +
968 precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value))) + used;\n" +
969 repeat(" unused = sin(sin(sin(sin(unused))));\n", numSinRows) + " value = used;\n";
970 }
971
unusedValueLoopCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)972 static inline string unusedValueLoopCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
973 {
974 const int numLoopIterations = useHeavierWorkload ? 16 : 4;
975
976 return optimized ? " " + precision +
977 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
978 " value = used;\n"
979
980 :
981 " " + precision +
982 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
983 " " +
984 precision +
985 " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
986 " for (int i = 0; i < " +
987 toString(numLoopIterations) +
988 "; i++)\n"
989 " unused = sin(unused + used);\n"
990 " value = used;\n";
991 }
992
unusedValueAfterReturnCaseFuncDefs(bool optimized,const string & precision,bool useHeavierWorkload)993 static inline string unusedValueAfterReturnCaseFuncDefs(bool optimized, const string &precision,
994 bool useHeavierWorkload)
995 {
996 const int numSinRows = useHeavierWorkload ? 12 : 1;
997
998 return optimized ? precision + " vec4 func (" + precision +
999 " vec4 v)\n"
1000 "{\n"
1001 " " +
1002 precision +
1003 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
1004 " return used;\n"
1005 "}\n"
1006
1007 :
1008 precision + " vec4 func (" + precision +
1009 " vec4 v)\n"
1010 "{\n"
1011 " " +
1012 precision +
1013 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
1014 " " +
1015 precision + " vec4 unused = cos(exp(sin(v))*log(sqrt(v)));\n" +
1016 repeat(" unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
1017 " return used;\n"
1018 " used = used*unused;"
1019 " return used;\n"
1020 "}\n";
1021 }
1022
unusedValueAfterReturnCaseStatements(void)1023 static inline string unusedValueAfterReturnCaseStatements(void)
1024 {
1025 return " value = func(value);\n";
1026 }
1027
unusedValueDeadBranchCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)1028 static inline string unusedValueDeadBranchCaseStatements(bool optimized, const string &precision,
1029 bool useHeavierWorkload)
1030 {
1031 const int numSinRows = useHeavierWorkload ? 12 : 1;
1032
1033 return optimized ? " " + precision +
1034 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1035 " value = used;\n"
1036
1037 :
1038 " " + precision +
1039 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1040 " " +
1041 precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n" +
1042 repeat(" unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
1043 " if (2 < 1)\n"
1044 " used = used*unused;\n"
1045 " value = used;\n";
1046 }
1047
unusedValueMulZeroCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)1048 static inline string unusedValueMulZeroCaseStatements(bool optimized, const string &precision,
1049 bool useHeavierWorkload)
1050 {
1051 const int numSinRows = useHeavierWorkload ? 12 : 1;
1052
1053 return optimized ? " " + precision +
1054 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1055 " value = used;\n"
1056
1057 :
1058 " " + precision +
1059 " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1060 " " +
1061 precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n" +
1062 repeat(" unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
1063 " value = used + unused*float(1-1);\n";
1064 }
1065 };
1066
1067 } // namespace
1068
ShaderOptimizationTests(Context & context)1069 ShaderOptimizationTests::ShaderOptimizationTests(Context &context)
1070 : TestCaseGroup(context, "optimization", "Shader Optimization Performance Tests")
1071 {
1072 }
1073
~ShaderOptimizationTests(void)1074 ShaderOptimizationTests::~ShaderOptimizationTests(void)
1075 {
1076 }
1077
init(void)1078 void ShaderOptimizationTests::init(void)
1079 {
1080 TestCaseGroup *const unrollGroup = new TestCaseGroup(m_context, "loop_unrolling", "Loop Unrolling Cases");
1081 TestCaseGroup *const loopInvariantCodeMotionGroup =
1082 new TestCaseGroup(m_context, "loop_invariant_code_motion", "Loop-Invariant Code Motion Cases");
1083 TestCaseGroup *const inlineGroup = new TestCaseGroup(m_context, "function_inlining", "Function Inlining Cases");
1084 TestCaseGroup *const constantPropagationGroup =
1085 new TestCaseGroup(m_context, "constant_propagation", "Constant Propagation Cases");
1086 TestCaseGroup *const commonSubexpressionGroup =
1087 new TestCaseGroup(m_context, "common_subexpression_elimination", "Common Subexpression Elimination Cases");
1088 TestCaseGroup *const deadCodeEliminationGroup =
1089 new TestCaseGroup(m_context, "dead_code_elimination", "Dead Code Elimination Cases");
1090 addChild(unrollGroup);
1091 addChild(loopInvariantCodeMotionGroup);
1092 addChild(inlineGroup);
1093 addChild(constantPropagationGroup);
1094 addChild(commonSubexpressionGroup);
1095 addChild(deadCodeEliminationGroup);
1096
1097 for (int caseShaderTypeI = 0; caseShaderTypeI < CASESHADERTYPE_LAST; caseShaderTypeI++)
1098 {
1099 const CaseShaderType caseShaderType = (CaseShaderType)caseShaderTypeI;
1100 const char *const caseShaderTypeSuffix = caseShaderType == CASESHADERTYPE_VERTEX ? "_vertex" :
1101 caseShaderType == CASESHADERTYPE_FRAGMENT ? "_fragment" :
1102 DE_NULL;
1103
1104 // Loop unrolling cases.
1105
1106 {
1107 static const int loopIterationCounts[] = {4, 8, 32};
1108
1109 for (int caseTypeI = 0; caseTypeI < LoopUnrollCase::CASETYPE_LAST; caseTypeI++)
1110 {
1111 const LoopUnrollCase::CaseType caseType = (LoopUnrollCase::CaseType)caseTypeI;
1112 const string caseTypeName = caseType == LoopUnrollCase::CASETYPE_INDEPENDENT ?
1113 "independent_iterations" :
1114 caseType == LoopUnrollCase::CASETYPE_DEPENDENT ? "dependent_iterations" :
1115 DE_NULL;
1116 const string caseTypeDesc =
1117 caseType == LoopUnrollCase::CASETYPE_INDEPENDENT ? "loop iterations don't depend on each other" :
1118 caseType == LoopUnrollCase::CASETYPE_DEPENDENT ? "loop iterations depend on each other" :
1119 DE_NULL;
1120
1121 for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
1122 {
1123 const int loopIterations = loopIterationCounts[loopIterNdx];
1124 const string name = caseTypeName + "_" + toString(loopIterations) + caseShaderTypeSuffix;
1125 const string description = toString(loopIterations) + " iterations; " + caseTypeDesc;
1126
1127 unrollGroup->addChild(new LoopUnrollCase(m_context, name.c_str(), description.c_str(),
1128 caseShaderType, caseType, loopIterations));
1129 }
1130 }
1131 }
1132
1133 // Loop-invariant code motion cases.
1134
1135 {
1136 static const int loopIterationCounts[] = {4, 8, 32};
1137
1138 for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
1139 {
1140 const int loopIterations = loopIterationCounts[loopIterNdx];
1141 const string name = toString(loopIterations) + "_iterations" + caseShaderTypeSuffix;
1142
1143 loopInvariantCodeMotionGroup->addChild(
1144 new LoopInvariantCodeMotionCase(m_context, name.c_str(), "", caseShaderType, loopIterations));
1145 }
1146 }
1147
1148 // Function inlining cases.
1149
1150 {
1151 static const int callNestingDepths[] = {4, 8, 32};
1152
1153 for (int nestDepthNdx = 0; nestDepthNdx < DE_LENGTH_OF_ARRAY(callNestingDepths); nestDepthNdx++)
1154 {
1155 const int nestingDepth = callNestingDepths[nestDepthNdx];
1156 const string name = toString(nestingDepth) + "_nested" + caseShaderTypeSuffix;
1157
1158 inlineGroup->addChild(
1159 new FunctionInliningCase(m_context, name.c_str(), "", caseShaderType, nestingDepth));
1160 }
1161 }
1162
1163 // Constant propagation cases.
1164
1165 for (int caseTypeI = 0; caseTypeI < ConstantPropagationCase::CASETYPE_LAST; caseTypeI++)
1166 {
1167 const ConstantPropagationCase::CaseType caseType = (ConstantPropagationCase::CaseType)caseTypeI;
1168 const string caseTypeName = caseType == ConstantPropagationCase::CASETYPE_BUILT_IN_FUNCTIONS ?
1169 "built_in_functions" :
1170 caseType == ConstantPropagationCase::CASETYPE_ARRAY ? "array" :
1171 caseType == ConstantPropagationCase::CASETYPE_STRUCT ? "struct" :
1172 DE_NULL;
1173
1174 for (int constantExpressionsOnlyI = 0; constantExpressionsOnlyI <= 1; constantExpressionsOnlyI++)
1175 {
1176 const bool constantExpressionsOnly = constantExpressionsOnlyI != 0;
1177 const string name = caseTypeName + (constantExpressionsOnly ? "" : "_no_const") + caseShaderTypeSuffix;
1178
1179 if (caseType == ConstantPropagationCase::CASETYPE_ARRAY &&
1180 constantExpressionsOnly) // \note See ConstantPropagationCase's constructor for explanation.
1181 continue;
1182
1183 constantPropagationGroup->addChild(new ConstantPropagationCase(
1184 m_context, name.c_str(), "", caseShaderType, caseType, constantExpressionsOnly));
1185 }
1186 }
1187
1188 // Common subexpression cases.
1189
1190 for (int caseTypeI = 0; caseTypeI < CommonSubexpressionCase::CASETYPE_LAST; caseTypeI++)
1191 {
1192 const CommonSubexpressionCase::CaseType caseType = (CommonSubexpressionCase::CaseType)caseTypeI;
1193
1194 const string caseTypeName =
1195 caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT ? "single_statement" :
1196 caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS ? "multiple_statements" :
1197 caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH ? "static_branch" :
1198 caseType == CommonSubexpressionCase::CASETYPE_LOOP ? "loop" :
1199 DE_NULL;
1200
1201 const string description = caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT ?
1202 "A single statement containing multiple uses of same subexpression" :
1203 caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS ?
1204 "Multiple statements performing same computations" :
1205 caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH ?
1206 "Multiple statements including a static conditional" :
1207 caseType == CommonSubexpressionCase::CASETYPE_LOOP ?
1208 "Multiple loops performing the same computations" :
1209 DE_NULL;
1210
1211 commonSubexpressionGroup->addChild(
1212 new CommonSubexpressionCase(m_context, (caseTypeName + caseShaderTypeSuffix).c_str(),
1213 description.c_str(), caseShaderType, caseType));
1214 }
1215
1216 // Dead code elimination cases.
1217
1218 for (int caseTypeI = 0; caseTypeI < DeadCodeEliminationCase::CASETYPE_LAST; caseTypeI++)
1219 {
1220 const DeadCodeEliminationCase::CaseType caseType = (DeadCodeEliminationCase::CaseType)caseTypeI;
1221 const char *const caseTypeName =
1222 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE ? "dead_branch_simple" :
1223 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX ? "dead_branch_complex" :
1224 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST ?
1225 "dead_branch_complex_no_const" :
1226 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL ? "dead_branch_func_call" :
1227 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC ? "unused_value_basic" :
1228 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP ? "unused_value_loop" :
1229 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH ? "unused_value_dead_branch" :
1230 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN ? "unused_value_after_return" :
1231 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO ? "unused_value_mul_zero" :
1232 DE_NULL;
1233
1234 const char *const caseTypeDescription =
1235 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE ?
1236 "Do computation inside a branch that is never taken (condition is simple false constant "
1237 "expression)" :
1238 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX ?
1239 "Do computation inside a branch that is never taken (condition is complex false constant "
1240 "expression)" :
1241 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST ?
1242 "Do computation inside a branch that is never taken (condition is complex false expression, not "
1243 "constant expression but still compile-time computable)" :
1244 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL ?
1245 "Do computation inside a branch that is never taken (condition is compile-time computable false "
1246 "expression containing function call to a simple inlineable function)" :
1247 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC ?
1248 "Compute a value that is never used even statically" :
1249 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP ?
1250 "Compute a value, using a loop, that is never used even statically" :
1251 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH ?
1252 "Compute a value that is used only inside a statically dead branch" :
1253 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN ?
1254 "Compute a value that is used only after a return statement" :
1255 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO ?
1256 "Compute a value that is used but multiplied by a zero constant expression" :
1257 DE_NULL;
1258
1259 deadCodeEliminationGroup->addChild(
1260 new DeadCodeEliminationCase(m_context, (string() + caseTypeName + caseShaderTypeSuffix).c_str(),
1261 caseTypeDescription, caseShaderType, caseType));
1262 }
1263 }
1264 }
1265
1266 } // namespace Performance
1267 } // namespace gles2
1268 } // namespace deqp
1269