xref: /aosp_15_r20/external/deqp/modules/gles3/performance/es3pShaderOptimizationTests.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.0 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Optimized vs unoptimized shader performance tests.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es3pShaderOptimizationTests.hpp"
25 #include "glsShaderPerformanceMeasurer.hpp"
26 #include "gluRenderContext.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "tcuTestLog.hpp"
29 #include "tcuVector.hpp"
30 #include "tcuStringTemplate.hpp"
31 #include "deSharedPtr.hpp"
32 #include "deStringUtil.hpp"
33 #include "deMath.h"
34 
35 #include "glwFunctions.hpp"
36 
37 #include <vector>
38 #include <string>
39 #include <map>
40 
41 using de::SharedPtr;
42 using de::toString;
43 using glu::ShaderProgram;
44 using tcu::TestLog;
45 using tcu::Vec4;
46 
47 using std::string;
48 using std::vector;
49 
50 namespace deqp
51 {
52 
53 using gls::ShaderPerformanceMeasurer;
54 
55 namespace gles3
56 {
57 namespace Performance
58 {
59 
singleMap(const string & key,const string & value)60 static inline std::map<string, string> singleMap(const string &key, const string &value)
61 {
62     std::map<string, string> res;
63     res[key] = value;
64     return res;
65 }
66 
repeat(const string & str,int numRepeats,const string & delim="")67 static inline string repeat(const string &str, int numRepeats, const string &delim = "")
68 {
69     string result = str;
70     for (int i = 1; i < numRepeats; i++)
71         result += delim + str;
72     return result;
73 }
74 
repeatIndexedTemplate(const string & strTempl,int numRepeats,const string & delim="",int ndxStart=0)75 static inline string repeatIndexedTemplate(const string &strTempl, int numRepeats, const string &delim = "",
76                                            int ndxStart = 0)
77 {
78     const tcu::StringTemplate templ(strTempl);
79     string result;
80     std::map<string, string> params;
81 
82     for (int i = 0; i < numRepeats; i++)
83     {
84         params["PREV_NDX"] = toString(i + ndxStart - 1);
85         params["NDX"]      = toString(i + ndxStart);
86 
87         result += (i > 0 ? delim : "") + templ.specialize(params);
88     }
89 
90     return result;
91 }
92 
93 namespace
94 {
95 
96 enum CaseShaderType
97 {
98     CASESHADERTYPE_VERTEX = 0,
99     CASESHADERTYPE_FRAGMENT,
100 
101     CASESHADERTYPE_LAST
102 };
103 
getShaderPrecision(CaseShaderType shaderType)104 static inline string getShaderPrecision(CaseShaderType shaderType)
105 {
106     switch (shaderType)
107     {
108     case CASESHADERTYPE_VERTEX:
109         return "highp";
110     case CASESHADERTYPE_FRAGMENT:
111         return "highp";
112     default:
113         DE_ASSERT(false);
114         return "";
115     }
116 }
117 
118 struct ProgramData
119 {
120     glu::ProgramSources sources;
121     vector<gls::AttribSpec>
122         attributes; //!< \note Shouldn't contain a_position; that one is set by gls::ShaderPerformanceMeasurer.
123 
ProgramDatadeqp::gles3::Performance::__anon24528f330111::ProgramData124     ProgramData(void)
125     {
126     }
ProgramDatadeqp::gles3::Performance::__anon24528f330111::ProgramData127     ProgramData(const glu::ProgramSources &sources_,
128                 const vector<gls::AttribSpec> &attributes_ = vector<gls::AttribSpec>())
129         : sources(sources_)
130         , attributes(attributes_)
131     {
132     }
ProgramDatadeqp::gles3::Performance::__anon24528f330111::ProgramData133     ProgramData(const glu::ProgramSources &sources_, const gls::AttribSpec &attribute)
134         : sources(sources_)
135         , attributes(1, attribute)
136     {
137     }
138 };
139 
140 //! Shader boilerplate helper; most cases have similar basic shader structure.
defaultProgramData(CaseShaderType shaderType,const string & funcDefs,const string & mainStatements)141 static inline ProgramData defaultProgramData(CaseShaderType shaderType, const string &funcDefs,
142                                              const string &mainStatements)
143 {
144     const bool isVertexCase   = shaderType == CASESHADERTYPE_VERTEX;
145     const bool isFragmentCase = shaderType == CASESHADERTYPE_FRAGMENT;
146     const string vtxPrec      = getShaderPrecision(CASESHADERTYPE_VERTEX);
147     const string fragPrec     = getShaderPrecision(CASESHADERTYPE_FRAGMENT);
148 
149     return ProgramData(glu::ProgramSources()
150                            << glu::VertexSource("#version 300 es\n"
151                                                 "in " +
152                                                 vtxPrec +
153                                                 " vec4 a_position;\n"
154                                                 "in " +
155                                                 vtxPrec +
156                                                 " vec4 a_value;\n"
157                                                 "out " +
158                                                 fragPrec + " vec4 v_value;\n" + (isVertexCase ? funcDefs : "") +
159                                                 "void main (void)\n"
160                                                 "{\n"
161                                                 "    gl_Position = a_position;\n"
162                                                 "    " +
163                                                 vtxPrec + " vec4 value = a_value;\n" +
164                                                 (isVertexCase ? mainStatements : "") +
165                                                 "    v_value = value;\n"
166                                                 "}\n")
167 
168                            << glu::FragmentSource("#version 300 es\n"
169                                                   "layout (location = 0) out " +
170                                                   fragPrec +
171                                                   " vec4 o_color;\n"
172                                                   "in " +
173                                                   fragPrec + " vec4 v_value;\n" + (isFragmentCase ? funcDefs : "") +
174                                                   "void main (void)\n"
175                                                   "{\n"
176                                                   "    " +
177                                                   fragPrec + " vec4 value = v_value;\n" +
178                                                   (isFragmentCase ? mainStatements : "") +
179                                                   "    o_color = value;\n"
180                                                   "}\n"),
181                        gls::AttribSpec("a_value", Vec4(1.0f, 0.0f, 0.0f, 0.0f), Vec4(0.0f, 1.0f, 0.0f, 0.0f),
182                                        Vec4(0.0f, 0.0f, 1.0f, 0.0f), Vec4(0.0f, 0.0f, 0.0f, 1.0f)));
183 }
184 
defaultProgramData(CaseShaderType shaderType,const string & mainStatements)185 static inline ProgramData defaultProgramData(CaseShaderType shaderType, const string &mainStatements)
186 {
187     return defaultProgramData(shaderType, "", mainStatements);
188 }
189 
190 class ShaderOptimizationCase : public TestCase
191 {
192 public:
ShaderOptimizationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType)193     ShaderOptimizationCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType)
194         : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, description)
195         , m_caseShaderType(caseShaderType)
196         , m_state(STATE_LAST)
197         , m_measurer(context.getRenderContext(), caseShaderType == CASESHADERTYPE_VERTEX   ? gls::CASETYPE_VERTEX :
198                                                  caseShaderType == CASESHADERTYPE_FRAGMENT ? gls::CASETYPE_FRAGMENT :
199                                                                                              gls::CASETYPE_LAST)
200         , m_unoptimizedResult(-1.0f, -1.0f)
201         , m_optimizedResult(-1.0f, -1.0f)
202     {
203     }
204 
~ShaderOptimizationCase(void)205     virtual ~ShaderOptimizationCase(void)
206     {
207     }
208 
209     void init(void);
210     IterateResult iterate(void);
211 
212 protected:
213     virtual ProgramData generateProgramData(bool optimized) const = 0;
214 
215     const CaseShaderType m_caseShaderType;
216 
217 private:
218     enum State
219     {
220         STATE_INIT_UNOPTIMIZED = 0,
221         STATE_MEASURE_UNOPTIMIZED,
222         STATE_INIT_OPTIMIZED,
223         STATE_MEASURE_OPTIMIZED,
224         STATE_FINISHED,
225 
226         STATE_LAST
227     };
228 
programData(bool optimized)229     ProgramData &programData(bool optimized)
230     {
231         return optimized ? m_optimizedData : m_unoptimizedData;
232     }
program(bool optimized)233     SharedPtr<const ShaderProgram> &program(bool optimized)
234     {
235         return optimized ? m_optimizedProgram : m_unoptimizedProgram;
236     }
result(bool optimized)237     ShaderPerformanceMeasurer::Result &result(bool optimized)
238     {
239         return optimized ? m_optimizedResult : m_unoptimizedResult;
240     }
241 
242     State m_state;
243     ShaderPerformanceMeasurer m_measurer;
244 
245     ProgramData m_unoptimizedData;
246     ProgramData m_optimizedData;
247     SharedPtr<const ShaderProgram> m_unoptimizedProgram;
248     SharedPtr<const ShaderProgram> m_optimizedProgram;
249     ShaderPerformanceMeasurer::Result m_unoptimizedResult;
250     ShaderPerformanceMeasurer::Result m_optimizedResult;
251 };
252 
init(void)253 void ShaderOptimizationCase::init(void)
254 {
255     const glu::RenderContext &renderCtx = m_context.getRenderContext();
256     TestLog &log                        = m_testCtx.getLog();
257 
258     m_measurer.logParameters(log);
259 
260     for (int ndx = 0; ndx < 2; ndx++)
261     {
262         const bool optimized = ndx == 1;
263 
264         programData(optimized) = generateProgramData(optimized);
265 
266         for (int i = 0; i < (int)programData(optimized).attributes.size(); i++)
267             DE_ASSERT(programData(optimized).attributes[i].name !=
268                       "a_position"); // \note Position attribute is set by m_measurer.
269 
270         program(optimized) =
271             SharedPtr<const ShaderProgram>(new ShaderProgram(renderCtx, programData(optimized).sources));
272 
273         {
274             const tcu::ScopedLogSection section(log, optimized ? "OptimizedProgram" : "UnoptimizedProgram",
275                                                 optimized ? "Hand-optimized program" : "Unoptimized program");
276             log << *program(optimized);
277         }
278 
279         if (!program(optimized)->isOk())
280             TCU_FAIL("Shader compilation failed");
281     }
282 
283     m_state = STATE_INIT_UNOPTIMIZED;
284 }
285 
iterate(void)286 ShaderOptimizationCase::IterateResult ShaderOptimizationCase::iterate(void)
287 {
288     TestLog &log = m_testCtx.getLog();
289 
290     if (m_state == STATE_INIT_UNOPTIMIZED || m_state == STATE_INIT_OPTIMIZED)
291     {
292         const bool optimized = m_state == STATE_INIT_OPTIMIZED;
293         m_measurer.init(program(optimized)->getProgram(), programData(optimized).attributes, 1);
294         m_state = optimized ? STATE_MEASURE_OPTIMIZED : STATE_MEASURE_UNOPTIMIZED;
295 
296         return CONTINUE;
297     }
298     else if (m_state == STATE_MEASURE_UNOPTIMIZED || m_state == STATE_MEASURE_OPTIMIZED)
299     {
300         m_measurer.iterate();
301 
302         if (m_measurer.isFinished())
303         {
304             const bool optimized = m_state == STATE_MEASURE_OPTIMIZED;
305             const tcu::ScopedLogSection section(log, optimized ? "OptimizedResult" : "UnoptimizedResult",
306                                                 optimized ? "Measurement results for hand-optimized program" :
307                                                             "Measurement result for unoptimized program");
308             m_measurer.logMeasurementInfo(log);
309             result(optimized) = m_measurer.getResult();
310             m_measurer.deinit();
311             m_state = optimized ? STATE_FINISHED : STATE_INIT_OPTIMIZED;
312         }
313 
314         return CONTINUE;
315     }
316     else
317     {
318         DE_ASSERT(m_state == STATE_FINISHED);
319 
320         const float unoptimizedRelevantResult = m_caseShaderType == CASESHADERTYPE_VERTEX ?
321                                                     m_unoptimizedResult.megaVertPerSec :
322                                                     m_unoptimizedResult.megaFragPerSec;
323         const float optimizedRelevantResult   = m_caseShaderType == CASESHADERTYPE_VERTEX ?
324                                                     m_optimizedResult.megaVertPerSec :
325                                                     m_optimizedResult.megaFragPerSec;
326         const char *const relevantResultName  = m_caseShaderType == CASESHADERTYPE_VERTEX ? "vertex" : "fragment";
327         const float ratio                     = unoptimizedRelevantResult / optimizedRelevantResult;
328         const int handOptimizationGain        = (int)deFloatRound(100.0f / ratio) - 100;
329 
330         log << TestLog::Message << "Unoptimized / optimized " << relevantResultName << " performance ratio: " << ratio
331             << TestLog::EndMessage;
332 
333         if (handOptimizationGain >= 0)
334             log << TestLog::Message << "Note: " << handOptimizationGain
335                 << "% performance gain was achieved with hand-optimized version" << TestLog::EndMessage;
336         else
337             log << TestLog::Message << "Note: hand-optimization degraded performance by " << -handOptimizationGain
338                 << "%" << TestLog::EndMessage;
339 
340         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(ratio, 2).c_str());
341 
342         return STOP;
343     }
344 }
345 
346 class LoopUnrollCase : public ShaderOptimizationCase
347 {
348 public:
349     enum CaseType
350     {
351         CASETYPE_INDEPENDENT = 0,
352         CASETYPE_DEPENDENT,
353 
354         CASETYPE_LAST
355     };
356 
LoopUnrollCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType,int numRepetitions)357     LoopUnrollCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
358                    CaseType caseType, int numRepetitions)
359         : ShaderOptimizationCase(context, name, description, caseShaderType)
360         , m_numRepetitions(numRepetitions)
361         , m_caseType(caseType)
362     {
363     }
364 
365 protected:
generateProgramData(bool optimized) const366     ProgramData generateProgramData(bool optimized) const
367     {
368         const string repetition =
369             optimized ? repeatIndexedTemplate("\t" + expressionTemplate(m_caseType) + ";\n", m_numRepetitions) :
370                         loop(m_numRepetitions, expressionTemplate(m_caseType));
371 
372         return defaultProgramData(m_caseShaderType, "\t" + getShaderPrecision(m_caseShaderType) +
373                                                         " vec4 valueOrig = value;\n" + repetition);
374     }
375 
376 private:
377     const int m_numRepetitions;
378     const CaseType m_caseType;
379 
expressionTemplate(CaseType caseType)380     static inline string expressionTemplate(CaseType caseType)
381     {
382         switch (caseType)
383         {
384         case CASETYPE_INDEPENDENT:
385             return "value += sin(float(${NDX}+1)*valueOrig)";
386         case CASETYPE_DEPENDENT:
387             return "value = sin(value)";
388         default:
389             DE_ASSERT(false);
390             return "";
391         }
392     }
393 
loop(int iterations,const string & innerExpr)394     static inline string loop(int iterations, const string &innerExpr)
395     {
396         return "\tfor (int i = 0; i < " + toString(iterations) + "; i++)\n\t\t" +
397                tcu::StringTemplate(innerExpr).specialize(singleMap("NDX", "i")) + ";\n";
398     }
399 };
400 
401 class LoopInvariantCodeMotionCase : public ShaderOptimizationCase
402 {
403 public:
LoopInvariantCodeMotionCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,int numLoopIterations)404     LoopInvariantCodeMotionCase(Context &context, const char *name, const char *description,
405                                 CaseShaderType caseShaderType, int numLoopIterations)
406         : ShaderOptimizationCase(context, name, description, caseShaderType)
407         , m_numLoopIterations(numLoopIterations)
408     {
409     }
410 
411 protected:
generateProgramData(bool optimized) const412     ProgramData generateProgramData(bool optimized) const
413     {
414         float scale = 0.0f;
415         for (int i = 0; i < m_numLoopIterations; i++)
416             scale += 3.2f * (float)i + 4.6f;
417         scale = 1.0f / scale;
418 
419         const string precision  = getShaderPrecision(m_caseShaderType);
420         const string statements = optimized ? "    " + precision +
421                                                   " vec4 valueOrig = value;\n"
422                                                   "    " +
423                                                   precision +
424                                                   " vec4 y = sin(cos(sin(valueOrig)));\n"
425                                                   "    for (int i = 0; i < " +
426                                                   toString(m_numLoopIterations) +
427                                                   "; i++)\n"
428                                                   "    {\n"
429                                                   "        " +
430                                                   precision +
431                                                   " float x = 3.2*float(i) + 4.6;\n"
432                                                   "        value += x*y;\n"
433                                                   "    }\n"
434                                                   "    value *= " +
435                                                   toString(scale) + ";\n"
436 
437                                               :
438                                               "    " + precision +
439                                                   " vec4 valueOrig = value;\n"
440                                                   "    for (int i = 0; i < " +
441                                                   toString(m_numLoopIterations) +
442                                                   "; i++)\n"
443                                                   "    {\n"
444                                                   "        " +
445                                                   precision +
446                                                   " float x = 3.2*float(i) + 4.6;\n"
447                                                   "        " +
448                                                   precision +
449                                                   " vec4 y = sin(cos(sin(valueOrig)));\n"
450                                                   "        value += x*y;\n"
451                                                   "    }\n"
452                                                   "    value *= " +
453                                                   toString(scale) + ";\n";
454 
455         return defaultProgramData(m_caseShaderType, statements);
456     }
457 
458 private:
459     const int m_numLoopIterations;
460 };
461 
462 class FunctionInliningCase : public ShaderOptimizationCase
463 {
464 public:
FunctionInliningCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,int callNestingDepth)465     FunctionInliningCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
466                          int callNestingDepth)
467         : ShaderOptimizationCase(context, name, description, caseShaderType)
468         , m_callNestingDepth(callNestingDepth)
469     {
470     }
471 
472 protected:
generateProgramData(bool optimized) const473     ProgramData generateProgramData(bool optimized) const
474     {
475         const string precision     = getShaderPrecision(m_caseShaderType);
476         const string expression    = "value*vec4(0.8, 0.7, 0.6, 0.9)";
477         const string maybeFuncDefs = optimized ? "" : funcDefinitions(m_callNestingDepth, precision, expression);
478         const string mainValueStatement =
479             (optimized ? "\tvalue = " + expression : "\tvalue = func" + toString(m_callNestingDepth - 1) + "(value)") +
480             ";\n";
481 
482         return defaultProgramData(m_caseShaderType, maybeFuncDefs, mainValueStatement);
483     }
484 
485 private:
486     const int m_callNestingDepth;
487 
funcDefinitions(int callNestingDepth,const string & precision,const string & expression)488     static inline string funcDefinitions(int callNestingDepth, const string &precision, const string &expression)
489     {
490         string result = precision + " vec4 func0 (" + precision + " vec4 value) { return " + expression + "; }\n";
491 
492         for (int i = 1; i < callNestingDepth; i++)
493             result += precision + " vec4 func" + toString(i) + " (" + precision + " vec4 v) { return func" +
494                       toString(i - 1) + "(v); }\n";
495 
496         return result;
497     }
498 };
499 
500 class ConstantPropagationCase : public ShaderOptimizationCase
501 {
502 public:
503     enum CaseType
504     {
505         CASETYPE_BUILT_IN_FUNCTIONS = 0,
506         CASETYPE_ARRAY,
507         CASETYPE_STRUCT,
508 
509         CASETYPE_LAST
510     };
511 
ConstantPropagationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType,bool useConstantExpressionsOnly)512     ConstantPropagationCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
513                             CaseType caseType, bool useConstantExpressionsOnly)
514         : ShaderOptimizationCase(context, name, description, caseShaderType)
515         , m_caseType(caseType)
516         , m_useConstantExpressionsOnly(useConstantExpressionsOnly)
517     {
518     }
519 
520 protected:
generateProgramData(bool optimized) const521     ProgramData generateProgramData(bool optimized) const
522     {
523         const bool isVertexCase = m_caseShaderType == CASESHADERTYPE_VERTEX;
524         const string precision  = getShaderPrecision(m_caseShaderType);
525         const string statements =
526             m_caseType == CASETYPE_BUILT_IN_FUNCTIONS ?
527                 builtinFunctionsCaseStatements(optimized, m_useConstantExpressionsOnly, precision, isVertexCase) :
528             m_caseType == CASETYPE_ARRAY ?
529                 arrayCaseStatements(optimized, m_useConstantExpressionsOnly, precision, isVertexCase) :
530             m_caseType == CASETYPE_STRUCT ?
531                 structCaseStatements(optimized, m_useConstantExpressionsOnly, precision, isVertexCase) :
532                 deFatalStr("Invalid CaseType");
533 
534         return defaultProgramData(m_caseShaderType, statements);
535     }
536 
537 private:
538     const CaseType m_caseType;
539     const bool m_useConstantExpressionsOnly;
540 
builtinFunctionsCaseStatements(bool optimized,bool constantExpressionsOnly,const string & precision,bool useHeavierWorkload)541     static inline string builtinFunctionsCaseStatements(bool optimized, bool constantExpressionsOnly,
542                                                         const string &precision, bool useHeavierWorkload)
543     {
544         const string constMaybe = constantExpressionsOnly ? "const " : "";
545         const int numSinRows    = useHeavierWorkload ? 12 : 1;
546 
547         return optimized ? "    value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match "
548                            "the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
549 
550                            :
551                            "    " + constMaybe + precision +
552                                " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
553                                "    " +
554                                constMaybe + precision +
555                                " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
556                                "    " +
557                                constMaybe +
558                                "bvec4 c = bvec4(true, false, true, true);\n"
559                                "    " +
560                                constMaybe + precision +
561                                " vec4 d = exp(b + vec4(c));\n"
562                                "    " +
563                                constMaybe + precision + " vec4 e0 = inversesqrt(mix(d+a, d+b, a));\n" +
564                                repeatIndexedTemplate("    " + constMaybe + precision +
565                                                          " vec4 e${NDX} = sin(sin(sin(sin(e${PREV_NDX}))));\n",
566                                                      numSinRows, "", 1) +
567                                "    " + constMaybe + precision + " vec4 f = abs(e" + toString(numSinRows) + ");\n" +
568                                "    value = f*value;\n";
569     }
570 
arrayCaseStatements(bool optimized,bool constantExpressionsOnly,const string & precision,bool useHeavierWorkload)571     static inline string arrayCaseStatements(bool optimized, bool constantExpressionsOnly, const string &precision,
572                                              bool useHeavierWorkload)
573     {
574         const string constMaybe = constantExpressionsOnly ? "const " : "";
575         const int numSinRows    = useHeavierWorkload ? 12 : 1;
576 
577         return optimized ? "    value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match "
578                            "the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
579 
580                            :
581                            "    const int arrLen = 4;\n" +
582                                (constantExpressionsOnly ? "    const " + precision +
583                                                               " vec4 arr[arrLen] =\n"
584                                                               "        vec4[](vec4(0.1, 0.5, 0.9, 1.3),\n"
585                                                               "               vec4(0.2, 0.6, 1.0, 1.4),\n"
586                                                               "               vec4(0.3, 0.7, 1.1, 1.5),\n"
587                                                               "               vec4(0.4, 0.8, 1.2, 1.6));\n"
588 
589                                                           :
590                                                           "    " + precision +
591                                                               " vec4 arr[arrLen];\n"
592                                                               "    arr[0] = vec4(0.1, 0.5, 0.9, 1.3);\n"
593                                                               "    arr[1] = vec4(0.2, 0.6, 1.0, 1.4);\n"
594                                                               "    arr[2] = vec4(0.3, 0.7, 1.1, 1.5);\n"
595                                                               "    arr[3] = vec4(0.4, 0.8, 1.2, 1.6);\n") +
596                                "    " + constMaybe + precision +
597                                " vec4 a = (arr[0] + arr[1] + arr[2] + arr[3]) * (1.0 / float(arr.length()));\n"
598                                "    " +
599                                constMaybe + precision + " vec4 b0 = cos(sin(a));\n" +
600                                repeatIndexedTemplate("    " + constMaybe + precision +
601                                                          " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n",
602                                                      numSinRows, "", 1) +
603                                "    " + constMaybe + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
604                                "    value = c*value;\n";
605     }
606 
structCaseStatements(bool optimized,bool constantExpressionsOnly,const string & precision,bool useHeavierWorkload)607     static inline string structCaseStatements(bool optimized, bool constantExpressionsOnly, const string &precision,
608                                               bool useHeavierWorkload)
609     {
610         const string constMaybe = constantExpressionsOnly ? "const " : "";
611         const int numSinRows    = useHeavierWorkload ? 12 : 1;
612 
613         return optimized ? "    value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match "
614                            "the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
615 
616                            :
617                            "    struct S\n"
618                            "    {\n"
619                            "        " +
620                                precision +
621                                " vec4 a;\n"
622                                "        " +
623                                precision +
624                                " vec4 b;\n"
625                                "        " +
626                                precision +
627                                " vec4 c;\n"
628                                "        " +
629                                precision +
630                                " vec4 d;\n"
631                                "    };\n"
632                                "\n"
633                                "    " +
634                                constMaybe +
635                                "S s =\n"
636                                "        S(vec4(0.1, 0.5, 0.9, 1.3),\n"
637                                "          vec4(0.2, 0.6, 1.0, 1.4),\n"
638                                "          vec4(0.3, 0.7, 1.1, 1.5),\n"
639                                "          vec4(0.4, 0.8, 1.2, 1.6));\n"
640                                "    " +
641                                constMaybe + precision +
642                                " vec4 a = (s.a + s.b + s.c + s.d) * 0.25;\n"
643                                "    " +
644                                constMaybe + precision + " vec4 b0 = cos(sin(a));\n" +
645                                repeatIndexedTemplate("    " + constMaybe + precision +
646                                                          " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n",
647                                                      numSinRows, "", 1) +
648                                "    " + constMaybe + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
649                                "    value = c*value;\n";
650     }
651 };
652 
653 class CommonSubexpressionCase : public ShaderOptimizationCase
654 {
655 public:
656     enum CaseType
657     {
658         CASETYPE_SINGLE_STATEMENT = 0,
659         CASETYPE_MULTIPLE_STATEMENTS,
660         CASETYPE_STATIC_BRANCH,
661         CASETYPE_LOOP,
662 
663         CASETYPE_LAST
664     };
665 
CommonSubexpressionCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType)666     CommonSubexpressionCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
667                             CaseType caseType)
668         : ShaderOptimizationCase(context, name, description, caseShaderType)
669         , m_caseType(caseType)
670     {
671     }
672 
673 protected:
generateProgramData(bool optimized) const674     ProgramData generateProgramData(bool optimized) const
675     {
676         const bool isVertexCase = m_caseShaderType == CASESHADERTYPE_VERTEX;
677         const string precision  = getShaderPrecision(m_caseShaderType);
678         const string statements = m_caseType == CASETYPE_SINGLE_STATEMENT ?
679                                       singleStatementCaseStatements(optimized, precision, isVertexCase) :
680                                   m_caseType == CASETYPE_MULTIPLE_STATEMENTS ?
681                                       multipleStatementsCaseStatements(optimized, precision, isVertexCase) :
682                                   m_caseType == CASETYPE_STATIC_BRANCH ?
683                                       staticBranchCaseStatements(optimized, precision, isVertexCase) :
684                                   m_caseType == CASETYPE_LOOP ? loopCaseStatements(optimized, precision, isVertexCase) :
685                                                                 deFatalStr("Invalid CaseType");
686 
687         return defaultProgramData(m_caseShaderType, statements);
688     }
689 
690 private:
691     const CaseType m_caseType;
692 
singleStatementCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)693     static inline string singleStatementCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
694     {
695         const int numTopLevelRepeats = useHeavierWorkload ? 4 : 1;
696 
697         return optimized ? "    " + precision +
698                                " vec4 s = sin(value);\n"
699                                "    " +
700                                precision +
701                                " vec4 cs = cos(s);\n"
702                                "    " +
703                                precision +
704                                " vec4 d = fract(s + cs) + sqrt(s + exp(cs));\n"
705                                "    value = " +
706                                repeat("d", numTopLevelRepeats, "+") + ";\n"
707 
708                            :
709                            "    value = " +
710                                repeat("fract(sin(value) + cos(sin(value))) + sqrt(sin(value) + exp(cos(sin(value))))",
711                                       numTopLevelRepeats, "\n\t      + ") +
712                                ";\n";
713     }
714 
multipleStatementsCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)715     static inline string multipleStatementsCaseStatements(bool optimized, const string &precision,
716                                                           bool useHeavierWorkload)
717     {
718         const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
719         DE_ASSERT(numTopLevelRepeats >= 2);
720 
721         return optimized ? "    " + precision +
722                                " vec4 a = sin(value) + cos(exp(value));\n"
723                                "    " +
724                                precision +
725                                " vec4 b = cos(cos(a));\n"
726                                "    a = fract(exp(sqrt(b)));\n"
727                                "\n" +
728                                repeat("\tvalue += a*b;\n", numTopLevelRepeats)
729 
730                            :
731                            repeatIndexedTemplate("    " + precision +
732                                                      " vec4 a${NDX} = sin(value) + cos(exp(value));\n"
733                                                      "    " +
734                                                      precision +
735                                                      " vec4 b${NDX} = cos(cos(a${NDX}));\n"
736                                                      "    a${NDX} = fract(exp(sqrt(b${NDX})));\n"
737                                                      "\n",
738                                                  numTopLevelRepeats) +
739 
740                                repeatIndexedTemplate("    value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
741     }
742 
staticBranchCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)743     static inline string staticBranchCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
744     {
745         const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
746         DE_ASSERT(numTopLevelRepeats >= 2);
747 
748         if (optimized)
749         {
750             return "    " + precision +
751                    " vec4 a = sin(value) + cos(exp(value));\n"
752                    "    " +
753                    precision +
754                    " vec4 b = cos(a);\n"
755                    "    b = cos(b);\n"
756                    "    a = fract(exp(sqrt(b)));\n"
757                    "\n" +
758                    repeat("    value += a*b;\n", numTopLevelRepeats);
759         }
760         else
761         {
762             string result;
763 
764             for (int i = 0; i < numTopLevelRepeats; i++)
765             {
766                 result += "    " + precision + " vec4 a" + toString(i) +
767                           " = sin(value) + cos(exp(value));\n"
768                           "    " +
769                           precision + " vec4 b" + toString(i) + " = cos(a" + toString(i) + ");\n";
770 
771                 if (i % 3 == 0)
772                     result += "    if (1 < 2)\n"
773                               "        b" +
774                               toString(i) + " = cos(b" + toString(i) + ");\n";
775                 else if (i % 3 == 1)
776                     result += "    b" + toString(i) + " = cos(b" + toString(i) + ");\n";
777                 else if (i % 3 == 2)
778                     result += "    if (2 < 1);\n"
779                               "    else\n"
780                               "        b" +
781                               toString(i) + " = cos(b" + toString(i) + ");\n";
782                 else
783                     DE_ASSERT(false);
784 
785                 result += "    a" + toString(i) + " = fract(exp(sqrt(b" + toString(i) + ")));\n\n";
786             }
787 
788             result += repeatIndexedTemplate("    value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
789 
790             return result;
791         }
792     }
793 
loopCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)794     static inline string loopCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
795     {
796         const int numLoopIterations = useHeavierWorkload ? 32 : 4;
797 
798         return optimized ? "    " + precision +
799                                " vec4 acc = value;\n"
800                                "    for (int i = 0; i < " +
801                                toString(numLoopIterations) +
802                                "; i++)\n"
803                                "        acc = sin(acc);\n"
804                                "\n"
805                                "    value += acc;\n"
806                                "    value += acc;\n"
807 
808                            :
809                            "    " + precision +
810                                " vec4 acc0 = value;\n"
811                                "    for (int i = 0; i < " +
812                                toString(numLoopIterations) +
813                                "; i++)\n"
814                                "        acc0 = sin(acc0);\n"
815                                "\n"
816                                "    " +
817                                precision +
818                                " vec4 acc1 = value;\n"
819                                "    for (int i = 0; i < " +
820                                toString(numLoopIterations) +
821                                "; i++)\n"
822                                "        acc1 = sin(acc1);\n"
823                                "\n"
824                                "    value += acc0;\n"
825                                "    value += acc1;\n";
826     }
827 };
828 
829 class DeadCodeEliminationCase : public ShaderOptimizationCase
830 {
831 public:
832     enum CaseType
833     {
834         CASETYPE_DEAD_BRANCH_SIMPLE = 0,
835         CASETYPE_DEAD_BRANCH_COMPLEX,
836         CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST,
837         CASETYPE_DEAD_BRANCH_FUNC_CALL,
838         CASETYPE_UNUSED_VALUE_BASIC,
839         CASETYPE_UNUSED_VALUE_LOOP,
840         CASETYPE_UNUSED_VALUE_DEAD_BRANCH,
841         CASETYPE_UNUSED_VALUE_AFTER_RETURN,
842         CASETYPE_UNUSED_VALUE_MUL_ZERO,
843 
844         CASETYPE_LAST
845     };
846 
DeadCodeEliminationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType)847     DeadCodeEliminationCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
848                             CaseType caseType)
849         : ShaderOptimizationCase(context, name, description, caseShaderType)
850         , m_caseType(caseType)
851     {
852     }
853 
854 protected:
generateProgramData(bool optimized) const855     ProgramData generateProgramData(bool optimized) const
856     {
857         const bool isVertexCase = m_caseShaderType == CASESHADERTYPE_VERTEX;
858         const string precision  = getShaderPrecision(m_caseShaderType);
859         const string funcDefs   = m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL ?
860                                       deadBranchFuncCallCaseFuncDefs(optimized, precision) :
861                                   m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN ?
862                                       unusedValueAfterReturnCaseFuncDefs(optimized, precision, isVertexCase) :
863                                       "";
864 
865         const string statements = m_caseType == CASETYPE_DEAD_BRANCH_SIMPLE ?
866                                       deadBranchSimpleCaseStatements(optimized, isVertexCase) :
867                                   m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX ?
868                                       deadBranchComplexCaseStatements(optimized, precision, true, isVertexCase) :
869                                   m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST ?
870                                       deadBranchComplexCaseStatements(optimized, precision, false, isVertexCase) :
871                                   m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL ?
872                                       deadBranchFuncCallCaseStatements(optimized, isVertexCase) :
873                                   m_caseType == CASETYPE_UNUSED_VALUE_BASIC ?
874                                       unusedValueBasicCaseStatements(optimized, precision, isVertexCase) :
875                                   m_caseType == CASETYPE_UNUSED_VALUE_LOOP ?
876                                       unusedValueLoopCaseStatements(optimized, precision, isVertexCase) :
877                                   m_caseType == CASETYPE_UNUSED_VALUE_DEAD_BRANCH ?
878                                       unusedValueDeadBranchCaseStatements(optimized, precision, isVertexCase) :
879                                   m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN ?
880                                       unusedValueAfterReturnCaseStatements() :
881                                   m_caseType == CASETYPE_UNUSED_VALUE_MUL_ZERO ?
882                                       unusedValueMulZeroCaseStatements(optimized, precision, isVertexCase) :
883                                       deFatalStr("Invalid CaseType");
884 
885         return defaultProgramData(m_caseShaderType, funcDefs, statements);
886     }
887 
888 private:
889     const CaseType m_caseType;
890 
deadBranchSimpleCaseStatements(bool optimized,bool useHeavierWorkload)891     static inline string deadBranchSimpleCaseStatements(bool optimized, bool useHeavierWorkload)
892     {
893         const int numLoopIterations = useHeavierWorkload ? 16 : 4;
894 
895         return optimized ? "    value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
896 
897                            :
898                            "    value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
899                            "    if (2 < 1)\n"
900                            "    {\n"
901                            "        value = cos(exp(sin(value))*log(sqrt(value)));\n"
902                            "        for (int i = 0; i < " +
903                                toString(numLoopIterations) +
904                                "; i++)\n"
905                                "            value = sin(value);\n"
906                                "    }\n";
907     }
908 
deadBranchComplexCaseStatements(bool optimized,const string & precision,bool useConst,bool useHeavierWorkload)909     static inline string deadBranchComplexCaseStatements(bool optimized, const string &precision, bool useConst,
910                                                          bool useHeavierWorkload)
911     {
912         const string constMaybe     = useConst ? "const " : "";
913         const int numLoopIterations = useHeavierWorkload ? 16 : 4;
914 
915         return optimized ? "    value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
916 
917                            :
918                            "    value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
919                            "    " +
920                                constMaybe + precision +
921                                " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
922                                "    " +
923                                constMaybe + precision +
924                                " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
925                                "    " +
926                                constMaybe +
927                                "bvec4 c = bvec4(true, false, true, true);\n"
928                                "    " +
929                                constMaybe + precision +
930                                " vec4 d = exp(b + vec4(c));\n"
931                                "    " +
932                                constMaybe + precision +
933                                " vec4 e = 1.8*abs(sin(sin(inversesqrt(mix(d+a, d+b, a)))));\n"
934                                "    if (e.x > 1.0)\n"
935                                "    {\n"
936                                "        value = cos(exp(sin(value))*log(sqrt(value)));\n"
937                                "        for (int i = 0; i < " +
938                                toString(numLoopIterations) +
939                                "; i++)\n"
940                                "            value = sin(value);\n"
941                                "    }\n";
942     }
943 
deadBranchFuncCallCaseFuncDefs(bool optimized,const string & precision)944     static inline string deadBranchFuncCallCaseFuncDefs(bool optimized, const string &precision)
945     {
946         return optimized ? "" : precision + " float func (" + precision + " float x) { return 2.0*x; }\n";
947     }
948 
deadBranchFuncCallCaseStatements(bool optimized,bool useHeavierWorkload)949     static inline string deadBranchFuncCallCaseStatements(bool optimized, bool useHeavierWorkload)
950     {
951         const int numLoopIterations = useHeavierWorkload ? 16 : 4;
952 
953         return optimized ? "    value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
954 
955                            :
956                            "    value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
957                            "    if (func(0.3) > 1.0)\n"
958                            "    {\n"
959                            "        value = cos(exp(sin(value))*log(sqrt(value)));\n"
960                            "        for (int i = 0; i < " +
961                                toString(numLoopIterations) +
962                                "; i++)\n"
963                                "            value = sin(value);\n"
964                                "    }\n";
965     }
966 
unusedValueBasicCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)967     static inline string unusedValueBasicCaseStatements(bool optimized, const string &precision,
968                                                         bool useHeavierWorkload)
969     {
970         const int numSinRows = useHeavierWorkload ? 12 : 1;
971 
972         return optimized ? "    " + precision +
973                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
974                                "    value = used;\n"
975 
976                            :
977                            "    " + precision +
978                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
979                                "    " +
980                                precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value))) + used;\n" +
981                                repeat("    unused = sin(sin(sin(sin(unused))));\n", numSinRows) + "    value = used;\n";
982     }
983 
unusedValueLoopCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)984     static inline string unusedValueLoopCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
985     {
986         const int numLoopIterations = useHeavierWorkload ? 16 : 4;
987 
988         return optimized ? "    " + precision +
989                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
990                                "    value = used;\n"
991 
992                            :
993                            "    " + precision +
994                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
995                                "    " +
996                                precision +
997                                " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
998                                "    for (int i = 0; i < " +
999                                toString(numLoopIterations) +
1000                                "; i++)\n"
1001                                "        unused = sin(unused + used);\n"
1002                                "    value = used;\n";
1003     }
1004 
unusedValueAfterReturnCaseFuncDefs(bool optimized,const string & precision,bool useHeavierWorkload)1005     static inline string unusedValueAfterReturnCaseFuncDefs(bool optimized, const string &precision,
1006                                                             bool useHeavierWorkload)
1007     {
1008         const int numSinRows = useHeavierWorkload ? 12 : 1;
1009 
1010         return optimized ? precision + " vec4 func (" + precision +
1011                                " vec4 v)\n"
1012                                "{\n"
1013                                "    " +
1014                                precision +
1015                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
1016                                "    return used;\n"
1017                                "}\n"
1018 
1019                            :
1020                            precision + " vec4 func (" + precision +
1021                                " vec4 v)\n"
1022                                "{\n"
1023                                "    " +
1024                                precision +
1025                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
1026                                "    " +
1027                                precision + " vec4 unused = cos(exp(sin(v))*log(sqrt(v)));\n" +
1028                                repeat("    unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
1029                                "    return used;\n"
1030                                "    used = used*unused;"
1031                                "    return used;\n"
1032                                "}\n";
1033     }
1034 
unusedValueAfterReturnCaseStatements(void)1035     static inline string unusedValueAfterReturnCaseStatements(void)
1036     {
1037         return "    value = func(value);\n";
1038     }
1039 
unusedValueDeadBranchCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)1040     static inline string unusedValueDeadBranchCaseStatements(bool optimized, const string &precision,
1041                                                              bool useHeavierWorkload)
1042     {
1043         const int numSinRows = useHeavierWorkload ? 12 : 1;
1044 
1045         return optimized ? "    " + precision +
1046                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1047                                "    value = used;\n"
1048 
1049                            :
1050                            "    " + precision +
1051                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1052                                "    " +
1053                                precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n" +
1054                                repeat("    unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
1055                                "    if (2 < 1)\n"
1056                                "        used = used*unused;\n"
1057                                "    value = used;\n";
1058     }
1059 
unusedValueMulZeroCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)1060     static inline string unusedValueMulZeroCaseStatements(bool optimized, const string &precision,
1061                                                           bool useHeavierWorkload)
1062     {
1063         const int numSinRows = useHeavierWorkload ? 12 : 1;
1064 
1065         return optimized ? "    " + precision +
1066                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1067                                "    value = used;\n"
1068 
1069                            :
1070                            "    " + precision +
1071                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1072                                "    " +
1073                                precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n" +
1074                                repeat("    unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
1075                                "    value = used + unused*float(1-1);\n";
1076     }
1077 };
1078 
1079 } // namespace
1080 
ShaderOptimizationTests(Context & context)1081 ShaderOptimizationTests::ShaderOptimizationTests(Context &context)
1082     : TestCaseGroup(context, "optimization", "Shader Optimization Performance Tests")
1083 {
1084 }
1085 
~ShaderOptimizationTests(void)1086 ShaderOptimizationTests::~ShaderOptimizationTests(void)
1087 {
1088 }
1089 
init(void)1090 void ShaderOptimizationTests::init(void)
1091 {
1092     TestCaseGroup *const unrollGroup = new TestCaseGroup(m_context, "loop_unrolling", "Loop Unrolling Cases");
1093     TestCaseGroup *const loopInvariantCodeMotionGroup =
1094         new TestCaseGroup(m_context, "loop_invariant_code_motion", "Loop-Invariant Code Motion Cases");
1095     TestCaseGroup *const inlineGroup = new TestCaseGroup(m_context, "function_inlining", "Function Inlining Cases");
1096     TestCaseGroup *const constantPropagationGroup =
1097         new TestCaseGroup(m_context, "constant_propagation", "Constant Propagation Cases");
1098     TestCaseGroup *const commonSubexpressionGroup =
1099         new TestCaseGroup(m_context, "common_subexpression_elimination", "Common Subexpression Elimination Cases");
1100     TestCaseGroup *const deadCodeEliminationGroup =
1101         new TestCaseGroup(m_context, "dead_code_elimination", "Dead Code Elimination Cases");
1102     addChild(unrollGroup);
1103     addChild(loopInvariantCodeMotionGroup);
1104     addChild(inlineGroup);
1105     addChild(constantPropagationGroup);
1106     addChild(commonSubexpressionGroup);
1107     addChild(deadCodeEliminationGroup);
1108 
1109     for (int caseShaderTypeI = 0; caseShaderTypeI < CASESHADERTYPE_LAST; caseShaderTypeI++)
1110     {
1111         const CaseShaderType caseShaderType    = (CaseShaderType)caseShaderTypeI;
1112         const char *const caseShaderTypeSuffix = caseShaderType == CASESHADERTYPE_VERTEX   ? "_vertex" :
1113                                                  caseShaderType == CASESHADERTYPE_FRAGMENT ? "_fragment" :
1114                                                                                              DE_NULL;
1115 
1116         // Loop unrolling cases.
1117 
1118         {
1119             static const int loopIterationCounts[] = {4, 8, 32};
1120 
1121             for (int caseTypeI = 0; caseTypeI < LoopUnrollCase::CASETYPE_LAST; caseTypeI++)
1122             {
1123                 const LoopUnrollCase::CaseType caseType = (LoopUnrollCase::CaseType)caseTypeI;
1124                 const string caseTypeName               = caseType == LoopUnrollCase::CASETYPE_INDEPENDENT ?
1125                                                               "independent_iterations" :
1126                                                           caseType == LoopUnrollCase::CASETYPE_DEPENDENT ? "dependent_iterations" :
1127                                                                                                            DE_NULL;
1128                 const string caseTypeDesc =
1129                     caseType == LoopUnrollCase::CASETYPE_INDEPENDENT ? "loop iterations don't depend on each other" :
1130                     caseType == LoopUnrollCase::CASETYPE_DEPENDENT   ? "loop iterations depend on each other" :
1131                                                                        DE_NULL;
1132 
1133                 for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
1134                 {
1135                     const int loopIterations = loopIterationCounts[loopIterNdx];
1136                     const string name        = caseTypeName + "_" + toString(loopIterations) + caseShaderTypeSuffix;
1137                     const string description = toString(loopIterations) + " iterations; " + caseTypeDesc;
1138 
1139                     unrollGroup->addChild(new LoopUnrollCase(m_context, name.c_str(), description.c_str(),
1140                                                              caseShaderType, caseType, loopIterations));
1141                 }
1142             }
1143         }
1144 
1145         // Loop-invariant code motion cases.
1146 
1147         {
1148             static const int loopIterationCounts[] = {4, 8, 32};
1149 
1150             for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
1151             {
1152                 const int loopIterations = loopIterationCounts[loopIterNdx];
1153                 const string name        = toString(loopIterations) + "_iterations" + caseShaderTypeSuffix;
1154 
1155                 loopInvariantCodeMotionGroup->addChild(
1156                     new LoopInvariantCodeMotionCase(m_context, name.c_str(), "", caseShaderType, loopIterations));
1157             }
1158         }
1159 
1160         // Function inlining cases.
1161 
1162         {
1163             static const int callNestingDepths[] = {4, 8, 32};
1164 
1165             for (int nestDepthNdx = 0; nestDepthNdx < DE_LENGTH_OF_ARRAY(callNestingDepths); nestDepthNdx++)
1166             {
1167                 const int nestingDepth = callNestingDepths[nestDepthNdx];
1168                 const string name      = toString(nestingDepth) + "_nested" + caseShaderTypeSuffix;
1169 
1170                 inlineGroup->addChild(
1171                     new FunctionInliningCase(m_context, name.c_str(), "", caseShaderType, nestingDepth));
1172             }
1173         }
1174 
1175         // Constant propagation cases.
1176 
1177         for (int caseTypeI = 0; caseTypeI < ConstantPropagationCase::CASETYPE_LAST; caseTypeI++)
1178         {
1179             const ConstantPropagationCase::CaseType caseType = (ConstantPropagationCase::CaseType)caseTypeI;
1180             const string caseTypeName = caseType == ConstantPropagationCase::CASETYPE_BUILT_IN_FUNCTIONS ?
1181                                             "built_in_functions" :
1182                                         caseType == ConstantPropagationCase::CASETYPE_ARRAY  ? "array" :
1183                                         caseType == ConstantPropagationCase::CASETYPE_STRUCT ? "struct" :
1184                                                                                                DE_NULL;
1185 
1186             for (int constantExpressionsOnlyI = 0; constantExpressionsOnlyI <= 1; constantExpressionsOnlyI++)
1187             {
1188                 const bool constantExpressionsOnly = constantExpressionsOnlyI != 0;
1189                 const string name = caseTypeName + (constantExpressionsOnly ? "" : "_no_const") + caseShaderTypeSuffix;
1190 
1191                 constantPropagationGroup->addChild(new ConstantPropagationCase(
1192                     m_context, name.c_str(), "", caseShaderType, caseType, constantExpressionsOnly));
1193             }
1194         }
1195 
1196         // Common subexpression cases.
1197 
1198         for (int caseTypeI = 0; caseTypeI < CommonSubexpressionCase::CASETYPE_LAST; caseTypeI++)
1199         {
1200             const CommonSubexpressionCase::CaseType caseType = (CommonSubexpressionCase::CaseType)caseTypeI;
1201 
1202             const string caseTypeName =
1203                 caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT    ? "single_statement" :
1204                 caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS ? "multiple_statements" :
1205                 caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH       ? "static_branch" :
1206                 caseType == CommonSubexpressionCase::CASETYPE_LOOP                ? "loop" :
1207                                                                                     DE_NULL;
1208 
1209             const string description = caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT ?
1210                                            "A single statement containing multiple uses of same subexpression" :
1211                                        caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS ?
1212                                            "Multiple statements performing same computations" :
1213                                        caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH ?
1214                                            "Multiple statements including a static conditional" :
1215                                        caseType == CommonSubexpressionCase::CASETYPE_LOOP ?
1216                                            "Multiple loops performing the same computations" :
1217                                            DE_NULL;
1218 
1219             commonSubexpressionGroup->addChild(
1220                 new CommonSubexpressionCase(m_context, (caseTypeName + caseShaderTypeSuffix).c_str(),
1221                                             description.c_str(), caseShaderType, caseType));
1222         }
1223 
1224         // Dead code elimination cases.
1225 
1226         for (int caseTypeI = 0; caseTypeI < DeadCodeEliminationCase::CASETYPE_LAST; caseTypeI++)
1227         {
1228             const DeadCodeEliminationCase::CaseType caseType = (DeadCodeEliminationCase::CaseType)caseTypeI;
1229             const char *const caseTypeName =
1230                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE  ? "dead_branch_simple" :
1231                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX ? "dead_branch_complex" :
1232                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST ?
1233                                                                                     "dead_branch_complex_no_const" :
1234                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL     ? "dead_branch_func_call" :
1235                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC        ? "unused_value_basic" :
1236                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP         ? "unused_value_loop" :
1237                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH  ? "unused_value_dead_branch" :
1238                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN ? "unused_value_after_return" :
1239                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO     ? "unused_value_mul_zero" :
1240                                                                                           DE_NULL;
1241 
1242             const char *const caseTypeDescription =
1243                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE ?
1244                     "Do computation inside a branch that is never taken (condition is simple false constant "
1245                     "expression)" :
1246                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX ?
1247                     "Do computation inside a branch that is never taken (condition is complex false constant "
1248                     "expression)" :
1249                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST ?
1250                     "Do computation inside a branch that is never taken (condition is complex false expression, not "
1251                     "constant expression but still compile-time computable)" :
1252                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL ?
1253                     "Do computation inside a branch that is never taken (condition is compile-time computable false "
1254                     "expression containing function call to a simple inlineable function)" :
1255                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC ?
1256                     "Compute a value that is never used even statically" :
1257                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP ?
1258                     "Compute a value, using a loop, that is never used even statically" :
1259                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH ?
1260                     "Compute a value that is used only inside a statically dead branch" :
1261                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN ?
1262                     "Compute a value that is used only after a return statement" :
1263                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO ?
1264                     "Compute a value that is used but multiplied by a zero constant expression" :
1265                     DE_NULL;
1266 
1267             deadCodeEliminationGroup->addChild(
1268                 new DeadCodeEliminationCase(m_context, (string() + caseTypeName + caseShaderTypeSuffix).c_str(),
1269                                             caseTypeDescription, caseShaderType, caseType));
1270         }
1271     }
1272 }
1273 
1274 } // namespace Performance
1275 } // namespace gles3
1276 } // namespace deqp
1277