xref: /aosp_15_r20/external/deqp/modules/gles2/performance/es2pShaderOptimizationTests.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 2.0 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Optimized vs unoptimized shader performance tests.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es2pShaderOptimizationTests.hpp"
25 #include "glsShaderPerformanceMeasurer.hpp"
26 #include "gluRenderContext.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "tcuTestLog.hpp"
29 #include "tcuVector.hpp"
30 #include "tcuStringTemplate.hpp"
31 #include "deSharedPtr.hpp"
32 #include "deStringUtil.hpp"
33 #include "deMath.h"
34 
35 #include "glwFunctions.hpp"
36 
37 #include <vector>
38 #include <string>
39 #include <map>
40 
41 using de::SharedPtr;
42 using de::toString;
43 using glu::ShaderProgram;
44 using tcu::TestLog;
45 using tcu::Vec4;
46 
47 using std::string;
48 using std::vector;
49 
50 namespace deqp
51 {
52 
53 using gls::ShaderPerformanceMeasurer;
54 
55 namespace gles2
56 {
57 namespace Performance
58 {
59 
singleMap(const string & key,const string & value)60 static inline std::map<string, string> singleMap(const string &key, const string &value)
61 {
62     std::map<string, string> res;
63     res[key] = value;
64     return res;
65 }
66 
repeat(const string & str,int numRepeats,const string & delim="")67 static inline string repeat(const string &str, int numRepeats, const string &delim = "")
68 {
69     string result = str;
70     for (int i = 1; i < numRepeats; i++)
71         result += delim + str;
72     return result;
73 }
74 
repeatIndexedTemplate(const string & strTempl,int numRepeats,const string & delim="",int ndxStart=0)75 static inline string repeatIndexedTemplate(const string &strTempl, int numRepeats, const string &delim = "",
76                                            int ndxStart = 0)
77 {
78     const tcu::StringTemplate templ(strTempl);
79     string result;
80     std::map<string, string> params;
81 
82     for (int i = 0; i < numRepeats; i++)
83     {
84         params["PREV_NDX"] = toString(i + ndxStart - 1);
85         params["NDX"]      = toString(i + ndxStart);
86 
87         result += (i > 0 ? delim : "") + templ.specialize(params);
88     }
89 
90     return result;
91 }
92 
93 namespace
94 {
95 
96 enum CaseShaderType
97 {
98     CASESHADERTYPE_VERTEX = 0,
99     CASESHADERTYPE_FRAGMENT,
100 
101     CASESHADERTYPE_LAST
102 };
103 
getShaderPrecision(CaseShaderType shaderType)104 static inline string getShaderPrecision(CaseShaderType shaderType)
105 {
106     switch (shaderType)
107     {
108     case CASESHADERTYPE_VERTEX:
109         return "highp";
110     case CASESHADERTYPE_FRAGMENT:
111         return "mediump";
112     default:
113         DE_ASSERT(false);
114         return "";
115     }
116 }
117 
118 struct ProgramData
119 {
120     glu::ProgramSources sources;
121     vector<gls::AttribSpec>
122         attributes; //!< \note Shouldn't contain a_position; that one is set by gls::ShaderPerformanceMeasurer.
123 
ProgramDatadeqp::gles2::Performance::__anon9fd9d6310111::ProgramData124     ProgramData(void)
125     {
126     }
ProgramDatadeqp::gles2::Performance::__anon9fd9d6310111::ProgramData127     ProgramData(const glu::ProgramSources &sources_,
128                 const vector<gls::AttribSpec> &attributes_ = vector<gls::AttribSpec>())
129         : sources(sources_)
130         , attributes(attributes_)
131     {
132     }
ProgramDatadeqp::gles2::Performance::__anon9fd9d6310111::ProgramData133     ProgramData(const glu::ProgramSources &sources_, const gls::AttribSpec &attribute)
134         : sources(sources_)
135         , attributes(1, attribute)
136     {
137     }
138 };
139 
140 //! Shader boilerplate helper; most cases have similar basic shader structure.
defaultProgramData(CaseShaderType shaderType,const string & funcDefs,const string & mainStatements)141 static inline ProgramData defaultProgramData(CaseShaderType shaderType, const string &funcDefs,
142                                              const string &mainStatements)
143 {
144     const bool isVertexCase   = shaderType == CASESHADERTYPE_VERTEX;
145     const bool isFragmentCase = shaderType == CASESHADERTYPE_FRAGMENT;
146     const string vtxPrec      = getShaderPrecision(CASESHADERTYPE_VERTEX);
147     const string fragPrec     = getShaderPrecision(CASESHADERTYPE_FRAGMENT);
148 
149     return ProgramData(glu::ProgramSources()
150                            << glu::VertexSource("attribute " + vtxPrec +
151                                                 " vec4 a_position;\n"
152                                                 "attribute " +
153                                                 vtxPrec +
154                                                 " vec4 a_value;\n"
155                                                 "varying " +
156                                                 fragPrec + " vec4 v_value;\n" + (isVertexCase ? funcDefs : "") +
157                                                 "void main (void)\n"
158                                                 "{\n"
159                                                 "    gl_Position = a_position;\n"
160                                                 "    " +
161                                                 vtxPrec + " vec4 value = a_value;\n" +
162                                                 (isVertexCase ? mainStatements : "") +
163                                                 "    v_value = value;\n"
164                                                 "}\n")
165 
166                            << glu::FragmentSource(
167                                   "varying " + fragPrec + " vec4 v_value;\n" + (isFragmentCase ? funcDefs : "") +
168                                   "void main (void)\n"
169                                   "{\n"
170                                   "    " +
171                                   fragPrec + " vec4 value = v_value;\n" + (isFragmentCase ? mainStatements : "") +
172                                   "    gl_FragColor = value;\n"
173                                   "}\n"),
174                        gls::AttribSpec("a_value", Vec4(1.0f, 0.0f, 0.0f, 0.0f), Vec4(0.0f, 1.0f, 0.0f, 0.0f),
175                                        Vec4(0.0f, 0.0f, 1.0f, 0.0f), Vec4(0.0f, 0.0f, 0.0f, 1.0f)));
176 }
177 
defaultProgramData(CaseShaderType shaderType,const string & mainStatements)178 static inline ProgramData defaultProgramData(CaseShaderType shaderType, const string &mainStatements)
179 {
180     return defaultProgramData(shaderType, "", mainStatements);
181 }
182 
183 class ShaderOptimizationCase : public TestCase
184 {
185 public:
ShaderOptimizationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType)186     ShaderOptimizationCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType)
187         : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, description)
188         , m_caseShaderType(caseShaderType)
189         , m_state(STATE_LAST)
190         , m_measurer(context.getRenderContext(), caseShaderType == CASESHADERTYPE_VERTEX   ? gls::CASETYPE_VERTEX :
191                                                  caseShaderType == CASESHADERTYPE_FRAGMENT ? gls::CASETYPE_FRAGMENT :
192                                                                                              gls::CASETYPE_LAST)
193         , m_unoptimizedResult(-1.0f, -1.0f)
194         , m_optimizedResult(-1.0f, -1.0f)
195     {
196     }
197 
~ShaderOptimizationCase(void)198     virtual ~ShaderOptimizationCase(void)
199     {
200     }
201 
202     void init(void);
203     IterateResult iterate(void);
204 
205 protected:
206     virtual ProgramData generateProgramData(bool optimized) const = 0;
207 
208     const CaseShaderType m_caseShaderType;
209 
210 private:
211     enum State
212     {
213         STATE_INIT_UNOPTIMIZED = 0,
214         STATE_MEASURE_UNOPTIMIZED,
215         STATE_INIT_OPTIMIZED,
216         STATE_MEASURE_OPTIMIZED,
217         STATE_FINISHED,
218 
219         STATE_LAST
220     };
221 
programData(bool optimized)222     ProgramData &programData(bool optimized)
223     {
224         return optimized ? m_optimizedData : m_unoptimizedData;
225     }
program(bool optimized)226     SharedPtr<const ShaderProgram> &program(bool optimized)
227     {
228         return optimized ? m_optimizedProgram : m_unoptimizedProgram;
229     }
result(bool optimized)230     ShaderPerformanceMeasurer::Result &result(bool optimized)
231     {
232         return optimized ? m_optimizedResult : m_unoptimizedResult;
233     }
234 
235     State m_state;
236     ShaderPerformanceMeasurer m_measurer;
237 
238     ProgramData m_unoptimizedData;
239     ProgramData m_optimizedData;
240     SharedPtr<const ShaderProgram> m_unoptimizedProgram;
241     SharedPtr<const ShaderProgram> m_optimizedProgram;
242     ShaderPerformanceMeasurer::Result m_unoptimizedResult;
243     ShaderPerformanceMeasurer::Result m_optimizedResult;
244 };
245 
init(void)246 void ShaderOptimizationCase::init(void)
247 {
248     const glu::RenderContext &renderCtx = m_context.getRenderContext();
249     TestLog &log                        = m_testCtx.getLog();
250 
251     m_measurer.logParameters(log);
252 
253     for (int ndx = 0; ndx < 2; ndx++)
254     {
255         const bool optimized = ndx == 1;
256 
257         programData(optimized) = generateProgramData(optimized);
258 
259         for (int i = 0; i < (int)programData(optimized).attributes.size(); i++)
260             DE_ASSERT(programData(optimized).attributes[i].name !=
261                       "a_position"); // \note Position attribute is set by m_measurer.
262 
263         program(optimized) =
264             SharedPtr<const ShaderProgram>(new ShaderProgram(renderCtx, programData(optimized).sources));
265 
266         {
267             const tcu::ScopedLogSection section(log, optimized ? "OptimizedProgram" : "UnoptimizedProgram",
268                                                 optimized ? "Hand-optimized program" : "Unoptimized program");
269             log << *program(optimized);
270         }
271 
272         if (!program(optimized)->isOk())
273             TCU_FAIL("Shader compilation failed");
274     }
275 
276     m_state = STATE_INIT_UNOPTIMIZED;
277 }
278 
iterate(void)279 ShaderOptimizationCase::IterateResult ShaderOptimizationCase::iterate(void)
280 {
281     TestLog &log = m_testCtx.getLog();
282 
283     if (m_state == STATE_INIT_UNOPTIMIZED || m_state == STATE_INIT_OPTIMIZED)
284     {
285         const bool optimized = m_state == STATE_INIT_OPTIMIZED;
286         m_measurer.init(program(optimized)->getProgram(), programData(optimized).attributes, 1);
287         m_state = optimized ? STATE_MEASURE_OPTIMIZED : STATE_MEASURE_UNOPTIMIZED;
288 
289         return CONTINUE;
290     }
291     else if (m_state == STATE_MEASURE_UNOPTIMIZED || m_state == STATE_MEASURE_OPTIMIZED)
292     {
293         m_measurer.iterate();
294 
295         if (m_measurer.isFinished())
296         {
297             const bool optimized = m_state == STATE_MEASURE_OPTIMIZED;
298             const tcu::ScopedLogSection section(log, optimized ? "OptimizedResult" : "UnoptimizedResult",
299                                                 optimized ? "Measurement results for hand-optimized program" :
300                                                             "Measurement result for unoptimized program");
301             m_measurer.logMeasurementInfo(log);
302             result(optimized) = m_measurer.getResult();
303             m_measurer.deinit();
304             m_state = optimized ? STATE_FINISHED : STATE_INIT_OPTIMIZED;
305         }
306 
307         return CONTINUE;
308     }
309     else
310     {
311         DE_ASSERT(m_state == STATE_FINISHED);
312 
313         const float unoptimizedRelevantResult = m_caseShaderType == CASESHADERTYPE_VERTEX ?
314                                                     m_unoptimizedResult.megaVertPerSec :
315                                                     m_unoptimizedResult.megaFragPerSec;
316         const float optimizedRelevantResult   = m_caseShaderType == CASESHADERTYPE_VERTEX ?
317                                                     m_optimizedResult.megaVertPerSec :
318                                                     m_optimizedResult.megaFragPerSec;
319         const char *const relevantResultName  = m_caseShaderType == CASESHADERTYPE_VERTEX ? "vertex" : "fragment";
320         const float ratio                     = unoptimizedRelevantResult / optimizedRelevantResult;
321         const int handOptimizationGain        = (int)deFloatRound(100.0f / ratio) - 100;
322 
323         log << TestLog::Message << "Unoptimized / optimized " << relevantResultName << " performance ratio: " << ratio
324             << TestLog::EndMessage;
325 
326         if (handOptimizationGain >= 0)
327             log << TestLog::Message << "Note: " << handOptimizationGain
328                 << "% performance gain was achieved with hand-optimized version" << TestLog::EndMessage;
329         else
330             log << TestLog::Message << "Note: hand-optimization degraded performance by " << -handOptimizationGain
331                 << "%" << TestLog::EndMessage;
332 
333         m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(ratio, 2).c_str());
334 
335         return STOP;
336     }
337 }
338 
339 class LoopUnrollCase : public ShaderOptimizationCase
340 {
341 public:
342     enum CaseType
343     {
344         CASETYPE_INDEPENDENT = 0,
345         CASETYPE_DEPENDENT,
346 
347         CASETYPE_LAST
348     };
349 
LoopUnrollCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType,int numRepetitions)350     LoopUnrollCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
351                    CaseType caseType, int numRepetitions)
352         : ShaderOptimizationCase(context, name, description, caseShaderType)
353         , m_numRepetitions(numRepetitions)
354         , m_caseType(caseType)
355     {
356     }
357 
358 protected:
generateProgramData(bool optimized) const359     ProgramData generateProgramData(bool optimized) const
360     {
361         const string repetition =
362             optimized ? repeatIndexedTemplate("\t" + expressionTemplate(m_caseType) + ";\n", m_numRepetitions) :
363                         loop(m_numRepetitions, expressionTemplate(m_caseType));
364 
365         return defaultProgramData(m_caseShaderType, "\t" + getShaderPrecision(m_caseShaderType) +
366                                                         " vec4 valueOrig = value;\n" + repetition);
367     }
368 
369 private:
370     const int m_numRepetitions;
371     const CaseType m_caseType;
372 
expressionTemplate(CaseType caseType)373     static inline string expressionTemplate(CaseType caseType)
374     {
375         switch (caseType)
376         {
377         case CASETYPE_INDEPENDENT:
378             return "value += sin(float(${NDX}+1)*valueOrig)";
379         case CASETYPE_DEPENDENT:
380             return "value = sin(value)";
381         default:
382             DE_ASSERT(false);
383             return "";
384         }
385     }
386 
loop(int iterations,const string & innerExpr)387     static inline string loop(int iterations, const string &innerExpr)
388     {
389         return "\tfor (int i = 0; i < " + toString(iterations) + "; i++)\n\t\t" +
390                tcu::StringTemplate(innerExpr).specialize(singleMap("NDX", "i")) + ";\n";
391     }
392 };
393 
394 class LoopInvariantCodeMotionCase : public ShaderOptimizationCase
395 {
396 public:
LoopInvariantCodeMotionCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,int numLoopIterations)397     LoopInvariantCodeMotionCase(Context &context, const char *name, const char *description,
398                                 CaseShaderType caseShaderType, int numLoopIterations)
399         : ShaderOptimizationCase(context, name, description, caseShaderType)
400         , m_numLoopIterations(numLoopIterations)
401     {
402     }
403 
404 protected:
generateProgramData(bool optimized) const405     ProgramData generateProgramData(bool optimized) const
406     {
407         float scale = 0.0f;
408         for (int i = 0; i < m_numLoopIterations; i++)
409             scale += 3.2f * (float)i + 4.6f;
410         scale = 1.0f / scale;
411 
412         const string precision  = getShaderPrecision(m_caseShaderType);
413         const string statements = optimized ? "    " + precision +
414                                                   " vec4 valueOrig = value;\n"
415                                                   "    " +
416                                                   precision +
417                                                   " vec4 y = sin(cos(sin(valueOrig)));\n"
418                                                   "    for (int i = 0; i < " +
419                                                   toString(m_numLoopIterations) +
420                                                   "; i++)\n"
421                                                   "    {\n"
422                                                   "        " +
423                                                   precision +
424                                                   " float x = 3.2*float(i) + 4.6;\n"
425                                                   "        value += x*y;\n"
426                                                   "    }\n"
427                                                   "    value *= " +
428                                                   toString(scale) + ";\n"
429 
430                                               :
431                                               "    " + precision +
432                                                   " vec4 valueOrig = value;\n"
433                                                   "    for (int i = 0; i < " +
434                                                   toString(m_numLoopIterations) +
435                                                   "; i++)\n"
436                                                   "    {\n"
437                                                   "        " +
438                                                   precision +
439                                                   " float x = 3.2*float(i) + 4.6;\n"
440                                                   "        " +
441                                                   precision +
442                                                   " vec4 y = sin(cos(sin(valueOrig)));\n"
443                                                   "        value += x*y;\n"
444                                                   "    }\n"
445                                                   "    value *= " +
446                                                   toString(scale) + ";\n";
447 
448         return defaultProgramData(m_caseShaderType, statements);
449     }
450 
451 private:
452     const int m_numLoopIterations;
453 };
454 
455 class FunctionInliningCase : public ShaderOptimizationCase
456 {
457 public:
FunctionInliningCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,int callNestingDepth)458     FunctionInliningCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
459                          int callNestingDepth)
460         : ShaderOptimizationCase(context, name, description, caseShaderType)
461         , m_callNestingDepth(callNestingDepth)
462     {
463     }
464 
465 protected:
generateProgramData(bool optimized) const466     ProgramData generateProgramData(bool optimized) const
467     {
468         const string precision     = getShaderPrecision(m_caseShaderType);
469         const string expression    = "value*vec4(0.8, 0.7, 0.6, 0.9)";
470         const string maybeFuncDefs = optimized ? "" : funcDefinitions(m_callNestingDepth, precision, expression);
471         const string mainValueStatement =
472             (optimized ? "\tvalue = " + expression : "\tvalue = func" + toString(m_callNestingDepth - 1) + "(value)") +
473             ";\n";
474 
475         return defaultProgramData(m_caseShaderType, maybeFuncDefs, mainValueStatement);
476     }
477 
478 private:
479     const int m_callNestingDepth;
480 
funcDefinitions(int callNestingDepth,const string & precision,const string & expression)481     static inline string funcDefinitions(int callNestingDepth, const string &precision, const string &expression)
482     {
483         string result = precision + " vec4 func0 (" + precision + " vec4 value) { return " + expression + "; }\n";
484 
485         for (int i = 1; i < callNestingDepth; i++)
486             result += precision + " vec4 func" + toString(i) + " (" + precision + " vec4 v) { return func" +
487                       toString(i - 1) + "(v); }\n";
488 
489         return result;
490     }
491 };
492 
493 class ConstantPropagationCase : public ShaderOptimizationCase
494 {
495 public:
496     enum CaseType
497     {
498         CASETYPE_BUILT_IN_FUNCTIONS = 0,
499         CASETYPE_ARRAY,
500         CASETYPE_STRUCT,
501 
502         CASETYPE_LAST
503     };
504 
ConstantPropagationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType,bool useConstantExpressionsOnly)505     ConstantPropagationCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
506                             CaseType caseType, bool useConstantExpressionsOnly)
507         : ShaderOptimizationCase(context, name, description, caseShaderType)
508         , m_caseType(caseType)
509         , m_useConstantExpressionsOnly(useConstantExpressionsOnly)
510     {
511         DE_ASSERT(
512             !(m_caseType == CASETYPE_ARRAY &&
513               m_useConstantExpressionsOnly)); // \note Would need array constructors, which GLSL ES 1 doesn't have.
514     }
515 
516 protected:
generateProgramData(bool optimized) const517     ProgramData generateProgramData(bool optimized) const
518     {
519         const bool isVertexCase = m_caseShaderType == CASESHADERTYPE_VERTEX;
520         const string precision  = getShaderPrecision(m_caseShaderType);
521         const string statements =
522             m_caseType == CASETYPE_BUILT_IN_FUNCTIONS ?
523                 builtinFunctionsCaseStatements(optimized, m_useConstantExpressionsOnly, precision, isVertexCase) :
524             m_caseType == CASETYPE_ARRAY ?
525                 arrayCaseStatements(optimized, precision, isVertexCase) :
526             m_caseType == CASETYPE_STRUCT ?
527                 structCaseStatements(optimized, m_useConstantExpressionsOnly, precision, isVertexCase) :
528                 deFatalStr("Invalid CaseType");
529 
530         return defaultProgramData(m_caseShaderType, statements);
531     }
532 
533 private:
534     const CaseType m_caseType;
535     const bool m_useConstantExpressionsOnly;
536 
builtinFunctionsCaseStatements(bool optimized,bool constantExpressionsOnly,const string & precision,bool useHeavierWorkload)537     static inline string builtinFunctionsCaseStatements(bool optimized, bool constantExpressionsOnly,
538                                                         const string &precision, bool useHeavierWorkload)
539     {
540         const string constMaybe = constantExpressionsOnly ? "const " : "";
541         const int numSinRows    = useHeavierWorkload ? 12 : 1;
542 
543         return optimized ? "    value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match "
544                            "the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
545 
546                            :
547                            "    " + constMaybe + precision +
548                                " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
549                                "    " +
550                                constMaybe + precision +
551                                " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
552                                "    " +
553                                constMaybe +
554                                "bvec4 c = bvec4(true, false, true, true);\n"
555                                "    " +
556                                constMaybe + precision +
557                                " vec4 d = exp(b + vec4(c));\n"
558                                "    " +
559                                constMaybe + precision + " vec4 e0 = inversesqrt(mix(d+a, d+b, a));\n" +
560                                repeatIndexedTemplate("    " + constMaybe + precision +
561                                                          " vec4 e${NDX} = sin(sin(sin(sin(e${PREV_NDX}))));\n",
562                                                      numSinRows, "", 1) +
563                                "    " + constMaybe + precision + " vec4 f = abs(e" + toString(numSinRows) + ");\n" +
564                                "    value = f*value;\n";
565     }
566 
arrayCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)567     static inline string arrayCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
568     {
569         const int numSinRows = useHeavierWorkload ? 12 : 1;
570 
571         return optimized ?
572                    "    value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match the one in "
573                    "unoptimized shader, but shouldn't make a difference performance-wise\n"
574 
575                    :
576                    "    const int arrLen = 4;\n"
577                    "    " +
578                        precision +
579                        " vec4 arr[arrLen];\n"
580                        "    arr[0] = vec4(0.1, 0.5, 0.9, 1.3);\n"
581                        "    arr[1] = vec4(0.2, 0.6, 1.0, 1.4);\n"
582                        "    arr[2] = vec4(0.3, 0.7, 1.1, 1.5);\n"
583                        "    arr[3] = vec4(0.4, 0.8, 1.2, 1.6);\n"
584                        "    " +
585                        precision +
586                        " vec4 a = (arr[0] + arr[1] + arr[2] + arr[3]) * 0.25;\n"
587                        "    " +
588                        precision + " vec4 b0 = cos(sin(a));\n" +
589                        repeatIndexedTemplate("    " + precision + " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n",
590                                              numSinRows, "", 1) +
591                        "    " + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
592                        "    value = c*value;\n";
593     }
594 
structCaseStatements(bool optimized,bool constantExpressionsOnly,const string & precision,bool useHeavierWorkload)595     static inline string structCaseStatements(bool optimized, bool constantExpressionsOnly, const string &precision,
596                                               bool useHeavierWorkload)
597     {
598         const string constMaybe = constantExpressionsOnly ? "const " : "";
599         const int numSinRows    = useHeavierWorkload ? 12 : 1;
600 
601         return optimized ? "    value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match "
602                            "the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
603 
604                            :
605                            "    struct S\n"
606                            "    {\n"
607                            "        " +
608                                precision +
609                                " vec4 a;\n"
610                                "        " +
611                                precision +
612                                " vec4 b;\n"
613                                "        " +
614                                precision +
615                                " vec4 c;\n"
616                                "        " +
617                                precision +
618                                " vec4 d;\n"
619                                "    };\n"
620                                "\n"
621                                "    " +
622                                constMaybe +
623                                "S s =\n"
624                                "        S(vec4(0.1, 0.5, 0.9, 1.3),\n"
625                                "          vec4(0.2, 0.6, 1.0, 1.4),\n"
626                                "          vec4(0.3, 0.7, 1.1, 1.5),\n"
627                                "          vec4(0.4, 0.8, 1.2, 1.6));\n"
628                                "    " +
629                                constMaybe + precision +
630                                " vec4 a = (s.a + s.b + s.c + s.d) * 0.25;\n"
631                                "    " +
632                                constMaybe + precision + " vec4 b0 = cos(sin(a));\n" +
633                                repeatIndexedTemplate("    " + constMaybe + precision +
634                                                          " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n",
635                                                      numSinRows, "", 1) +
636                                "    " + constMaybe + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
637                                "    value = c*value;\n";
638     }
639 };
640 
641 class CommonSubexpressionCase : public ShaderOptimizationCase
642 {
643 public:
644     enum CaseType
645     {
646         CASETYPE_SINGLE_STATEMENT = 0,
647         CASETYPE_MULTIPLE_STATEMENTS,
648         CASETYPE_STATIC_BRANCH,
649         CASETYPE_LOOP,
650 
651         CASETYPE_LAST
652     };
653 
CommonSubexpressionCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType)654     CommonSubexpressionCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
655                             CaseType caseType)
656         : ShaderOptimizationCase(context, name, description, caseShaderType)
657         , m_caseType(caseType)
658     {
659     }
660 
661 protected:
generateProgramData(bool optimized) const662     ProgramData generateProgramData(bool optimized) const
663     {
664         const bool isVertexCase = m_caseShaderType == CASESHADERTYPE_VERTEX;
665         const string precision  = getShaderPrecision(m_caseShaderType);
666         const string statements = m_caseType == CASETYPE_SINGLE_STATEMENT ?
667                                       singleStatementCaseStatements(optimized, precision, isVertexCase) :
668                                   m_caseType == CASETYPE_MULTIPLE_STATEMENTS ?
669                                       multipleStatementsCaseStatements(optimized, precision, isVertexCase) :
670                                   m_caseType == CASETYPE_STATIC_BRANCH ?
671                                       staticBranchCaseStatements(optimized, precision, isVertexCase) :
672                                   m_caseType == CASETYPE_LOOP ? loopCaseStatements(optimized, precision, isVertexCase) :
673                                                                 deFatalStr("Invalid CaseType");
674 
675         return defaultProgramData(m_caseShaderType, statements);
676     }
677 
678 private:
679     const CaseType m_caseType;
680 
singleStatementCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)681     static inline string singleStatementCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
682     {
683         const int numTopLevelRepeats = useHeavierWorkload ? 4 : 1;
684 
685         return optimized ? "    " + precision +
686                                " vec4 s = sin(value);\n"
687                                "    " +
688                                precision +
689                                " vec4 cs = cos(s);\n"
690                                "    " +
691                                precision +
692                                " vec4 d = fract(s + cs) + sqrt(s + exp(cs));\n"
693                                "    value = " +
694                                repeat("d", numTopLevelRepeats, "+") + ";\n"
695 
696                            :
697                            "    value = " +
698                                repeat("fract(sin(value) + cos(sin(value))) + sqrt(sin(value) + exp(cos(sin(value))))",
699                                       numTopLevelRepeats, "\n\t      + ") +
700                                ";\n";
701     }
702 
multipleStatementsCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)703     static inline string multipleStatementsCaseStatements(bool optimized, const string &precision,
704                                                           bool useHeavierWorkload)
705     {
706         const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
707         DE_ASSERT(numTopLevelRepeats >= 2);
708 
709         return optimized ? "    " + precision +
710                                " vec4 a = sin(value) + cos(exp(value));\n"
711                                "    " +
712                                precision +
713                                " vec4 b = cos(cos(a));\n"
714                                "    a = fract(exp(sqrt(b)));\n"
715                                "\n" +
716                                repeat("\tvalue += a*b;\n", numTopLevelRepeats)
717 
718                            :
719                            repeatIndexedTemplate("    " + precision +
720                                                      " vec4 a${NDX} = sin(value) + cos(exp(value));\n"
721                                                      "    " +
722                                                      precision +
723                                                      " vec4 b${NDX} = cos(cos(a${NDX}));\n"
724                                                      "    a${NDX} = fract(exp(sqrt(b${NDX})));\n"
725                                                      "\n",
726                                                  numTopLevelRepeats) +
727 
728                                repeatIndexedTemplate("    value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
729     }
730 
staticBranchCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)731     static inline string staticBranchCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
732     {
733         const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
734         DE_ASSERT(numTopLevelRepeats >= 2);
735 
736         if (optimized)
737         {
738             return "    " + precision +
739                    " vec4 a = sin(value) + cos(exp(value));\n"
740                    "    " +
741                    precision +
742                    " vec4 b = cos(a);\n"
743                    "    b = cos(b);\n"
744                    "    a = fract(exp(sqrt(b)));\n"
745                    "\n" +
746                    repeat("    value += a*b;\n", numTopLevelRepeats);
747         }
748         else
749         {
750             string result;
751 
752             for (int i = 0; i < numTopLevelRepeats; i++)
753             {
754                 result += "    " + precision + " vec4 a" + toString(i) +
755                           " = sin(value) + cos(exp(value));\n"
756                           "    " +
757                           precision + " vec4 b" + toString(i) + " = cos(a" + toString(i) + ");\n";
758 
759                 if (i % 3 == 0)
760                     result += "    if (1 < 2)\n"
761                               "        b" +
762                               toString(i) + " = cos(b" + toString(i) + ");\n";
763                 else if (i % 3 == 1)
764                     result += "    b" + toString(i) + " = cos(b" + toString(i) + ");\n";
765                 else if (i % 3 == 2)
766                     result += "    if (2 < 1);\n"
767                               "    else\n"
768                               "        b" +
769                               toString(i) + " = cos(b" + toString(i) + ");\n";
770                 else
771                     DE_ASSERT(false);
772 
773                 result += "    a" + toString(i) + " = fract(exp(sqrt(b" + toString(i) + ")));\n\n";
774             }
775 
776             result += repeatIndexedTemplate("    value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
777 
778             return result;
779         }
780     }
781 
loopCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)782     static inline string loopCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
783     {
784         const int numLoopIterations = useHeavierWorkload ? 32 : 4;
785 
786         return optimized ? "    " + precision +
787                                " vec4 acc = value;\n"
788                                "    for (int i = 0; i < " +
789                                toString(numLoopIterations) +
790                                "; i++)\n"
791                                "        acc = sin(acc);\n"
792                                "\n"
793                                "    value += acc;\n"
794                                "    value += acc;\n"
795 
796                            :
797                            "    " + precision +
798                                " vec4 acc0 = value;\n"
799                                "    for (int i = 0; i < " +
800                                toString(numLoopIterations) +
801                                "; i++)\n"
802                                "        acc0 = sin(acc0);\n"
803                                "\n"
804                                "    " +
805                                precision +
806                                " vec4 acc1 = value;\n"
807                                "    for (int i = 0; i < " +
808                                toString(numLoopIterations) +
809                                "; i++)\n"
810                                "        acc1 = sin(acc1);\n"
811                                "\n"
812                                "    value += acc0;\n"
813                                "    value += acc1;\n";
814     }
815 };
816 
817 class DeadCodeEliminationCase : public ShaderOptimizationCase
818 {
819 public:
820     enum CaseType
821     {
822         CASETYPE_DEAD_BRANCH_SIMPLE = 0,
823         CASETYPE_DEAD_BRANCH_COMPLEX,
824         CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST,
825         CASETYPE_DEAD_BRANCH_FUNC_CALL,
826         CASETYPE_UNUSED_VALUE_BASIC,
827         CASETYPE_UNUSED_VALUE_LOOP,
828         CASETYPE_UNUSED_VALUE_DEAD_BRANCH,
829         CASETYPE_UNUSED_VALUE_AFTER_RETURN,
830         CASETYPE_UNUSED_VALUE_MUL_ZERO,
831 
832         CASETYPE_LAST
833     };
834 
DeadCodeEliminationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType)835     DeadCodeEliminationCase(Context &context, const char *name, const char *description, CaseShaderType caseShaderType,
836                             CaseType caseType)
837         : ShaderOptimizationCase(context, name, description, caseShaderType)
838         , m_caseType(caseType)
839     {
840     }
841 
842 protected:
generateProgramData(bool optimized) const843     ProgramData generateProgramData(bool optimized) const
844     {
845         const bool isVertexCase = m_caseShaderType == CASESHADERTYPE_VERTEX;
846         const string precision  = getShaderPrecision(m_caseShaderType);
847         const string funcDefs   = m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL ?
848                                       deadBranchFuncCallCaseFuncDefs(optimized, precision) :
849                                   m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN ?
850                                       unusedValueAfterReturnCaseFuncDefs(optimized, precision, isVertexCase) :
851                                       "";
852 
853         const string statements = m_caseType == CASETYPE_DEAD_BRANCH_SIMPLE ?
854                                       deadBranchSimpleCaseStatements(optimized, isVertexCase) :
855                                   m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX ?
856                                       deadBranchComplexCaseStatements(optimized, precision, true, isVertexCase) :
857                                   m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST ?
858                                       deadBranchComplexCaseStatements(optimized, precision, false, isVertexCase) :
859                                   m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL ?
860                                       deadBranchFuncCallCaseStatements(optimized, isVertexCase) :
861                                   m_caseType == CASETYPE_UNUSED_VALUE_BASIC ?
862                                       unusedValueBasicCaseStatements(optimized, precision, isVertexCase) :
863                                   m_caseType == CASETYPE_UNUSED_VALUE_LOOP ?
864                                       unusedValueLoopCaseStatements(optimized, precision, isVertexCase) :
865                                   m_caseType == CASETYPE_UNUSED_VALUE_DEAD_BRANCH ?
866                                       unusedValueDeadBranchCaseStatements(optimized, precision, isVertexCase) :
867                                   m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN ?
868                                       unusedValueAfterReturnCaseStatements() :
869                                   m_caseType == CASETYPE_UNUSED_VALUE_MUL_ZERO ?
870                                       unusedValueMulZeroCaseStatements(optimized, precision, isVertexCase) :
871                                       deFatalStr("Invalid CaseType");
872 
873         return defaultProgramData(m_caseShaderType, funcDefs, statements);
874     }
875 
876 private:
877     const CaseType m_caseType;
878 
deadBranchSimpleCaseStatements(bool optimized,bool useHeavierWorkload)879     static inline string deadBranchSimpleCaseStatements(bool optimized, bool useHeavierWorkload)
880     {
881         const int numLoopIterations = useHeavierWorkload ? 16 : 4;
882 
883         return optimized ? "    value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
884 
885                            :
886                            "    value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
887                            "    if (2 < 1)\n"
888                            "    {\n"
889                            "        value = cos(exp(sin(value))*log(sqrt(value)));\n"
890                            "        for (int i = 0; i < " +
891                                toString(numLoopIterations) +
892                                "; i++)\n"
893                                "            value = sin(value);\n"
894                                "    }\n";
895     }
896 
deadBranchComplexCaseStatements(bool optimized,const string & precision,bool useConst,bool useHeavierWorkload)897     static inline string deadBranchComplexCaseStatements(bool optimized, const string &precision, bool useConst,
898                                                          bool useHeavierWorkload)
899     {
900         const string constMaybe     = useConst ? "const " : "";
901         const int numLoopIterations = useHeavierWorkload ? 16 : 4;
902 
903         return optimized ? "    value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
904 
905                            :
906                            "    value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
907                            "    " +
908                                constMaybe + precision +
909                                " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
910                                "    " +
911                                constMaybe + precision +
912                                " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
913                                "    " +
914                                constMaybe +
915                                "bvec4 c = bvec4(true, false, true, true);\n"
916                                "    " +
917                                constMaybe + precision +
918                                " vec4 d = exp(b + vec4(c));\n"
919                                "    " +
920                                constMaybe + precision +
921                                " vec4 e = 1.8*abs(sin(sin(inversesqrt(mix(d+a, d+b, a)))));\n"
922                                "    if (e.x > 1.0)\n"
923                                "    {\n"
924                                "        value = cos(exp(sin(value))*log(sqrt(value)));\n"
925                                "        for (int i = 0; i < " +
926                                toString(numLoopIterations) +
927                                "; i++)\n"
928                                "            value = sin(value);\n"
929                                "    }\n";
930     }
931 
deadBranchFuncCallCaseFuncDefs(bool optimized,const string & precision)932     static inline string deadBranchFuncCallCaseFuncDefs(bool optimized, const string &precision)
933     {
934         return optimized ? "" : precision + " float func (" + precision + " float x) { return 2.0*x; }\n";
935     }
936 
deadBranchFuncCallCaseStatements(bool optimized,bool useHeavierWorkload)937     static inline string deadBranchFuncCallCaseStatements(bool optimized, bool useHeavierWorkload)
938     {
939         const int numLoopIterations = useHeavierWorkload ? 16 : 4;
940 
941         return optimized ? "    value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
942 
943                            :
944                            "    value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
945                            "    if (func(0.3) > 1.0)\n"
946                            "    {\n"
947                            "        value = cos(exp(sin(value))*log(sqrt(value)));\n"
948                            "        for (int i = 0; i < " +
949                                toString(numLoopIterations) +
950                                "; i++)\n"
951                                "            value = sin(value);\n"
952                                "    }\n";
953     }
954 
unusedValueBasicCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)955     static inline string unusedValueBasicCaseStatements(bool optimized, const string &precision,
956                                                         bool useHeavierWorkload)
957     {
958         const int numSinRows = useHeavierWorkload ? 12 : 1;
959 
960         return optimized ? "    " + precision +
961                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
962                                "    value = used;\n"
963 
964                            :
965                            "    " + precision +
966                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
967                                "    " +
968                                precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value))) + used;\n" +
969                                repeat("    unused = sin(sin(sin(sin(unused))));\n", numSinRows) + "    value = used;\n";
970     }
971 
unusedValueLoopCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)972     static inline string unusedValueLoopCaseStatements(bool optimized, const string &precision, bool useHeavierWorkload)
973     {
974         const int numLoopIterations = useHeavierWorkload ? 16 : 4;
975 
976         return optimized ? "    " + precision +
977                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
978                                "    value = used;\n"
979 
980                            :
981                            "    " + precision +
982                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
983                                "    " +
984                                precision +
985                                " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
986                                "    for (int i = 0; i < " +
987                                toString(numLoopIterations) +
988                                "; i++)\n"
989                                "        unused = sin(unused + used);\n"
990                                "    value = used;\n";
991     }
992 
unusedValueAfterReturnCaseFuncDefs(bool optimized,const string & precision,bool useHeavierWorkload)993     static inline string unusedValueAfterReturnCaseFuncDefs(bool optimized, const string &precision,
994                                                             bool useHeavierWorkload)
995     {
996         const int numSinRows = useHeavierWorkload ? 12 : 1;
997 
998         return optimized ? precision + " vec4 func (" + precision +
999                                " vec4 v)\n"
1000                                "{\n"
1001                                "    " +
1002                                precision +
1003                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
1004                                "    return used;\n"
1005                                "}\n"
1006 
1007                            :
1008                            precision + " vec4 func (" + precision +
1009                                " vec4 v)\n"
1010                                "{\n"
1011                                "    " +
1012                                precision +
1013                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
1014                                "    " +
1015                                precision + " vec4 unused = cos(exp(sin(v))*log(sqrt(v)));\n" +
1016                                repeat("    unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
1017                                "    return used;\n"
1018                                "    used = used*unused;"
1019                                "    return used;\n"
1020                                "}\n";
1021     }
1022 
unusedValueAfterReturnCaseStatements(void)1023     static inline string unusedValueAfterReturnCaseStatements(void)
1024     {
1025         return "    value = func(value);\n";
1026     }
1027 
unusedValueDeadBranchCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)1028     static inline string unusedValueDeadBranchCaseStatements(bool optimized, const string &precision,
1029                                                              bool useHeavierWorkload)
1030     {
1031         const int numSinRows = useHeavierWorkload ? 12 : 1;
1032 
1033         return optimized ? "    " + precision +
1034                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1035                                "    value = used;\n"
1036 
1037                            :
1038                            "    " + precision +
1039                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1040                                "    " +
1041                                precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n" +
1042                                repeat("    unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
1043                                "    if (2 < 1)\n"
1044                                "        used = used*unused;\n"
1045                                "    value = used;\n";
1046     }
1047 
unusedValueMulZeroCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)1048     static inline string unusedValueMulZeroCaseStatements(bool optimized, const string &precision,
1049                                                           bool useHeavierWorkload)
1050     {
1051         const int numSinRows = useHeavierWorkload ? 12 : 1;
1052 
1053         return optimized ? "    " + precision +
1054                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1055                                "    value = used;\n"
1056 
1057                            :
1058                            "    " + precision +
1059                                " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
1060                                "    " +
1061                                precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n" +
1062                                repeat("    unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
1063                                "    value = used + unused*float(1-1);\n";
1064     }
1065 };
1066 
1067 } // namespace
1068 
ShaderOptimizationTests(Context & context)1069 ShaderOptimizationTests::ShaderOptimizationTests(Context &context)
1070     : TestCaseGroup(context, "optimization", "Shader Optimization Performance Tests")
1071 {
1072 }
1073 
~ShaderOptimizationTests(void)1074 ShaderOptimizationTests::~ShaderOptimizationTests(void)
1075 {
1076 }
1077 
init(void)1078 void ShaderOptimizationTests::init(void)
1079 {
1080     TestCaseGroup *const unrollGroup = new TestCaseGroup(m_context, "loop_unrolling", "Loop Unrolling Cases");
1081     TestCaseGroup *const loopInvariantCodeMotionGroup =
1082         new TestCaseGroup(m_context, "loop_invariant_code_motion", "Loop-Invariant Code Motion Cases");
1083     TestCaseGroup *const inlineGroup = new TestCaseGroup(m_context, "function_inlining", "Function Inlining Cases");
1084     TestCaseGroup *const constantPropagationGroup =
1085         new TestCaseGroup(m_context, "constant_propagation", "Constant Propagation Cases");
1086     TestCaseGroup *const commonSubexpressionGroup =
1087         new TestCaseGroup(m_context, "common_subexpression_elimination", "Common Subexpression Elimination Cases");
1088     TestCaseGroup *const deadCodeEliminationGroup =
1089         new TestCaseGroup(m_context, "dead_code_elimination", "Dead Code Elimination Cases");
1090     addChild(unrollGroup);
1091     addChild(loopInvariantCodeMotionGroup);
1092     addChild(inlineGroup);
1093     addChild(constantPropagationGroup);
1094     addChild(commonSubexpressionGroup);
1095     addChild(deadCodeEliminationGroup);
1096 
1097     for (int caseShaderTypeI = 0; caseShaderTypeI < CASESHADERTYPE_LAST; caseShaderTypeI++)
1098     {
1099         const CaseShaderType caseShaderType    = (CaseShaderType)caseShaderTypeI;
1100         const char *const caseShaderTypeSuffix = caseShaderType == CASESHADERTYPE_VERTEX   ? "_vertex" :
1101                                                  caseShaderType == CASESHADERTYPE_FRAGMENT ? "_fragment" :
1102                                                                                              DE_NULL;
1103 
1104         // Loop unrolling cases.
1105 
1106         {
1107             static const int loopIterationCounts[] = {4, 8, 32};
1108 
1109             for (int caseTypeI = 0; caseTypeI < LoopUnrollCase::CASETYPE_LAST; caseTypeI++)
1110             {
1111                 const LoopUnrollCase::CaseType caseType = (LoopUnrollCase::CaseType)caseTypeI;
1112                 const string caseTypeName               = caseType == LoopUnrollCase::CASETYPE_INDEPENDENT ?
1113                                                               "independent_iterations" :
1114                                                           caseType == LoopUnrollCase::CASETYPE_DEPENDENT ? "dependent_iterations" :
1115                                                                                                            DE_NULL;
1116                 const string caseTypeDesc =
1117                     caseType == LoopUnrollCase::CASETYPE_INDEPENDENT ? "loop iterations don't depend on each other" :
1118                     caseType == LoopUnrollCase::CASETYPE_DEPENDENT   ? "loop iterations depend on each other" :
1119                                                                        DE_NULL;
1120 
1121                 for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
1122                 {
1123                     const int loopIterations = loopIterationCounts[loopIterNdx];
1124                     const string name        = caseTypeName + "_" + toString(loopIterations) + caseShaderTypeSuffix;
1125                     const string description = toString(loopIterations) + " iterations; " + caseTypeDesc;
1126 
1127                     unrollGroup->addChild(new LoopUnrollCase(m_context, name.c_str(), description.c_str(),
1128                                                              caseShaderType, caseType, loopIterations));
1129                 }
1130             }
1131         }
1132 
1133         // Loop-invariant code motion cases.
1134 
1135         {
1136             static const int loopIterationCounts[] = {4, 8, 32};
1137 
1138             for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
1139             {
1140                 const int loopIterations = loopIterationCounts[loopIterNdx];
1141                 const string name        = toString(loopIterations) + "_iterations" + caseShaderTypeSuffix;
1142 
1143                 loopInvariantCodeMotionGroup->addChild(
1144                     new LoopInvariantCodeMotionCase(m_context, name.c_str(), "", caseShaderType, loopIterations));
1145             }
1146         }
1147 
1148         // Function inlining cases.
1149 
1150         {
1151             static const int callNestingDepths[] = {4, 8, 32};
1152 
1153             for (int nestDepthNdx = 0; nestDepthNdx < DE_LENGTH_OF_ARRAY(callNestingDepths); nestDepthNdx++)
1154             {
1155                 const int nestingDepth = callNestingDepths[nestDepthNdx];
1156                 const string name      = toString(nestingDepth) + "_nested" + caseShaderTypeSuffix;
1157 
1158                 inlineGroup->addChild(
1159                     new FunctionInliningCase(m_context, name.c_str(), "", caseShaderType, nestingDepth));
1160             }
1161         }
1162 
1163         // Constant propagation cases.
1164 
1165         for (int caseTypeI = 0; caseTypeI < ConstantPropagationCase::CASETYPE_LAST; caseTypeI++)
1166         {
1167             const ConstantPropagationCase::CaseType caseType = (ConstantPropagationCase::CaseType)caseTypeI;
1168             const string caseTypeName = caseType == ConstantPropagationCase::CASETYPE_BUILT_IN_FUNCTIONS ?
1169                                             "built_in_functions" :
1170                                         caseType == ConstantPropagationCase::CASETYPE_ARRAY  ? "array" :
1171                                         caseType == ConstantPropagationCase::CASETYPE_STRUCT ? "struct" :
1172                                                                                                DE_NULL;
1173 
1174             for (int constantExpressionsOnlyI = 0; constantExpressionsOnlyI <= 1; constantExpressionsOnlyI++)
1175             {
1176                 const bool constantExpressionsOnly = constantExpressionsOnlyI != 0;
1177                 const string name = caseTypeName + (constantExpressionsOnly ? "" : "_no_const") + caseShaderTypeSuffix;
1178 
1179                 if (caseType == ConstantPropagationCase::CASETYPE_ARRAY &&
1180                     constantExpressionsOnly) // \note See ConstantPropagationCase's constructor for explanation.
1181                     continue;
1182 
1183                 constantPropagationGroup->addChild(new ConstantPropagationCase(
1184                     m_context, name.c_str(), "", caseShaderType, caseType, constantExpressionsOnly));
1185             }
1186         }
1187 
1188         // Common subexpression cases.
1189 
1190         for (int caseTypeI = 0; caseTypeI < CommonSubexpressionCase::CASETYPE_LAST; caseTypeI++)
1191         {
1192             const CommonSubexpressionCase::CaseType caseType = (CommonSubexpressionCase::CaseType)caseTypeI;
1193 
1194             const string caseTypeName =
1195                 caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT    ? "single_statement" :
1196                 caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS ? "multiple_statements" :
1197                 caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH       ? "static_branch" :
1198                 caseType == CommonSubexpressionCase::CASETYPE_LOOP                ? "loop" :
1199                                                                                     DE_NULL;
1200 
1201             const string description = caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT ?
1202                                            "A single statement containing multiple uses of same subexpression" :
1203                                        caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS ?
1204                                            "Multiple statements performing same computations" :
1205                                        caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH ?
1206                                            "Multiple statements including a static conditional" :
1207                                        caseType == CommonSubexpressionCase::CASETYPE_LOOP ?
1208                                            "Multiple loops performing the same computations" :
1209                                            DE_NULL;
1210 
1211             commonSubexpressionGroup->addChild(
1212                 new CommonSubexpressionCase(m_context, (caseTypeName + caseShaderTypeSuffix).c_str(),
1213                                             description.c_str(), caseShaderType, caseType));
1214         }
1215 
1216         // Dead code elimination cases.
1217 
1218         for (int caseTypeI = 0; caseTypeI < DeadCodeEliminationCase::CASETYPE_LAST; caseTypeI++)
1219         {
1220             const DeadCodeEliminationCase::CaseType caseType = (DeadCodeEliminationCase::CaseType)caseTypeI;
1221             const char *const caseTypeName =
1222                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE  ? "dead_branch_simple" :
1223                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX ? "dead_branch_complex" :
1224                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST ?
1225                                                                                     "dead_branch_complex_no_const" :
1226                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL     ? "dead_branch_func_call" :
1227                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC        ? "unused_value_basic" :
1228                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP         ? "unused_value_loop" :
1229                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH  ? "unused_value_dead_branch" :
1230                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN ? "unused_value_after_return" :
1231                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO     ? "unused_value_mul_zero" :
1232                                                                                           DE_NULL;
1233 
1234             const char *const caseTypeDescription =
1235                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE ?
1236                     "Do computation inside a branch that is never taken (condition is simple false constant "
1237                     "expression)" :
1238                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX ?
1239                     "Do computation inside a branch that is never taken (condition is complex false constant "
1240                     "expression)" :
1241                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST ?
1242                     "Do computation inside a branch that is never taken (condition is complex false expression, not "
1243                     "constant expression but still compile-time computable)" :
1244                 caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL ?
1245                     "Do computation inside a branch that is never taken (condition is compile-time computable false "
1246                     "expression containing function call to a simple inlineable function)" :
1247                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC ?
1248                     "Compute a value that is never used even statically" :
1249                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP ?
1250                     "Compute a value, using a loop, that is never used even statically" :
1251                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH ?
1252                     "Compute a value that is used only inside a statically dead branch" :
1253                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN ?
1254                     "Compute a value that is used only after a return statement" :
1255                 caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO ?
1256                     "Compute a value that is used but multiplied by a zero constant expression" :
1257                     DE_NULL;
1258 
1259             deadCodeEliminationGroup->addChild(
1260                 new DeadCodeEliminationCase(m_context, (string() + caseTypeName + caseShaderTypeSuffix).c_str(),
1261                                             caseTypeDescription, caseShaderType, caseType));
1262         }
1263     }
1264 }
1265 
1266 } // namespace Performance
1267 } // namespace gles2
1268 } // namespace deqp
1269