xref: /aosp_15_r20/external/deqp/modules/glshared/glsShaderPerformanceMeasurer.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL (ES) Module
3  * -----------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Shader performance measurer; handles calibration and measurement
22  *//*--------------------------------------------------------------------*/
23 
24 #include "glsShaderPerformanceMeasurer.hpp"
25 #include "gluDefs.hpp"
26 #include "tcuTestLog.hpp"
27 #include "tcuRenderTarget.hpp"
28 #include "deStringUtil.hpp"
29 #include "deMath.h"
30 #include "deClock.h"
31 
32 #include "glwFunctions.hpp"
33 #include "glwEnums.hpp"
34 
35 #include <algorithm>
36 
37 using std::string;
38 using std::vector;
39 using tcu::TestLog;
40 using tcu::Vec4;
41 using namespace glw; // GL types
42 
43 namespace deqp
44 {
45 namespace gls
46 {
47 
triangleInterpolate(float v0,float v1,float v2,float x,float y)48 static inline float triangleInterpolate(float v0, float v1, float v2, float x, float y)
49 {
50     return v0 + (v2 - v0) * x + (v1 - v0) * y;
51 }
52 
triQuadInterpolate(float x,float y,const tcu::Vec4 & quad)53 static inline float triQuadInterpolate(float x, float y, const tcu::Vec4 &quad)
54 {
55     // \note Top left fill rule.
56     if (x + y < 1.0f)
57         return triangleInterpolate(quad.x(), quad.y(), quad.z(), x, y);
58     else
59         return triangleInterpolate(quad.w(), quad.z(), quad.y(), 1.0f - x, 1.0f - y);
60 }
61 
getNumVertices(int gridSizeX,int gridSizeY)62 static inline int getNumVertices(int gridSizeX, int gridSizeY)
63 {
64     return (gridSizeX + 1) * (gridSizeY + 1);
65 }
66 
getNumIndices(int gridSizeX,int gridSizeY)67 static inline int getNumIndices(int gridSizeX, int gridSizeY)
68 {
69     return gridSizeX * gridSizeY * 6;
70 }
71 
getVtxIndex(int x,int y,int gridSizeX)72 static inline uint16_t getVtxIndex(int x, int y, int gridSizeX)
73 {
74     return (uint16_t)(y * (gridSizeX + 1) + x);
75 }
76 
generateVertices(std::vector<float> & dst,int gridSizeX,int gridSizeY,const AttribSpec & spec)77 static void generateVertices(std::vector<float> &dst, int gridSizeX, int gridSizeY, const AttribSpec &spec)
78 {
79     const int numComponents = 4;
80 
81     DE_ASSERT((gridSizeX + 1) * (gridSizeY + 1) <= (1 << 16)); // Must fit into 16-bit indices.
82     DE_ASSERT(gridSizeX >= 1 && gridSizeY >= 1);
83     dst.resize((gridSizeX + 1) * (gridSizeY + 1) * 4);
84 
85     for (int y = 0; y <= gridSizeY; y++)
86     {
87         for (int x = 0; x <= gridSizeX; x++)
88         {
89             float xf = (float)x / (float)gridSizeX;
90             float yf = (float)y / (float)gridSizeY;
91 
92             for (int compNdx = 0; compNdx < numComponents; compNdx++)
93                 dst[getVtxIndex(x, y, gridSizeX) * numComponents + compNdx] = triQuadInterpolate(
94                     xf, yf, tcu::Vec4(spec.p00[compNdx], spec.p01[compNdx], spec.p10[compNdx], spec.p11[compNdx]));
95         }
96     }
97 }
98 
generateIndices(std::vector<uint16_t> & dst,int gridSizeX,int gridSizeY)99 static void generateIndices(std::vector<uint16_t> &dst, int gridSizeX, int gridSizeY)
100 {
101     const int numIndicesPerQuad = 6;
102     int numIndices              = gridSizeX * gridSizeY * numIndicesPerQuad;
103     dst.resize(numIndices);
104 
105     for (int y = 0; y < gridSizeY; y++)
106     {
107         for (int x = 0; x < gridSizeX; x++)
108         {
109             int quadNdx = y * gridSizeX + x;
110 
111             dst[quadNdx * numIndicesPerQuad + 0] = getVtxIndex(x + 0, y + 0, gridSizeX);
112             dst[quadNdx * numIndicesPerQuad + 1] = getVtxIndex(x + 1, y + 0, gridSizeX);
113             dst[quadNdx * numIndicesPerQuad + 2] = getVtxIndex(x + 0, y + 1, gridSizeX);
114 
115             dst[quadNdx * numIndicesPerQuad + 3] = getVtxIndex(x + 0, y + 1, gridSizeX);
116             dst[quadNdx * numIndicesPerQuad + 4] = getVtxIndex(x + 1, y + 0, gridSizeX);
117             dst[quadNdx * numIndicesPerQuad + 5] = getVtxIndex(x + 1, y + 1, gridSizeX);
118         }
119     }
120 }
121 
ShaderPerformanceMeasurer(const glu::RenderContext & renderCtx,PerfCaseType measureType)122 ShaderPerformanceMeasurer::ShaderPerformanceMeasurer(const glu::RenderContext &renderCtx, PerfCaseType measureType)
123     : m_renderCtx(renderCtx)
124     , m_gridSizeX(measureType == CASETYPE_FRAGMENT ? 1 : 255)
125     , m_gridSizeY(measureType == CASETYPE_FRAGMENT ? 1 : 255)
126     , m_viewportWidth(measureType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getWidth())
127     , m_viewportHeight(measureType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getHeight())
128     , m_state(STATE_UNINITIALIZED)
129     , m_isFirstIteration(false)
130     , m_prevRenderStartTime(0)
131     , m_result(-1.0f, -1.0f)
132     , m_indexBuffer(0)
133     , m_vao(0)
134 {
135 }
136 
logParameters(TestLog & log) const137 void ShaderPerformanceMeasurer::logParameters(TestLog &log) const
138 {
139     log << TestLog::Message << "Grid size: " << m_gridSizeX << "x" << m_gridSizeY << TestLog::EndMessage
140         << TestLog::Message << "Viewport: " << m_viewportWidth << "x" << m_viewportHeight << TestLog::EndMessage;
141 }
142 
init(uint32_t program,const vector<AttribSpec> & attributes,int calibratorInitialNumCalls)143 void ShaderPerformanceMeasurer::init(uint32_t program, const vector<AttribSpec> &attributes,
144                                      int calibratorInitialNumCalls)
145 {
146     DE_ASSERT(m_state == STATE_UNINITIALIZED);
147 
148     const glw::Functions &gl = m_renderCtx.getFunctions();
149     const bool useVAO        = glu::isContextTypeGLCore(m_renderCtx.getType());
150 
151     if (useVAO)
152     {
153         DE_ASSERT(!m_vao);
154         gl.genVertexArrays(1, &m_vao);
155         gl.bindVertexArray(m_vao);
156         GLU_EXPECT_NO_ERROR(gl.getError(), "Create VAO");
157     }
158 
159     // Validate that we have sane grid and viewport setup.
160 
161     DE_ASSERT(de::inBounds(m_gridSizeX, 1, 256) && de::inBounds(m_gridSizeY, 1, 256));
162 
163     {
164         bool widthTooSmall  = m_renderCtx.getRenderTarget().getWidth() < m_viewportWidth;
165         bool heightTooSmall = m_renderCtx.getRenderTarget().getHeight() < m_viewportHeight;
166 
167         if (widthTooSmall || heightTooSmall)
168             throw tcu::NotSupportedError(
169                 "Render target too small (" +
170                 (widthTooSmall ? "width must be at least " + de::toString(m_viewportWidth) : "") +
171                 (heightTooSmall ?
172                      string(widthTooSmall ? ", " : "") + "height must be at least " + de::toString(m_viewportHeight) :
173                      "") +
174                 ")");
175     }
176 
177     TCU_CHECK_INTERNAL(de::inRange(m_viewportWidth, 1, m_renderCtx.getRenderTarget().getWidth()) &&
178                        de::inRange(m_viewportHeight, 1, m_renderCtx.getRenderTarget().getHeight()));
179 
180     // Insert a_position to attributes.
181     m_attributes = attributes;
182     m_attributes.push_back(AttribSpec("a_position", Vec4(-1.0f, -1.0f, 0.0f, 1.0f), Vec4(1.0f, -1.0f, 0.0f, 1.0f),
183                                       Vec4(-1.0f, 1.0f, 0.0f, 1.0f), Vec4(1.0f, 1.0f, 0.0f, 1.0f)));
184 
185     // Generate indices.
186     {
187         std::vector<uint16_t> indices;
188         generateIndices(indices, m_gridSizeX, m_gridSizeY);
189 
190         gl.genBuffers(1, &m_indexBuffer);
191         gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
192         gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (GLsizeiptr)(indices.size() * sizeof(uint16_t)), &indices[0],
193                       GL_STATIC_DRAW);
194 
195         GLU_EXPECT_NO_ERROR(gl.getError(), "Upload index data");
196     }
197 
198     // Generate vertices.
199     m_attribBuffers.resize(m_attributes.size(), 0);
200     gl.genBuffers((GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
201 
202     for (int attribNdx = 0; attribNdx < (int)m_attributes.size(); attribNdx++)
203     {
204         std::vector<float> vertices;
205         generateVertices(vertices, m_gridSizeX, m_gridSizeY, m_attributes[attribNdx]);
206 
207         gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
208         gl.bufferData(GL_ARRAY_BUFFER, (GLsizeiptr)(vertices.size() * sizeof(float)), &vertices[0], GL_STATIC_DRAW);
209     }
210 
211     GLU_EXPECT_NO_ERROR(gl.getError(), "Upload vertex data");
212 
213     // Setup attribute bindings.
214     for (int attribNdx = 0; attribNdx < (int)m_attributes.size(); attribNdx++)
215     {
216         int location = gl.getAttribLocation(program, m_attributes[attribNdx].name.c_str());
217 
218         if (location >= 0)
219         {
220             gl.enableVertexAttribArray(location);
221             gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
222             gl.vertexAttribPointer(location, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
223         }
224 
225         GLU_EXPECT_NO_ERROR(gl.getError(), "Setup vertex attribute state");
226     }
227 
228     gl.useProgram(program);
229     GLU_EXPECT_NO_ERROR(gl.getError(), "glUseProgram()");
230 
231     m_state            = STATE_MEASURING;
232     m_isFirstIteration = true;
233 
234     m_calibrator.clear(CalibratorParameters(calibratorInitialNumCalls, 10 /* calibrate iteration frames */,
235                                             2000.0f /* calibrate iteration shortcut threshold (ms) */,
236                                             16 /* max calibrate iterations */, 1000.0f / 30.0f /* frame time (ms) */,
237                                             1000.0f / 60.0f /* frame time cap (ms) */,
238                                             1000.0f /* target measure duration (ms) */));
239 }
240 
deinit(void)241 void ShaderPerformanceMeasurer::deinit(void)
242 {
243     const glw::Functions &gl = m_renderCtx.getFunctions();
244 
245     if (m_indexBuffer)
246     {
247         gl.deleteBuffers(1, &m_indexBuffer);
248         m_indexBuffer = 0;
249     }
250 
251     if (m_vao)
252     {
253         gl.deleteVertexArrays(1, &m_vao);
254         m_vao = 0;
255     }
256 
257     if (!m_attribBuffers.empty())
258     {
259         gl.deleteBuffers((GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
260         m_attribBuffers.clear();
261     }
262 
263     m_state = STATE_UNINITIALIZED;
264 }
265 
render(int numDrawCalls)266 void ShaderPerformanceMeasurer::render(int numDrawCalls)
267 {
268     const glw::Functions &gl = m_renderCtx.getFunctions();
269     GLsizei numIndices       = (GLsizei)getNumIndices(m_gridSizeX, m_gridSizeY);
270 
271     gl.viewport(0, 0, m_viewportWidth, m_viewportHeight);
272 
273     for (int callNdx = 0; callNdx < numDrawCalls; callNdx++)
274         gl.drawElements(GL_TRIANGLES, numIndices, GL_UNSIGNED_SHORT, DE_NULL);
275 }
276 
iterate(void)277 void ShaderPerformanceMeasurer::iterate(void)
278 {
279     DE_ASSERT(m_state == STATE_MEASURING);
280 
281     uint64_t renderStartTime = deGetMicroseconds();
282     render(m_calibrator.getCallCount()); // Always render. This gives more stable performance behavior.
283 
284     TheilSenCalibrator::State calibratorState = m_calibrator.getState();
285 
286     if (calibratorState == TheilSenCalibrator::STATE_RECOMPUTE_PARAMS)
287     {
288         m_calibrator.recomputeParameters();
289 
290         m_isFirstIteration    = true;
291         m_prevRenderStartTime = renderStartTime;
292     }
293     else if (calibratorState == TheilSenCalibrator::STATE_MEASURE)
294     {
295         if (!m_isFirstIteration)
296             m_calibrator.recordIteration(renderStartTime - m_prevRenderStartTime);
297 
298         m_isFirstIteration    = false;
299         m_prevRenderStartTime = renderStartTime;
300     }
301     else
302     {
303         DE_ASSERT(calibratorState == TheilSenCalibrator::STATE_FINISHED);
304 
305         GLU_EXPECT_NO_ERROR(m_renderCtx.getFunctions().getError(), "End of rendering");
306 
307         const MeasureState &measureState = m_calibrator.getMeasureState();
308 
309         // Compute result.
310         uint64_t totalTime    = measureState.getTotalTime();
311         int numFrames         = (int)measureState.frameTimes.size();
312         int64_t numQuadGrids  = measureState.numDrawCalls * numFrames;
313         int64_t numPixels     = (int64_t)m_viewportWidth * (int64_t)m_viewportHeight * numQuadGrids;
314         int64_t numVertices   = (int64_t)getNumVertices(m_gridSizeX, m_gridSizeY) * numQuadGrids;
315         double mfragPerSecond = (double)numPixels / (double)totalTime;
316         double mvertPerSecond = (double)numVertices / (double)totalTime;
317 
318         m_result = Result((float)mvertPerSecond, (float)mfragPerSecond);
319         m_state  = STATE_FINISHED;
320     }
321 }
322 
logMeasurementInfo(TestLog & log) const323 void ShaderPerformanceMeasurer::logMeasurementInfo(TestLog &log) const
324 {
325     DE_ASSERT(m_state == STATE_FINISHED);
326 
327     const MeasureState &measureState(m_calibrator.getMeasureState());
328 
329     // Compute totals.
330     uint64_t totalTime     = measureState.getTotalTime();
331     int numFrames          = (int)measureState.frameTimes.size();
332     int64_t numQuadGrids   = measureState.numDrawCalls * numFrames;
333     int64_t numPixels      = (int64_t)m_viewportWidth * (int64_t)m_viewportHeight * numQuadGrids;
334     int64_t numVertices    = (int64_t)getNumVertices(m_gridSizeX, m_gridSizeY) * numQuadGrids;
335     double mfragPerSecond  = (double)numPixels / (double)totalTime;
336     double mvertPerSecond  = (double)numVertices / (double)totalTime;
337     double framesPerSecond = (double)numFrames / ((double)totalTime / 1000000.0);
338 
339     logCalibrationInfo(log, m_calibrator);
340 
341     log << TestLog::Float("FramesPerSecond", "Frames per second in measurement", "Frames/s", QP_KEY_TAG_PERFORMANCE,
342                           (float)framesPerSecond)
343         << TestLog::Float("FragmentsPerVertices", "Vertex-fragment ratio", "Fragments/Vertices", QP_KEY_TAG_NONE,
344                           (float)numPixels / (float)numVertices)
345         << TestLog::Float("FragmentPerf", "Fragment performance", "MPix/s", QP_KEY_TAG_PERFORMANCE,
346                           (float)mfragPerSecond)
347         << TestLog::Float("VertexPerf", "Vertex performance", "MVert/s", QP_KEY_TAG_PERFORMANCE, (float)mvertPerSecond);
348 }
349 
setGridSize(int gridW,int gridH)350 void ShaderPerformanceMeasurer::setGridSize(int gridW, int gridH)
351 {
352     DE_ASSERT(m_state == STATE_UNINITIALIZED);
353     DE_ASSERT(de::inBounds(gridW, 1, 256) && de::inBounds(gridH, 1, 256));
354     m_gridSizeX = gridW;
355     m_gridSizeY = gridH;
356 }
357 
setViewportSize(int width,int height)358 void ShaderPerformanceMeasurer::setViewportSize(int width, int height)
359 {
360     DE_ASSERT(m_state == STATE_UNINITIALIZED);
361     DE_ASSERT(de::inRange(width, 1, m_renderCtx.getRenderTarget().getWidth()) &&
362               de::inRange(height, 1, m_renderCtx.getRenderTarget().getHeight()));
363     m_viewportWidth  = width;
364     m_viewportHeight = height;
365 }
366 
367 } // namespace gls
368 } // namespace deqp
369