xref: /aosp_15_r20/external/angle/src/tests/perf_tests/VulkanBarriersPerf.cpp (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 //
2 // Copyright 2019 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // VulkanBarriersPerf:
7 //   Performance tests for ANGLE's Vulkan backend w.r.t barrier efficiency.
8 //
9 
10 #include <sstream>
11 
12 #include "ANGLEPerfTest.h"
13 #include "test_utils/gl_raii.h"
14 #include "util/shader_utils.h"
15 
16 using namespace angle;
17 
18 namespace
19 {
20 constexpr unsigned int kIterationsPerStep = 10;
21 
22 struct VulkanBarriersPerfParams final : public RenderTestParams
23 {
VulkanBarriersPerfParams__anon40f1e62d0111::VulkanBarriersPerfParams24     VulkanBarriersPerfParams(bool bufferCopy, bool largeTransfers, bool slowFS)
25     {
26         iterationsPerStep = kIterationsPerStep;
27 
28         // Common default parameters
29         eglParameters = egl_platform::VULKAN();
30         majorVersion  = 3;
31         minorVersion  = 0;
32         windowWidth   = 256;
33         windowHeight  = 256;
34         trackGpuTime  = true;
35 
36         doBufferCopy          = bufferCopy;
37         doLargeTransfers      = largeTransfers;
38         doSlowFragmentShaders = slowFS;
39     }
40 
41     std::string story() const override;
42 
43     // Static parameters
44     static constexpr int kImageSizes[3] = {256, 512, 4096};
45     static constexpr int kBufferSize    = 4096 * 4096;
46 
47     bool doBufferCopy;
48     bool doLargeTransfers;
49     bool doSlowFragmentShaders;
50 };
51 
52 constexpr int VulkanBarriersPerfParams::kImageSizes[];
53 
operator <<(std::ostream & os,const VulkanBarriersPerfParams & params)54 std::ostream &operator<<(std::ostream &os, const VulkanBarriersPerfParams &params)
55 {
56     os << params.backendAndStory().substr(1);
57     return os;
58 }
59 
60 class VulkanBarriersPerfBenchmark : public ANGLERenderTest,
61                                     public ::testing::WithParamInterface<VulkanBarriersPerfParams>
62 {
63   public:
64     VulkanBarriersPerfBenchmark();
65 
66     void initializeBenchmark() override;
67     void destroyBenchmark() override;
68     void drawBenchmark() override;
69 
70   private:
71     void createTexture(uint32_t textureIndex, uint32_t sizeIndex, bool compressed);
72     void createUniformBuffer();
73     void createFramebuffer(uint32_t fboIndex, uint32_t textureIndex, uint32_t sizeIndex);
74     void createResources();
75 
76     // Handle to the program object
77     GLProgram mProgram;
78 
79     // Attribute locations
80     GLint mPositionLoc;
81     GLint mTexCoordLoc;
82 
83     // Sampler location
84     GLint mSamplerLoc;
85 
86     // Texture handles
87     GLTexture mTextures[4];
88 
89     // Uniform buffer handles
90     GLBuffer mUniformBuffers[2];
91 
92     // Framebuffer handles
93     GLFramebuffer mFbos[2];
94 
95     // Buffer handle
96     GLBuffer mVertexBuffer;
97     GLBuffer mIndexBuffer;
98 
99     static constexpr size_t kSmallFboIndex = 0;
100     static constexpr size_t kLargeFboIndex = 1;
101 
102     static constexpr size_t kUniformBuffer1Index = 0;
103     static constexpr size_t kUniformBuffer2Index = 1;
104 
105     static constexpr size_t kSmallTextureIndex     = 0;
106     static constexpr size_t kLargeTextureIndex     = 1;
107     static constexpr size_t kTransferTexture1Index = 2;
108     static constexpr size_t kTransferTexture2Index = 3;
109 
110     static constexpr size_t kSmallSizeIndex = 0;
111     static constexpr size_t kLargeSizeIndex = 1;
112     static constexpr size_t kHugeSizeIndex  = 2;
113 };
114 
story() const115 std::string VulkanBarriersPerfParams::story() const
116 {
117     std::ostringstream sout;
118 
119     sout << RenderTestParams::story();
120 
121     if (doBufferCopy)
122     {
123         sout << "_buffer_copy";
124     }
125     if (doLargeTransfers)
126     {
127         sout << "_transfer";
128     }
129     if (doSlowFragmentShaders)
130     {
131         sout << "_slowfs";
132     }
133 
134     return sout.str();
135 }
136 
VulkanBarriersPerfBenchmark()137 VulkanBarriersPerfBenchmark::VulkanBarriersPerfBenchmark()
138     : ANGLERenderTest("VulkanBarriersPerf", GetParam()),
139       mPositionLoc(-1),
140       mTexCoordLoc(-1),
141       mSamplerLoc(-1)
142 {
143     if (IsNVIDIA() && IsWindows7())
144     {
145         skipTest(
146             "http://crbug.com/1096510 Fails on Windows7 NVIDIA Vulkan, presumably due to old "
147             "drivers");
148     }
149 }
150 
151 constexpr char kVS[] = R"(attribute vec4 a_position;
152 attribute vec2 a_texCoord;
153 varying vec2 v_texCoord;
154 void main()
155 {
156     gl_Position = a_position;
157     v_texCoord  = a_texCoord;
158 })";
159 
160 constexpr char kShortFS[] = R"(precision mediump float;
161 varying vec2 v_texCoord;
162 uniform sampler2D s_texture;
163 void main()
164 {
165     gl_FragColor = texture2D(s_texture, v_texCoord);
166 })";
167 
168 constexpr char kSlowFS[] = R"(precision mediump float;
169 varying vec2 v_texCoord;
170 uniform sampler2D s_texture;
171 void main()
172 {
173     vec4 outColor = vec4(0);
174     if (v_texCoord.x < 0.2)
175     {
176         for (int i = 0; i < 100; ++i)
177         {
178             outColor += texture2D(s_texture, v_texCoord);
179         }
180     }
181     gl_FragColor = outColor;
182 })";
183 
createTexture(uint32_t textureIndex,uint32_t sizeIndex,bool compressed)184 void VulkanBarriersPerfBenchmark::createTexture(uint32_t textureIndex,
185                                                 uint32_t sizeIndex,
186                                                 bool compressed)
187 {
188     const auto &params = GetParam();
189 
190     // TODO(syoussefi): compressed copy using vkCmdCopyImage not yet implemented in the vulkan
191     // backend. http://anglebug.com/42261682
192 
193     glBindTexture(GL_TEXTURE_2D, mTextures[textureIndex]);
194     glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, params.kImageSizes[sizeIndex],
195                  params.kImageSizes[sizeIndex], 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
196 
197     // Disable mipmapping
198     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
199     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
200 }
201 
createUniformBuffer()202 void VulkanBarriersPerfBenchmark::createUniformBuffer()
203 {
204     const auto &params = GetParam();
205 
206     glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffers[kUniformBuffer1Index]);
207     glBufferData(GL_UNIFORM_BUFFER, params.kBufferSize, nullptr, GL_DYNAMIC_COPY);
208     glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffers[kUniformBuffer2Index]);
209     glBufferData(GL_UNIFORM_BUFFER, params.kBufferSize, nullptr, GL_DYNAMIC_COPY);
210     glBindBuffer(GL_UNIFORM_BUFFER, 0);
211 }
212 
createFramebuffer(uint32_t fboIndex,uint32_t textureIndex,uint32_t sizeIndex)213 void VulkanBarriersPerfBenchmark::createFramebuffer(uint32_t fboIndex,
214                                                     uint32_t textureIndex,
215                                                     uint32_t sizeIndex)
216 {
217     createTexture(textureIndex, sizeIndex, false);
218 
219     glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboIndex]);
220     glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
221                            mTextures[textureIndex], 0);
222 }
223 
createResources()224 void VulkanBarriersPerfBenchmark::createResources()
225 {
226     const auto &params = GetParam();
227 
228     mProgram.makeRaster(kVS, params.doSlowFragmentShaders ? kSlowFS : kShortFS);
229     ASSERT_TRUE(mProgram.valid());
230 
231     // Get the attribute locations
232     mPositionLoc = glGetAttribLocation(mProgram, "a_position");
233     mTexCoordLoc = glGetAttribLocation(mProgram, "a_texCoord");
234 
235     // Get the sampler location
236     mSamplerLoc = glGetUniformLocation(mProgram, "s_texture");
237 
238     // Build the vertex buffer
239     GLfloat vertices[] = {
240         -0.5f, 0.5f,  0.0f,  // Position 0
241         0.0f,  0.0f,         // TexCoord 0
242         -0.5f, -0.5f, 0.0f,  // Position 1
243         0.0f,  1.0f,         // TexCoord 1
244         0.5f,  -0.5f, 0.0f,  // Position 2
245         1.0f,  1.0f,         // TexCoord 2
246         0.5f,  0.5f,  0.0f,  // Position 3
247         1.0f,  0.0f          // TexCoord 3
248     };
249 
250     glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);
251     glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
252 
253     GLushort indices[] = {0, 1, 2, 0, 2, 3};
254     glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);
255     glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
256 
257     // Use tightly packed data
258     glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
259 
260     // Create four textures.  Two of them are going to be framebuffers, and two are used for large
261     // transfers.
262     createFramebuffer(kSmallFboIndex, kSmallTextureIndex, kSmallSizeIndex);
263     createFramebuffer(kLargeFboIndex, kLargeTextureIndex, kLargeSizeIndex);
264     createUniformBuffer();
265 
266     if (params.doLargeTransfers)
267     {
268         createTexture(kTransferTexture1Index, kHugeSizeIndex, true);
269         createTexture(kTransferTexture2Index, kHugeSizeIndex, true);
270     }
271 }
272 
initializeBenchmark()273 void VulkanBarriersPerfBenchmark::initializeBenchmark()
274 {
275     createResources();
276 
277     glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
278 
279     ASSERT_GL_NO_ERROR();
280 }
281 
destroyBenchmark()282 void VulkanBarriersPerfBenchmark::destroyBenchmark() {}
283 
drawBenchmark()284 void VulkanBarriersPerfBenchmark::drawBenchmark()
285 {
286     const auto &params = GetParam();
287 
288     glUseProgram(mProgram);
289 
290     // Bind the buffers
291     glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);
292     glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);
293 
294     // Load the vertex position
295     glVertexAttribPointer(mPositionLoc, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), 0);
296     // Load the texture coordinate
297     glVertexAttribPointer(mTexCoordLoc, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat),
298                           reinterpret_cast<void *>(3 * sizeof(GLfloat)));
299 
300     glEnableVertexAttribArray(mPositionLoc);
301     glEnableVertexAttribArray(mTexCoordLoc);
302 
303     // Set the texture sampler to texture unit to 0
304     glUniform1i(mSamplerLoc, 0);
305 
306     /*
307      * The perf benchmark does the following:
308      *
309      * - Alternately clear and draw from fbo 1 into fbo 2 and back.  This would use the color
310      * attachment and shader read-only layouts in the fragment shader and color attachment stages.
311      *
312      * - Alternately copy data between the 2 uniform buffers. This would use the transfer layouts
313      * in the transfer stage.
314      *
315      * Once compressed texture copies are supported, alternately copy large chunks of data from
316      * texture 1 into texture 2 and back.  This would use the transfer layouts in the transfer
317      * stage.
318      *
319      * Once compute shader support is added, another independent set of operations could be a few
320      * dispatches.  This would use the general and shader read-only layouts in the compute stage.
321      *
322      * The idea is to create independent pipelines of operations that would run in parallel on the
323      * GPU.  Regressions or inefficiencies in the barrier implementation could result in
324      * serialization of these jobs, resulting in a hit in performance.
325      *
326      * The above operations for example should ideally run on the GPU threads in parallel:
327      *
328      * + |---draw---||---draw---||---draw---||---draw---||---draw---|
329      * + |----buffer copy----||----buffer copy----||----buffer copy----|
330      * + |-----------texture copy------------||-----------texture copy------------|
331      * + |-----dispatch------||------dispatch------||------dispatch------|
332      *
333      * If barriers are too restrictive, situations like this could happen (draw is blocking
334      * copy):
335      *
336      * + |---draw---||---draw---||---draw---||---draw---||---draw---|
337      * +             |------------copy------------||-----------copy------------|
338      *
339      * Or like this (copy is blocking draw):
340      *
341      * + |---draw---|                     |---draw---|                     |---draw---|
342      * + |--------------copy-------------||-------------copy--------------|
343      *
344      * Or like this (draw and copy blocking each other):
345      *
346      * + |---draw---|                                 |---draw---|
347      * +             |------------copy---------------|            |------------copy------------|
348      *
349      * The idea of doing slow FS calls is to make the second case above slower (by making the draw
350      * slower than the transfer):
351      *
352      * + |------------------draw------------------|                                 |-...draw...-|
353      * + |--------------copy----------------|       |-------------copy-------------|
354      */
355 
356     startGpuTimer();
357     for (unsigned int iteration = 0; iteration < params.iterationsPerStep; ++iteration)
358     {
359         bool altEven = iteration % 2 == 0;
360 
361         const int fboDestIndex            = altEven ? kLargeFboIndex : kSmallFboIndex;
362         const int fboTexSrcIndex          = altEven ? kSmallTextureIndex : kLargeTextureIndex;
363         const int fboDestSizeIndex        = altEven ? kLargeSizeIndex : kSmallSizeIndex;
364         const int uniformBufferReadIndex  = altEven ? kUniformBuffer1Index : kUniformBuffer2Index;
365         const int uniformBufferWriteIndex = altEven ? kUniformBuffer2Index : kUniformBuffer1Index;
366 
367         if (params.doBufferCopy)
368         {
369             // Transfer data between the 2 Uniform buffers
370             glBindBuffer(GL_COPY_READ_BUFFER, mUniformBuffers[uniformBufferReadIndex]);
371             glBindBuffer(GL_COPY_WRITE_BUFFER, mUniformBuffers[uniformBufferWriteIndex]);
372             glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, 0, 0,
373                                 params.kBufferSize);
374         }
375 
376         // Bind the framebuffer
377         glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboDestIndex]);
378 
379         // Set the viewport
380         glViewport(0, 0, params.kImageSizes[fboDestSizeIndex],
381                    params.kImageSizes[fboDestSizeIndex]);
382 
383         // Clear the color buffer
384         glClear(GL_COLOR_BUFFER_BIT);
385 
386         // Bind the texture
387         glActiveTexture(GL_TEXTURE0);
388         glBindTexture(GL_TEXTURE_2D, mTextures[fboTexSrcIndex]);
389 
390         ASSERT_GL_NO_ERROR();
391 
392         glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0);
393     }
394     stopGpuTimer();
395 
396     ASSERT_GL_NO_ERROR();
397 }
398 
399 }  // namespace
400 
TEST_P(VulkanBarriersPerfBenchmark,Run)401 TEST_P(VulkanBarriersPerfBenchmark, Run)
402 {
403     run();
404 }
405 
406 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(VulkanBarriersPerfBenchmark);
407 ANGLE_INSTANTIATE_TEST(VulkanBarriersPerfBenchmark,
408                        VulkanBarriersPerfParams(false, false, false),
409                        VulkanBarriersPerfParams(true, false, false),
410                        VulkanBarriersPerfParams(false, true, false),
411                        VulkanBarriersPerfParams(false, true, true));
412