1 //
2 // Copyright 2019 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // VulkanBarriersPerf:
7 // Performance tests for ANGLE's Vulkan backend w.r.t barrier efficiency.
8 //
9
10 #include <sstream>
11
12 #include "ANGLEPerfTest.h"
13 #include "test_utils/gl_raii.h"
14 #include "util/shader_utils.h"
15
16 using namespace angle;
17
18 namespace
19 {
20 constexpr unsigned int kIterationsPerStep = 10;
21
22 struct VulkanBarriersPerfParams final : public RenderTestParams
23 {
VulkanBarriersPerfParams__anon40f1e62d0111::VulkanBarriersPerfParams24 VulkanBarriersPerfParams(bool bufferCopy, bool largeTransfers, bool slowFS)
25 {
26 iterationsPerStep = kIterationsPerStep;
27
28 // Common default parameters
29 eglParameters = egl_platform::VULKAN();
30 majorVersion = 3;
31 minorVersion = 0;
32 windowWidth = 256;
33 windowHeight = 256;
34 trackGpuTime = true;
35
36 doBufferCopy = bufferCopy;
37 doLargeTransfers = largeTransfers;
38 doSlowFragmentShaders = slowFS;
39 }
40
41 std::string story() const override;
42
43 // Static parameters
44 static constexpr int kImageSizes[3] = {256, 512, 4096};
45 static constexpr int kBufferSize = 4096 * 4096;
46
47 bool doBufferCopy;
48 bool doLargeTransfers;
49 bool doSlowFragmentShaders;
50 };
51
52 constexpr int VulkanBarriersPerfParams::kImageSizes[];
53
operator <<(std::ostream & os,const VulkanBarriersPerfParams & params)54 std::ostream &operator<<(std::ostream &os, const VulkanBarriersPerfParams ¶ms)
55 {
56 os << params.backendAndStory().substr(1);
57 return os;
58 }
59
60 class VulkanBarriersPerfBenchmark : public ANGLERenderTest,
61 public ::testing::WithParamInterface<VulkanBarriersPerfParams>
62 {
63 public:
64 VulkanBarriersPerfBenchmark();
65
66 void initializeBenchmark() override;
67 void destroyBenchmark() override;
68 void drawBenchmark() override;
69
70 private:
71 void createTexture(uint32_t textureIndex, uint32_t sizeIndex, bool compressed);
72 void createUniformBuffer();
73 void createFramebuffer(uint32_t fboIndex, uint32_t textureIndex, uint32_t sizeIndex);
74 void createResources();
75
76 // Handle to the program object
77 GLProgram mProgram;
78
79 // Attribute locations
80 GLint mPositionLoc;
81 GLint mTexCoordLoc;
82
83 // Sampler location
84 GLint mSamplerLoc;
85
86 // Texture handles
87 GLTexture mTextures[4];
88
89 // Uniform buffer handles
90 GLBuffer mUniformBuffers[2];
91
92 // Framebuffer handles
93 GLFramebuffer mFbos[2];
94
95 // Buffer handle
96 GLBuffer mVertexBuffer;
97 GLBuffer mIndexBuffer;
98
99 static constexpr size_t kSmallFboIndex = 0;
100 static constexpr size_t kLargeFboIndex = 1;
101
102 static constexpr size_t kUniformBuffer1Index = 0;
103 static constexpr size_t kUniformBuffer2Index = 1;
104
105 static constexpr size_t kSmallTextureIndex = 0;
106 static constexpr size_t kLargeTextureIndex = 1;
107 static constexpr size_t kTransferTexture1Index = 2;
108 static constexpr size_t kTransferTexture2Index = 3;
109
110 static constexpr size_t kSmallSizeIndex = 0;
111 static constexpr size_t kLargeSizeIndex = 1;
112 static constexpr size_t kHugeSizeIndex = 2;
113 };
114
story() const115 std::string VulkanBarriersPerfParams::story() const
116 {
117 std::ostringstream sout;
118
119 sout << RenderTestParams::story();
120
121 if (doBufferCopy)
122 {
123 sout << "_buffer_copy";
124 }
125 if (doLargeTransfers)
126 {
127 sout << "_transfer";
128 }
129 if (doSlowFragmentShaders)
130 {
131 sout << "_slowfs";
132 }
133
134 return sout.str();
135 }
136
VulkanBarriersPerfBenchmark()137 VulkanBarriersPerfBenchmark::VulkanBarriersPerfBenchmark()
138 : ANGLERenderTest("VulkanBarriersPerf", GetParam()),
139 mPositionLoc(-1),
140 mTexCoordLoc(-1),
141 mSamplerLoc(-1)
142 {
143 if (IsNVIDIA() && IsWindows7())
144 {
145 skipTest(
146 "http://crbug.com/1096510 Fails on Windows7 NVIDIA Vulkan, presumably due to old "
147 "drivers");
148 }
149 }
150
151 constexpr char kVS[] = R"(attribute vec4 a_position;
152 attribute vec2 a_texCoord;
153 varying vec2 v_texCoord;
154 void main()
155 {
156 gl_Position = a_position;
157 v_texCoord = a_texCoord;
158 })";
159
160 constexpr char kShortFS[] = R"(precision mediump float;
161 varying vec2 v_texCoord;
162 uniform sampler2D s_texture;
163 void main()
164 {
165 gl_FragColor = texture2D(s_texture, v_texCoord);
166 })";
167
168 constexpr char kSlowFS[] = R"(precision mediump float;
169 varying vec2 v_texCoord;
170 uniform sampler2D s_texture;
171 void main()
172 {
173 vec4 outColor = vec4(0);
174 if (v_texCoord.x < 0.2)
175 {
176 for (int i = 0; i < 100; ++i)
177 {
178 outColor += texture2D(s_texture, v_texCoord);
179 }
180 }
181 gl_FragColor = outColor;
182 })";
183
createTexture(uint32_t textureIndex,uint32_t sizeIndex,bool compressed)184 void VulkanBarriersPerfBenchmark::createTexture(uint32_t textureIndex,
185 uint32_t sizeIndex,
186 bool compressed)
187 {
188 const auto ¶ms = GetParam();
189
190 // TODO(syoussefi): compressed copy using vkCmdCopyImage not yet implemented in the vulkan
191 // backend. http://anglebug.com/42261682
192
193 glBindTexture(GL_TEXTURE_2D, mTextures[textureIndex]);
194 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, params.kImageSizes[sizeIndex],
195 params.kImageSizes[sizeIndex], 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
196
197 // Disable mipmapping
198 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
199 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
200 }
201
createUniformBuffer()202 void VulkanBarriersPerfBenchmark::createUniformBuffer()
203 {
204 const auto ¶ms = GetParam();
205
206 glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffers[kUniformBuffer1Index]);
207 glBufferData(GL_UNIFORM_BUFFER, params.kBufferSize, nullptr, GL_DYNAMIC_COPY);
208 glBindBuffer(GL_UNIFORM_BUFFER, mUniformBuffers[kUniformBuffer2Index]);
209 glBufferData(GL_UNIFORM_BUFFER, params.kBufferSize, nullptr, GL_DYNAMIC_COPY);
210 glBindBuffer(GL_UNIFORM_BUFFER, 0);
211 }
212
createFramebuffer(uint32_t fboIndex,uint32_t textureIndex,uint32_t sizeIndex)213 void VulkanBarriersPerfBenchmark::createFramebuffer(uint32_t fboIndex,
214 uint32_t textureIndex,
215 uint32_t sizeIndex)
216 {
217 createTexture(textureIndex, sizeIndex, false);
218
219 glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboIndex]);
220 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
221 mTextures[textureIndex], 0);
222 }
223
createResources()224 void VulkanBarriersPerfBenchmark::createResources()
225 {
226 const auto ¶ms = GetParam();
227
228 mProgram.makeRaster(kVS, params.doSlowFragmentShaders ? kSlowFS : kShortFS);
229 ASSERT_TRUE(mProgram.valid());
230
231 // Get the attribute locations
232 mPositionLoc = glGetAttribLocation(mProgram, "a_position");
233 mTexCoordLoc = glGetAttribLocation(mProgram, "a_texCoord");
234
235 // Get the sampler location
236 mSamplerLoc = glGetUniformLocation(mProgram, "s_texture");
237
238 // Build the vertex buffer
239 GLfloat vertices[] = {
240 -0.5f, 0.5f, 0.0f, // Position 0
241 0.0f, 0.0f, // TexCoord 0
242 -0.5f, -0.5f, 0.0f, // Position 1
243 0.0f, 1.0f, // TexCoord 1
244 0.5f, -0.5f, 0.0f, // Position 2
245 1.0f, 1.0f, // TexCoord 2
246 0.5f, 0.5f, 0.0f, // Position 3
247 1.0f, 0.0f // TexCoord 3
248 };
249
250 glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);
251 glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
252
253 GLushort indices[] = {0, 1, 2, 0, 2, 3};
254 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);
255 glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
256
257 // Use tightly packed data
258 glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
259
260 // Create four textures. Two of them are going to be framebuffers, and two are used for large
261 // transfers.
262 createFramebuffer(kSmallFboIndex, kSmallTextureIndex, kSmallSizeIndex);
263 createFramebuffer(kLargeFboIndex, kLargeTextureIndex, kLargeSizeIndex);
264 createUniformBuffer();
265
266 if (params.doLargeTransfers)
267 {
268 createTexture(kTransferTexture1Index, kHugeSizeIndex, true);
269 createTexture(kTransferTexture2Index, kHugeSizeIndex, true);
270 }
271 }
272
initializeBenchmark()273 void VulkanBarriersPerfBenchmark::initializeBenchmark()
274 {
275 createResources();
276
277 glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
278
279 ASSERT_GL_NO_ERROR();
280 }
281
destroyBenchmark()282 void VulkanBarriersPerfBenchmark::destroyBenchmark() {}
283
drawBenchmark()284 void VulkanBarriersPerfBenchmark::drawBenchmark()
285 {
286 const auto ¶ms = GetParam();
287
288 glUseProgram(mProgram);
289
290 // Bind the buffers
291 glBindBuffer(GL_ARRAY_BUFFER, mVertexBuffer);
292 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer);
293
294 // Load the vertex position
295 glVertexAttribPointer(mPositionLoc, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat), 0);
296 // Load the texture coordinate
297 glVertexAttribPointer(mTexCoordLoc, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(GLfloat),
298 reinterpret_cast<void *>(3 * sizeof(GLfloat)));
299
300 glEnableVertexAttribArray(mPositionLoc);
301 glEnableVertexAttribArray(mTexCoordLoc);
302
303 // Set the texture sampler to texture unit to 0
304 glUniform1i(mSamplerLoc, 0);
305
306 /*
307 * The perf benchmark does the following:
308 *
309 * - Alternately clear and draw from fbo 1 into fbo 2 and back. This would use the color
310 * attachment and shader read-only layouts in the fragment shader and color attachment stages.
311 *
312 * - Alternately copy data between the 2 uniform buffers. This would use the transfer layouts
313 * in the transfer stage.
314 *
315 * Once compressed texture copies are supported, alternately copy large chunks of data from
316 * texture 1 into texture 2 and back. This would use the transfer layouts in the transfer
317 * stage.
318 *
319 * Once compute shader support is added, another independent set of operations could be a few
320 * dispatches. This would use the general and shader read-only layouts in the compute stage.
321 *
322 * The idea is to create independent pipelines of operations that would run in parallel on the
323 * GPU. Regressions or inefficiencies in the barrier implementation could result in
324 * serialization of these jobs, resulting in a hit in performance.
325 *
326 * The above operations for example should ideally run on the GPU threads in parallel:
327 *
328 * + |---draw---||---draw---||---draw---||---draw---||---draw---|
329 * + |----buffer copy----||----buffer copy----||----buffer copy----|
330 * + |-----------texture copy------------||-----------texture copy------------|
331 * + |-----dispatch------||------dispatch------||------dispatch------|
332 *
333 * If barriers are too restrictive, situations like this could happen (draw is blocking
334 * copy):
335 *
336 * + |---draw---||---draw---||---draw---||---draw---||---draw---|
337 * + |------------copy------------||-----------copy------------|
338 *
339 * Or like this (copy is blocking draw):
340 *
341 * + |---draw---| |---draw---| |---draw---|
342 * + |--------------copy-------------||-------------copy--------------|
343 *
344 * Or like this (draw and copy blocking each other):
345 *
346 * + |---draw---| |---draw---|
347 * + |------------copy---------------| |------------copy------------|
348 *
349 * The idea of doing slow FS calls is to make the second case above slower (by making the draw
350 * slower than the transfer):
351 *
352 * + |------------------draw------------------| |-...draw...-|
353 * + |--------------copy----------------| |-------------copy-------------|
354 */
355
356 startGpuTimer();
357 for (unsigned int iteration = 0; iteration < params.iterationsPerStep; ++iteration)
358 {
359 bool altEven = iteration % 2 == 0;
360
361 const int fboDestIndex = altEven ? kLargeFboIndex : kSmallFboIndex;
362 const int fboTexSrcIndex = altEven ? kSmallTextureIndex : kLargeTextureIndex;
363 const int fboDestSizeIndex = altEven ? kLargeSizeIndex : kSmallSizeIndex;
364 const int uniformBufferReadIndex = altEven ? kUniformBuffer1Index : kUniformBuffer2Index;
365 const int uniformBufferWriteIndex = altEven ? kUniformBuffer2Index : kUniformBuffer1Index;
366
367 if (params.doBufferCopy)
368 {
369 // Transfer data between the 2 Uniform buffers
370 glBindBuffer(GL_COPY_READ_BUFFER, mUniformBuffers[uniformBufferReadIndex]);
371 glBindBuffer(GL_COPY_WRITE_BUFFER, mUniformBuffers[uniformBufferWriteIndex]);
372 glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, 0, 0,
373 params.kBufferSize);
374 }
375
376 // Bind the framebuffer
377 glBindFramebuffer(GL_FRAMEBUFFER, mFbos[fboDestIndex]);
378
379 // Set the viewport
380 glViewport(0, 0, params.kImageSizes[fboDestSizeIndex],
381 params.kImageSizes[fboDestSizeIndex]);
382
383 // Clear the color buffer
384 glClear(GL_COLOR_BUFFER_BIT);
385
386 // Bind the texture
387 glActiveTexture(GL_TEXTURE0);
388 glBindTexture(GL_TEXTURE_2D, mTextures[fboTexSrcIndex]);
389
390 ASSERT_GL_NO_ERROR();
391
392 glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0);
393 }
394 stopGpuTimer();
395
396 ASSERT_GL_NO_ERROR();
397 }
398
399 } // namespace
400
TEST_P(VulkanBarriersPerfBenchmark,Run)401 TEST_P(VulkanBarriersPerfBenchmark, Run)
402 {
403 run();
404 }
405
406 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(VulkanBarriersPerfBenchmark);
407 ANGLE_INSTANTIATE_TEST(VulkanBarriersPerfBenchmark,
408 VulkanBarriersPerfParams(false, false, false),
409 VulkanBarriersPerfParams(true, false, false),
410 VulkanBarriersPerfParams(false, true, false),
411 VulkanBarriersPerfParams(false, true, true));
412