1 //
2 // Copyright 2016 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // BufferVk.cpp:
7 // Implements the class methods for BufferVk.
8 //
9
10 #include "libANGLE/renderer/vulkan/BufferVk.h"
11
12 #include "common/FixedVector.h"
13 #include "common/debug.h"
14 #include "common/mathutil.h"
15 #include "common/utilities.h"
16 #include "libANGLE/Context.h"
17 #include "libANGLE/renderer/vulkan/ContextVk.h"
18 #include "libANGLE/renderer/vulkan/vk_renderer.h"
19
20 namespace rx
21 {
GetDefaultBufferUsageFlags(vk::Renderer * renderer)22 VkBufferUsageFlags GetDefaultBufferUsageFlags(vk::Renderer *renderer)
23 {
24 // We could potentially use multiple backing buffers for different usages.
25 // For now keep a single buffer with all relevant usage flags.
26 VkBufferUsageFlags defaultBufferUsageFlags =
27 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
28 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
29 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
30 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
31 VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
32 if (renderer->getFeatures().supportsTransformFeedbackExtension.enabled)
33 {
34 defaultBufferUsageFlags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
35 VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT;
36 }
37 return defaultBufferUsageFlags;
38 }
39
40 namespace
41 {
42 constexpr VkMemoryPropertyFlags kDeviceLocalFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
43 constexpr VkMemoryPropertyFlags kDeviceLocalHostCoherentFlags =
44 (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
45 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
46 constexpr VkMemoryPropertyFlags kHostCachedFlags =
47 (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
48 VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
49 constexpr VkMemoryPropertyFlags kHostUncachedFlags =
50 (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
51 constexpr VkMemoryPropertyFlags kHostCachedNonCoherentFlags =
52 (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
53
54 // Vertex attribute buffers are used as storage buffers for conversion in compute, where access to
55 // the buffer is made in 4-byte chunks. Assume the size of the buffer is 4k+n where n is in [0, 3).
56 // On some hardware, reading 4 bytes from address 4k returns 0, making it impossible to read the
57 // last n bytes. By rounding up the buffer sizes to a multiple of 4, the problem is alleviated.
58 constexpr size_t kBufferSizeGranularity = 4;
59 static_assert(gl::isPow2(kBufferSizeGranularity), "use as alignment, must be power of two");
60
61 // Start with a fairly small buffer size. We can increase this dynamically as we convert more data.
62 constexpr size_t kConvertedArrayBufferInitialSize = 1024 * 8;
63
64 // Buffers that have a static usage pattern will be allocated in
65 // device local memory to speed up access to and from the GPU.
66 // Dynamic usage patterns or that are frequently mapped
67 // will now request host cached memory to speed up access from the CPU.
GetPreferredMemoryType(vk::Renderer * renderer,gl::BufferBinding target,gl::BufferUsage usage)68 VkMemoryPropertyFlags GetPreferredMemoryType(vk::Renderer *renderer,
69 gl::BufferBinding target,
70 gl::BufferUsage usage)
71 {
72 if (target == gl::BufferBinding::PixelUnpack)
73 {
74 return kHostCachedFlags;
75 }
76
77 switch (usage)
78 {
79 case gl::BufferUsage::StaticCopy:
80 case gl::BufferUsage::StaticDraw:
81 case gl::BufferUsage::StaticRead:
82 // For static usage, request a device local memory
83 return renderer->getFeatures().preferDeviceLocalMemoryHostVisible.enabled
84 ? kDeviceLocalHostCoherentFlags
85 : kDeviceLocalFlags;
86 case gl::BufferUsage::DynamicDraw:
87 case gl::BufferUsage::StreamDraw:
88 // For non-static usage where the CPU performs a write-only access, request
89 // a host uncached memory
90 return renderer->getFeatures().preferHostCachedForNonStaticBufferUsage.enabled
91 ? kHostCachedFlags
92 : kHostUncachedFlags;
93 case gl::BufferUsage::DynamicCopy:
94 case gl::BufferUsage::DynamicRead:
95 case gl::BufferUsage::StreamCopy:
96 case gl::BufferUsage::StreamRead:
97 // For all other types of usage, request a host cached memory
98 return renderer->getFeatures()
99 .preferCachedNoncoherentForDynamicStreamBufferUsage.enabled
100 ? kHostCachedNonCoherentFlags
101 : kHostCachedFlags;
102 default:
103 UNREACHABLE();
104 return kHostCachedFlags;
105 }
106 }
107
GetStorageMemoryType(vk::Renderer * renderer,GLbitfield storageFlags,bool externalBuffer)108 VkMemoryPropertyFlags GetStorageMemoryType(vk::Renderer *renderer,
109 GLbitfield storageFlags,
110 bool externalBuffer)
111 {
112 const bool hasMapAccess =
113 (storageFlags & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT_EXT)) != 0;
114
115 if (renderer->getFeatures().preferDeviceLocalMemoryHostVisible.enabled)
116 {
117 const bool canUpdate = (storageFlags & GL_DYNAMIC_STORAGE_BIT_EXT) != 0;
118 if (canUpdate || hasMapAccess || externalBuffer)
119 {
120 // We currently allocate coherent memory for persistently mapped buffers.
121 // GL_EXT_buffer_storage allows non-coherent memory, but currently the implementation of
122 // |glMemoryBarrier(CLIENT_MAPPED_BUFFER_BARRIER_BIT_EXT)| relies on the mapping being
123 // coherent.
124 //
125 // If persistently mapped buffers ever use non-coherent memory, then said
126 // |glMemoryBarrier| call must result in |vkInvalidateMappedMemoryRanges| for all
127 // persistently mapped buffers.
128 return kDeviceLocalHostCoherentFlags;
129 }
130 return kDeviceLocalFlags;
131 }
132
133 return hasMapAccess ? kHostCachedFlags : kDeviceLocalFlags;
134 }
135
ShouldAllocateNewMemoryForUpdate(ContextVk * contextVk,size_t subDataSize,size_t bufferSize)136 bool ShouldAllocateNewMemoryForUpdate(ContextVk *contextVk, size_t subDataSize, size_t bufferSize)
137 {
138 // A sub-data update with size > 50% of buffer size meets the threshold to acquire a new
139 // BufferHelper from the pool.
140 size_t halfBufferSize = bufferSize / 2;
141 if (subDataSize > halfBufferSize)
142 {
143 return true;
144 }
145
146 // If the GPU is busy, it is possible to use the CPU for updating sub-data instead, but since it
147 // would need to create a duplicate of the buffer, a large enough buffer copy could result in a
148 // performance regression.
149 if (contextVk->getFeatures().preferCPUForBufferSubData.enabled)
150 {
151 // If the buffer is small enough, the cost of barrier associated with the GPU copy likely
152 // exceeds the overhead with the CPU copy. Duplicating the buffer allows the CPU to write to
153 // the buffer immediately, thus avoiding the barrier that prevents parallel operation.
154 constexpr size_t kCpuCopyBufferSizeThreshold = 32 * 1024;
155 if (bufferSize < kCpuCopyBufferSizeThreshold)
156 {
157 return true;
158 }
159
160 // To use CPU for the sub-data update in larger buffers, the update should be sizable enough
161 // compared to the whole buffer size. The threshold is chosen based on perf data collected
162 // from Pixel devices. At 1/8 of buffer size, the CPU overhead associated with extra data
163 // copy weighs less than serialization caused by barriers.
164 size_t subDataThreshold = bufferSize / 8;
165 if (subDataSize > subDataThreshold)
166 {
167 return true;
168 }
169 }
170
171 return false;
172 }
173
ShouldUseCPUToCopyData(ContextVk * contextVk,const vk::BufferHelper & buffer,size_t copySize,size_t bufferSize)174 bool ShouldUseCPUToCopyData(ContextVk *contextVk,
175 const vk::BufferHelper &buffer,
176 size_t copySize,
177 size_t bufferSize)
178 {
179 vk::Renderer *renderer = contextVk->getRenderer();
180
181 // If the buffer is not host-visible, or if it's busy on the GPU, can't read from it from the
182 // CPU
183 if (!buffer.isHostVisible() || !renderer->hasResourceUseFinished(buffer.getWriteResourceUse()))
184 {
185 return false;
186 }
187
188 // For some GPUs (e.g. ARM) we always prefer using CPU to do copy instead of using the GPU to
189 // avoid pipeline bubbles. If the GPU is currently busy and data copy size is less than certain
190 // threshold, we choose to use CPU to do the copy over GPU to achieve better parallelism.
191 return renderer->getFeatures().preferCPUForBufferSubData.enabled ||
192 (renderer->isCommandQueueBusy() &&
193 copySize < renderer->getMaxCopyBytesUsingCPUWhenPreservingBufferData());
194 }
195
RenderPassUsesBufferForReadOnly(ContextVk * contextVk,const vk::BufferHelper & buffer)196 bool RenderPassUsesBufferForReadOnly(ContextVk *contextVk, const vk::BufferHelper &buffer)
197 {
198 if (!contextVk->hasActiveRenderPass())
199 {
200 return false;
201 }
202
203 vk::RenderPassCommandBufferHelper &renderPassCommands =
204 contextVk->getStartedRenderPassCommands();
205 return renderPassCommands.usesBuffer(buffer) && !renderPassCommands.usesBufferForWrite(buffer);
206 }
207
208 // If a render pass is open which uses the buffer in read-only mode, render pass break can be
209 // avoided by using acquireAndUpdate. This can be costly however if the update is very small, and
210 // is limited to platforms where render pass break is itself costly (i.e. tiled-based renderers).
ShouldAvoidRenderPassBreakOnUpdate(ContextVk * contextVk,const vk::BufferHelper & buffer,size_t bufferSize)211 bool ShouldAvoidRenderPassBreakOnUpdate(ContextVk *contextVk,
212 const vk::BufferHelper &buffer,
213 size_t bufferSize)
214 {
215 // Only avoid breaking the render pass if the buffer is not so big such that duplicating it
216 // would outweight the cost of breaking the render pass. A value of 1KB is temporary chosen as
217 // a heuristic, and can be adjusted when such a situation is encountered.
218 constexpr size_t kPreferDuplicateOverRenderPassBreakMaxBufferSize = 1024;
219 if (!contextVk->getFeatures().preferCPUForBufferSubData.enabled ||
220 bufferSize > kPreferDuplicateOverRenderPassBreakMaxBufferSize)
221 {
222 return false;
223 }
224
225 return RenderPassUsesBufferForReadOnly(contextVk, buffer);
226 }
227
GetBufferUsageType(gl::BufferUsage usage)228 BufferUsageType GetBufferUsageType(gl::BufferUsage usage)
229 {
230 return (usage == gl::BufferUsage::DynamicDraw || usage == gl::BufferUsage::DynamicCopy ||
231 usage == gl::BufferUsage::DynamicRead)
232 ? BufferUsageType::Dynamic
233 : BufferUsageType::Static;
234 }
235
GetMemoryTypeIndex(ContextVk * contextVk,VkDeviceSize size,VkMemoryPropertyFlags memoryPropertyFlags,uint32_t * memoryTypeIndexOut)236 angle::Result GetMemoryTypeIndex(ContextVk *contextVk,
237 VkDeviceSize size,
238 VkMemoryPropertyFlags memoryPropertyFlags,
239 uint32_t *memoryTypeIndexOut)
240 {
241 vk::Renderer *renderer = contextVk->getRenderer();
242 const vk::Allocator &allocator = renderer->getAllocator();
243
244 bool persistentlyMapped = renderer->getFeatures().persistentlyMappedBuffers.enabled;
245 VkBufferUsageFlags defaultBufferUsageFlags = GetDefaultBufferUsageFlags(renderer);
246
247 VkBufferCreateInfo createInfo = {};
248 createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
249 createInfo.flags = 0;
250 createInfo.size = size;
251 createInfo.usage = defaultBufferUsageFlags;
252 createInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
253 createInfo.queueFamilyIndexCount = 0;
254 createInfo.pQueueFamilyIndices = nullptr;
255
256 // Host visible is required, all other bits are preferred, (i.e., optional)
257 VkMemoryPropertyFlags requiredFlags =
258 (memoryPropertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
259 VkMemoryPropertyFlags preferredFlags =
260 (memoryPropertyFlags & (~VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
261
262 // Check that the allocation is not too large.
263 uint32_t memoryTypeIndex = 0;
264 ANGLE_VK_TRY(contextVk, allocator.findMemoryTypeIndexForBufferInfo(
265 createInfo, requiredFlags, preferredFlags, persistentlyMapped,
266 &memoryTypeIndex));
267 *memoryTypeIndexOut = memoryTypeIndex;
268
269 return angle::Result::Continue;
270 }
271
IsSelfCopy(const BufferDataSource & dataSource,const vk::BufferHelper & destination)272 bool IsSelfCopy(const BufferDataSource &dataSource, const vk::BufferHelper &destination)
273 {
274 return dataSource.data == nullptr &&
275 dataSource.buffer->getBufferSerial() == destination.getBufferSerial();
276 }
277
CopyBuffers(ContextVk * contextVk,vk::BufferHelper * srcBuffer,vk::BufferHelper * dstBuffer,uint32_t regionCount,const VkBufferCopy * copyRegions)278 angle::Result CopyBuffers(ContextVk *contextVk,
279 vk::BufferHelper *srcBuffer,
280 vk::BufferHelper *dstBuffer,
281 uint32_t regionCount,
282 const VkBufferCopy *copyRegions)
283 {
284 ASSERT(srcBuffer->valid() && dstBuffer->valid());
285
286 // Enqueue a copy command on the GPU
287 vk::CommandBufferAccess access;
288 if (srcBuffer->getBufferSerial() == dstBuffer->getBufferSerial())
289 {
290 access.onBufferSelfCopy(srcBuffer);
291 }
292 else
293 {
294 access.onBufferTransferRead(srcBuffer);
295 access.onBufferTransferWrite(dstBuffer);
296 }
297
298 vk::OutsideRenderPassCommandBuffer *commandBuffer;
299 ANGLE_TRY(contextVk->getOutsideRenderPassCommandBuffer(access, &commandBuffer));
300
301 commandBuffer->copyBuffer(srcBuffer->getBuffer(), dstBuffer->getBuffer(), regionCount,
302 copyRegions);
303
304 return angle::Result::Continue;
305 }
306 } // namespace
307
308 // ConversionBuffer implementation.
ConversionBuffer(vk::Renderer * renderer,VkBufferUsageFlags usageFlags,size_t initialSize,size_t alignment,bool hostVisible)309 ConversionBuffer::ConversionBuffer(vk::Renderer *renderer,
310 VkBufferUsageFlags usageFlags,
311 size_t initialSize,
312 size_t alignment,
313 bool hostVisible)
314 : mEntireBufferDirty(true)
315 {
316 mData = std::make_unique<vk::BufferHelper>();
317 mDirtyRanges.reserve(32);
318 }
319
~ConversionBuffer()320 ConversionBuffer::~ConversionBuffer()
321 {
322 ASSERT(!mData || !mData->valid());
323 mDirtyRanges.clear();
324 }
325
326 ConversionBuffer::ConversionBuffer(ConversionBuffer &&other) = default;
327
328 // dirtyRanges may be overlap or continuous. In order to reduce the redunant conversion, we try to
329 // consolidate the dirty ranges. First we sort it by the range's low. Then we walk the range again
330 // and check it with previous range and merge them if possible. That merge will remove the
331 // overlapped area as well as reduce the number of ranges.
consolidateDirtyRanges()332 void ConversionBuffer::consolidateDirtyRanges()
333 {
334 ASSERT(!mEntireBufferDirty);
335
336 auto comp = [](const RangeDeviceSize &a, const RangeDeviceSize &b) -> bool {
337 return a.low() < b.low();
338 };
339 std::sort(mDirtyRanges.begin(), mDirtyRanges.end(), comp);
340
341 size_t prev = 0;
342 for (size_t i = 1; i < mDirtyRanges.size(); i++)
343 {
344 if (mDirtyRanges[prev].intersectsOrContinuous(mDirtyRanges[i]))
345 {
346 mDirtyRanges[prev].merge(mDirtyRanges[i]);
347 mDirtyRanges[i].invalidate();
348 }
349 else
350 {
351 prev = i;
352 }
353 }
354 }
355
356 // VertexConversionBuffer implementation.
VertexConversionBuffer(vk::Renderer * renderer,const CacheKey & cacheKey)357 VertexConversionBuffer::VertexConversionBuffer(vk::Renderer *renderer, const CacheKey &cacheKey)
358 : ConversionBuffer(renderer,
359 vk::kVertexBufferUsageFlags,
360 kConvertedArrayBufferInitialSize,
361 vk::kVertexBufferAlignment,
362 cacheKey.hostVisible),
363 mCacheKey(cacheKey)
364 {}
365
366 VertexConversionBuffer::VertexConversionBuffer(VertexConversionBuffer &&other) = default;
367
368 VertexConversionBuffer::~VertexConversionBuffer() = default;
369
370 // BufferVk implementation.
BufferVk(const gl::BufferState & state)371 BufferVk::BufferVk(const gl::BufferState &state)
372 : BufferImpl(state),
373 mClientBuffer(nullptr),
374 mMemoryTypeIndex(0),
375 mMemoryPropertyFlags(0),
376 mIsStagingBufferMapped(false),
377 mHasValidData(false),
378 mIsMappedForWrite(false),
379 mUsageType(BufferUsageType::Static)
380 {
381 mMappedRange.invalidate();
382 }
383
~BufferVk()384 BufferVk::~BufferVk() {}
385
destroy(const gl::Context * context)386 void BufferVk::destroy(const gl::Context *context)
387 {
388 ContextVk *contextVk = vk::GetImpl(context);
389
390 (void)release(contextVk);
391 }
392
releaseConversionBuffers(vk::Renderer * renderer)393 void BufferVk::releaseConversionBuffers(vk::Renderer *renderer)
394 {
395 for (ConversionBuffer &buffer : mVertexConversionBuffers)
396 {
397 buffer.release(renderer);
398 }
399 mVertexConversionBuffers.clear();
400 }
401
release(ContextVk * contextVk)402 angle::Result BufferVk::release(ContextVk *contextVk)
403 {
404 vk::Renderer *renderer = contextVk->getRenderer();
405 if (mBuffer.valid())
406 {
407 ANGLE_TRY(contextVk->releaseBufferAllocation(&mBuffer));
408 }
409 if (mStagingBuffer.valid())
410 {
411 mStagingBuffer.release(renderer);
412 }
413
414 releaseConversionBuffers(renderer);
415
416 return angle::Result::Continue;
417 }
418
setExternalBufferData(const gl::Context * context,gl::BufferBinding target,GLeglClientBufferEXT clientBuffer,size_t size,VkMemoryPropertyFlags memoryPropertyFlags)419 angle::Result BufferVk::setExternalBufferData(const gl::Context *context,
420 gl::BufferBinding target,
421 GLeglClientBufferEXT clientBuffer,
422 size_t size,
423 VkMemoryPropertyFlags memoryPropertyFlags)
424 {
425 ContextVk *contextVk = vk::GetImpl(context);
426
427 // Release and re-create the memory and buffer.
428 ANGLE_TRY(release(contextVk));
429
430 VkBufferCreateInfo createInfo = {};
431 createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
432 createInfo.flags = 0;
433 createInfo.size = size;
434 createInfo.usage = GetDefaultBufferUsageFlags(contextVk->getRenderer());
435 createInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
436 createInfo.queueFamilyIndexCount = 0;
437 createInfo.pQueueFamilyIndices = nullptr;
438
439 return mBuffer.initExternal(contextVk, memoryPropertyFlags, createInfo, clientBuffer);
440 }
441
setDataWithUsageFlags(const gl::Context * context,gl::BufferBinding target,GLeglClientBufferEXT clientBuffer,const void * data,size_t size,gl::BufferUsage usage,GLbitfield flags)442 angle::Result BufferVk::setDataWithUsageFlags(const gl::Context *context,
443 gl::BufferBinding target,
444 GLeglClientBufferEXT clientBuffer,
445 const void *data,
446 size_t size,
447 gl::BufferUsage usage,
448 GLbitfield flags)
449 {
450 ContextVk *contextVk = vk::GetImpl(context);
451 VkMemoryPropertyFlags memoryPropertyFlags = 0;
452 bool persistentMapRequired = false;
453 const bool isExternalBuffer = clientBuffer != nullptr;
454
455 switch (usage)
456 {
457 case gl::BufferUsage::InvalidEnum:
458 {
459 // glBufferStorage API call
460 memoryPropertyFlags =
461 GetStorageMemoryType(contextVk->getRenderer(), flags, isExternalBuffer);
462 persistentMapRequired = (flags & GL_MAP_PERSISTENT_BIT_EXT) != 0;
463 break;
464 }
465 default:
466 {
467 // glBufferData API call
468 memoryPropertyFlags = GetPreferredMemoryType(contextVk->getRenderer(), target, usage);
469 break;
470 }
471 }
472
473 if (isExternalBuffer)
474 {
475 ANGLE_TRY(setExternalBufferData(context, target, clientBuffer, size, memoryPropertyFlags));
476 if (!mBuffer.isHostVisible())
477 {
478 // If external buffer's memory does not support host visible memory property, we cannot
479 // support a persistent map request.
480 ANGLE_VK_CHECK(contextVk, !persistentMapRequired, VK_ERROR_MEMORY_MAP_FAILED);
481 }
482
483 mClientBuffer = clientBuffer;
484
485 return angle::Result::Continue;
486 }
487 return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags, usage);
488 }
489
setData(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,gl::BufferUsage usage)490 angle::Result BufferVk::setData(const gl::Context *context,
491 gl::BufferBinding target,
492 const void *data,
493 size_t size,
494 gl::BufferUsage usage)
495 {
496 ContextVk *contextVk = vk::GetImpl(context);
497 // Assume host visible/coherent memory available.
498 VkMemoryPropertyFlags memoryPropertyFlags =
499 GetPreferredMemoryType(contextVk->getRenderer(), target, usage);
500 return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags, usage);
501 }
502
setDataWithMemoryType(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,VkMemoryPropertyFlags memoryPropertyFlags,gl::BufferUsage usage)503 angle::Result BufferVk::setDataWithMemoryType(const gl::Context *context,
504 gl::BufferBinding target,
505 const void *data,
506 size_t size,
507 VkMemoryPropertyFlags memoryPropertyFlags,
508 gl::BufferUsage usage)
509 {
510 ContextVk *contextVk = vk::GetImpl(context);
511 vk::Renderer *renderer = contextVk->getRenderer();
512
513 // Since the buffer is being entirely reinitialized, reset the valid-data flag. If the caller
514 // passed in data to fill the buffer, the flag will be updated when the data is copied to the
515 // buffer.
516 mHasValidData = false;
517
518 if (size == 0)
519 {
520 // Nothing to do.
521 return angle::Result::Continue;
522 }
523
524 if (!mVertexConversionBuffers.empty())
525 {
526 for (ConversionBuffer &buffer : mVertexConversionBuffers)
527 {
528 buffer.clearDirty();
529 }
530 }
531
532 const BufferUsageType usageType = GetBufferUsageType(usage);
533 const BufferUpdateType updateType =
534 calculateBufferUpdateTypeOnFullUpdate(renderer, size, memoryPropertyFlags, usageType, data);
535
536 if (updateType == BufferUpdateType::StorageRedefined)
537 {
538 mUsageType = usageType;
539 mMemoryPropertyFlags = memoryPropertyFlags;
540 ANGLE_TRY(GetMemoryTypeIndex(contextVk, size, memoryPropertyFlags, &mMemoryTypeIndex));
541 ANGLE_TRY(acquireBufferHelper(contextVk, size, mUsageType));
542 }
543 else if (size != static_cast<size_t>(mState.getSize()))
544 {
545 if (mBuffer.onBufferUserSizeChange(renderer))
546 {
547 // If we have a dedicated VkBuffer created with user size, even if the storage is
548 // reused, we have to recreate that VkBuffer with user size when user size changes.
549 // When this happens, we must notify other objects that observing this buffer, such as
550 // vertex array. The reason vertex array is observing the buffer's storage change is
551 // because they uses VkBuffer. Now VkBuffer have changed, vertex array needs to
552 // re-process it just like storage has been reallocated.
553 onStateChange(angle::SubjectMessage::InternalMemoryAllocationChanged);
554 }
555 }
556
557 if (data != nullptr)
558 {
559 BufferDataSource dataSource = {};
560 dataSource.data = data;
561
562 // Handle full-buffer updates similarly to glBufferSubData
563 ANGLE_TRY(setDataImpl(contextVk, size, dataSource, size, 0, updateType));
564 }
565
566 return angle::Result::Continue;
567 }
568
setSubData(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,size_t offset)569 angle::Result BufferVk::setSubData(const gl::Context *context,
570 gl::BufferBinding target,
571 const void *data,
572 size_t size,
573 size_t offset)
574 {
575 ASSERT(mBuffer.valid());
576
577 BufferDataSource dataSource = {};
578 dataSource.data = data;
579
580 ContextVk *contextVk = vk::GetImpl(context);
581 return setDataImpl(contextVk, static_cast<size_t>(mState.getSize()), dataSource, size, offset,
582 BufferUpdateType::ContentsUpdate);
583 }
584
copySubData(const gl::Context * context,BufferImpl * source,GLintptr sourceOffset,GLintptr destOffset,GLsizeiptr size)585 angle::Result BufferVk::copySubData(const gl::Context *context,
586 BufferImpl *source,
587 GLintptr sourceOffset,
588 GLintptr destOffset,
589 GLsizeiptr size)
590 {
591 ASSERT(mBuffer.valid());
592
593 ContextVk *contextVk = vk::GetImpl(context);
594 BufferVk *sourceVk = GetAs<BufferVk>(source);
595
596 BufferDataSource dataSource = {};
597 dataSource.buffer = &sourceVk->getBuffer();
598 dataSource.bufferOffset = static_cast<VkDeviceSize>(sourceOffset);
599
600 ASSERT(dataSource.buffer->valid());
601
602 return setDataImpl(contextVk, static_cast<size_t>(mState.getSize()), dataSource, size,
603 destOffset, BufferUpdateType::ContentsUpdate);
604 }
605
allocStagingBuffer(ContextVk * contextVk,vk::MemoryCoherency coherency,VkDeviceSize size,uint8_t ** mapPtr)606 angle::Result BufferVk::allocStagingBuffer(ContextVk *contextVk,
607 vk::MemoryCoherency coherency,
608 VkDeviceSize size,
609 uint8_t **mapPtr)
610 {
611 ASSERT(!mIsStagingBufferMapped);
612
613 if (mStagingBuffer.valid())
614 {
615 if (size <= mStagingBuffer.getSize() && IsCached(coherency) == mStagingBuffer.isCached() &&
616 contextVk->getRenderer()->hasResourceUseFinished(mStagingBuffer.getResourceUse()))
617 {
618 // If size is big enough and it is idle, then just reuse the existing staging buffer
619 *mapPtr = mStagingBuffer.getMappedMemory();
620 mIsStagingBufferMapped = true;
621 return angle::Result::Continue;
622 }
623 mStagingBuffer.release(contextVk->getRenderer());
624 }
625
626 ANGLE_TRY(
627 contextVk->initBufferForBufferCopy(&mStagingBuffer, static_cast<size_t>(size), coherency));
628 *mapPtr = mStagingBuffer.getMappedMemory();
629 mIsStagingBufferMapped = true;
630
631 return angle::Result::Continue;
632 }
633
flushStagingBuffer(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize size)634 angle::Result BufferVk::flushStagingBuffer(ContextVk *contextVk,
635 VkDeviceSize offset,
636 VkDeviceSize size)
637 {
638 vk::Renderer *renderer = contextVk->getRenderer();
639
640 ASSERT(mIsStagingBufferMapped);
641 ASSERT(mStagingBuffer.valid());
642
643 if (!mStagingBuffer.isCoherent())
644 {
645 ANGLE_TRY(mStagingBuffer.flush(renderer));
646 }
647
648 VkBufferCopy copyRegion = {mStagingBuffer.getOffset(), mBuffer.getOffset() + offset, size};
649 ANGLE_TRY(CopyBuffers(contextVk, &mStagingBuffer, &mBuffer, 1, ©Region));
650
651 return angle::Result::Continue;
652 }
653
handleDeviceLocalBufferMap(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize size,uint8_t ** mapPtr)654 angle::Result BufferVk::handleDeviceLocalBufferMap(ContextVk *contextVk,
655 VkDeviceSize offset,
656 VkDeviceSize size,
657 uint8_t **mapPtr)
658 {
659 vk::Renderer *renderer = contextVk->getRenderer();
660 ANGLE_TRY(
661 allocStagingBuffer(contextVk, vk::MemoryCoherency::CachedPreferCoherent, size, mapPtr));
662 ANGLE_TRY(mStagingBuffer.flush(renderer));
663
664 // Copy data from device local buffer to host visible staging buffer.
665 VkBufferCopy copyRegion = {mBuffer.getOffset() + offset, mStagingBuffer.getOffset(), size};
666 ANGLE_TRY(CopyBuffers(contextVk, &mBuffer, &mStagingBuffer, 1, ©Region));
667 ANGLE_TRY(mStagingBuffer.waitForIdle(contextVk, "GPU stall due to mapping device local buffer",
668 RenderPassClosureReason::DeviceLocalBufferMap));
669 // Since coherent is prefer, we may end up getting non-coherent. Always call invalidate here (it
670 // will check memory flag before it actually calls into driver).
671 ANGLE_TRY(mStagingBuffer.invalidate(renderer));
672
673 return angle::Result::Continue;
674 }
675
mapHostVisibleBuffer(ContextVk * contextVk,VkDeviceSize offset,GLbitfield access,uint8_t ** mapPtr)676 angle::Result BufferVk::mapHostVisibleBuffer(ContextVk *contextVk,
677 VkDeviceSize offset,
678 GLbitfield access,
679 uint8_t **mapPtr)
680 {
681 ANGLE_TRY(mBuffer.mapWithOffset(contextVk, mapPtr, static_cast<size_t>(offset)));
682
683 // Invalidate non-coherent for READ case.
684 if (!mBuffer.isCoherent() && (access & GL_MAP_READ_BIT) != 0)
685 {
686 ANGLE_TRY(mBuffer.invalidate(contextVk->getRenderer()));
687 }
688 return angle::Result::Continue;
689 }
690
map(const gl::Context * context,GLenum access,void ** mapPtr)691 angle::Result BufferVk::map(const gl::Context *context, GLenum access, void **mapPtr)
692 {
693 ASSERT(mBuffer.valid());
694 ASSERT(access == GL_WRITE_ONLY_OES);
695
696 return mapImpl(vk::GetImpl(context), GL_MAP_WRITE_BIT, mapPtr);
697 }
698
mapRange(const gl::Context * context,size_t offset,size_t length,GLbitfield access,void ** mapPtr)699 angle::Result BufferVk::mapRange(const gl::Context *context,
700 size_t offset,
701 size_t length,
702 GLbitfield access,
703 void **mapPtr)
704 {
705 return mapRangeImpl(vk::GetImpl(context), offset, length, access, mapPtr);
706 }
707
mapImpl(ContextVk * contextVk,GLbitfield access,void ** mapPtr)708 angle::Result BufferVk::mapImpl(ContextVk *contextVk, GLbitfield access, void **mapPtr)
709 {
710 return mapRangeImpl(contextVk, 0, static_cast<VkDeviceSize>(mState.getSize()), access, mapPtr);
711 }
712
ghostMappedBuffer(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize length,GLbitfield access,void ** mapPtr)713 angle::Result BufferVk::ghostMappedBuffer(ContextVk *contextVk,
714 VkDeviceSize offset,
715 VkDeviceSize length,
716 GLbitfield access,
717 void **mapPtr)
718 {
719 // We shouldn't get here if it is external memory
720 ASSERT(!isExternalBuffer());
721
722 ++contextVk->getPerfCounters().buffersGhosted;
723
724 // If we are creating a new buffer because the GPU is using it as read-only, then we
725 // also need to copy the contents of the previous buffer into the new buffer, in
726 // case the caller only updates a portion of the new buffer.
727 vk::BufferHelper src = std::move(mBuffer);
728 ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
729 BufferUsageType::Dynamic));
730
731 // Before returning the new buffer, map the previous buffer and copy its entire
732 // contents into the new buffer.
733 uint8_t *srcMapPtr = nullptr;
734 uint8_t *dstMapPtr = nullptr;
735 ANGLE_TRY(src.map(contextVk, &srcMapPtr));
736 ANGLE_TRY(mBuffer.map(contextVk, &dstMapPtr));
737
738 ASSERT(src.isCoherent());
739 ASSERT(mBuffer.isCoherent());
740
741 // No need to copy over [offset, offset + length), just around it
742 if ((access & GL_MAP_INVALIDATE_RANGE_BIT) != 0)
743 {
744 if (offset != 0)
745 {
746 memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(offset));
747 }
748 size_t totalSize = static_cast<size_t>(mState.getSize());
749 size_t remainingStart = static_cast<size_t>(offset + length);
750 size_t remainingSize = totalSize - remainingStart;
751 if (remainingSize != 0)
752 {
753 memcpy(dstMapPtr + remainingStart, srcMapPtr + remainingStart, remainingSize);
754 }
755 }
756 else
757 {
758 memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(mState.getSize()));
759 }
760
761 ANGLE_TRY(contextVk->releaseBufferAllocation(&src));
762
763 // Return the already mapped pointer with the offset adjustment to avoid the call to unmap().
764 *mapPtr = dstMapPtr + offset;
765
766 return angle::Result::Continue;
767 }
768
mapRangeImpl(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize length,GLbitfield access,void ** mapPtr)769 angle::Result BufferVk::mapRangeImpl(ContextVk *contextVk,
770 VkDeviceSize offset,
771 VkDeviceSize length,
772 GLbitfield access,
773 void **mapPtr)
774 {
775 vk::Renderer *renderer = contextVk->getRenderer();
776 ASSERT(mBuffer.valid());
777
778 // Record map call parameters in case this call is from angle internal (the access/offset/length
779 // will be inconsistent from mState).
780 mIsMappedForWrite = (access & GL_MAP_WRITE_BIT) != 0;
781 mMappedRange = RangeDeviceSize(offset, offset + length);
782
783 uint8_t **mapPtrBytes = reinterpret_cast<uint8_t **>(mapPtr);
784 bool hostVisible = mBuffer.isHostVisible();
785
786 // MAP_UNSYNCHRONIZED_BIT, so immediately map.
787 if ((access & GL_MAP_UNSYNCHRONIZED_BIT) != 0)
788 {
789 if (hostVisible)
790 {
791 return mapHostVisibleBuffer(contextVk, offset, access, mapPtrBytes);
792 }
793 return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
794 }
795
796 // Read case
797 if ((access & GL_MAP_WRITE_BIT) == 0)
798 {
799 // If app is not going to write, all we need is to ensure GPU write is finished.
800 // Concurrent reads from CPU and GPU is allowed.
801 if (!renderer->hasResourceUseFinished(mBuffer.getWriteResourceUse()))
802 {
803 // If there are unflushed write commands for the resource, flush them.
804 if (contextVk->hasUnsubmittedUse(mBuffer.getWriteResourceUse()))
805 {
806 ANGLE_TRY(contextVk->flushAndSubmitCommands(
807 nullptr, nullptr, RenderPassClosureReason::BufferWriteThenMap));
808 }
809 ANGLE_TRY(renderer->finishResourceUse(contextVk, mBuffer.getWriteResourceUse()));
810 }
811 if (hostVisible)
812 {
813 return mapHostVisibleBuffer(contextVk, offset, access, mapPtrBytes);
814 }
815 return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
816 }
817
818 // Write case
819 if (!hostVisible)
820 {
821 return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
822 }
823
824 // Write case, buffer not in use.
825 if (isExternalBuffer() || !isCurrentlyInUse(contextVk->getRenderer()))
826 {
827 return mapHostVisibleBuffer(contextVk, offset, access, mapPtrBytes);
828 }
829
830 // Write case, buffer in use.
831 //
832 // Here, we try to map the buffer, but it's busy. Instead of waiting for the GPU to
833 // finish, we just allocate a new buffer if:
834 // 1.) Caller has told us it doesn't care about previous contents, or
835 // 2.) The GPU won't write to the buffer.
836
837 bool rangeInvalidate = (access & GL_MAP_INVALIDATE_RANGE_BIT) != 0;
838 bool entireBufferInvalidated =
839 ((access & GL_MAP_INVALIDATE_BUFFER_BIT) != 0) ||
840 (rangeInvalidate && offset == 0 && static_cast<VkDeviceSize>(mState.getSize()) == length);
841
842 if (entireBufferInvalidated)
843 {
844 ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
845 BufferUsageType::Dynamic));
846 return mapHostVisibleBuffer(contextVk, offset, access, mapPtrBytes);
847 }
848
849 bool smallMapRange = (length < static_cast<VkDeviceSize>(mState.getSize()) / 2);
850
851 if (smallMapRange && rangeInvalidate)
852 {
853 ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::CachedNonCoherent,
854 static_cast<size_t>(length), mapPtrBytes));
855 return angle::Result::Continue;
856 }
857
858 if (renderer->hasResourceUseFinished(mBuffer.getWriteResourceUse()))
859 {
860 // This will keep the new buffer mapped and update mapPtr, so return immediately.
861 return ghostMappedBuffer(contextVk, offset, length, access, mapPtr);
862 }
863
864 // Write case (worst case, buffer in use for write)
865 ANGLE_TRY(mBuffer.waitForIdle(contextVk, "GPU stall due to mapping buffer in use by the GPU",
866 RenderPassClosureReason::BufferInUseWhenSynchronizedMap));
867 return mapHostVisibleBuffer(contextVk, offset, access, mapPtrBytes);
868 }
869
unmap(const gl::Context * context,GLboolean * result)870 angle::Result BufferVk::unmap(const gl::Context *context, GLboolean *result)
871 {
872 ANGLE_TRY(unmapImpl(vk::GetImpl(context)));
873
874 // This should be false if the contents have been corrupted through external means. Vulkan
875 // doesn't provide such information.
876 *result = true;
877
878 return angle::Result::Continue;
879 }
880
unmapImpl(ContextVk * contextVk)881 angle::Result BufferVk::unmapImpl(ContextVk *contextVk)
882 {
883 ASSERT(mBuffer.valid());
884
885 if (mIsStagingBufferMapped)
886 {
887 ASSERT(mStagingBuffer.valid());
888 // The buffer is device local or optimization of small range map.
889 if (mIsMappedForWrite)
890 {
891 ANGLE_TRY(flushStagingBuffer(contextVk, mMappedRange.low(), mMappedRange.length()));
892 }
893
894 mIsStagingBufferMapped = false;
895 }
896 else
897 {
898 ASSERT(mBuffer.isHostVisible());
899 vk::Renderer *renderer = contextVk->getRenderer();
900 if (!mBuffer.isCoherent())
901 {
902 ANGLE_TRY(mBuffer.flush(renderer));
903 }
904 mBuffer.unmap(renderer);
905 }
906
907 if (mIsMappedForWrite)
908 {
909 if (mMappedRange == RangeDeviceSize(0, static_cast<VkDeviceSize>(getSize())))
910 {
911 dataUpdated();
912 }
913 else
914 {
915 dataRangeUpdated(mMappedRange);
916 }
917 }
918
919 // Reset the mapping parameters
920 mIsMappedForWrite = false;
921 mMappedRange.invalidate();
922
923 return angle::Result::Continue;
924 }
925
getSubData(const gl::Context * context,GLintptr offset,GLsizeiptr size,void * outData)926 angle::Result BufferVk::getSubData(const gl::Context *context,
927 GLintptr offset,
928 GLsizeiptr size,
929 void *outData)
930 {
931 ASSERT(offset + size <= getSize());
932 ASSERT(mBuffer.valid());
933 ContextVk *contextVk = vk::GetImpl(context);
934 void *mapPtr;
935 ANGLE_TRY(mapRangeImpl(contextVk, offset, size, GL_MAP_READ_BIT, &mapPtr));
936 memcpy(outData, mapPtr, size);
937 return unmapImpl(contextVk);
938 }
939
getIndexRange(const gl::Context * context,gl::DrawElementsType type,size_t offset,size_t count,bool primitiveRestartEnabled,gl::IndexRange * outRange)940 angle::Result BufferVk::getIndexRange(const gl::Context *context,
941 gl::DrawElementsType type,
942 size_t offset,
943 size_t count,
944 bool primitiveRestartEnabled,
945 gl::IndexRange *outRange)
946 {
947 ContextVk *contextVk = vk::GetImpl(context);
948 vk::Renderer *renderer = contextVk->getRenderer();
949
950 // This is a workaround for the mock ICD not implementing buffer memory state.
951 // Could be removed if https://github.com/KhronosGroup/Vulkan-Tools/issues/84 is fixed.
952 if (renderer->isMockICDEnabled())
953 {
954 outRange->start = 0;
955 outRange->end = 0;
956 return angle::Result::Continue;
957 }
958
959 ANGLE_TRACE_EVENT0("gpu.angle", "BufferVk::getIndexRange");
960
961 void *mapPtr;
962 ANGLE_TRY(mapRangeImpl(contextVk, offset, getSize(), GL_MAP_READ_BIT, &mapPtr));
963 *outRange = gl::ComputeIndexRange(type, mapPtr, count, primitiveRestartEnabled);
964 ANGLE_TRY(unmapImpl(contextVk));
965
966 return angle::Result::Continue;
967 }
968
updateBuffer(ContextVk * contextVk,size_t bufferSize,const BufferDataSource & dataSource,size_t updateSize,size_t updateOffset)969 angle::Result BufferVk::updateBuffer(ContextVk *contextVk,
970 size_t bufferSize,
971 const BufferDataSource &dataSource,
972 size_t updateSize,
973 size_t updateOffset)
974 {
975 // To copy on the CPU, destination must be host-visible. The source should also be either a CPU
976 // pointer or other a host-visible buffer that is not being written to by the GPU.
977 const bool shouldCopyOnCPU =
978 mBuffer.isHostVisible() &&
979 (dataSource.data != nullptr ||
980 ShouldUseCPUToCopyData(contextVk, *dataSource.buffer, updateSize, bufferSize));
981
982 if (shouldCopyOnCPU)
983 {
984 ANGLE_TRY(directUpdate(contextVk, dataSource, updateSize, updateOffset));
985 }
986 else
987 {
988 ANGLE_TRY(stagedUpdate(contextVk, dataSource, updateSize, updateOffset));
989 }
990 return angle::Result::Continue;
991 }
992
directUpdate(ContextVk * contextVk,const BufferDataSource & dataSource,size_t size,size_t offset)993 angle::Result BufferVk::directUpdate(ContextVk *contextVk,
994 const BufferDataSource &dataSource,
995 size_t size,
996 size_t offset)
997 {
998 vk::Renderer *renderer = contextVk->getRenderer();
999 uint8_t *srcPointerMapped = nullptr;
1000 const uint8_t *srcPointer = nullptr;
1001 uint8_t *dstPointer = nullptr;
1002
1003 // Map the destination buffer.
1004 ASSERT(mBuffer.isHostVisible());
1005 ANGLE_TRY(mBuffer.mapWithOffset(contextVk, &dstPointer, offset));
1006 ASSERT(dstPointer);
1007
1008 // If source data is coming from a buffer, map it. If this is a self-copy, avoid double-mapping
1009 // the buffer.
1010 if (dataSource.data != nullptr)
1011 {
1012 srcPointer = static_cast<const uint8_t *>(dataSource.data);
1013 }
1014 else
1015 {
1016 ANGLE_TRY(dataSource.buffer->mapWithOffset(contextVk, &srcPointerMapped,
1017 static_cast<size_t>(dataSource.bufferOffset)));
1018 srcPointer = srcPointerMapped;
1019 }
1020
1021 memcpy(dstPointer, srcPointer, size);
1022
1023 // External memory may end up with noncoherent
1024 if (!mBuffer.isCoherent())
1025 {
1026 ANGLE_TRY(mBuffer.flush(renderer, offset, size));
1027 }
1028
1029 // Unmap the destination and source buffers if applicable.
1030 //
1031 // If the buffer has dynamic usage then the intent is frequent client side updates to the
1032 // buffer. Don't CPU unmap the buffer, we will take care of unmapping when releasing the buffer
1033 // to either the renderer or mBufferFreeList.
1034 if (GetBufferUsageType(mState.getUsage()) == BufferUsageType::Static)
1035 {
1036 mBuffer.unmap(renderer);
1037 }
1038
1039 if (srcPointerMapped != nullptr)
1040 {
1041 dataSource.buffer->unmap(renderer);
1042 }
1043
1044 return angle::Result::Continue;
1045 }
1046
stagedUpdate(ContextVk * contextVk,const BufferDataSource & dataSource,size_t size,size_t offset)1047 angle::Result BufferVk::stagedUpdate(ContextVk *contextVk,
1048 const BufferDataSource &dataSource,
1049 size_t size,
1050 size_t offset)
1051 {
1052 // If data is coming from a CPU pointer, stage it in a temporary staging buffer.
1053 // Otherwise, do a GPU copy directly from the given buffer.
1054 if (dataSource.data != nullptr)
1055 {
1056 uint8_t *mapPointer = nullptr;
1057 ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::CachedNonCoherent, size,
1058 &mapPointer));
1059 memcpy(mapPointer, dataSource.data, size);
1060 ANGLE_TRY(flushStagingBuffer(contextVk, offset, size));
1061 mIsStagingBufferMapped = false;
1062 }
1063 else
1064 {
1065 // Check for self-dependency.
1066 vk::CommandBufferAccess access;
1067 if (dataSource.buffer->getBufferSerial() == mBuffer.getBufferSerial())
1068 {
1069 access.onBufferSelfCopy(&mBuffer);
1070 }
1071 else
1072 {
1073 access.onBufferTransferRead(dataSource.buffer);
1074 access.onBufferTransferWrite(&mBuffer);
1075 }
1076
1077 vk::OutsideRenderPassCommandBuffer *commandBuffer;
1078 ANGLE_TRY(contextVk->getOutsideRenderPassCommandBuffer(access, &commandBuffer));
1079
1080 // Enqueue a copy command on the GPU.
1081 const VkBufferCopy copyRegion = {dataSource.bufferOffset + dataSource.buffer->getOffset(),
1082 static_cast<VkDeviceSize>(offset) + mBuffer.getOffset(),
1083 static_cast<VkDeviceSize>(size)};
1084
1085 commandBuffer->copyBuffer(dataSource.buffer->getBuffer(), mBuffer.getBuffer(), 1,
1086 ©Region);
1087 }
1088
1089 return angle::Result::Continue;
1090 }
1091
acquireAndUpdate(ContextVk * contextVk,size_t bufferSize,const BufferDataSource & dataSource,size_t updateSize,size_t updateOffset,BufferUpdateType updateType)1092 angle::Result BufferVk::acquireAndUpdate(ContextVk *contextVk,
1093 size_t bufferSize,
1094 const BufferDataSource &dataSource,
1095 size_t updateSize,
1096 size_t updateOffset,
1097 BufferUpdateType updateType)
1098 {
1099 // We shouldn't get here if this is external memory
1100 ASSERT(!isExternalBuffer());
1101 // If StorageRedefined, we cannot use mState.getSize() to allocate a new buffer.
1102 ASSERT(updateType != BufferUpdateType::StorageRedefined);
1103 ASSERT(mBuffer.valid());
1104 ASSERT(mBuffer.getSize() >= bufferSize);
1105
1106 // Here we acquire a new BufferHelper and directUpdate() the new buffer.
1107 // If the subData size was less than the buffer's size we additionally enqueue
1108 // a GPU copy of the remaining regions from the old mBuffer to the new one.
1109 vk::BufferHelper prevBuffer;
1110 size_t offsetAfterSubdata = (updateOffset + updateSize);
1111 bool updateRegionBeforeSubData = mHasValidData && (updateOffset > 0);
1112 bool updateRegionAfterSubData = mHasValidData && (offsetAfterSubdata < bufferSize);
1113
1114 uint8_t *prevMapPtrBeforeSubData = nullptr;
1115 uint8_t *prevMapPtrAfterSubData = nullptr;
1116 if (updateRegionBeforeSubData || updateRegionAfterSubData)
1117 {
1118 prevBuffer = std::move(mBuffer);
1119
1120 // The total bytes that we need to copy from old buffer to new buffer
1121 size_t copySize = bufferSize - updateSize;
1122
1123 // If the buffer is host visible and the GPU is not writing to it, we use the CPU to do the
1124 // copy. We need to save the source buffer pointer before we acquire a new buffer.
1125 if (ShouldUseCPUToCopyData(contextVk, prevBuffer, copySize, bufferSize))
1126 {
1127 uint8_t *mapPointer = nullptr;
1128 // prevBuffer buffer will be recycled (or released and unmapped) by acquireBufferHelper
1129 ANGLE_TRY(prevBuffer.map(contextVk, &mapPointer));
1130 ASSERT(mapPointer);
1131 prevMapPtrBeforeSubData = mapPointer;
1132 prevMapPtrAfterSubData = mapPointer + offsetAfterSubdata;
1133 }
1134 }
1135
1136 ANGLE_TRY(acquireBufferHelper(contextVk, bufferSize, BufferUsageType::Dynamic));
1137 ANGLE_TRY(updateBuffer(contextVk, bufferSize, dataSource, updateSize, updateOffset));
1138
1139 constexpr int kMaxCopyRegions = 2;
1140 angle::FixedVector<VkBufferCopy, kMaxCopyRegions> copyRegions;
1141
1142 if (updateRegionBeforeSubData)
1143 {
1144 if (prevMapPtrBeforeSubData)
1145 {
1146 BufferDataSource beforeSrc = {};
1147 beforeSrc.data = prevMapPtrBeforeSubData;
1148
1149 ANGLE_TRY(directUpdate(contextVk, beforeSrc, updateOffset, 0));
1150 }
1151 else
1152 {
1153 copyRegions.push_back({prevBuffer.getOffset(), mBuffer.getOffset(), updateOffset});
1154 }
1155 }
1156
1157 if (updateRegionAfterSubData)
1158 {
1159 size_t copySize = bufferSize - offsetAfterSubdata;
1160 if (prevMapPtrAfterSubData)
1161 {
1162 BufferDataSource afterSrc = {};
1163 afterSrc.data = prevMapPtrAfterSubData;
1164
1165 ANGLE_TRY(directUpdate(contextVk, afterSrc, copySize, offsetAfterSubdata));
1166 }
1167 else
1168 {
1169 copyRegions.push_back({prevBuffer.getOffset() + offsetAfterSubdata,
1170 mBuffer.getOffset() + offsetAfterSubdata, copySize});
1171 }
1172 }
1173
1174 if (!copyRegions.empty())
1175 {
1176 ANGLE_TRY(CopyBuffers(contextVk, &prevBuffer, &mBuffer,
1177 static_cast<uint32_t>(copyRegions.size()), copyRegions.data()));
1178 }
1179
1180 if (prevBuffer.valid())
1181 {
1182 ANGLE_TRY(contextVk->releaseBufferAllocation(&prevBuffer));
1183 }
1184
1185 return angle::Result::Continue;
1186 }
1187
setDataImpl(ContextVk * contextVk,size_t bufferSize,const BufferDataSource & dataSource,size_t updateSize,size_t updateOffset,BufferUpdateType updateType)1188 angle::Result BufferVk::setDataImpl(ContextVk *contextVk,
1189 size_t bufferSize,
1190 const BufferDataSource &dataSource,
1191 size_t updateSize,
1192 size_t updateOffset,
1193 BufferUpdateType updateType)
1194 {
1195 // if the buffer is currently in use
1196 // if it isn't an external buffer and not a self-copy and sub data size meets threshold
1197 // acquire a new BufferHelper from the pool
1198 // else stage the update
1199 // else update the buffer directly
1200 if (isCurrentlyInUse(contextVk->getRenderer()))
1201 {
1202 // The acquire-and-update path creates a new buffer, which is sometimes more efficient than
1203 // trying to update the existing one. Firstly, this is not done in the following
1204 // situations:
1205 //
1206 // - For external buffers, the underlying storage cannot be reallocated.
1207 // - If storage has just been redefined, this path is not taken because a new buffer has
1208 // already been created by the caller. Besides, this path uses mState.getSize(), which the
1209 // frontend updates only after this call in situations where the storage may be redefined.
1210 // This could happen if the buffer memory is DEVICE_LOCAL and
1211 // renderer->getFeatures().allocateNonZeroMemory.enabled is true. In this case a
1212 // copyToBuffer is immediately issued after allocation and isCurrentlyInUse will be true.
1213 // - If this is a self copy through glCopyBufferSubData, |dataSource| will contain a
1214 // reference to |mBuffer|, in which case source information is lost after acquiring a new
1215 // buffer.
1216 //
1217 // Additionally, this path is taken only if either of the following conditions are true:
1218 //
1219 // - If BufferVk does not have any valid data. This means that there is no data to be
1220 // copied from the old buffer to the new one after acquiring it. This could happen when
1221 // the application calls glBufferData with the same size and we reuse the existing buffer
1222 // storage.
1223 // - If the buffer is used read-only in the current render pass. In this case, acquiring a
1224 // new buffer is preferred to avoid breaking the render pass.
1225 // - The update modifies a significant portion of the buffer
1226 // - The preferCPUForBufferSubData feature is enabled.
1227 //
1228 const bool canAcquireAndUpdate = !isExternalBuffer() &&
1229 updateType != BufferUpdateType::StorageRedefined &&
1230 !IsSelfCopy(dataSource, mBuffer);
1231 if (canAcquireAndUpdate &&
1232 (!mHasValidData || ShouldAvoidRenderPassBreakOnUpdate(contextVk, mBuffer, bufferSize) ||
1233 ShouldAllocateNewMemoryForUpdate(contextVk, updateSize, bufferSize)))
1234 {
1235 ANGLE_TRY(acquireAndUpdate(contextVk, bufferSize, dataSource, updateSize, updateOffset,
1236 updateType));
1237 }
1238 else
1239 {
1240 if (canAcquireAndUpdate && RenderPassUsesBufferForReadOnly(contextVk, mBuffer))
1241 {
1242 ANGLE_VK_PERF_WARNING(contextVk, GL_DEBUG_SEVERITY_LOW,
1243 "Breaking the render pass on small upload to large buffer");
1244 }
1245
1246 ANGLE_TRY(stagedUpdate(contextVk, dataSource, updateSize, updateOffset));
1247 }
1248 }
1249 else
1250 {
1251 ANGLE_TRY(updateBuffer(contextVk, bufferSize, dataSource, updateSize, updateOffset));
1252 }
1253
1254 // Update conversions.
1255 if (updateOffset == 0 && updateSize == bufferSize)
1256 {
1257 dataUpdated();
1258 }
1259 else
1260 {
1261 dataRangeUpdated(RangeDeviceSize(updateOffset, updateOffset + updateSize));
1262 }
1263
1264 return angle::Result::Continue;
1265 }
1266
getVertexConversionBuffer(vk::Renderer * renderer,const VertexConversionBuffer::CacheKey & cacheKey)1267 VertexConversionBuffer *BufferVk::getVertexConversionBuffer(
1268 vk::Renderer *renderer,
1269 const VertexConversionBuffer::CacheKey &cacheKey)
1270 {
1271 for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
1272 {
1273 if (buffer.match(cacheKey))
1274 {
1275 ASSERT(buffer.valid());
1276 return &buffer;
1277 }
1278 }
1279
1280 mVertexConversionBuffers.emplace_back(renderer, cacheKey);
1281 return &mVertexConversionBuffers.back();
1282 }
1283
dataRangeUpdated(const RangeDeviceSize & range)1284 void BufferVk::dataRangeUpdated(const RangeDeviceSize &range)
1285 {
1286 for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
1287 {
1288 buffer.addDirtyBufferRange(range);
1289 }
1290 // Now we have valid data
1291 mHasValidData = true;
1292 }
1293
dataUpdated()1294 void BufferVk::dataUpdated()
1295 {
1296 for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
1297 {
1298 buffer.setEntireBufferDirty();
1299 }
1300 // Now we have valid data
1301 mHasValidData = true;
1302 }
1303
onDataChanged()1304 void BufferVk::onDataChanged()
1305 {
1306 dataUpdated();
1307 }
1308
acquireBufferHelper(ContextVk * contextVk,size_t sizeInBytes,BufferUsageType usageType)1309 angle::Result BufferVk::acquireBufferHelper(ContextVk *contextVk,
1310 size_t sizeInBytes,
1311 BufferUsageType usageType)
1312 {
1313 vk::Renderer *renderer = contextVk->getRenderer();
1314 size_t size = roundUpPow2(sizeInBytes, kBufferSizeGranularity);
1315 size_t alignment = renderer->getDefaultBufferAlignment();
1316
1317 if (mBuffer.valid())
1318 {
1319 ANGLE_TRY(contextVk->releaseBufferAllocation(&mBuffer));
1320 }
1321
1322 // Allocate the buffer directly
1323 ANGLE_TRY(
1324 contextVk->initBufferAllocation(&mBuffer, mMemoryTypeIndex, size, alignment, usageType));
1325
1326 // Tell the observers (front end) that a new buffer was created, so the necessary
1327 // dirty bits can be set. This allows the buffer views pointing to the old buffer to
1328 // be recreated and point to the new buffer, along with updating the descriptor sets
1329 // to use the new buffer.
1330 onStateChange(angle::SubjectMessage::InternalMemoryAllocationChanged);
1331
1332 return angle::Result::Continue;
1333 }
1334
isCurrentlyInUse(vk::Renderer * renderer) const1335 bool BufferVk::isCurrentlyInUse(vk::Renderer *renderer) const
1336 {
1337 return !renderer->hasResourceUseFinished(mBuffer.getResourceUse());
1338 }
1339
1340 // When a buffer is being completely changed, calculate whether it's better to allocate a new buffer
1341 // or overwrite the existing one.
calculateBufferUpdateTypeOnFullUpdate(vk::Renderer * renderer,size_t size,VkMemoryPropertyFlags memoryPropertyFlags,BufferUsageType usageType,const void * data) const1342 BufferUpdateType BufferVk::calculateBufferUpdateTypeOnFullUpdate(
1343 vk::Renderer *renderer,
1344 size_t size,
1345 VkMemoryPropertyFlags memoryPropertyFlags,
1346 BufferUsageType usageType,
1347 const void *data) const
1348 {
1349 // 0-sized updates should be no-op'd before this call.
1350 ASSERT(size > 0);
1351
1352 // If there is no existing buffer, this cannot be a content update.
1353 if (!mBuffer.valid())
1354 {
1355 return BufferUpdateType::StorageRedefined;
1356 }
1357
1358 const bool inUseAndRespecifiedWithoutData = data == nullptr && isCurrentlyInUse(renderer);
1359 bool redefineStorage = shouldRedefineStorage(renderer, usageType, memoryPropertyFlags, size);
1360
1361 // Create a new buffer if the buffer is busy and it's being redefined without data.
1362 // Additionally, a new buffer is created if any of the parameters change (memory type, usage,
1363 // size).
1364 return redefineStorage || inUseAndRespecifiedWithoutData ? BufferUpdateType::StorageRedefined
1365 : BufferUpdateType::ContentsUpdate;
1366 }
1367
shouldRedefineStorage(vk::Renderer * renderer,BufferUsageType usageType,VkMemoryPropertyFlags memoryPropertyFlags,size_t size) const1368 bool BufferVk::shouldRedefineStorage(vk::Renderer *renderer,
1369 BufferUsageType usageType,
1370 VkMemoryPropertyFlags memoryPropertyFlags,
1371 size_t size) const
1372 {
1373 if (mUsageType != usageType)
1374 {
1375 return true;
1376 }
1377
1378 if (mMemoryPropertyFlags != memoryPropertyFlags)
1379 {
1380 return true;
1381 }
1382
1383 if (size > mBuffer.getSize())
1384 {
1385 return true;
1386 }
1387 else
1388 {
1389 size_t paddedBufferSize =
1390 (renderer->getFeatures().padBuffersToMaxVertexAttribStride.enabled)
1391 ? (size + static_cast<size_t>(renderer->getMaxVertexAttribStride()))
1392 : size;
1393 size_t sizeInBytes = roundUpPow2(paddedBufferSize, kBufferSizeGranularity);
1394 size_t alignedSize = roundUp(sizeInBytes, renderer->getDefaultBufferAlignment());
1395 if (alignedSize > mBuffer.getSize())
1396 {
1397 return true;
1398 }
1399 }
1400
1401 return false;
1402 }
1403 } // namespace rx
1404