xref: /aosp_15_r20/external/angle/src/libANGLE/renderer/vulkan/BufferVk.cpp (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 //
2 // Copyright 2016 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // BufferVk.cpp:
7 //    Implements the class methods for BufferVk.
8 //
9 
10 #include "libANGLE/renderer/vulkan/BufferVk.h"
11 
12 #include "common/FixedVector.h"
13 #include "common/debug.h"
14 #include "common/mathutil.h"
15 #include "common/utilities.h"
16 #include "libANGLE/Context.h"
17 #include "libANGLE/renderer/vulkan/ContextVk.h"
18 #include "libANGLE/renderer/vulkan/vk_renderer.h"
19 
20 namespace rx
21 {
GetDefaultBufferUsageFlags(vk::Renderer * renderer)22 VkBufferUsageFlags GetDefaultBufferUsageFlags(vk::Renderer *renderer)
23 {
24     // We could potentially use multiple backing buffers for different usages.
25     // For now keep a single buffer with all relevant usage flags.
26     VkBufferUsageFlags defaultBufferUsageFlags =
27         VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
28         VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
29         VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
30         VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
31         VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
32     if (renderer->getFeatures().supportsTransformFeedbackExtension.enabled)
33     {
34         defaultBufferUsageFlags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
35                                    VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT;
36     }
37     return defaultBufferUsageFlags;
38 }
39 
40 namespace
41 {
42 constexpr VkMemoryPropertyFlags kDeviceLocalFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
43 constexpr VkMemoryPropertyFlags kDeviceLocalHostCoherentFlags =
44     (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
45      VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
46 constexpr VkMemoryPropertyFlags kHostCachedFlags =
47     (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
48      VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
49 constexpr VkMemoryPropertyFlags kHostUncachedFlags =
50     (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
51 constexpr VkMemoryPropertyFlags kHostCachedNonCoherentFlags =
52     (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
53 
54 // Vertex attribute buffers are used as storage buffers for conversion in compute, where access to
55 // the buffer is made in 4-byte chunks.  Assume the size of the buffer is 4k+n where n is in [0, 3).
56 // On some hardware, reading 4 bytes from address 4k returns 0, making it impossible to read the
57 // last n bytes.  By rounding up the buffer sizes to a multiple of 4, the problem is alleviated.
58 constexpr size_t kBufferSizeGranularity = 4;
59 static_assert(gl::isPow2(kBufferSizeGranularity), "use as alignment, must be power of two");
60 
61 // Start with a fairly small buffer size. We can increase this dynamically as we convert more data.
62 constexpr size_t kConvertedArrayBufferInitialSize = 1024 * 8;
63 
64 // Buffers that have a static usage pattern will be allocated in
65 // device local memory to speed up access to and from the GPU.
66 // Dynamic usage patterns or that are frequently mapped
67 // will now request host cached memory to speed up access from the CPU.
GetPreferredMemoryType(vk::Renderer * renderer,gl::BufferBinding target,gl::BufferUsage usage)68 VkMemoryPropertyFlags GetPreferredMemoryType(vk::Renderer *renderer,
69                                              gl::BufferBinding target,
70                                              gl::BufferUsage usage)
71 {
72     if (target == gl::BufferBinding::PixelUnpack)
73     {
74         return kHostCachedFlags;
75     }
76 
77     switch (usage)
78     {
79         case gl::BufferUsage::StaticCopy:
80         case gl::BufferUsage::StaticDraw:
81         case gl::BufferUsage::StaticRead:
82             // For static usage, request a device local memory
83             return renderer->getFeatures().preferDeviceLocalMemoryHostVisible.enabled
84                        ? kDeviceLocalHostCoherentFlags
85                        : kDeviceLocalFlags;
86         case gl::BufferUsage::DynamicDraw:
87         case gl::BufferUsage::StreamDraw:
88             // For non-static usage where the CPU performs a write-only access, request
89             // a host uncached memory
90             return renderer->getFeatures().preferHostCachedForNonStaticBufferUsage.enabled
91                        ? kHostCachedFlags
92                        : kHostUncachedFlags;
93         case gl::BufferUsage::DynamicCopy:
94         case gl::BufferUsage::DynamicRead:
95         case gl::BufferUsage::StreamCopy:
96         case gl::BufferUsage::StreamRead:
97             // For all other types of usage, request a host cached memory
98             return renderer->getFeatures()
99                            .preferCachedNoncoherentForDynamicStreamBufferUsage.enabled
100                        ? kHostCachedNonCoherentFlags
101                        : kHostCachedFlags;
102         default:
103             UNREACHABLE();
104             return kHostCachedFlags;
105     }
106 }
107 
GetStorageMemoryType(vk::Renderer * renderer,GLbitfield storageFlags,bool externalBuffer)108 VkMemoryPropertyFlags GetStorageMemoryType(vk::Renderer *renderer,
109                                            GLbitfield storageFlags,
110                                            bool externalBuffer)
111 {
112     const bool hasMapAccess =
113         (storageFlags & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT_EXT)) != 0;
114 
115     if (renderer->getFeatures().preferDeviceLocalMemoryHostVisible.enabled)
116     {
117         const bool canUpdate = (storageFlags & GL_DYNAMIC_STORAGE_BIT_EXT) != 0;
118         if (canUpdate || hasMapAccess || externalBuffer)
119         {
120             // We currently allocate coherent memory for persistently mapped buffers.
121             // GL_EXT_buffer_storage allows non-coherent memory, but currently the implementation of
122             // |glMemoryBarrier(CLIENT_MAPPED_BUFFER_BARRIER_BIT_EXT)| relies on the mapping being
123             // coherent.
124             //
125             // If persistently mapped buffers ever use non-coherent memory, then said
126             // |glMemoryBarrier| call must result in |vkInvalidateMappedMemoryRanges| for all
127             // persistently mapped buffers.
128             return kDeviceLocalHostCoherentFlags;
129         }
130         return kDeviceLocalFlags;
131     }
132 
133     return hasMapAccess ? kHostCachedFlags : kDeviceLocalFlags;
134 }
135 
ShouldAllocateNewMemoryForUpdate(ContextVk * contextVk,size_t subDataSize,size_t bufferSize)136 bool ShouldAllocateNewMemoryForUpdate(ContextVk *contextVk, size_t subDataSize, size_t bufferSize)
137 {
138     // A sub-data update with size > 50% of buffer size meets the threshold to acquire a new
139     // BufferHelper from the pool.
140     size_t halfBufferSize = bufferSize / 2;
141     if (subDataSize > halfBufferSize)
142     {
143         return true;
144     }
145 
146     // If the GPU is busy, it is possible to use the CPU for updating sub-data instead, but since it
147     // would need to create a duplicate of the buffer, a large enough buffer copy could result in a
148     // performance regression.
149     if (contextVk->getFeatures().preferCPUForBufferSubData.enabled)
150     {
151         // If the buffer is small enough, the cost of barrier associated with the GPU copy likely
152         // exceeds the overhead with the CPU copy. Duplicating the buffer allows the CPU to write to
153         // the buffer immediately, thus avoiding the barrier that prevents parallel operation.
154         constexpr size_t kCpuCopyBufferSizeThreshold = 32 * 1024;
155         if (bufferSize < kCpuCopyBufferSizeThreshold)
156         {
157             return true;
158         }
159 
160         // To use CPU for the sub-data update in larger buffers, the update should be sizable enough
161         // compared to the whole buffer size. The threshold is chosen based on perf data collected
162         // from Pixel devices. At 1/8 of buffer size, the CPU overhead associated with extra data
163         // copy weighs less than serialization caused by barriers.
164         size_t subDataThreshold = bufferSize / 8;
165         if (subDataSize > subDataThreshold)
166         {
167             return true;
168         }
169     }
170 
171     return false;
172 }
173 
ShouldUseCPUToCopyData(ContextVk * contextVk,const vk::BufferHelper & buffer,size_t copySize,size_t bufferSize)174 bool ShouldUseCPUToCopyData(ContextVk *contextVk,
175                             const vk::BufferHelper &buffer,
176                             size_t copySize,
177                             size_t bufferSize)
178 {
179     vk::Renderer *renderer = contextVk->getRenderer();
180 
181     // If the buffer is not host-visible, or if it's busy on the GPU, can't read from it from the
182     // CPU
183     if (!buffer.isHostVisible() || !renderer->hasResourceUseFinished(buffer.getWriteResourceUse()))
184     {
185         return false;
186     }
187 
188     // For some GPUs (e.g. ARM) we always prefer using CPU to do copy instead of using the GPU to
189     // avoid pipeline bubbles. If the GPU is currently busy and data copy size is less than certain
190     // threshold, we choose to use CPU to do the copy over GPU to achieve better parallelism.
191     return renderer->getFeatures().preferCPUForBufferSubData.enabled ||
192            (renderer->isCommandQueueBusy() &&
193             copySize < renderer->getMaxCopyBytesUsingCPUWhenPreservingBufferData());
194 }
195 
RenderPassUsesBufferForReadOnly(ContextVk * contextVk,const vk::BufferHelper & buffer)196 bool RenderPassUsesBufferForReadOnly(ContextVk *contextVk, const vk::BufferHelper &buffer)
197 {
198     if (!contextVk->hasActiveRenderPass())
199     {
200         return false;
201     }
202 
203     vk::RenderPassCommandBufferHelper &renderPassCommands =
204         contextVk->getStartedRenderPassCommands();
205     return renderPassCommands.usesBuffer(buffer) && !renderPassCommands.usesBufferForWrite(buffer);
206 }
207 
208 // If a render pass is open which uses the buffer in read-only mode, render pass break can be
209 // avoided by using acquireAndUpdate.  This can be costly however if the update is very small, and
210 // is limited to platforms where render pass break is itself costly (i.e. tiled-based renderers).
ShouldAvoidRenderPassBreakOnUpdate(ContextVk * contextVk,const vk::BufferHelper & buffer,size_t bufferSize)211 bool ShouldAvoidRenderPassBreakOnUpdate(ContextVk *contextVk,
212                                         const vk::BufferHelper &buffer,
213                                         size_t bufferSize)
214 {
215     // Only avoid breaking the render pass if the buffer is not so big such that duplicating it
216     // would outweight the cost of breaking the render pass.  A value of 1KB is temporary chosen as
217     // a heuristic, and can be adjusted when such a situation is encountered.
218     constexpr size_t kPreferDuplicateOverRenderPassBreakMaxBufferSize = 1024;
219     if (!contextVk->getFeatures().preferCPUForBufferSubData.enabled ||
220         bufferSize > kPreferDuplicateOverRenderPassBreakMaxBufferSize)
221     {
222         return false;
223     }
224 
225     return RenderPassUsesBufferForReadOnly(contextVk, buffer);
226 }
227 
GetBufferUsageType(gl::BufferUsage usage)228 BufferUsageType GetBufferUsageType(gl::BufferUsage usage)
229 {
230     return (usage == gl::BufferUsage::DynamicDraw || usage == gl::BufferUsage::DynamicCopy ||
231             usage == gl::BufferUsage::DynamicRead)
232                ? BufferUsageType::Dynamic
233                : BufferUsageType::Static;
234 }
235 
GetMemoryTypeIndex(ContextVk * contextVk,VkDeviceSize size,VkMemoryPropertyFlags memoryPropertyFlags,uint32_t * memoryTypeIndexOut)236 angle::Result GetMemoryTypeIndex(ContextVk *contextVk,
237                                  VkDeviceSize size,
238                                  VkMemoryPropertyFlags memoryPropertyFlags,
239                                  uint32_t *memoryTypeIndexOut)
240 {
241     vk::Renderer *renderer         = contextVk->getRenderer();
242     const vk::Allocator &allocator = renderer->getAllocator();
243 
244     bool persistentlyMapped = renderer->getFeatures().persistentlyMappedBuffers.enabled;
245     VkBufferUsageFlags defaultBufferUsageFlags = GetDefaultBufferUsageFlags(renderer);
246 
247     VkBufferCreateInfo createInfo    = {};
248     createInfo.sType                 = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
249     createInfo.flags                 = 0;
250     createInfo.size                  = size;
251     createInfo.usage                 = defaultBufferUsageFlags;
252     createInfo.sharingMode           = VK_SHARING_MODE_EXCLUSIVE;
253     createInfo.queueFamilyIndexCount = 0;
254     createInfo.pQueueFamilyIndices   = nullptr;
255 
256     // Host visible is required, all other bits are preferred, (i.e., optional)
257     VkMemoryPropertyFlags requiredFlags =
258         (memoryPropertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
259     VkMemoryPropertyFlags preferredFlags =
260         (memoryPropertyFlags & (~VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
261 
262     // Check that the allocation is not too large.
263     uint32_t memoryTypeIndex = 0;
264     ANGLE_VK_TRY(contextVk, allocator.findMemoryTypeIndexForBufferInfo(
265                                 createInfo, requiredFlags, preferredFlags, persistentlyMapped,
266                                 &memoryTypeIndex));
267     *memoryTypeIndexOut = memoryTypeIndex;
268 
269     return angle::Result::Continue;
270 }
271 
IsSelfCopy(const BufferDataSource & dataSource,const vk::BufferHelper & destination)272 bool IsSelfCopy(const BufferDataSource &dataSource, const vk::BufferHelper &destination)
273 {
274     return dataSource.data == nullptr &&
275            dataSource.buffer->getBufferSerial() == destination.getBufferSerial();
276 }
277 
CopyBuffers(ContextVk * contextVk,vk::BufferHelper * srcBuffer,vk::BufferHelper * dstBuffer,uint32_t regionCount,const VkBufferCopy * copyRegions)278 angle::Result CopyBuffers(ContextVk *contextVk,
279                           vk::BufferHelper *srcBuffer,
280                           vk::BufferHelper *dstBuffer,
281                           uint32_t regionCount,
282                           const VkBufferCopy *copyRegions)
283 {
284     ASSERT(srcBuffer->valid() && dstBuffer->valid());
285 
286     // Enqueue a copy command on the GPU
287     vk::CommandBufferAccess access;
288     if (srcBuffer->getBufferSerial() == dstBuffer->getBufferSerial())
289     {
290         access.onBufferSelfCopy(srcBuffer);
291     }
292     else
293     {
294         access.onBufferTransferRead(srcBuffer);
295         access.onBufferTransferWrite(dstBuffer);
296     }
297 
298     vk::OutsideRenderPassCommandBuffer *commandBuffer;
299     ANGLE_TRY(contextVk->getOutsideRenderPassCommandBuffer(access, &commandBuffer));
300 
301     commandBuffer->copyBuffer(srcBuffer->getBuffer(), dstBuffer->getBuffer(), regionCount,
302                               copyRegions);
303 
304     return angle::Result::Continue;
305 }
306 }  // namespace
307 
308 // ConversionBuffer implementation.
ConversionBuffer(vk::Renderer * renderer,VkBufferUsageFlags usageFlags,size_t initialSize,size_t alignment,bool hostVisible)309 ConversionBuffer::ConversionBuffer(vk::Renderer *renderer,
310                                    VkBufferUsageFlags usageFlags,
311                                    size_t initialSize,
312                                    size_t alignment,
313                                    bool hostVisible)
314     : mEntireBufferDirty(true)
315 {
316     mData = std::make_unique<vk::BufferHelper>();
317     mDirtyRanges.reserve(32);
318 }
319 
~ConversionBuffer()320 ConversionBuffer::~ConversionBuffer()
321 {
322     ASSERT(!mData || !mData->valid());
323     mDirtyRanges.clear();
324 }
325 
326 ConversionBuffer::ConversionBuffer(ConversionBuffer &&other) = default;
327 
328 // dirtyRanges may be overlap or continuous. In order to reduce the redunant conversion, we try to
329 // consolidate the dirty ranges. First we sort it by the range's low. Then we walk the range again
330 // and check it with previous range and merge them if possible. That merge will remove the
331 // overlapped area as well as reduce the number of ranges.
consolidateDirtyRanges()332 void ConversionBuffer::consolidateDirtyRanges()
333 {
334     ASSERT(!mEntireBufferDirty);
335 
336     auto comp = [](const RangeDeviceSize &a, const RangeDeviceSize &b) -> bool {
337         return a.low() < b.low();
338     };
339     std::sort(mDirtyRanges.begin(), mDirtyRanges.end(), comp);
340 
341     size_t prev = 0;
342     for (size_t i = 1; i < mDirtyRanges.size(); i++)
343     {
344         if (mDirtyRanges[prev].intersectsOrContinuous(mDirtyRanges[i]))
345         {
346             mDirtyRanges[prev].merge(mDirtyRanges[i]);
347             mDirtyRanges[i].invalidate();
348         }
349         else
350         {
351             prev = i;
352         }
353     }
354 }
355 
356 // VertexConversionBuffer implementation.
VertexConversionBuffer(vk::Renderer * renderer,const CacheKey & cacheKey)357 VertexConversionBuffer::VertexConversionBuffer(vk::Renderer *renderer, const CacheKey &cacheKey)
358     : ConversionBuffer(renderer,
359                        vk::kVertexBufferUsageFlags,
360                        kConvertedArrayBufferInitialSize,
361                        vk::kVertexBufferAlignment,
362                        cacheKey.hostVisible),
363       mCacheKey(cacheKey)
364 {}
365 
366 VertexConversionBuffer::VertexConversionBuffer(VertexConversionBuffer &&other) = default;
367 
368 VertexConversionBuffer::~VertexConversionBuffer() = default;
369 
370 // BufferVk implementation.
BufferVk(const gl::BufferState & state)371 BufferVk::BufferVk(const gl::BufferState &state)
372     : BufferImpl(state),
373       mClientBuffer(nullptr),
374       mMemoryTypeIndex(0),
375       mMemoryPropertyFlags(0),
376       mIsStagingBufferMapped(false),
377       mHasValidData(false),
378       mIsMappedForWrite(false),
379       mUsageType(BufferUsageType::Static)
380 {
381     mMappedRange.invalidate();
382 }
383 
~BufferVk()384 BufferVk::~BufferVk() {}
385 
destroy(const gl::Context * context)386 void BufferVk::destroy(const gl::Context *context)
387 {
388     ContextVk *contextVk = vk::GetImpl(context);
389 
390     (void)release(contextVk);
391 }
392 
releaseConversionBuffers(vk::Renderer * renderer)393 void BufferVk::releaseConversionBuffers(vk::Renderer *renderer)
394 {
395     for (ConversionBuffer &buffer : mVertexConversionBuffers)
396     {
397         buffer.release(renderer);
398     }
399     mVertexConversionBuffers.clear();
400 }
401 
release(ContextVk * contextVk)402 angle::Result BufferVk::release(ContextVk *contextVk)
403 {
404     vk::Renderer *renderer = contextVk->getRenderer();
405     if (mBuffer.valid())
406     {
407         ANGLE_TRY(contextVk->releaseBufferAllocation(&mBuffer));
408     }
409     if (mStagingBuffer.valid())
410     {
411         mStagingBuffer.release(renderer);
412     }
413 
414     releaseConversionBuffers(renderer);
415 
416     return angle::Result::Continue;
417 }
418 
setExternalBufferData(const gl::Context * context,gl::BufferBinding target,GLeglClientBufferEXT clientBuffer,size_t size,VkMemoryPropertyFlags memoryPropertyFlags)419 angle::Result BufferVk::setExternalBufferData(const gl::Context *context,
420                                               gl::BufferBinding target,
421                                               GLeglClientBufferEXT clientBuffer,
422                                               size_t size,
423                                               VkMemoryPropertyFlags memoryPropertyFlags)
424 {
425     ContextVk *contextVk = vk::GetImpl(context);
426 
427     // Release and re-create the memory and buffer.
428     ANGLE_TRY(release(contextVk));
429 
430     VkBufferCreateInfo createInfo    = {};
431     createInfo.sType                 = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
432     createInfo.flags                 = 0;
433     createInfo.size                  = size;
434     createInfo.usage                 = GetDefaultBufferUsageFlags(contextVk->getRenderer());
435     createInfo.sharingMode           = VK_SHARING_MODE_EXCLUSIVE;
436     createInfo.queueFamilyIndexCount = 0;
437     createInfo.pQueueFamilyIndices   = nullptr;
438 
439     return mBuffer.initExternal(contextVk, memoryPropertyFlags, createInfo, clientBuffer);
440 }
441 
setDataWithUsageFlags(const gl::Context * context,gl::BufferBinding target,GLeglClientBufferEXT clientBuffer,const void * data,size_t size,gl::BufferUsage usage,GLbitfield flags)442 angle::Result BufferVk::setDataWithUsageFlags(const gl::Context *context,
443                                               gl::BufferBinding target,
444                                               GLeglClientBufferEXT clientBuffer,
445                                               const void *data,
446                                               size_t size,
447                                               gl::BufferUsage usage,
448                                               GLbitfield flags)
449 {
450     ContextVk *contextVk                      = vk::GetImpl(context);
451     VkMemoryPropertyFlags memoryPropertyFlags = 0;
452     bool persistentMapRequired                = false;
453     const bool isExternalBuffer               = clientBuffer != nullptr;
454 
455     switch (usage)
456     {
457         case gl::BufferUsage::InvalidEnum:
458         {
459             // glBufferStorage API call
460             memoryPropertyFlags =
461                 GetStorageMemoryType(contextVk->getRenderer(), flags, isExternalBuffer);
462             persistentMapRequired = (flags & GL_MAP_PERSISTENT_BIT_EXT) != 0;
463             break;
464         }
465         default:
466         {
467             // glBufferData API call
468             memoryPropertyFlags = GetPreferredMemoryType(contextVk->getRenderer(), target, usage);
469             break;
470         }
471     }
472 
473     if (isExternalBuffer)
474     {
475         ANGLE_TRY(setExternalBufferData(context, target, clientBuffer, size, memoryPropertyFlags));
476         if (!mBuffer.isHostVisible())
477         {
478             // If external buffer's memory does not support host visible memory property, we cannot
479             // support a persistent map request.
480             ANGLE_VK_CHECK(contextVk, !persistentMapRequired, VK_ERROR_MEMORY_MAP_FAILED);
481         }
482 
483         mClientBuffer = clientBuffer;
484 
485         return angle::Result::Continue;
486     }
487     return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags, usage);
488 }
489 
setData(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,gl::BufferUsage usage)490 angle::Result BufferVk::setData(const gl::Context *context,
491                                 gl::BufferBinding target,
492                                 const void *data,
493                                 size_t size,
494                                 gl::BufferUsage usage)
495 {
496     ContextVk *contextVk = vk::GetImpl(context);
497     // Assume host visible/coherent memory available.
498     VkMemoryPropertyFlags memoryPropertyFlags =
499         GetPreferredMemoryType(contextVk->getRenderer(), target, usage);
500     return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags, usage);
501 }
502 
setDataWithMemoryType(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,VkMemoryPropertyFlags memoryPropertyFlags,gl::BufferUsage usage)503 angle::Result BufferVk::setDataWithMemoryType(const gl::Context *context,
504                                               gl::BufferBinding target,
505                                               const void *data,
506                                               size_t size,
507                                               VkMemoryPropertyFlags memoryPropertyFlags,
508                                               gl::BufferUsage usage)
509 {
510     ContextVk *contextVk   = vk::GetImpl(context);
511     vk::Renderer *renderer = contextVk->getRenderer();
512 
513     // Since the buffer is being entirely reinitialized, reset the valid-data flag. If the caller
514     // passed in data to fill the buffer, the flag will be updated when the data is copied to the
515     // buffer.
516     mHasValidData = false;
517 
518     if (size == 0)
519     {
520         // Nothing to do.
521         return angle::Result::Continue;
522     }
523 
524     if (!mVertexConversionBuffers.empty())
525     {
526         for (ConversionBuffer &buffer : mVertexConversionBuffers)
527         {
528             buffer.clearDirty();
529         }
530     }
531 
532     const BufferUsageType usageType = GetBufferUsageType(usage);
533     const BufferUpdateType updateType =
534         calculateBufferUpdateTypeOnFullUpdate(renderer, size, memoryPropertyFlags, usageType, data);
535 
536     if (updateType == BufferUpdateType::StorageRedefined)
537     {
538         mUsageType           = usageType;
539         mMemoryPropertyFlags = memoryPropertyFlags;
540         ANGLE_TRY(GetMemoryTypeIndex(contextVk, size, memoryPropertyFlags, &mMemoryTypeIndex));
541         ANGLE_TRY(acquireBufferHelper(contextVk, size, mUsageType));
542     }
543     else if (size != static_cast<size_t>(mState.getSize()))
544     {
545         if (mBuffer.onBufferUserSizeChange(renderer))
546         {
547             // If we have a dedicated VkBuffer created with user size, even if the storage is
548             // reused, we have to recreate that VkBuffer with user size when user size changes.
549             // When this happens, we must notify other objects that observing this buffer, such as
550             // vertex array. The reason vertex array is observing the buffer's storage change is
551             // because they uses VkBuffer. Now VkBuffer have changed, vertex array needs to
552             // re-process it just like storage has been reallocated.
553             onStateChange(angle::SubjectMessage::InternalMemoryAllocationChanged);
554         }
555     }
556 
557     if (data != nullptr)
558     {
559         BufferDataSource dataSource = {};
560         dataSource.data             = data;
561 
562         // Handle full-buffer updates similarly to glBufferSubData
563         ANGLE_TRY(setDataImpl(contextVk, size, dataSource, size, 0, updateType));
564     }
565 
566     return angle::Result::Continue;
567 }
568 
setSubData(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,size_t offset)569 angle::Result BufferVk::setSubData(const gl::Context *context,
570                                    gl::BufferBinding target,
571                                    const void *data,
572                                    size_t size,
573                                    size_t offset)
574 {
575     ASSERT(mBuffer.valid());
576 
577     BufferDataSource dataSource = {};
578     dataSource.data             = data;
579 
580     ContextVk *contextVk = vk::GetImpl(context);
581     return setDataImpl(contextVk, static_cast<size_t>(mState.getSize()), dataSource, size, offset,
582                        BufferUpdateType::ContentsUpdate);
583 }
584 
copySubData(const gl::Context * context,BufferImpl * source,GLintptr sourceOffset,GLintptr destOffset,GLsizeiptr size)585 angle::Result BufferVk::copySubData(const gl::Context *context,
586                                     BufferImpl *source,
587                                     GLintptr sourceOffset,
588                                     GLintptr destOffset,
589                                     GLsizeiptr size)
590 {
591     ASSERT(mBuffer.valid());
592 
593     ContextVk *contextVk = vk::GetImpl(context);
594     BufferVk *sourceVk   = GetAs<BufferVk>(source);
595 
596     BufferDataSource dataSource = {};
597     dataSource.buffer           = &sourceVk->getBuffer();
598     dataSource.bufferOffset     = static_cast<VkDeviceSize>(sourceOffset);
599 
600     ASSERT(dataSource.buffer->valid());
601 
602     return setDataImpl(contextVk, static_cast<size_t>(mState.getSize()), dataSource, size,
603                        destOffset, BufferUpdateType::ContentsUpdate);
604 }
605 
allocStagingBuffer(ContextVk * contextVk,vk::MemoryCoherency coherency,VkDeviceSize size,uint8_t ** mapPtr)606 angle::Result BufferVk::allocStagingBuffer(ContextVk *contextVk,
607                                            vk::MemoryCoherency coherency,
608                                            VkDeviceSize size,
609                                            uint8_t **mapPtr)
610 {
611     ASSERT(!mIsStagingBufferMapped);
612 
613     if (mStagingBuffer.valid())
614     {
615         if (size <= mStagingBuffer.getSize() && IsCached(coherency) == mStagingBuffer.isCached() &&
616             contextVk->getRenderer()->hasResourceUseFinished(mStagingBuffer.getResourceUse()))
617         {
618             // If size is big enough and it is idle, then just reuse the existing staging buffer
619             *mapPtr                = mStagingBuffer.getMappedMemory();
620             mIsStagingBufferMapped = true;
621             return angle::Result::Continue;
622         }
623         mStagingBuffer.release(contextVk->getRenderer());
624     }
625 
626     ANGLE_TRY(
627         contextVk->initBufferForBufferCopy(&mStagingBuffer, static_cast<size_t>(size), coherency));
628     *mapPtr                = mStagingBuffer.getMappedMemory();
629     mIsStagingBufferMapped = true;
630 
631     return angle::Result::Continue;
632 }
633 
flushStagingBuffer(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize size)634 angle::Result BufferVk::flushStagingBuffer(ContextVk *contextVk,
635                                            VkDeviceSize offset,
636                                            VkDeviceSize size)
637 {
638     vk::Renderer *renderer = contextVk->getRenderer();
639 
640     ASSERT(mIsStagingBufferMapped);
641     ASSERT(mStagingBuffer.valid());
642 
643     if (!mStagingBuffer.isCoherent())
644     {
645         ANGLE_TRY(mStagingBuffer.flush(renderer));
646     }
647 
648     VkBufferCopy copyRegion = {mStagingBuffer.getOffset(), mBuffer.getOffset() + offset, size};
649     ANGLE_TRY(CopyBuffers(contextVk, &mStagingBuffer, &mBuffer, 1, &copyRegion));
650 
651     return angle::Result::Continue;
652 }
653 
handleDeviceLocalBufferMap(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize size,uint8_t ** mapPtr)654 angle::Result BufferVk::handleDeviceLocalBufferMap(ContextVk *contextVk,
655                                                    VkDeviceSize offset,
656                                                    VkDeviceSize size,
657                                                    uint8_t **mapPtr)
658 {
659     vk::Renderer *renderer = contextVk->getRenderer();
660     ANGLE_TRY(
661         allocStagingBuffer(contextVk, vk::MemoryCoherency::CachedPreferCoherent, size, mapPtr));
662     ANGLE_TRY(mStagingBuffer.flush(renderer));
663 
664     // Copy data from device local buffer to host visible staging buffer.
665     VkBufferCopy copyRegion = {mBuffer.getOffset() + offset, mStagingBuffer.getOffset(), size};
666     ANGLE_TRY(CopyBuffers(contextVk, &mBuffer, &mStagingBuffer, 1, &copyRegion));
667     ANGLE_TRY(mStagingBuffer.waitForIdle(contextVk, "GPU stall due to mapping device local buffer",
668                                          RenderPassClosureReason::DeviceLocalBufferMap));
669     // Since coherent is prefer, we may end up getting non-coherent. Always call invalidate here (it
670     // will check memory flag before it actually calls into driver).
671     ANGLE_TRY(mStagingBuffer.invalidate(renderer));
672 
673     return angle::Result::Continue;
674 }
675 
mapHostVisibleBuffer(ContextVk * contextVk,VkDeviceSize offset,GLbitfield access,uint8_t ** mapPtr)676 angle::Result BufferVk::mapHostVisibleBuffer(ContextVk *contextVk,
677                                              VkDeviceSize offset,
678                                              GLbitfield access,
679                                              uint8_t **mapPtr)
680 {
681     ANGLE_TRY(mBuffer.mapWithOffset(contextVk, mapPtr, static_cast<size_t>(offset)));
682 
683     // Invalidate non-coherent for READ case.
684     if (!mBuffer.isCoherent() && (access & GL_MAP_READ_BIT) != 0)
685     {
686         ANGLE_TRY(mBuffer.invalidate(contextVk->getRenderer()));
687     }
688     return angle::Result::Continue;
689 }
690 
map(const gl::Context * context,GLenum access,void ** mapPtr)691 angle::Result BufferVk::map(const gl::Context *context, GLenum access, void **mapPtr)
692 {
693     ASSERT(mBuffer.valid());
694     ASSERT(access == GL_WRITE_ONLY_OES);
695 
696     return mapImpl(vk::GetImpl(context), GL_MAP_WRITE_BIT, mapPtr);
697 }
698 
mapRange(const gl::Context * context,size_t offset,size_t length,GLbitfield access,void ** mapPtr)699 angle::Result BufferVk::mapRange(const gl::Context *context,
700                                  size_t offset,
701                                  size_t length,
702                                  GLbitfield access,
703                                  void **mapPtr)
704 {
705     return mapRangeImpl(vk::GetImpl(context), offset, length, access, mapPtr);
706 }
707 
mapImpl(ContextVk * contextVk,GLbitfield access,void ** mapPtr)708 angle::Result BufferVk::mapImpl(ContextVk *contextVk, GLbitfield access, void **mapPtr)
709 {
710     return mapRangeImpl(contextVk, 0, static_cast<VkDeviceSize>(mState.getSize()), access, mapPtr);
711 }
712 
ghostMappedBuffer(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize length,GLbitfield access,void ** mapPtr)713 angle::Result BufferVk::ghostMappedBuffer(ContextVk *contextVk,
714                                           VkDeviceSize offset,
715                                           VkDeviceSize length,
716                                           GLbitfield access,
717                                           void **mapPtr)
718 {
719     // We shouldn't get here if it is external memory
720     ASSERT(!isExternalBuffer());
721 
722     ++contextVk->getPerfCounters().buffersGhosted;
723 
724     // If we are creating a new buffer because the GPU is using it as read-only, then we
725     // also need to copy the contents of the previous buffer into the new buffer, in
726     // case the caller only updates a portion of the new buffer.
727     vk::BufferHelper src = std::move(mBuffer);
728     ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
729                                   BufferUsageType::Dynamic));
730 
731     // Before returning the new buffer, map the previous buffer and copy its entire
732     // contents into the new buffer.
733     uint8_t *srcMapPtr = nullptr;
734     uint8_t *dstMapPtr = nullptr;
735     ANGLE_TRY(src.map(contextVk, &srcMapPtr));
736     ANGLE_TRY(mBuffer.map(contextVk, &dstMapPtr));
737 
738     ASSERT(src.isCoherent());
739     ASSERT(mBuffer.isCoherent());
740 
741     // No need to copy over [offset, offset + length), just around it
742     if ((access & GL_MAP_INVALIDATE_RANGE_BIT) != 0)
743     {
744         if (offset != 0)
745         {
746             memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(offset));
747         }
748         size_t totalSize      = static_cast<size_t>(mState.getSize());
749         size_t remainingStart = static_cast<size_t>(offset + length);
750         size_t remainingSize  = totalSize - remainingStart;
751         if (remainingSize != 0)
752         {
753             memcpy(dstMapPtr + remainingStart, srcMapPtr + remainingStart, remainingSize);
754         }
755     }
756     else
757     {
758         memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(mState.getSize()));
759     }
760 
761     ANGLE_TRY(contextVk->releaseBufferAllocation(&src));
762 
763     // Return the already mapped pointer with the offset adjustment to avoid the call to unmap().
764     *mapPtr = dstMapPtr + offset;
765 
766     return angle::Result::Continue;
767 }
768 
mapRangeImpl(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize length,GLbitfield access,void ** mapPtr)769 angle::Result BufferVk::mapRangeImpl(ContextVk *contextVk,
770                                      VkDeviceSize offset,
771                                      VkDeviceSize length,
772                                      GLbitfield access,
773                                      void **mapPtr)
774 {
775     vk::Renderer *renderer = contextVk->getRenderer();
776     ASSERT(mBuffer.valid());
777 
778     // Record map call parameters in case this call is from angle internal (the access/offset/length
779     // will be inconsistent from mState).
780     mIsMappedForWrite = (access & GL_MAP_WRITE_BIT) != 0;
781     mMappedRange      = RangeDeviceSize(offset, offset + length);
782 
783     uint8_t **mapPtrBytes = reinterpret_cast<uint8_t **>(mapPtr);
784     bool hostVisible      = mBuffer.isHostVisible();
785 
786     // MAP_UNSYNCHRONIZED_BIT, so immediately map.
787     if ((access & GL_MAP_UNSYNCHRONIZED_BIT) != 0)
788     {
789         if (hostVisible)
790         {
791             return mapHostVisibleBuffer(contextVk, offset, access, mapPtrBytes);
792         }
793         return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
794     }
795 
796     // Read case
797     if ((access & GL_MAP_WRITE_BIT) == 0)
798     {
799         // If app is not going to write, all we need is to ensure GPU write is finished.
800         // Concurrent reads from CPU and GPU is allowed.
801         if (!renderer->hasResourceUseFinished(mBuffer.getWriteResourceUse()))
802         {
803             // If there are unflushed write commands for the resource, flush them.
804             if (contextVk->hasUnsubmittedUse(mBuffer.getWriteResourceUse()))
805             {
806                 ANGLE_TRY(contextVk->flushAndSubmitCommands(
807                     nullptr, nullptr, RenderPassClosureReason::BufferWriteThenMap));
808             }
809             ANGLE_TRY(renderer->finishResourceUse(contextVk, mBuffer.getWriteResourceUse()));
810         }
811         if (hostVisible)
812         {
813             return mapHostVisibleBuffer(contextVk, offset, access, mapPtrBytes);
814         }
815         return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
816     }
817 
818     // Write case
819     if (!hostVisible)
820     {
821         return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
822     }
823 
824     // Write case, buffer not in use.
825     if (isExternalBuffer() || !isCurrentlyInUse(contextVk->getRenderer()))
826     {
827         return mapHostVisibleBuffer(contextVk, offset, access, mapPtrBytes);
828     }
829 
830     // Write case, buffer in use.
831     //
832     // Here, we try to map the buffer, but it's busy. Instead of waiting for the GPU to
833     // finish, we just allocate a new buffer if:
834     // 1.) Caller has told us it doesn't care about previous contents, or
835     // 2.) The GPU won't write to the buffer.
836 
837     bool rangeInvalidate = (access & GL_MAP_INVALIDATE_RANGE_BIT) != 0;
838     bool entireBufferInvalidated =
839         ((access & GL_MAP_INVALIDATE_BUFFER_BIT) != 0) ||
840         (rangeInvalidate && offset == 0 && static_cast<VkDeviceSize>(mState.getSize()) == length);
841 
842     if (entireBufferInvalidated)
843     {
844         ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
845                                       BufferUsageType::Dynamic));
846         return mapHostVisibleBuffer(contextVk, offset, access, mapPtrBytes);
847     }
848 
849     bool smallMapRange = (length < static_cast<VkDeviceSize>(mState.getSize()) / 2);
850 
851     if (smallMapRange && rangeInvalidate)
852     {
853         ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::CachedNonCoherent,
854                                      static_cast<size_t>(length), mapPtrBytes));
855         return angle::Result::Continue;
856     }
857 
858     if (renderer->hasResourceUseFinished(mBuffer.getWriteResourceUse()))
859     {
860         // This will keep the new buffer mapped and update mapPtr, so return immediately.
861         return ghostMappedBuffer(contextVk, offset, length, access, mapPtr);
862     }
863 
864     // Write case (worst case, buffer in use for write)
865     ANGLE_TRY(mBuffer.waitForIdle(contextVk, "GPU stall due to mapping buffer in use by the GPU",
866                                   RenderPassClosureReason::BufferInUseWhenSynchronizedMap));
867     return mapHostVisibleBuffer(contextVk, offset, access, mapPtrBytes);
868 }
869 
unmap(const gl::Context * context,GLboolean * result)870 angle::Result BufferVk::unmap(const gl::Context *context, GLboolean *result)
871 {
872     ANGLE_TRY(unmapImpl(vk::GetImpl(context)));
873 
874     // This should be false if the contents have been corrupted through external means.  Vulkan
875     // doesn't provide such information.
876     *result = true;
877 
878     return angle::Result::Continue;
879 }
880 
unmapImpl(ContextVk * contextVk)881 angle::Result BufferVk::unmapImpl(ContextVk *contextVk)
882 {
883     ASSERT(mBuffer.valid());
884 
885     if (mIsStagingBufferMapped)
886     {
887         ASSERT(mStagingBuffer.valid());
888         // The buffer is device local or optimization of small range map.
889         if (mIsMappedForWrite)
890         {
891             ANGLE_TRY(flushStagingBuffer(contextVk, mMappedRange.low(), mMappedRange.length()));
892         }
893 
894         mIsStagingBufferMapped = false;
895     }
896     else
897     {
898         ASSERT(mBuffer.isHostVisible());
899         vk::Renderer *renderer = contextVk->getRenderer();
900         if (!mBuffer.isCoherent())
901         {
902             ANGLE_TRY(mBuffer.flush(renderer));
903         }
904         mBuffer.unmap(renderer);
905     }
906 
907     if (mIsMappedForWrite)
908     {
909         if (mMappedRange == RangeDeviceSize(0, static_cast<VkDeviceSize>(getSize())))
910         {
911             dataUpdated();
912         }
913         else
914         {
915             dataRangeUpdated(mMappedRange);
916         }
917     }
918 
919     // Reset the mapping parameters
920     mIsMappedForWrite = false;
921     mMappedRange.invalidate();
922 
923     return angle::Result::Continue;
924 }
925 
getSubData(const gl::Context * context,GLintptr offset,GLsizeiptr size,void * outData)926 angle::Result BufferVk::getSubData(const gl::Context *context,
927                                    GLintptr offset,
928                                    GLsizeiptr size,
929                                    void *outData)
930 {
931     ASSERT(offset + size <= getSize());
932     ASSERT(mBuffer.valid());
933     ContextVk *contextVk = vk::GetImpl(context);
934     void *mapPtr;
935     ANGLE_TRY(mapRangeImpl(contextVk, offset, size, GL_MAP_READ_BIT, &mapPtr));
936     memcpy(outData, mapPtr, size);
937     return unmapImpl(contextVk);
938 }
939 
getIndexRange(const gl::Context * context,gl::DrawElementsType type,size_t offset,size_t count,bool primitiveRestartEnabled,gl::IndexRange * outRange)940 angle::Result BufferVk::getIndexRange(const gl::Context *context,
941                                       gl::DrawElementsType type,
942                                       size_t offset,
943                                       size_t count,
944                                       bool primitiveRestartEnabled,
945                                       gl::IndexRange *outRange)
946 {
947     ContextVk *contextVk   = vk::GetImpl(context);
948     vk::Renderer *renderer = contextVk->getRenderer();
949 
950     // This is a workaround for the mock ICD not implementing buffer memory state.
951     // Could be removed if https://github.com/KhronosGroup/Vulkan-Tools/issues/84 is fixed.
952     if (renderer->isMockICDEnabled())
953     {
954         outRange->start = 0;
955         outRange->end   = 0;
956         return angle::Result::Continue;
957     }
958 
959     ANGLE_TRACE_EVENT0("gpu.angle", "BufferVk::getIndexRange");
960 
961     void *mapPtr;
962     ANGLE_TRY(mapRangeImpl(contextVk, offset, getSize(), GL_MAP_READ_BIT, &mapPtr));
963     *outRange = gl::ComputeIndexRange(type, mapPtr, count, primitiveRestartEnabled);
964     ANGLE_TRY(unmapImpl(contextVk));
965 
966     return angle::Result::Continue;
967 }
968 
updateBuffer(ContextVk * contextVk,size_t bufferSize,const BufferDataSource & dataSource,size_t updateSize,size_t updateOffset)969 angle::Result BufferVk::updateBuffer(ContextVk *contextVk,
970                                      size_t bufferSize,
971                                      const BufferDataSource &dataSource,
972                                      size_t updateSize,
973                                      size_t updateOffset)
974 {
975     // To copy on the CPU, destination must be host-visible.  The source should also be either a CPU
976     // pointer or other a host-visible buffer that is not being written to by the GPU.
977     const bool shouldCopyOnCPU =
978         mBuffer.isHostVisible() &&
979         (dataSource.data != nullptr ||
980          ShouldUseCPUToCopyData(contextVk, *dataSource.buffer, updateSize, bufferSize));
981 
982     if (shouldCopyOnCPU)
983     {
984         ANGLE_TRY(directUpdate(contextVk, dataSource, updateSize, updateOffset));
985     }
986     else
987     {
988         ANGLE_TRY(stagedUpdate(contextVk, dataSource, updateSize, updateOffset));
989     }
990     return angle::Result::Continue;
991 }
992 
directUpdate(ContextVk * contextVk,const BufferDataSource & dataSource,size_t size,size_t offset)993 angle::Result BufferVk::directUpdate(ContextVk *contextVk,
994                                      const BufferDataSource &dataSource,
995                                      size_t size,
996                                      size_t offset)
997 {
998     vk::Renderer *renderer    = contextVk->getRenderer();
999     uint8_t *srcPointerMapped = nullptr;
1000     const uint8_t *srcPointer = nullptr;
1001     uint8_t *dstPointer       = nullptr;
1002 
1003     // Map the destination buffer.
1004     ASSERT(mBuffer.isHostVisible());
1005     ANGLE_TRY(mBuffer.mapWithOffset(contextVk, &dstPointer, offset));
1006     ASSERT(dstPointer);
1007 
1008     // If source data is coming from a buffer, map it.  If this is a self-copy, avoid double-mapping
1009     // the buffer.
1010     if (dataSource.data != nullptr)
1011     {
1012         srcPointer = static_cast<const uint8_t *>(dataSource.data);
1013     }
1014     else
1015     {
1016         ANGLE_TRY(dataSource.buffer->mapWithOffset(contextVk, &srcPointerMapped,
1017                                                    static_cast<size_t>(dataSource.bufferOffset)));
1018         srcPointer = srcPointerMapped;
1019     }
1020 
1021     memcpy(dstPointer, srcPointer, size);
1022 
1023     // External memory may end up with noncoherent
1024     if (!mBuffer.isCoherent())
1025     {
1026         ANGLE_TRY(mBuffer.flush(renderer, offset, size));
1027     }
1028 
1029     // Unmap the destination and source buffers if applicable.
1030     //
1031     // If the buffer has dynamic usage then the intent is frequent client side updates to the
1032     // buffer. Don't CPU unmap the buffer, we will take care of unmapping when releasing the buffer
1033     // to either the renderer or mBufferFreeList.
1034     if (GetBufferUsageType(mState.getUsage()) == BufferUsageType::Static)
1035     {
1036         mBuffer.unmap(renderer);
1037     }
1038 
1039     if (srcPointerMapped != nullptr)
1040     {
1041         dataSource.buffer->unmap(renderer);
1042     }
1043 
1044     return angle::Result::Continue;
1045 }
1046 
stagedUpdate(ContextVk * contextVk,const BufferDataSource & dataSource,size_t size,size_t offset)1047 angle::Result BufferVk::stagedUpdate(ContextVk *contextVk,
1048                                      const BufferDataSource &dataSource,
1049                                      size_t size,
1050                                      size_t offset)
1051 {
1052     // If data is coming from a CPU pointer, stage it in a temporary staging buffer.
1053     // Otherwise, do a GPU copy directly from the given buffer.
1054     if (dataSource.data != nullptr)
1055     {
1056         uint8_t *mapPointer = nullptr;
1057         ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::CachedNonCoherent, size,
1058                                      &mapPointer));
1059         memcpy(mapPointer, dataSource.data, size);
1060         ANGLE_TRY(flushStagingBuffer(contextVk, offset, size));
1061         mIsStagingBufferMapped = false;
1062     }
1063     else
1064     {
1065         // Check for self-dependency.
1066         vk::CommandBufferAccess access;
1067         if (dataSource.buffer->getBufferSerial() == mBuffer.getBufferSerial())
1068         {
1069             access.onBufferSelfCopy(&mBuffer);
1070         }
1071         else
1072         {
1073             access.onBufferTransferRead(dataSource.buffer);
1074             access.onBufferTransferWrite(&mBuffer);
1075         }
1076 
1077         vk::OutsideRenderPassCommandBuffer *commandBuffer;
1078         ANGLE_TRY(contextVk->getOutsideRenderPassCommandBuffer(access, &commandBuffer));
1079 
1080         // Enqueue a copy command on the GPU.
1081         const VkBufferCopy copyRegion = {dataSource.bufferOffset + dataSource.buffer->getOffset(),
1082                                          static_cast<VkDeviceSize>(offset) + mBuffer.getOffset(),
1083                                          static_cast<VkDeviceSize>(size)};
1084 
1085         commandBuffer->copyBuffer(dataSource.buffer->getBuffer(), mBuffer.getBuffer(), 1,
1086                                   &copyRegion);
1087     }
1088 
1089     return angle::Result::Continue;
1090 }
1091 
acquireAndUpdate(ContextVk * contextVk,size_t bufferSize,const BufferDataSource & dataSource,size_t updateSize,size_t updateOffset,BufferUpdateType updateType)1092 angle::Result BufferVk::acquireAndUpdate(ContextVk *contextVk,
1093                                          size_t bufferSize,
1094                                          const BufferDataSource &dataSource,
1095                                          size_t updateSize,
1096                                          size_t updateOffset,
1097                                          BufferUpdateType updateType)
1098 {
1099     // We shouldn't get here if this is external memory
1100     ASSERT(!isExternalBuffer());
1101     // If StorageRedefined, we cannot use mState.getSize() to allocate a new buffer.
1102     ASSERT(updateType != BufferUpdateType::StorageRedefined);
1103     ASSERT(mBuffer.valid());
1104     ASSERT(mBuffer.getSize() >= bufferSize);
1105 
1106     // Here we acquire a new BufferHelper and directUpdate() the new buffer.
1107     // If the subData size was less than the buffer's size we additionally enqueue
1108     // a GPU copy of the remaining regions from the old mBuffer to the new one.
1109     vk::BufferHelper prevBuffer;
1110     size_t offsetAfterSubdata      = (updateOffset + updateSize);
1111     bool updateRegionBeforeSubData = mHasValidData && (updateOffset > 0);
1112     bool updateRegionAfterSubData  = mHasValidData && (offsetAfterSubdata < bufferSize);
1113 
1114     uint8_t *prevMapPtrBeforeSubData = nullptr;
1115     uint8_t *prevMapPtrAfterSubData  = nullptr;
1116     if (updateRegionBeforeSubData || updateRegionAfterSubData)
1117     {
1118         prevBuffer = std::move(mBuffer);
1119 
1120         // The total bytes that we need to copy from old buffer to new buffer
1121         size_t copySize = bufferSize - updateSize;
1122 
1123         // If the buffer is host visible and the GPU is not writing to it, we use the CPU to do the
1124         // copy. We need to save the source buffer pointer before we acquire a new buffer.
1125         if (ShouldUseCPUToCopyData(contextVk, prevBuffer, copySize, bufferSize))
1126         {
1127             uint8_t *mapPointer = nullptr;
1128             // prevBuffer buffer will be recycled (or released and unmapped) by acquireBufferHelper
1129             ANGLE_TRY(prevBuffer.map(contextVk, &mapPointer));
1130             ASSERT(mapPointer);
1131             prevMapPtrBeforeSubData = mapPointer;
1132             prevMapPtrAfterSubData  = mapPointer + offsetAfterSubdata;
1133         }
1134     }
1135 
1136     ANGLE_TRY(acquireBufferHelper(contextVk, bufferSize, BufferUsageType::Dynamic));
1137     ANGLE_TRY(updateBuffer(contextVk, bufferSize, dataSource, updateSize, updateOffset));
1138 
1139     constexpr int kMaxCopyRegions = 2;
1140     angle::FixedVector<VkBufferCopy, kMaxCopyRegions> copyRegions;
1141 
1142     if (updateRegionBeforeSubData)
1143     {
1144         if (prevMapPtrBeforeSubData)
1145         {
1146             BufferDataSource beforeSrc = {};
1147             beforeSrc.data             = prevMapPtrBeforeSubData;
1148 
1149             ANGLE_TRY(directUpdate(contextVk, beforeSrc, updateOffset, 0));
1150         }
1151         else
1152         {
1153             copyRegions.push_back({prevBuffer.getOffset(), mBuffer.getOffset(), updateOffset});
1154         }
1155     }
1156 
1157     if (updateRegionAfterSubData)
1158     {
1159         size_t copySize = bufferSize - offsetAfterSubdata;
1160         if (prevMapPtrAfterSubData)
1161         {
1162             BufferDataSource afterSrc = {};
1163             afterSrc.data             = prevMapPtrAfterSubData;
1164 
1165             ANGLE_TRY(directUpdate(contextVk, afterSrc, copySize, offsetAfterSubdata));
1166         }
1167         else
1168         {
1169             copyRegions.push_back({prevBuffer.getOffset() + offsetAfterSubdata,
1170                                    mBuffer.getOffset() + offsetAfterSubdata, copySize});
1171         }
1172     }
1173 
1174     if (!copyRegions.empty())
1175     {
1176         ANGLE_TRY(CopyBuffers(contextVk, &prevBuffer, &mBuffer,
1177                               static_cast<uint32_t>(copyRegions.size()), copyRegions.data()));
1178     }
1179 
1180     if (prevBuffer.valid())
1181     {
1182         ANGLE_TRY(contextVk->releaseBufferAllocation(&prevBuffer));
1183     }
1184 
1185     return angle::Result::Continue;
1186 }
1187 
setDataImpl(ContextVk * contextVk,size_t bufferSize,const BufferDataSource & dataSource,size_t updateSize,size_t updateOffset,BufferUpdateType updateType)1188 angle::Result BufferVk::setDataImpl(ContextVk *contextVk,
1189                                     size_t bufferSize,
1190                                     const BufferDataSource &dataSource,
1191                                     size_t updateSize,
1192                                     size_t updateOffset,
1193                                     BufferUpdateType updateType)
1194 {
1195     // if the buffer is currently in use
1196     //     if it isn't an external buffer and not a self-copy and sub data size meets threshold
1197     //          acquire a new BufferHelper from the pool
1198     //     else stage the update
1199     // else update the buffer directly
1200     if (isCurrentlyInUse(contextVk->getRenderer()))
1201     {
1202         // The acquire-and-update path creates a new buffer, which is sometimes more efficient than
1203         // trying to update the existing one.  Firstly, this is not done in the following
1204         // situations:
1205         //
1206         // - For external buffers, the underlying storage cannot be reallocated.
1207         // - If storage has just been redefined, this path is not taken because a new buffer has
1208         //   already been created by the caller. Besides, this path uses mState.getSize(), which the
1209         //   frontend updates only after this call in situations where the storage may be redefined.
1210         //   This could happen if the buffer memory is DEVICE_LOCAL and
1211         //   renderer->getFeatures().allocateNonZeroMemory.enabled is true. In this case a
1212         //   copyToBuffer is immediately issued after allocation and isCurrentlyInUse will be true.
1213         // - If this is a self copy through glCopyBufferSubData, |dataSource| will contain a
1214         //   reference to |mBuffer|, in which case source information is lost after acquiring a new
1215         //   buffer.
1216         //
1217         // Additionally, this path is taken only if either of the following conditions are true:
1218         //
1219         // - If BufferVk does not have any valid data.  This means that there is no data to be
1220         //   copied from the old buffer to the new one after acquiring it.  This could happen when
1221         //   the application calls glBufferData with the same size and we reuse the existing buffer
1222         //   storage.
1223         // - If the buffer is used read-only in the current render pass.  In this case, acquiring a
1224         //   new buffer is preferred to avoid breaking the render pass.
1225         // - The update modifies a significant portion of the buffer
1226         // - The preferCPUForBufferSubData feature is enabled.
1227         //
1228         const bool canAcquireAndUpdate = !isExternalBuffer() &&
1229                                          updateType != BufferUpdateType::StorageRedefined &&
1230                                          !IsSelfCopy(dataSource, mBuffer);
1231         if (canAcquireAndUpdate &&
1232             (!mHasValidData || ShouldAvoidRenderPassBreakOnUpdate(contextVk, mBuffer, bufferSize) ||
1233              ShouldAllocateNewMemoryForUpdate(contextVk, updateSize, bufferSize)))
1234         {
1235             ANGLE_TRY(acquireAndUpdate(contextVk, bufferSize, dataSource, updateSize, updateOffset,
1236                                        updateType));
1237         }
1238         else
1239         {
1240             if (canAcquireAndUpdate && RenderPassUsesBufferForReadOnly(contextVk, mBuffer))
1241             {
1242                 ANGLE_VK_PERF_WARNING(contextVk, GL_DEBUG_SEVERITY_LOW,
1243                                       "Breaking the render pass on small upload to large buffer");
1244             }
1245 
1246             ANGLE_TRY(stagedUpdate(contextVk, dataSource, updateSize, updateOffset));
1247         }
1248     }
1249     else
1250     {
1251         ANGLE_TRY(updateBuffer(contextVk, bufferSize, dataSource, updateSize, updateOffset));
1252     }
1253 
1254     // Update conversions.
1255     if (updateOffset == 0 && updateSize == bufferSize)
1256     {
1257         dataUpdated();
1258     }
1259     else
1260     {
1261         dataRangeUpdated(RangeDeviceSize(updateOffset, updateOffset + updateSize));
1262     }
1263 
1264     return angle::Result::Continue;
1265 }
1266 
getVertexConversionBuffer(vk::Renderer * renderer,const VertexConversionBuffer::CacheKey & cacheKey)1267 VertexConversionBuffer *BufferVk::getVertexConversionBuffer(
1268     vk::Renderer *renderer,
1269     const VertexConversionBuffer::CacheKey &cacheKey)
1270 {
1271     for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
1272     {
1273         if (buffer.match(cacheKey))
1274         {
1275             ASSERT(buffer.valid());
1276             return &buffer;
1277         }
1278     }
1279 
1280     mVertexConversionBuffers.emplace_back(renderer, cacheKey);
1281     return &mVertexConversionBuffers.back();
1282 }
1283 
dataRangeUpdated(const RangeDeviceSize & range)1284 void BufferVk::dataRangeUpdated(const RangeDeviceSize &range)
1285 {
1286     for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
1287     {
1288         buffer.addDirtyBufferRange(range);
1289     }
1290     // Now we have valid data
1291     mHasValidData = true;
1292 }
1293 
dataUpdated()1294 void BufferVk::dataUpdated()
1295 {
1296     for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
1297     {
1298         buffer.setEntireBufferDirty();
1299     }
1300     // Now we have valid data
1301     mHasValidData = true;
1302 }
1303 
onDataChanged()1304 void BufferVk::onDataChanged()
1305 {
1306     dataUpdated();
1307 }
1308 
acquireBufferHelper(ContextVk * contextVk,size_t sizeInBytes,BufferUsageType usageType)1309 angle::Result BufferVk::acquireBufferHelper(ContextVk *contextVk,
1310                                             size_t sizeInBytes,
1311                                             BufferUsageType usageType)
1312 {
1313     vk::Renderer *renderer = contextVk->getRenderer();
1314     size_t size            = roundUpPow2(sizeInBytes, kBufferSizeGranularity);
1315     size_t alignment       = renderer->getDefaultBufferAlignment();
1316 
1317     if (mBuffer.valid())
1318     {
1319         ANGLE_TRY(contextVk->releaseBufferAllocation(&mBuffer));
1320     }
1321 
1322     // Allocate the buffer directly
1323     ANGLE_TRY(
1324         contextVk->initBufferAllocation(&mBuffer, mMemoryTypeIndex, size, alignment, usageType));
1325 
1326     // Tell the observers (front end) that a new buffer was created, so the necessary
1327     // dirty bits can be set. This allows the buffer views pointing to the old buffer to
1328     // be recreated and point to the new buffer, along with updating the descriptor sets
1329     // to use the new buffer.
1330     onStateChange(angle::SubjectMessage::InternalMemoryAllocationChanged);
1331 
1332     return angle::Result::Continue;
1333 }
1334 
isCurrentlyInUse(vk::Renderer * renderer) const1335 bool BufferVk::isCurrentlyInUse(vk::Renderer *renderer) const
1336 {
1337     return !renderer->hasResourceUseFinished(mBuffer.getResourceUse());
1338 }
1339 
1340 // When a buffer is being completely changed, calculate whether it's better to allocate a new buffer
1341 // or overwrite the existing one.
calculateBufferUpdateTypeOnFullUpdate(vk::Renderer * renderer,size_t size,VkMemoryPropertyFlags memoryPropertyFlags,BufferUsageType usageType,const void * data) const1342 BufferUpdateType BufferVk::calculateBufferUpdateTypeOnFullUpdate(
1343     vk::Renderer *renderer,
1344     size_t size,
1345     VkMemoryPropertyFlags memoryPropertyFlags,
1346     BufferUsageType usageType,
1347     const void *data) const
1348 {
1349     // 0-sized updates should be no-op'd before this call.
1350     ASSERT(size > 0);
1351 
1352     // If there is no existing buffer, this cannot be a content update.
1353     if (!mBuffer.valid())
1354     {
1355         return BufferUpdateType::StorageRedefined;
1356     }
1357 
1358     const bool inUseAndRespecifiedWithoutData = data == nullptr && isCurrentlyInUse(renderer);
1359     bool redefineStorage = shouldRedefineStorage(renderer, usageType, memoryPropertyFlags, size);
1360 
1361     // Create a new buffer if the buffer is busy and it's being redefined without data.
1362     // Additionally, a new buffer is created if any of the parameters change (memory type, usage,
1363     // size).
1364     return redefineStorage || inUseAndRespecifiedWithoutData ? BufferUpdateType::StorageRedefined
1365                                                              : BufferUpdateType::ContentsUpdate;
1366 }
1367 
shouldRedefineStorage(vk::Renderer * renderer,BufferUsageType usageType,VkMemoryPropertyFlags memoryPropertyFlags,size_t size) const1368 bool BufferVk::shouldRedefineStorage(vk::Renderer *renderer,
1369                                      BufferUsageType usageType,
1370                                      VkMemoryPropertyFlags memoryPropertyFlags,
1371                                      size_t size) const
1372 {
1373     if (mUsageType != usageType)
1374     {
1375         return true;
1376     }
1377 
1378     if (mMemoryPropertyFlags != memoryPropertyFlags)
1379     {
1380         return true;
1381     }
1382 
1383     if (size > mBuffer.getSize())
1384     {
1385         return true;
1386     }
1387     else
1388     {
1389         size_t paddedBufferSize =
1390             (renderer->getFeatures().padBuffersToMaxVertexAttribStride.enabled)
1391                 ? (size + static_cast<size_t>(renderer->getMaxVertexAttribStride()))
1392                 : size;
1393         size_t sizeInBytes = roundUpPow2(paddedBufferSize, kBufferSizeGranularity);
1394         size_t alignedSize = roundUp(sizeInBytes, renderer->getDefaultBufferAlignment());
1395         if (alignedSize > mBuffer.getSize())
1396         {
1397             return true;
1398         }
1399     }
1400 
1401     return false;
1402 }
1403 }  // namespace rx
1404