1 /*
2 * Copyright 2022 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/gpu/graphite/vk/VulkanBuffer.h"
9
10 #include "include/gpu/vk/VulkanMemoryAllocator.h"
11 #include "src/gpu/graphite/vk/VulkanCommandBuffer.h"
12 #include "src/gpu/graphite/vk/VulkanGraphiteUtilsPriv.h"
13 #include "src/gpu/vk/VulkanMemory.h"
14
15 namespace skgpu::graphite {
16
Make(const VulkanSharedContext * sharedContext,size_t size,BufferType type,AccessPattern accessPattern)17 sk_sp<Buffer> VulkanBuffer::Make(const VulkanSharedContext* sharedContext,
18 size_t size,
19 BufferType type,
20 AccessPattern accessPattern) {
21 if (size <= 0) {
22 return nullptr;
23 }
24 VkBuffer buffer;
25 skgpu::VulkanAlloc alloc;
26
27 // TODO (b/374749633): We can't use protected buffers in the vertex shader. The checks below
28 // make sure we don't use it for vertex or index buffers. But we currently don't have a way to
29 // check here if it is a uniform or storage buffer that is used in the vertex shader. If we hit
30 // that issue and need those GpuOnly buffers, we'll need to pass in some information to the
31 // factory to say what stage the buffer is for. Maybe expand AccessPattern to be
32 // GpuOnly_NotVertex or some better name like that.
33 bool isProtected = sharedContext->isProtected() == Protected::kYes &&
34 accessPattern == AccessPattern::kGpuOnly &&
35 type != BufferType::kVertex &&
36 type != BufferType::kIndex;
37
38 // Protected memory _never_ uses mappable buffers.
39 // Otherwise, the only time we don't require mappable buffers is when we're on a device
40 // where gpu only memory has faster reads on the gpu than memory that is also mappable
41 // on the cpu.
42 bool requiresMappable = !isProtected &&
43 (accessPattern == AccessPattern::kHostVisible ||
44 !sharedContext->vulkanCaps().gpuOnlyBuffersMorePerformant());
45
46 using BufferUsage = skgpu::VulkanMemoryAllocator::BufferUsage;
47
48 BufferUsage allocUsage;
49 if (type == BufferType::kXferCpuToGpu) {
50 allocUsage = BufferUsage::kTransfersFromCpuToGpu;
51 } else if (type == BufferType::kXferGpuToCpu) {
52 allocUsage = BufferUsage::kTransfersFromGpuToCpu;
53 } else {
54 // GPU-only buffers are preferred unless mappability is required.
55 allocUsage = requiresMappable ? BufferUsage::kCpuWritesGpuReads : BufferUsage::kGpuOnly;
56 }
57
58 // Create the buffer object
59 VkBufferCreateInfo bufInfo;
60 memset(&bufInfo, 0, sizeof(VkBufferCreateInfo));
61 bufInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
62 bufInfo.flags = isProtected ? VK_BUFFER_CREATE_PROTECTED_BIT : 0;
63 bufInfo.size = size;
64
65 // To support SkMesh buffer updates we make Vertex and Index buffers capable of being transfer
66 // dsts. To support rtAdjust uniform buffer updates, we make host-visible uniform buffers also
67 // capable of being transfer dsts.
68 switch (type) {
69 case BufferType::kVertex:
70 bufInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
71 break;
72 case BufferType::kIndex:
73 bufInfo.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
74 break;
75 case BufferType::kStorage:
76 bufInfo.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
77 break;
78 case BufferType::kQuery:
79 SK_ABORT("Query buffers not supported on Vulkan");
80 break;
81 case BufferType::kIndirect:
82 bufInfo.usage =
83 VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
84 break;
85 case BufferType::kVertexStorage:
86 bufInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
87 break;
88 case BufferType::kIndexStorage:
89 bufInfo.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
90 break;
91 case BufferType::kUniform:
92 bufInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
93 break;
94 case BufferType::kXferCpuToGpu:
95 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
96 break;
97 case BufferType::kXferGpuToCpu:
98 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
99 break;
100 }
101
102 // We may not always get a mappable buffer for non-dynamic access buffers. Thus we set the
103 // transfer dst usage bit in case we need to do a copy to write data. It doesn't really hurt
104 // to set this extra usage flag, but we could narrow the scope of buffers we set it on more than
105 // just not dynamic.
106 if (!requiresMappable || accessPattern == AccessPattern::kGpuOnly) {
107 bufInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
108 }
109
110 bufInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
111 bufInfo.queueFamilyIndexCount = 0;
112 bufInfo.pQueueFamilyIndices = nullptr;
113
114 VkResult result;
115 VULKAN_CALL_RESULT(sharedContext,
116 result,
117 CreateBuffer(sharedContext->device(),
118 &bufInfo,
119 nullptr, /*const VkAllocationCallbacks*/
120 &buffer));
121 if (result != VK_SUCCESS) {
122 return nullptr;
123 }
124
125 auto allocator = sharedContext->memoryAllocator();
126 bool shouldPersistentlyMapCpuToGpu =
127 sharedContext->vulkanCaps().shouldPersistentlyMapCpuToGpuBuffers();
128 //AllocBufferMemory
129 auto checkResult = [](VkResult result) {
130 return result == VK_SUCCESS;
131 };
132 if (!skgpu::VulkanMemory::AllocBufferMemory(allocator,
133 buffer,
134 skgpu::Protected(isProtected),
135 allocUsage,
136 shouldPersistentlyMapCpuToGpu,
137 checkResult,
138 &alloc)) {
139 VULKAN_CALL(sharedContext->interface(),
140 DestroyBuffer(sharedContext->device(),
141 buffer,
142 /*const VkAllocationCallbacks*=*/nullptr));
143 return nullptr;
144 }
145
146 // Bind buffer
147 VULKAN_CALL_RESULT(
148 sharedContext,
149 result,
150 BindBufferMemory(sharedContext->device(), buffer, alloc.fMemory, alloc.fOffset));
151 if (result != VK_SUCCESS) {
152 skgpu::VulkanMemory::FreeBufferMemory(allocator, alloc);
153 VULKAN_CALL(sharedContext->interface(), DestroyBuffer(sharedContext->device(),
154 buffer,
155 /*const VkAllocationCallbacks*=*/nullptr));
156 return nullptr;
157 }
158
159 return sk_sp<Buffer>(new VulkanBuffer(
160 sharedContext, size, type, accessPattern, std::move(buffer), alloc, bufInfo.usage,
161 Protected(isProtected)));
162 }
163
VulkanBuffer(const VulkanSharedContext * sharedContext,size_t size,BufferType type,AccessPattern accessPattern,VkBuffer buffer,const skgpu::VulkanAlloc & alloc,const VkBufferUsageFlags usageFlags,Protected isProtected)164 VulkanBuffer::VulkanBuffer(const VulkanSharedContext* sharedContext,
165 size_t size,
166 BufferType type,
167 AccessPattern accessPattern,
168 VkBuffer buffer,
169 const skgpu::VulkanAlloc& alloc,
170 const VkBufferUsageFlags usageFlags,
171 Protected isProtected)
172 : Buffer(sharedContext, size, isProtected)
173 , fBuffer(std::move(buffer))
174 , fAlloc(alloc)
175 , fBufferUsageFlags(usageFlags)
176 // We assume a buffer is used for CPU reads only in the case of GPU->CPU transfer buffers.
177 , fBufferUsedForCPURead(type == BufferType::kXferGpuToCpu) {}
178
freeGpuData()179 void VulkanBuffer::freeGpuData() {
180 if (fMapPtr) {
181 this->internalUnmap(0, this->size());
182 fMapPtr = nullptr;
183 }
184
185 const VulkanSharedContext* sharedContext =
186 static_cast<const VulkanSharedContext*>(this->sharedContext());
187 SkASSERT(fBuffer);
188 SkASSERT(fAlloc.fMemory && fAlloc.fBackendMemory);
189 VULKAN_CALL(sharedContext->interface(),
190 DestroyBuffer(sharedContext->device(), fBuffer, nullptr));
191 fBuffer = VK_NULL_HANDLE;
192
193 skgpu::VulkanMemory::FreeBufferMemory(sharedContext->memoryAllocator(), fAlloc);
194 fAlloc.fMemory = VK_NULL_HANDLE;
195 fAlloc.fBackendMemory = 0;
196 }
197
internalMap(size_t readOffset,size_t readSize)198 void VulkanBuffer::internalMap(size_t readOffset, size_t readSize) {
199 SkASSERT(!fMapPtr);
200 if (this->isMappable()) {
201 // Not every buffer will use command buffer usage refs. Instead, the command buffer just
202 // holds normal refs. Systems higher up in Graphite should be making sure not to reuse a
203 // buffer that currently has a ref held by something else. However, we do need to make sure
204 // there isn't a buffer with just a command buffer usage that is trying to be mapped.
205 #ifdef SK_DEBUG
206 SkASSERT(!this->debugHasCommandBufferRef());
207 #endif
208 SkASSERT(fAlloc.fSize > 0);
209 SkASSERT(fAlloc.fSize >= readOffset + readSize);
210
211 const VulkanSharedContext* sharedContext = this->vulkanSharedContext();
212
213 auto allocator = sharedContext->memoryAllocator();
214 auto checkResult = [sharedContext](VkResult result) {
215 VULKAN_LOG_IF_NOT_SUCCESS(sharedContext, result, "skgpu::VulkanMemory::MapAlloc");
216 return sharedContext->checkVkResult(result);
217 };
218 fMapPtr = skgpu::VulkanMemory::MapAlloc(allocator, fAlloc, checkResult);
219 if (fMapPtr && readSize != 0) {
220 auto checkResult_invalidate = [sharedContext, readOffset, readSize](VkResult result) {
221 VULKAN_LOG_IF_NOT_SUCCESS(sharedContext,
222 result,
223 "skgpu::VulkanMemory::InvalidateMappedAlloc "
224 "(readOffset:%zu, readSize:%zu)",
225 readOffset,
226 readSize);
227 return sharedContext->checkVkResult(result);
228 };
229 // "Invalidate" here means make device writes visible to the host. That is, it makes
230 // sure any GPU writes are finished in the range we might read from.
231 skgpu::VulkanMemory::InvalidateMappedAlloc(allocator,
232 fAlloc,
233 readOffset,
234 readSize,
235 checkResult_invalidate);
236 }
237 }
238 }
239
internalUnmap(size_t flushOffset,size_t flushSize)240 void VulkanBuffer::internalUnmap(size_t flushOffset, size_t flushSize) {
241 SkASSERT(fMapPtr && this->isMappable());
242
243 SkASSERT(fAlloc.fSize > 0);
244 SkASSERT(fAlloc.fSize >= flushOffset + flushSize);
245
246 const VulkanSharedContext* sharedContext = this->vulkanSharedContext();
247 auto checkResult = [sharedContext, flushOffset, flushSize](VkResult result) {
248 VULKAN_LOG_IF_NOT_SUCCESS(sharedContext,
249 result,
250 "skgpu::VulkanMemory::FlushMappedAlloc "
251 "(flushOffset:%zu, flushSize:%zu)",
252 flushOffset,
253 flushSize);
254 return sharedContext->checkVkResult(result);
255 };
256
257 auto allocator = sharedContext->memoryAllocator();
258 skgpu::VulkanMemory::FlushMappedAlloc(allocator, fAlloc, flushOffset, flushSize, checkResult);
259 skgpu::VulkanMemory::UnmapAlloc(allocator, fAlloc);
260 }
261
onMap()262 void VulkanBuffer::onMap() {
263 SkASSERT(fBuffer);
264 SkASSERT(!this->isMapped());
265
266 this->internalMap(0, fBufferUsedForCPURead ? this->size() : 0);
267 }
268
onUnmap()269 void VulkanBuffer::onUnmap() {
270 SkASSERT(fBuffer);
271 SkASSERT(this->isMapped());
272 this->internalUnmap(0, fBufferUsedForCPURead ? 0 : this->size());
273 }
274
setBufferAccess(VulkanCommandBuffer * cmdBuffer,VkAccessFlags dstAccessMask,VkPipelineStageFlags dstStageMask) const275 void VulkanBuffer::setBufferAccess(VulkanCommandBuffer* cmdBuffer,
276 VkAccessFlags dstAccessMask,
277 VkPipelineStageFlags dstStageMask) const {
278 // TODO: fill out other cases where we need a barrier
279 if (dstAccessMask == VK_ACCESS_HOST_READ_BIT ||
280 dstAccessMask == VK_ACCESS_TRANSFER_WRITE_BIT ||
281 dstAccessMask == VK_ACCESS_UNIFORM_READ_BIT) {
282 VkPipelineStageFlags srcStageMask =
283 VulkanBuffer::AccessMaskToPipelineSrcStageFlags(fCurrentAccessMask);
284
285 VkBufferMemoryBarrier bufferMemoryBarrier = {
286 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
287 nullptr, // pNext
288 fCurrentAccessMask, // srcAccessMask
289 dstAccessMask, // dstAccessMask
290 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
291 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
292 fBuffer, // buffer
293 0, // offset
294 this->size(), // size
295 };
296 cmdBuffer->addBufferMemoryBarrier(srcStageMask, dstStageMask, &bufferMemoryBarrier);
297 }
298
299 fCurrentAccessMask = dstAccessMask;
300 }
301
AccessMaskToPipelineSrcStageFlags(const VkAccessFlags srcMask)302 VkPipelineStageFlags VulkanBuffer::AccessMaskToPipelineSrcStageFlags(const VkAccessFlags srcMask) {
303 if (srcMask == 0) {
304 return VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
305 }
306 VkPipelineStageFlags flags = 0;
307
308 if (srcMask & VK_ACCESS_TRANSFER_WRITE_BIT || srcMask & VK_ACCESS_TRANSFER_READ_BIT) {
309 flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
310 }
311 if (srcMask & VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT ||
312 srcMask & VK_ACCESS_COLOR_ATTACHMENT_READ_BIT) {
313 flags |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
314 }
315 if (srcMask & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT ||
316 srcMask & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT) {
317 flags |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
318 }
319 if (srcMask & VK_ACCESS_INPUT_ATTACHMENT_READ_BIT) {
320 flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
321 }
322 if (srcMask & VK_ACCESS_SHADER_READ_BIT ||
323 srcMask & VK_ACCESS_UNIFORM_READ_BIT) {
324 // TODO(b/307577875): It is possible that uniforms could have simply been used in the vertex
325 // shader and not the fragment shader, so using the fragment shader pipeline stage bit
326 // indiscriminately is a bit overkill. This call should be modified to check & allow for
327 // selecting VK_PIPELINE_STAGE_VERTEX_SHADER_BIT when appropriate.
328 flags |= (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
329 }
330 if (srcMask & VK_ACCESS_SHADER_WRITE_BIT) {
331 flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
332 }
333 if (srcMask & VK_ACCESS_INDEX_READ_BIT ||
334 srcMask & VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT) {
335 flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
336 }
337 if (srcMask & VK_ACCESS_INDIRECT_COMMAND_READ_BIT) {
338 flags |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
339 }
340 if (srcMask & VK_ACCESS_HOST_READ_BIT || srcMask & VK_ACCESS_HOST_WRITE_BIT) {
341 flags |= VK_PIPELINE_STAGE_HOST_BIT;
342 }
343
344 return flags;
345 }
346
347 } // namespace skgpu::graphite
348