1 //
2 // Copyright 2016 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // VertexArrayVk.cpp:
7 // Implements the class methods for VertexArrayVk.
8 //
9
10 #include "libANGLE/renderer/vulkan/VertexArrayVk.h"
11
12 #include "common/debug.h"
13 #include "common/utilities.h"
14 #include "libANGLE/Context.h"
15 #include "libANGLE/renderer/vulkan/BufferVk.h"
16 #include "libANGLE/renderer/vulkan/ContextVk.h"
17 #include "libANGLE/renderer/vulkan/FramebufferVk.h"
18 #include "libANGLE/renderer/vulkan/vk_format_utils.h"
19 #include "libANGLE/renderer/vulkan/vk_renderer.h"
20 #include "libANGLE/renderer/vulkan/vk_resource.h"
21
22 namespace rx
23 {
24 namespace
25 {
26 constexpr int kStreamIndexBufferCachedIndexCount = 6;
27 constexpr int kMaxCachedStreamIndexBuffers = 4;
28 constexpr size_t kDefaultValueSize = sizeof(gl::VertexAttribCurrentValueData::Values);
29
BindingIsAligned(const angle::Format & angleFormat,VkDeviceSize offset,GLuint stride)30 ANGLE_INLINE bool BindingIsAligned(const angle::Format &angleFormat,
31 VkDeviceSize offset,
32 GLuint stride)
33 {
34 ASSERT(stride != 0);
35 GLuint mask = angleFormat.componentAlignmentMask;
36 if (mask != std::numeric_limits<GLuint>::max())
37 {
38 return ((offset & mask) == 0 && (stride & mask) == 0);
39 }
40 else
41 {
42 // To perform the GPU conversion for formats with components that aren't byte-aligned
43 // (for example, A2BGR10 or RGB10A2), one element has to be placed in 4 bytes to perform
44 // the compute shader. So, binding offset and stride has to be aligned to formatSize.
45 unsigned int formatSize = angleFormat.pixelBytes;
46 return (offset % formatSize == 0) && (stride % formatSize == 0);
47 }
48 }
49
ClientBindingAligned(const gl::VertexAttribute & attrib,GLuint stride,size_t alignment)50 ANGLE_INLINE bool ClientBindingAligned(const gl::VertexAttribute &attrib,
51 GLuint stride,
52 size_t alignment)
53 {
54 return reinterpret_cast<intptr_t>(attrib.pointer) % alignment == 0 && stride % alignment == 0;
55 }
56
ShouldCombineAttributes(vk::Renderer * renderer,const gl::VertexAttribute & attrib,const gl::VertexBinding & binding)57 bool ShouldCombineAttributes(vk::Renderer *renderer,
58 const gl::VertexAttribute &attrib,
59 const gl::VertexBinding &binding)
60 {
61 if (!renderer->getFeatures().enableMergeClientAttribBuffer.enabled)
62 {
63 return false;
64 }
65 const vk::Format &vertexFormat = renderer->getFormat(attrib.format->id);
66 return !vertexFormat.getVertexLoadRequiresConversion(false) && binding.getDivisor() == 0 &&
67 ClientBindingAligned(attrib, binding.getStride(),
68 vertexFormat.getVertexInputAlignment(false));
69 }
70
WarnOnVertexFormatConversion(ContextVk * contextVk,const vk::Format & vertexFormat,bool compressed,bool insertEventMarker)71 void WarnOnVertexFormatConversion(ContextVk *contextVk,
72 const vk::Format &vertexFormat,
73 bool compressed,
74 bool insertEventMarker)
75 {
76 if (!vertexFormat.getVertexLoadRequiresConversion(compressed))
77 {
78 return;
79 }
80
81 ANGLE_VK_PERF_WARNING(
82 contextVk, GL_DEBUG_SEVERITY_LOW,
83 "The Vulkan driver does not support vertex attribute format 0x%04X, emulating with 0x%04X",
84 vertexFormat.getIntendedFormat().glInternalFormat,
85 vertexFormat.getActualBufferFormat(compressed).glInternalFormat);
86 }
87
StreamVertexData(ContextVk * contextVk,vk::BufferHelper * dstBufferHelper,const uint8_t * srcData,size_t bytesToCopy,size_t dstOffset,size_t vertexCount,size_t srcStride,VertexCopyFunction vertexLoadFunction)88 angle::Result StreamVertexData(ContextVk *contextVk,
89 vk::BufferHelper *dstBufferHelper,
90 const uint8_t *srcData,
91 size_t bytesToCopy,
92 size_t dstOffset,
93 size_t vertexCount,
94 size_t srcStride,
95 VertexCopyFunction vertexLoadFunction)
96 {
97 vk::Renderer *renderer = contextVk->getRenderer();
98
99 // If the source pointer is null, it should not be accessed.
100 if (srcData == nullptr)
101 {
102 return angle::Result::Continue;
103 }
104
105 uint8_t *dst = dstBufferHelper->getMappedMemory() + dstOffset;
106
107 if (vertexLoadFunction != nullptr)
108 {
109 vertexLoadFunction(srcData, srcStride, vertexCount, dst);
110 }
111 else
112 {
113 memcpy(dst, srcData, bytesToCopy);
114 }
115
116 ANGLE_TRY(dstBufferHelper->flush(renderer));
117
118 return angle::Result::Continue;
119 }
120
StreamVertexDataWithDivisor(ContextVk * contextVk,vk::BufferHelper * dstBufferHelper,const uint8_t * srcData,size_t bytesToAllocate,size_t srcStride,size_t dstStride,VertexCopyFunction vertexLoadFunction,uint32_t divisor,size_t numSrcVertices)121 angle::Result StreamVertexDataWithDivisor(ContextVk *contextVk,
122 vk::BufferHelper *dstBufferHelper,
123 const uint8_t *srcData,
124 size_t bytesToAllocate,
125 size_t srcStride,
126 size_t dstStride,
127 VertexCopyFunction vertexLoadFunction,
128 uint32_t divisor,
129 size_t numSrcVertices)
130 {
131 vk::Renderer *renderer = contextVk->getRenderer();
132
133 uint8_t *dst = dstBufferHelper->getMappedMemory();
134
135 // Each source vertex is used `divisor` times before advancing. Clamp to avoid OOB reads.
136 size_t clampedSize = std::min(numSrcVertices * dstStride * divisor, bytesToAllocate);
137
138 ASSERT(clampedSize % dstStride == 0);
139 ASSERT(divisor > 0);
140
141 uint32_t srcVertexUseCount = 0;
142 for (size_t dataCopied = 0; dataCopied < clampedSize; dataCopied += dstStride)
143 {
144 vertexLoadFunction(srcData, srcStride, 1, dst);
145 srcVertexUseCount++;
146 if (srcVertexUseCount == divisor)
147 {
148 srcData += srcStride;
149 srcVertexUseCount = 0;
150 }
151 dst += dstStride;
152 }
153
154 // Satisfy robustness constraints (only if extension enabled)
155 if (contextVk->getExtensions().robustnessAny())
156 {
157 if (clampedSize < bytesToAllocate)
158 {
159 memset(dst, 0, bytesToAllocate - clampedSize);
160 }
161 }
162
163 ANGLE_TRY(dstBufferHelper->flush(renderer));
164
165 return angle::Result::Continue;
166 }
167
GetVertexCountForRange(GLint64 srcBufferBytes,uint32_t srcFormatSize,uint32_t srcVertexStride)168 size_t GetVertexCountForRange(GLint64 srcBufferBytes,
169 uint32_t srcFormatSize,
170 uint32_t srcVertexStride)
171 {
172 ASSERT(srcVertexStride != 0);
173 ASSERT(srcFormatSize != 0);
174
175 if (srcBufferBytes < srcFormatSize)
176 {
177 return 0;
178 }
179
180 size_t numVertices =
181 static_cast<size_t>(srcBufferBytes + srcVertexStride - 1) / srcVertexStride;
182 return numVertices;
183 }
184
GetVertexCount(BufferVk * srcBuffer,const gl::VertexBinding & binding,uint32_t srcFormatSize)185 size_t GetVertexCount(BufferVk *srcBuffer, const gl::VertexBinding &binding, uint32_t srcFormatSize)
186 {
187 // Bytes usable for vertex data.
188 GLint64 bytes = srcBuffer->getSize() - binding.getOffset();
189 GLuint stride = binding.getStride();
190 if (stride == 0)
191 {
192 stride = srcFormatSize;
193 }
194 return GetVertexCountForRange(bytes, srcFormatSize, stride);
195 }
196
CalculateMaxVertexCountForConversion(ContextVk * contextVk,BufferVk * srcBuffer,VertexConversionBuffer * conversion,const angle::Format & srcFormat,const angle::Format & dstFormat,size_t * maxNumVerticesOut)197 angle::Result CalculateMaxVertexCountForConversion(ContextVk *contextVk,
198 BufferVk *srcBuffer,
199 VertexConversionBuffer *conversion,
200 const angle::Format &srcFormat,
201 const angle::Format &dstFormat,
202 size_t *maxNumVerticesOut)
203 {
204 // Initialize numVertices to 0
205 *maxNumVerticesOut = 0;
206
207 unsigned srcFormatSize = srcFormat.pixelBytes;
208 unsigned dstFormatSize = dstFormat.pixelBytes;
209
210 uint32_t srcStride = conversion->getCacheKey().stride;
211 uint32_t dstStride = dstFormatSize;
212
213 ASSERT(srcStride != 0);
214 ASSERT(conversion->dirty());
215
216 // Start the range with the range from the the beginning of the buffer to the end of
217 // buffer. Then scissor it with the dirtyRange.
218 size_t srcOffset = conversion->getCacheKey().offset;
219 GLint64 srcLength = srcBuffer->getSize() - srcOffset;
220
221 // The max number of vertices from binding to the end of the buffer
222 size_t maxNumVertices = GetVertexCountForRange(srcLength, srcFormatSize, srcStride);
223 if (maxNumVertices == 0)
224 {
225 return angle::Result::Continue;
226 }
227
228 // Allocate buffer for results
229 vk::MemoryHostVisibility hostVisible = conversion->getCacheKey().hostVisible
230 ? vk::MemoryHostVisibility::Visible
231 : vk::MemoryHostVisibility::NonVisible;
232 ANGLE_TRY(contextVk->initBufferForVertexConversion(conversion, maxNumVertices * dstStride,
233 hostVisible));
234
235 // Calculate numVertices to convert
236 *maxNumVerticesOut = GetVertexCountForRange(srcLength, srcFormatSize, srcStride);
237
238 return angle::Result::Continue;
239 }
240
CalculateOffsetAndVertexCountForDirtyRange(BufferVk * bufferVk,VertexConversionBuffer * conversion,const angle::Format & srcFormat,const angle::Format & dstFormat,const RangeDeviceSize & dirtyRange,uint32_t * srcOffsetOut,uint32_t * dstOffsetOut,uint32_t * numVerticesOut)241 void CalculateOffsetAndVertexCountForDirtyRange(BufferVk *bufferVk,
242 VertexConversionBuffer *conversion,
243 const angle::Format &srcFormat,
244 const angle::Format &dstFormat,
245 const RangeDeviceSize &dirtyRange,
246 uint32_t *srcOffsetOut,
247 uint32_t *dstOffsetOut,
248 uint32_t *numVerticesOut)
249 {
250 ASSERT(!dirtyRange.empty());
251 unsigned srcFormatSize = srcFormat.pixelBytes;
252 unsigned dstFormatSize = dstFormat.pixelBytes;
253
254 uint32_t srcStride = conversion->getCacheKey().stride;
255 uint32_t dstStride = dstFormatSize;
256
257 ASSERT(srcStride != 0);
258 ASSERT(conversion->dirty());
259
260 // Start the range with the range from the the beginning of the buffer to the end of
261 // buffer. Then scissor it with the dirtyRange.
262 size_t srcOffset = conversion->getCacheKey().offset;
263 size_t dstOffset = 0;
264
265 GLint64 srcLength = bufferVk->getSize() - srcOffset;
266
267 // Adjust offset to the begining of the dirty range
268 if (dirtyRange.low() > srcOffset)
269 {
270 size_t vertexCountToSkip = (static_cast<size_t>(dirtyRange.low()) - srcOffset) / srcStride;
271 size_t srcBytesToSkip = vertexCountToSkip * srcStride;
272 size_t dstBytesToSkip = vertexCountToSkip * dstStride;
273 srcOffset += srcBytesToSkip;
274 srcLength -= srcBytesToSkip;
275 dstOffset += dstBytesToSkip;
276 }
277
278 // Adjust dstOffset to align to 4 bytes. The GPU convert code path always write a uint32_t and
279 // must aligned at 4 bytes. We could possibly make it able to store at unaligned uint32_t but
280 // performance will be worse than just convert a few extra data.
281 while ((dstOffset % 4) != 0)
282 {
283 dstOffset -= dstStride;
284 srcOffset -= srcStride;
285 srcLength += srcStride;
286 }
287
288 // Adjust length
289 if (dirtyRange.high() < static_cast<VkDeviceSize>(bufferVk->getSize()))
290 {
291 srcLength = dirtyRange.high() - srcOffset;
292 }
293
294 // Calculate numVertices to convert
295 size_t numVertices = GetVertexCountForRange(srcLength, srcFormatSize, srcStride);
296
297 *numVerticesOut = static_cast<uint32_t>(numVertices);
298 *srcOffsetOut = static_cast<uint32_t>(srcOffset);
299 *dstOffsetOut = static_cast<uint32_t>(dstOffset);
300 }
301 } // anonymous namespace
302
VertexArrayVk(ContextVk * contextVk,const gl::VertexArrayState & state)303 VertexArrayVk::VertexArrayVk(ContextVk *contextVk, const gl::VertexArrayState &state)
304 : VertexArrayImpl(state),
305 mCurrentArrayBufferHandles{},
306 mCurrentArrayBufferOffsets{},
307 mCurrentArrayBufferRelativeOffsets{},
308 mCurrentArrayBuffers{},
309 mCurrentArrayBufferStrides{},
310 mCurrentArrayBufferDivisors{},
311 mCurrentElementArrayBuffer(nullptr),
312 mLineLoopHelper(contextVk->getRenderer()),
313 mDirtyLineLoopTranslation(true)
314 {
315 vk::BufferHelper &emptyBuffer = contextVk->getEmptyBuffer();
316
317 mCurrentArrayBufferHandles.fill(emptyBuffer.getBuffer().getHandle());
318 mCurrentArrayBufferOffsets.fill(0);
319 mCurrentArrayBufferRelativeOffsets.fill(0);
320 mCurrentArrayBuffers.fill(&emptyBuffer);
321 mCurrentArrayBufferStrides.fill(0);
322 mCurrentArrayBufferDivisors.fill(0);
323
324 mBindingDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_BINDING_DIVISOR);
325 if (!contextVk->getFeatures().useVertexInputBindingStrideDynamicState.enabled)
326 {
327 mBindingDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_BINDING_STRIDE);
328 }
329
330 // All but DIRTY_ATTRIB_POINTER_BUFFER requires graphics pipeline update
331 mAttribDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_ATTRIB_ENABLED);
332 mAttribDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_ATTRIB_POINTER);
333 mAttribDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_ATTRIB_FORMAT);
334 mAttribDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_ATTRIB_BINDING);
335 }
336
~VertexArrayVk()337 VertexArrayVk::~VertexArrayVk() {}
338
destroy(const gl::Context * context)339 void VertexArrayVk::destroy(const gl::Context *context)
340 {
341 ContextVk *contextVk = vk::GetImpl(context);
342
343 vk::Renderer *renderer = contextVk->getRenderer();
344
345 for (std::unique_ptr<vk::BufferHelper> &buffer : mCachedStreamIndexBuffers)
346 {
347 buffer->release(renderer);
348 }
349
350 mStreamedIndexData.release(renderer);
351 mTranslatedByteIndexData.release(renderer);
352 mTranslatedByteIndirectData.release(renderer);
353 mLineLoopHelper.release(contextVk);
354 }
355
convertIndexBufferGPU(ContextVk * contextVk,BufferVk * bufferVk,const void * indices)356 angle::Result VertexArrayVk::convertIndexBufferGPU(ContextVk *contextVk,
357 BufferVk *bufferVk,
358 const void *indices)
359 {
360 intptr_t offsetIntoSrcData = reinterpret_cast<intptr_t>(indices);
361 size_t srcDataSize = static_cast<size_t>(bufferVk->getSize()) - offsetIntoSrcData;
362
363 // Allocate buffer for results
364 ANGLE_TRY(contextVk->initBufferForVertexConversion(&mTranslatedByteIndexData,
365 sizeof(GLushort) * srcDataSize,
366 vk::MemoryHostVisibility::NonVisible));
367 mCurrentElementArrayBuffer = mTranslatedByteIndexData.getBuffer();
368
369 vk::BufferHelper *dst = mTranslatedByteIndexData.getBuffer();
370 vk::BufferHelper *src = &bufferVk->getBuffer();
371
372 // Copy relevant section of the source into destination at allocated offset. Note that the
373 // offset returned by allocate() above is in bytes. As is the indices offset pointer.
374 UtilsVk::ConvertIndexParameters params = {};
375 params.srcOffset = static_cast<uint32_t>(offsetIntoSrcData);
376 params.dstOffset = 0;
377 params.maxIndex = static_cast<uint32_t>(bufferVk->getSize());
378
379 ANGLE_TRY(contextVk->getUtils().convertIndexBuffer(contextVk, dst, src, params));
380 mTranslatedByteIndexData.clearDirty();
381
382 return angle::Result::Continue;
383 }
384
convertIndexBufferIndirectGPU(ContextVk * contextVk,vk::BufferHelper * srcIndirectBuf,VkDeviceSize srcIndirectBufOffset,vk::BufferHelper ** indirectBufferVkOut)385 angle::Result VertexArrayVk::convertIndexBufferIndirectGPU(ContextVk *contextVk,
386 vk::BufferHelper *srcIndirectBuf,
387 VkDeviceSize srcIndirectBufOffset,
388 vk::BufferHelper **indirectBufferVkOut)
389 {
390 size_t srcDataSize = static_cast<size_t>(mCurrentElementArrayBuffer->getSize());
391 ASSERT(mCurrentElementArrayBuffer ==
392 &vk::GetImpl(getState().getElementArrayBuffer())->getBuffer());
393
394 vk::BufferHelper *srcIndexBuf = mCurrentElementArrayBuffer;
395
396 // Allocate buffer for results
397 ANGLE_TRY(contextVk->initBufferForVertexConversion(&mTranslatedByteIndexData,
398 sizeof(GLushort) * srcDataSize,
399 vk::MemoryHostVisibility::NonVisible));
400 vk::BufferHelper *dstIndexBuf = mTranslatedByteIndexData.getBuffer();
401
402 ANGLE_TRY(contextVk->initBufferForVertexConversion(&mTranslatedByteIndirectData,
403 sizeof(VkDrawIndexedIndirectCommand),
404 vk::MemoryHostVisibility::NonVisible));
405 vk::BufferHelper *dstIndirectBuf = mTranslatedByteIndirectData.getBuffer();
406
407 // Save new element array buffer
408 mCurrentElementArrayBuffer = dstIndexBuf;
409 // Tell caller what new indirect buffer is
410 *indirectBufferVkOut = dstIndirectBuf;
411
412 // Copy relevant section of the source into destination at allocated offset. Note that the
413 // offset returned by allocate() above is in bytes. As is the indices offset pointer.
414 UtilsVk::ConvertIndexIndirectParameters params = {};
415 params.srcIndirectBufOffset = static_cast<uint32_t>(srcIndirectBufOffset);
416 params.srcIndexBufOffset = 0;
417 params.dstIndexBufOffset = 0;
418 params.maxIndex = static_cast<uint32_t>(srcDataSize);
419 params.dstIndirectBufOffset = 0;
420
421 ANGLE_TRY(contextVk->getUtils().convertIndexIndirectBuffer(
422 contextVk, srcIndirectBuf, srcIndexBuf, dstIndirectBuf, dstIndexBuf, params));
423
424 mTranslatedByteIndexData.clearDirty();
425 mTranslatedByteIndirectData.clearDirty();
426
427 return angle::Result::Continue;
428 }
429
handleLineLoopIndexIndirect(ContextVk * contextVk,gl::DrawElementsType glIndexType,vk::BufferHelper * srcIndexBuffer,vk::BufferHelper * srcIndirectBuffer,VkDeviceSize indirectBufferOffset,vk::BufferHelper ** indexBufferOut,vk::BufferHelper ** indirectBufferOut)430 angle::Result VertexArrayVk::handleLineLoopIndexIndirect(ContextVk *contextVk,
431 gl::DrawElementsType glIndexType,
432 vk::BufferHelper *srcIndexBuffer,
433 vk::BufferHelper *srcIndirectBuffer,
434 VkDeviceSize indirectBufferOffset,
435 vk::BufferHelper **indexBufferOut,
436 vk::BufferHelper **indirectBufferOut)
437 {
438 return mLineLoopHelper.streamIndicesIndirect(contextVk, glIndexType, srcIndexBuffer,
439 srcIndirectBuffer, indirectBufferOffset,
440 indexBufferOut, indirectBufferOut);
441 }
442
handleLineLoopIndirectDraw(const gl::Context * context,vk::BufferHelper * indirectBufferVk,VkDeviceSize indirectBufferOffset,vk::BufferHelper ** indexBufferOut,vk::BufferHelper ** indirectBufferOut)443 angle::Result VertexArrayVk::handleLineLoopIndirectDraw(const gl::Context *context,
444 vk::BufferHelper *indirectBufferVk,
445 VkDeviceSize indirectBufferOffset,
446 vk::BufferHelper **indexBufferOut,
447 vk::BufferHelper **indirectBufferOut)
448 {
449 size_t maxVertexCount = 0;
450 ContextVk *contextVk = vk::GetImpl(context);
451 const gl::AttributesMask activeAttribs =
452 context->getStateCache().getActiveBufferedAttribsMask();
453
454 const auto &attribs = mState.getVertexAttributes();
455 const auto &bindings = mState.getVertexBindings();
456
457 for (size_t attribIndex : activeAttribs)
458 {
459 const gl::VertexAttribute &attrib = attribs[attribIndex];
460 ASSERT(attrib.enabled);
461 VkDeviceSize bufSize = getCurrentArrayBuffers()[attribIndex]->getSize();
462 const gl::VertexBinding &binding = bindings[attrib.bindingIndex];
463 size_t stride = binding.getStride();
464 size_t vertexCount = static_cast<size_t>(bufSize / stride);
465 if (vertexCount > maxVertexCount)
466 {
467 maxVertexCount = vertexCount;
468 }
469 }
470 ANGLE_TRY(mLineLoopHelper.streamArrayIndirect(contextVk, maxVertexCount + 1, indirectBufferVk,
471 indirectBufferOffset, indexBufferOut,
472 indirectBufferOut));
473
474 return angle::Result::Continue;
475 }
476
convertIndexBufferCPU(ContextVk * contextVk,gl::DrawElementsType indexType,size_t indexCount,const void * sourcePointer,BufferBindingDirty * bindingDirty)477 angle::Result VertexArrayVk::convertIndexBufferCPU(ContextVk *contextVk,
478 gl::DrawElementsType indexType,
479 size_t indexCount,
480 const void *sourcePointer,
481 BufferBindingDirty *bindingDirty)
482 {
483 ASSERT(!mState.getElementArrayBuffer() || indexType == gl::DrawElementsType::UnsignedByte);
484 vk::Renderer *renderer = contextVk->getRenderer();
485 size_t elementSize = contextVk->getVkIndexTypeSize(indexType);
486 const size_t amount = elementSize * indexCount;
487
488 // Applications often time draw a quad with two triangles. This is try to catch all the
489 // common used element array buffer with pre-created BufferHelper objects to improve
490 // performance.
491 if (indexCount == kStreamIndexBufferCachedIndexCount &&
492 indexType == gl::DrawElementsType::UnsignedShort)
493 {
494 for (std::unique_ptr<vk::BufferHelper> &buffer : mCachedStreamIndexBuffers)
495 {
496 void *ptr = buffer->getMappedMemory();
497 if (memcmp(sourcePointer, ptr, amount) == 0)
498 {
499 // Found a matching cached buffer, use the cached internal index buffer.
500 *bindingDirty = mCurrentElementArrayBuffer == buffer.get()
501 ? BufferBindingDirty::No
502 : BufferBindingDirty::Yes;
503 mCurrentElementArrayBuffer = buffer.get();
504 return angle::Result::Continue;
505 }
506 }
507
508 // If we still have capacity, cache this index buffer for future use.
509 if (mCachedStreamIndexBuffers.size() < kMaxCachedStreamIndexBuffers)
510 {
511 std::unique_ptr<vk::BufferHelper> buffer = std::make_unique<vk::BufferHelper>();
512 ANGLE_TRY(contextVk->initBufferAllocation(
513 buffer.get(),
514 renderer->getVertexConversionBufferMemoryTypeIndex(
515 vk::MemoryHostVisibility::Visible),
516 amount, renderer->getVertexConversionBufferAlignment(), BufferUsageType::Static));
517 memcpy(buffer->getMappedMemory(), sourcePointer, amount);
518 ANGLE_TRY(buffer->flush(renderer));
519
520 mCachedStreamIndexBuffers.push_back(std::move(buffer));
521
522 *bindingDirty = BufferBindingDirty::Yes;
523 mCurrentElementArrayBuffer = mCachedStreamIndexBuffers.back().get();
524 return angle::Result::Continue;
525 }
526 }
527
528 ANGLE_TRY(contextVk->initBufferForVertexConversion(&mStreamedIndexData, amount,
529 vk::MemoryHostVisibility::Visible));
530 mCurrentElementArrayBuffer = mStreamedIndexData.getBuffer();
531 GLubyte *dst = mCurrentElementArrayBuffer->getMappedMemory();
532 *bindingDirty = BufferBindingDirty::Yes;
533
534 if (contextVk->shouldConvertUint8VkIndexType(indexType))
535 {
536 // Unsigned bytes don't have direct support in Vulkan so we have to expand the
537 // memory to a GLushort.
538 const GLubyte *in = static_cast<const GLubyte *>(sourcePointer);
539 GLushort *expandedDst = reinterpret_cast<GLushort *>(dst);
540 bool primitiveRestart = contextVk->getState().isPrimitiveRestartEnabled();
541
542 constexpr GLubyte kUnsignedByteRestartValue = 0xFF;
543 constexpr GLushort kUnsignedShortRestartValue = 0xFFFF;
544
545 if (primitiveRestart)
546 {
547 for (size_t index = 0; index < indexCount; index++)
548 {
549 GLushort value = static_cast<GLushort>(in[index]);
550 if (in[index] == kUnsignedByteRestartValue)
551 {
552 // Convert from 8-bit restart value to 16-bit restart value
553 value = kUnsignedShortRestartValue;
554 }
555 expandedDst[index] = value;
556 }
557 }
558 else
559 {
560 // Fast path for common case.
561 for (size_t index = 0; index < indexCount; index++)
562 {
563 expandedDst[index] = static_cast<GLushort>(in[index]);
564 }
565 }
566 }
567 else
568 {
569 // The primitive restart value is the same for OpenGL and Vulkan,
570 // so there's no need to perform any conversion.
571 memcpy(dst, sourcePointer, amount);
572 }
573
574 mStreamedIndexData.clearDirty();
575
576 return mCurrentElementArrayBuffer->flush(contextVk->getRenderer());
577 }
578
579 // We assume the buffer is completely full of the same kind of data and convert
580 // and/or align it as we copy it to a buffer. The assumption could be wrong
581 // but the alternative of copying it piecemeal on each draw would have a lot more
582 // overhead.
convertVertexBufferGPU(ContextVk * contextVk,BufferVk * srcBuffer,VertexConversionBuffer * conversion,const angle::Format & srcFormat,const angle::Format & dstFormat)583 angle::Result VertexArrayVk::convertVertexBufferGPU(ContextVk *contextVk,
584 BufferVk *srcBuffer,
585 VertexConversionBuffer *conversion,
586 const angle::Format &srcFormat,
587 const angle::Format &dstFormat)
588 {
589 uint32_t srcStride = conversion->getCacheKey().stride;
590 ASSERT(srcStride % (srcFormat.pixelBytes / srcFormat.channelCount) == 0);
591
592 size_t maxNumVertices;
593 ANGLE_TRY(CalculateMaxVertexCountForConversion(contextVk, srcBuffer, conversion, srcFormat,
594 dstFormat, &maxNumVertices));
595 if (maxNumVertices == 0)
596 {
597 return angle::Result::Continue;
598 }
599
600 vk::BufferHelper *srcBufferHelper = &srcBuffer->getBuffer();
601 vk::BufferHelper *dstBuffer = conversion->getBuffer();
602
603 UtilsVk::OffsetAndVertexCounts additionalOffsetVertexCounts;
604
605 UtilsVk::ConvertVertexParameters params;
606 params.srcFormat = &srcFormat;
607 params.dstFormat = &dstFormat;
608 params.srcStride = srcStride;
609 params.vertexCount = 0;
610
611 if (conversion->isEntireBufferDirty())
612 {
613 params.vertexCount = static_cast<uint32_t>(maxNumVertices);
614 params.srcOffset = static_cast<uint32_t>(conversion->getCacheKey().offset);
615 params.dstOffset = 0;
616 }
617 else
618 {
619 // dirtyRanges may overlap with each other. Try to do a quick merge to reduce the number of
620 // dispatch calls as well as avoid redundant conversion in the overlapped area.
621 conversion->consolidateDirtyRanges();
622
623 const std::vector<RangeDeviceSize> &dirtyRanges = conversion->getDirtyBufferRanges();
624 additionalOffsetVertexCounts.reserve(dirtyRanges.size());
625
626 for (const RangeDeviceSize &dirtyRange : dirtyRanges)
627 {
628 if (dirtyRange.empty())
629 {
630 // consolidateDirtyRanges may end up with invalid range if it gets merged.
631 continue;
632 }
633
634 uint32_t srcOffset, dstOffset, numVertices;
635 CalculateOffsetAndVertexCountForDirtyRange(srcBuffer, conversion, srcFormat, dstFormat,
636 dirtyRange, &srcOffset, &dstOffset,
637 &numVertices);
638 if (params.vertexCount == 0)
639 {
640 params.vertexCount = numVertices;
641 params.srcOffset = srcOffset;
642 params.dstOffset = dstOffset;
643 }
644 else
645 {
646 additionalOffsetVertexCounts.emplace_back();
647 additionalOffsetVertexCounts.back().srcOffset = srcOffset;
648 additionalOffsetVertexCounts.back().dstOffset = dstOffset;
649 additionalOffsetVertexCounts.back().vertexCount = numVertices;
650 }
651 }
652 }
653 ANGLE_TRY(contextVk->getUtils().convertVertexBuffer(contextVk, dstBuffer, srcBufferHelper,
654 params, additionalOffsetVertexCounts));
655 conversion->clearDirty();
656
657 return angle::Result::Continue;
658 }
659
convertVertexBufferCPU(ContextVk * contextVk,BufferVk * srcBuffer,VertexConversionBuffer * conversion,const angle::Format & srcFormat,const angle::Format & dstFormat,const VertexCopyFunction vertexLoadFunction)660 angle::Result VertexArrayVk::convertVertexBufferCPU(ContextVk *contextVk,
661 BufferVk *srcBuffer,
662 VertexConversionBuffer *conversion,
663 const angle::Format &srcFormat,
664 const angle::Format &dstFormat,
665 const VertexCopyFunction vertexLoadFunction)
666 {
667 ANGLE_TRACE_EVENT0("gpu.angle", "VertexArrayVk::convertVertexBufferCpu");
668
669 size_t maxNumVertices;
670 ANGLE_TRY(CalculateMaxVertexCountForConversion(contextVk, srcBuffer, conversion, srcFormat,
671 dstFormat, &maxNumVertices));
672 if (maxNumVertices == 0)
673 {
674 return angle::Result::Continue;
675 }
676
677 uint8_t *src = nullptr;
678 ANGLE_TRY(srcBuffer->mapImpl(contextVk, GL_MAP_READ_BIT, reinterpret_cast<void **>(&src)));
679 uint32_t srcStride = conversion->getCacheKey().stride;
680
681 if (conversion->isEntireBufferDirty())
682 {
683 size_t srcOffset = conversion->getCacheKey().offset;
684 size_t dstOffset = 0;
685 const uint8_t *srcBytes = src + srcOffset;
686 size_t bytesToCopy = maxNumVertices * dstFormat.pixelBytes;
687 ANGLE_TRY(StreamVertexData(contextVk, conversion->getBuffer(), srcBytes, bytesToCopy,
688 dstOffset, maxNumVertices, srcStride, vertexLoadFunction));
689 }
690 else
691 {
692 // dirtyRanges may overlap with each other. Try to do a quick merge to avoid redundant
693 // conversion in the overlapped area.
694 conversion->consolidateDirtyRanges();
695
696 const std::vector<RangeDeviceSize> &dirtyRanges = conversion->getDirtyBufferRanges();
697 for (const RangeDeviceSize &dirtyRange : dirtyRanges)
698 {
699 if (dirtyRange.empty())
700 {
701 // consolidateDirtyRanges may end up with invalid range if it gets merged.
702 continue;
703 }
704
705 uint32_t srcOffset, dstOffset, numVertices;
706 CalculateOffsetAndVertexCountForDirtyRange(srcBuffer, conversion, srcFormat, dstFormat,
707 dirtyRange, &srcOffset, &dstOffset,
708 &numVertices);
709
710 if (numVertices > 0)
711 {
712 const uint8_t *srcBytes = src + srcOffset;
713 size_t bytesToCopy = maxNumVertices * dstFormat.pixelBytes;
714 ANGLE_TRY(StreamVertexData(contextVk, conversion->getBuffer(), srcBytes,
715 bytesToCopy, dstOffset, maxNumVertices, srcStride,
716 vertexLoadFunction));
717 }
718 }
719 }
720
721 conversion->clearDirty();
722 ANGLE_TRY(srcBuffer->unmapImpl(contextVk));
723
724 return angle::Result::Continue;
725 }
726
updateCurrentElementArrayBuffer()727 void VertexArrayVk::updateCurrentElementArrayBuffer()
728 {
729 ASSERT(mState.getElementArrayBuffer() != nullptr);
730 ASSERT(mState.getElementArrayBuffer()->getSize() > 0);
731
732 BufferVk *bufferVk = vk::GetImpl(mState.getElementArrayBuffer());
733 mCurrentElementArrayBuffer = &bufferVk->getBuffer();
734 }
735
syncState(const gl::Context * context,const gl::VertexArray::DirtyBits & dirtyBits,gl::VertexArray::DirtyAttribBitsArray * attribBits,gl::VertexArray::DirtyBindingBitsArray * bindingBits)736 angle::Result VertexArrayVk::syncState(const gl::Context *context,
737 const gl::VertexArray::DirtyBits &dirtyBits,
738 gl::VertexArray::DirtyAttribBitsArray *attribBits,
739 gl::VertexArray::DirtyBindingBitsArray *bindingBits)
740 {
741 ASSERT(dirtyBits.any());
742
743 ContextVk *contextVk = vk::GetImpl(context);
744 contextVk->getPerfCounters().vertexArraySyncStateCalls++;
745
746 const std::vector<gl::VertexAttribute> &attribs = mState.getVertexAttributes();
747 const std::vector<gl::VertexBinding> &bindings = mState.getVertexBindings();
748
749 for (auto iter = dirtyBits.begin(), endIter = dirtyBits.end(); iter != endIter; ++iter)
750 {
751 size_t dirtyBit = *iter;
752 switch (dirtyBit)
753 {
754 case gl::VertexArray::DIRTY_BIT_LOST_OBSERVATION:
755 {
756 // If vertex array was not observing while unbound, we need to check buffer's
757 // internal storage and take action if buffer storage has changed while not
758 // observing.
759 if (contextVk->getFeatures().compressVertexData.enabled ||
760 mContentsObservers->any())
761 {
762 // We may have lost buffer content change when it became non-current. In that
763 // case we always assume buffer has changed. If compressVertexData.enabled is
764 // true, it also depends on buffer usage which may have changed.
765 iter.setLaterBits(
766 gl::VertexArray::DirtyBits(mState.getBufferBindingMask().to_ulong()
767 << gl::VertexArray::DIRTY_BIT_BINDING_0));
768 }
769 else
770 {
771 for (size_t bindingIndex : mState.getBufferBindingMask())
772 {
773 const gl::Buffer *bufferGL = bindings[bindingIndex].getBuffer().get();
774 vk::BufferSerial bufferSerial = vk::GetImpl(bufferGL)->getBufferSerial();
775 for (size_t attribIndex : bindings[bindingIndex].getBoundAttributesMask())
776 {
777 if (attribs[attribIndex].enabled &&
778 (!bufferSerial.valid() ||
779 bufferSerial != mCurrentArrayBufferSerial[attribIndex]))
780 {
781 iter.setLaterBit(gl::VertexArray::DIRTY_BIT_BINDING_0 +
782 bindingIndex);
783 break;
784 }
785 }
786 }
787 }
788 break;
789 }
790
791 case gl::VertexArray::DIRTY_BIT_ELEMENT_ARRAY_BUFFER:
792 case gl::VertexArray::DIRTY_BIT_ELEMENT_ARRAY_BUFFER_DATA:
793 {
794 gl::Buffer *bufferGL = mState.getElementArrayBuffer();
795 if (bufferGL && bufferGL->getSize() > 0)
796 {
797 // Note that just updating buffer data may still result in a new
798 // vk::BufferHelper allocation.
799 updateCurrentElementArrayBuffer();
800 }
801 else
802 {
803 mCurrentElementArrayBuffer = nullptr;
804 }
805
806 mLineLoopBufferFirstIndex.reset();
807 mLineLoopBufferLastIndex.reset();
808 ANGLE_TRY(contextVk->onIndexBufferChange(mCurrentElementArrayBuffer));
809 mDirtyLineLoopTranslation = true;
810 break;
811 }
812
813 #define ANGLE_VERTEX_DIRTY_ATTRIB_FUNC(INDEX) \
814 case gl::VertexArray::DIRTY_BIT_ATTRIB_0 + INDEX: \
815 { \
816 gl::VertexArray::DirtyAttribBits dirtyAttribBitsRequiresPipelineUpdate = \
817 (*attribBits)[INDEX] & mAttribDirtyBitsRequiresPipelineUpdate; \
818 const bool bufferOnly = dirtyAttribBitsRequiresPipelineUpdate.none(); \
819 ANGLE_TRY(syncDirtyAttrib(contextVk, attribs[INDEX], \
820 bindings[attribs[INDEX].bindingIndex], INDEX, bufferOnly)); \
821 (*attribBits)[INDEX].reset(); \
822 break; \
823 }
824
825 ANGLE_VERTEX_INDEX_CASES(ANGLE_VERTEX_DIRTY_ATTRIB_FUNC)
826
827 // Since BINDING already implies DATA and ATTRIB change, we remove these here to avoid redundant
828 // processing.
829 #define ANGLE_VERTEX_DIRTY_BINDING_FUNC(INDEX) \
830 case gl::VertexArray::DIRTY_BIT_BINDING_0 + INDEX: \
831 { \
832 gl::VertexArray::DirtyBindingBits dirtyBindingBitsRequirePipelineUpdate = \
833 (*bindingBits)[INDEX] & mBindingDirtyBitsRequiresPipelineUpdate; \
834 \
835 for (size_t attribIndex : bindings[INDEX].getBoundAttributesMask()) \
836 { \
837 gl::VertexArray::DirtyAttribBits dirtyAttribBitsRequiresPipelineUpdate = \
838 (*attribBits)[attribIndex] & mAttribDirtyBitsRequiresPipelineUpdate; \
839 const bool bufferOnly = dirtyBindingBitsRequirePipelineUpdate.none() && \
840 dirtyAttribBitsRequiresPipelineUpdate.none(); \
841 ANGLE_TRY(syncDirtyAttrib(contextVk, attribs[attribIndex], bindings[INDEX], \
842 attribIndex, bufferOnly)); \
843 iter.resetLaterBit(gl::VertexArray::DIRTY_BIT_BUFFER_DATA_0 + attribIndex); \
844 iter.resetLaterBit(gl::VertexArray::DIRTY_BIT_ATTRIB_0 + attribIndex); \
845 (*attribBits)[attribIndex].reset(); \
846 } \
847 (*bindingBits)[INDEX].reset(); \
848 break; \
849 }
850
851 ANGLE_VERTEX_INDEX_CASES(ANGLE_VERTEX_DIRTY_BINDING_FUNC)
852
853 #define ANGLE_VERTEX_DIRTY_BUFFER_DATA_FUNC(INDEX) \
854 case gl::VertexArray::DIRTY_BIT_BUFFER_DATA_0 + INDEX: \
855 ANGLE_TRY(syncDirtyAttrib(contextVk, attribs[INDEX], \
856 bindings[attribs[INDEX].bindingIndex], INDEX, false)); \
857 iter.resetLaterBit(gl::VertexArray::DIRTY_BIT_ATTRIB_0 + INDEX); \
858 (*attribBits)[INDEX].reset(); \
859 break;
860
861 ANGLE_VERTEX_INDEX_CASES(ANGLE_VERTEX_DIRTY_BUFFER_DATA_FUNC)
862
863 default:
864 UNREACHABLE();
865 break;
866 }
867 }
868
869 return angle::Result::Continue;
870 } // namespace rx
871
872 #undef ANGLE_VERTEX_DIRTY_ATTRIB_FUNC
873 #undef ANGLE_VERTEX_DIRTY_BINDING_FUNC
874 #undef ANGLE_VERTEX_DIRTY_BUFFER_DATA_FUNC
875
setDefaultPackedInput(ContextVk * contextVk,size_t attribIndex,angle::FormatID * formatOut)876 ANGLE_INLINE angle::Result VertexArrayVk::setDefaultPackedInput(ContextVk *contextVk,
877 size_t attribIndex,
878 angle::FormatID *formatOut)
879 {
880 const gl::State &glState = contextVk->getState();
881 const gl::VertexAttribCurrentValueData &defaultValue =
882 glState.getVertexAttribCurrentValues()[attribIndex];
883
884 *formatOut = GetCurrentValueFormatID(defaultValue.Type);
885
886 return contextVk->onVertexAttributeChange(attribIndex, 0, 0, *formatOut, false, 0, nullptr);
887 }
888
updateActiveAttribInfo(ContextVk * contextVk)889 angle::Result VertexArrayVk::updateActiveAttribInfo(ContextVk *contextVk)
890 {
891 const std::vector<gl::VertexAttribute> &attribs = mState.getVertexAttributes();
892 const std::vector<gl::VertexBinding> &bindings = mState.getVertexBindings();
893
894 // Update pipeline cache with current active attribute info
895 for (size_t attribIndex : mState.getEnabledAttributesMask())
896 {
897 const gl::VertexAttribute &attrib = attribs[attribIndex];
898 const gl::VertexBinding &binding = bindings[attribs[attribIndex].bindingIndex];
899 const angle::FormatID format = attrib.format->id;
900
901 ANGLE_TRY(contextVk->onVertexAttributeChange(
902 attribIndex, mCurrentArrayBufferStrides[attribIndex], binding.getDivisor(), format,
903 mCurrentArrayBufferCompressed.test(attribIndex),
904 mCurrentArrayBufferRelativeOffsets[attribIndex], mCurrentArrayBuffers[attribIndex]));
905
906 mCurrentArrayBufferFormats[attribIndex] = format;
907 }
908
909 return angle::Result::Continue;
910 }
911
syncDirtyAttrib(ContextVk * contextVk,const gl::VertexAttribute & attrib,const gl::VertexBinding & binding,size_t attribIndex,bool bufferOnly)912 angle::Result VertexArrayVk::syncDirtyAttrib(ContextVk *contextVk,
913 const gl::VertexAttribute &attrib,
914 const gl::VertexBinding &binding,
915 size_t attribIndex,
916 bool bufferOnly)
917 {
918 vk::Renderer *renderer = contextVk->getRenderer();
919 if (attrib.enabled)
920 {
921 const vk::Format &vertexFormat = renderer->getFormat(attrib.format->id);
922
923 // Init attribute offset to the front-end value
924 mCurrentArrayBufferRelativeOffsets[attribIndex] = attrib.relativeOffset;
925 gl::Buffer *bufferGL = binding.getBuffer().get();
926 // Emulated and/or client-side attribs will be streamed
927 bool isStreamingVertexAttrib =
928 (binding.getDivisor() > renderer->getMaxVertexAttribDivisor()) || (bufferGL == nullptr);
929 // If we sre switching between streaming and buffer mode, set bufferOnly to false since we
930 // are actually changing the buffer.
931 if (bufferOnly && isStreamingVertexAttrib != mStreamingVertexAttribsMask.test(attribIndex))
932 {
933 bufferOnly = false;
934 }
935 mStreamingVertexAttribsMask.set(attribIndex, isStreamingVertexAttrib);
936 bool compressed = false;
937
938 if (bufferGL)
939 {
940 mContentsObservers->disableForBuffer(bufferGL, static_cast<uint32_t>(attribIndex));
941 }
942
943 if (!isStreamingVertexAttrib && bufferGL->getSize() > 0)
944 {
945 BufferVk *bufferVk = vk::GetImpl(bufferGL);
946 const angle::Format &srcFormat = vertexFormat.getIntendedFormat();
947 unsigned srcFormatSize = srcFormat.pixelBytes;
948 uint32_t srcStride = binding.getStride() == 0 ? srcFormatSize : binding.getStride();
949 size_t numVertices = GetVertexCount(bufferVk, binding, srcFormatSize);
950 bool bindingIsAligned =
951 BindingIsAligned(srcFormat, binding.getOffset() + attrib.relativeOffset, srcStride);
952
953 if (renderer->getFeatures().compressVertexData.enabled &&
954 gl::IsStaticBufferUsage(bufferGL->getUsage()) &&
955 vertexFormat.canCompressBufferData())
956 {
957 compressed = true;
958 }
959
960 bool needsConversion =
961 numVertices > 0 &&
962 (vertexFormat.getVertexLoadRequiresConversion(compressed) || !bindingIsAligned);
963
964 if (needsConversion)
965 {
966 const angle::Format &dstFormat = vertexFormat.getActualBufferFormat(compressed);
967 // Converted buffer is tightly packed
968 uint32_t dstStride = dstFormat.pixelBytes;
969
970 ASSERT(vertexFormat.getVertexInputAlignment(compressed) <=
971 vk::kVertexBufferAlignment);
972
973 mContentsObservers->enableForBuffer(bufferGL, static_cast<uint32_t>(attribIndex));
974
975 WarnOnVertexFormatConversion(contextVk, vertexFormat, compressed, true);
976
977 const VertexConversionBuffer::CacheKey cacheKey{
978 srcFormat.id, srcStride,
979 static_cast<size_t>(binding.getOffset()) + attrib.relativeOffset,
980 !bindingIsAligned, false};
981
982 VertexConversionBuffer *conversion =
983 bufferVk->getVertexConversionBuffer(renderer, cacheKey);
984
985 // Converted attribs are packed in their own VK buffer so offset is relative to the
986 // binding and coversion's offset. The conversion buffer try to reuse the existing
987 // buffer as much as possible to reduce the amount of data that has to be converted.
988 // When binding's offset changes, it will check if new offset and existing buffer's
989 // offset are multiple of strides apart. It yes it will reuse. If new offset is
990 // larger, all existing data are still valid. If the new offset is smaller it will
991 // mark the newly exposed range dirty and then rely on
992 // ContextVk::initBufferForVertexConversion to decide buffer's size is big enough or
993 // not and reallocate (and mark entire buffer dirty) if needed.
994 //
995 // bufferVk:-----------------------------------------------------------------------
996 // | |
997 // | bingding.offset + attrib.relativeOffset.
998 // conversion->getCacheKey().offset
999 //
1000 // conversion.buffer: --------------------------------------------------------------
1001 // |
1002 // dstRelativeOffset
1003 size_t srcRelativeOffset =
1004 binding.getOffset() + attrib.relativeOffset - conversion->getCacheKey().offset;
1005 size_t numberOfVerticesToSkip = srcRelativeOffset / srcStride;
1006 size_t dstRelativeOffset = numberOfVerticesToSkip * dstStride;
1007
1008 if (conversion->dirty())
1009 {
1010 if (compressed)
1011 {
1012 INFO() << "Compressing vertex data in buffer " << bufferGL->id().value
1013 << " from " << ToUnderlying(srcFormat.id) << " to "
1014 << ToUnderlying(dstFormat.id) << ".";
1015 }
1016
1017 if (bindingIsAligned)
1018 {
1019 ANGLE_TRY(convertVertexBufferGPU(contextVk, bufferVk, conversion, srcFormat,
1020 dstFormat));
1021 }
1022 else
1023 {
1024 ANGLE_VK_PERF_WARNING(
1025 contextVk, GL_DEBUG_SEVERITY_HIGH,
1026 "GPU stall due to vertex format conversion of unaligned data");
1027
1028 ANGLE_TRY(convertVertexBufferCPU(
1029 contextVk, bufferVk, conversion, srcFormat, dstFormat,
1030 vertexFormat.getVertexLoadFunction(compressed)));
1031 }
1032
1033 // If conversion happens, the destination buffer stride may be changed,
1034 // therefore an attribute change needs to be called. Note that it may trigger
1035 // unnecessary vulkan PSO update when the destination buffer stride does not
1036 // change, but for simplicity just make it conservative
1037 bufferOnly = false;
1038 }
1039
1040 vk::BufferHelper *bufferHelper = conversion->getBuffer();
1041 mCurrentArrayBuffers[attribIndex] = bufferHelper;
1042 mCurrentArrayBufferSerial[attribIndex] = bufferHelper->getBufferSerial();
1043 VkDeviceSize bufferOffset;
1044 mCurrentArrayBufferHandles[attribIndex] =
1045 bufferHelper
1046 ->getBufferForVertexArray(contextVk, bufferHelper->getSize(), &bufferOffset)
1047 .getHandle();
1048 ASSERT(BindingIsAligned(dstFormat, bufferOffset + dstRelativeOffset, dstStride));
1049 mCurrentArrayBufferOffsets[attribIndex] = bufferOffset + dstRelativeOffset;
1050 mCurrentArrayBufferRelativeOffsets[attribIndex] = 0;
1051 mCurrentArrayBufferStrides[attribIndex] = dstStride;
1052 }
1053 else
1054 {
1055 if (numVertices == 0)
1056 {
1057 vk::BufferHelper &emptyBuffer = contextVk->getEmptyBuffer();
1058
1059 mCurrentArrayBuffers[attribIndex] = &emptyBuffer;
1060 mCurrentArrayBufferSerial[attribIndex] = emptyBuffer.getBufferSerial();
1061 mCurrentArrayBufferHandles[attribIndex] = emptyBuffer.getBuffer().getHandle();
1062 mCurrentArrayBufferOffsets[attribIndex] = emptyBuffer.getOffset();
1063 mCurrentArrayBufferStrides[attribIndex] = 0;
1064 }
1065 else
1066 {
1067 vk::BufferHelper &bufferHelper = bufferVk->getBuffer();
1068 mCurrentArrayBuffers[attribIndex] = &bufferHelper;
1069 mCurrentArrayBufferSerial[attribIndex] = bufferHelper.getBufferSerial();
1070 VkDeviceSize bufferOffset;
1071 mCurrentArrayBufferHandles[attribIndex] =
1072 bufferHelper
1073 .getBufferForVertexArray(contextVk, bufferVk->getSize(), &bufferOffset)
1074 .getHandle();
1075
1076 // Vulkan requires the offset is within the buffer. We use robust access
1077 // behaviour to reset the offset if it starts outside the buffer.
1078 mCurrentArrayBufferOffsets[attribIndex] =
1079 binding.getOffset() < static_cast<GLint64>(bufferVk->getSize())
1080 ? binding.getOffset() + bufferOffset
1081 : bufferOffset;
1082
1083 mCurrentArrayBufferStrides[attribIndex] = binding.getStride();
1084 }
1085 }
1086 }
1087 else
1088 {
1089 vk::BufferHelper &emptyBuffer = contextVk->getEmptyBuffer();
1090 mCurrentArrayBuffers[attribIndex] = &emptyBuffer;
1091 mCurrentArrayBufferSerial[attribIndex] = emptyBuffer.getBufferSerial();
1092 mCurrentArrayBufferHandles[attribIndex] = emptyBuffer.getBuffer().getHandle();
1093 mCurrentArrayBufferOffsets[attribIndex] = emptyBuffer.getOffset();
1094
1095 bool combined = ShouldCombineAttributes(renderer, attrib, binding);
1096 mCurrentArrayBufferStrides[attribIndex] =
1097 combined ? binding.getStride()
1098 : vertexFormat.getActualBufferFormat(compressed).pixelBytes;
1099 }
1100
1101 if (bufferOnly)
1102 {
1103 ANGLE_TRY(contextVk->onVertexBufferChange(mCurrentArrayBuffers[attribIndex]));
1104 }
1105 else
1106 {
1107 const angle::FormatID format = attrib.format->id;
1108 ANGLE_TRY(contextVk->onVertexAttributeChange(
1109 attribIndex, mCurrentArrayBufferStrides[attribIndex], binding.getDivisor(), format,
1110 compressed, mCurrentArrayBufferRelativeOffsets[attribIndex],
1111 mCurrentArrayBuffers[attribIndex]));
1112
1113 mCurrentArrayBufferFormats[attribIndex] = format;
1114 mCurrentArrayBufferCompressed[attribIndex] = compressed;
1115 mCurrentArrayBufferDivisors[attribIndex] = binding.getDivisor();
1116 }
1117 }
1118 else
1119 {
1120 contextVk->invalidateDefaultAttribute(attribIndex);
1121
1122 // These will be filled out by the ContextVk.
1123 vk::BufferHelper &emptyBuffer = contextVk->getEmptyBuffer();
1124 mCurrentArrayBuffers[attribIndex] = &emptyBuffer;
1125 mCurrentArrayBufferSerial[attribIndex] = emptyBuffer.getBufferSerial();
1126 mCurrentArrayBufferHandles[attribIndex] = emptyBuffer.getBuffer().getHandle();
1127 mCurrentArrayBufferOffsets[attribIndex] = emptyBuffer.getOffset();
1128 mCurrentArrayBufferStrides[attribIndex] = 0;
1129 mCurrentArrayBufferDivisors[attribIndex] = 0;
1130 mCurrentArrayBufferCompressed[attribIndex] = false;
1131 mCurrentArrayBufferRelativeOffsets[attribIndex] = 0;
1132
1133 ANGLE_TRY(setDefaultPackedInput(contextVk, attribIndex,
1134 &mCurrentArrayBufferFormats[attribIndex]));
1135 }
1136
1137 return angle::Result::Continue;
1138 }
1139
mergeClientAttribsRange(vk::Renderer * renderer,const gl::AttributesMask activeStreamedAttribs,size_t startVertex,size_t endVertex,std::array<AttributeRange,gl::MAX_VERTEX_ATTRIBS> & mergeRangesOut,std::array<size_t,gl::MAX_VERTEX_ATTRIBS> & mergedIndexesOut) const1140 gl::AttributesMask VertexArrayVk::mergeClientAttribsRange(
1141 vk::Renderer *renderer,
1142 const gl::AttributesMask activeStreamedAttribs,
1143 size_t startVertex,
1144 size_t endVertex,
1145 std::array<AttributeRange, gl::MAX_VERTEX_ATTRIBS> &mergeRangesOut,
1146 std::array<size_t, gl::MAX_VERTEX_ATTRIBS> &mergedIndexesOut) const
1147 {
1148 const std::vector<gl::VertexAttribute> &attribs = mState.getVertexAttributes();
1149 const std::vector<gl::VertexBinding> &bindings = mState.getVertexBindings();
1150 gl::AttributesMask attributeMaskCanCombine;
1151 angle::FixedVector<size_t, gl::MAX_VERTEX_ATTRIBS> combinedIndexes;
1152 for (size_t attribIndex : activeStreamedAttribs)
1153 {
1154 const gl::VertexAttribute &attrib = attribs[attribIndex];
1155 ASSERT(attrib.enabled);
1156 const gl::VertexBinding &binding = bindings[attrib.bindingIndex];
1157 const vk::Format &vertexFormat = renderer->getFormat(attrib.format->id);
1158 bool combined = ShouldCombineAttributes(renderer, attrib, binding);
1159 attributeMaskCanCombine.set(attribIndex, combined);
1160 if (combined)
1161 {
1162 combinedIndexes.push_back(attribIndex);
1163 }
1164 GLuint pixelBytes = vertexFormat.getActualBufferFormat(false).pixelBytes;
1165 size_t destStride = combined ? binding.getStride() : pixelBytes;
1166 uintptr_t startAddress = reinterpret_cast<uintptr_t>(attrib.pointer);
1167 mergeRangesOut[attribIndex].startAddr = startAddress;
1168 mergeRangesOut[attribIndex].endAddr =
1169 startAddress + (endVertex - 1) * destStride + pixelBytes;
1170 mergeRangesOut[attribIndex].copyStartAddr =
1171 startAddress + startVertex * binding.getStride();
1172 mergedIndexesOut[attribIndex] = attribIndex;
1173 }
1174 if (attributeMaskCanCombine.none())
1175 {
1176 return attributeMaskCanCombine;
1177 }
1178 auto comp = [&mergeRangesOut](size_t a, size_t b) -> bool {
1179 return mergeRangesOut[a] < mergeRangesOut[b];
1180 };
1181 // Only sort combined range indexes.
1182 std::sort(combinedIndexes.begin(), combinedIndexes.end(), comp);
1183 // Merge combined range span.
1184 auto next = combinedIndexes.begin();
1185 auto cur = next++;
1186 while (next != combinedIndexes.end() || (cur != next))
1187 {
1188 // Cur and next overlaps: merge next into cur and move next.
1189 if (next != combinedIndexes.end() &&
1190 mergeRangesOut[*cur].endAddr >= mergeRangesOut[*next].startAddr)
1191 {
1192 mergeRangesOut[*cur].endAddr =
1193 std::max(mergeRangesOut[*cur].endAddr, mergeRangesOut[*next].endAddr);
1194 mergeRangesOut[*cur].copyStartAddr =
1195 std::min(mergeRangesOut[*cur].copyStartAddr, mergeRangesOut[*next].copyStartAddr);
1196 mergedIndexesOut[*next] = mergedIndexesOut[*cur];
1197 ++next;
1198 }
1199 else
1200 {
1201 ++cur;
1202 if (cur != next)
1203 {
1204 mergeRangesOut[*cur] = mergeRangesOut[*(cur - 1)];
1205 }
1206 else if (next != combinedIndexes.end())
1207 {
1208 ++next;
1209 }
1210 }
1211 }
1212 return attributeMaskCanCombine;
1213 }
1214
1215 // Handle copying client attribs and/or expanding attrib buffer in case where attribute
1216 // divisor value has to be emulated.
updateStreamedAttribs(const gl::Context * context,GLint firstVertex,GLsizei vertexOrIndexCount,GLsizei instanceCount,gl::DrawElementsType indexTypeOrInvalid,const void * indices)1217 angle::Result VertexArrayVk::updateStreamedAttribs(const gl::Context *context,
1218 GLint firstVertex,
1219 GLsizei vertexOrIndexCount,
1220 GLsizei instanceCount,
1221 gl::DrawElementsType indexTypeOrInvalid,
1222 const void *indices)
1223 {
1224 ContextVk *contextVk = vk::GetImpl(context);
1225 vk::Renderer *renderer = contextVk->getRenderer();
1226
1227 const gl::AttributesMask activeAttribs =
1228 context->getStateCache().getActiveClientAttribsMask() |
1229 context->getStateCache().getActiveBufferedAttribsMask();
1230 const gl::AttributesMask activeStreamedAttribs = mStreamingVertexAttribsMask & activeAttribs;
1231
1232 // Early return for corner case where emulated buffered attribs are not active
1233 if (!activeStreamedAttribs.any())
1234 {
1235 return angle::Result::Continue;
1236 }
1237
1238 GLint startVertex;
1239 size_t vertexCount;
1240 ANGLE_TRY(GetVertexRangeInfo(context, firstVertex, vertexOrIndexCount, indexTypeOrInvalid,
1241 indices, 0, &startVertex, &vertexCount));
1242
1243 ASSERT(vertexCount > 0);
1244 const auto &attribs = mState.getVertexAttributes();
1245 const auto &bindings = mState.getVertexBindings();
1246
1247 std::array<size_t, gl::MAX_VERTEX_ATTRIBS> mergedIndexes;
1248 std::array<AttributeRange, gl::MAX_VERTEX_ATTRIBS> mergeRanges;
1249 std::array<vk::BufferHelper *, gl::MAX_VERTEX_ATTRIBS> attribBufferHelper = {};
1250 auto mergeAttribMask =
1251 mergeClientAttribsRange(renderer, activeStreamedAttribs, startVertex,
1252 startVertex + vertexCount, mergeRanges, mergedIndexes);
1253
1254 for (size_t attribIndex : activeStreamedAttribs)
1255 {
1256 const gl::VertexAttribute &attrib = attribs[attribIndex];
1257 ASSERT(attrib.enabled);
1258 const gl::VertexBinding &binding = bindings[attrib.bindingIndex];
1259
1260 const vk::Format &vertexFormat = renderer->getFormat(attrib.format->id);
1261 const angle::Format &dstFormat = vertexFormat.getActualBufferFormat(false);
1262 GLuint pixelBytes = dstFormat.pixelBytes;
1263
1264 const bool compressed = false;
1265 ASSERT(vertexFormat.getVertexInputAlignment(false) <= vk::kVertexBufferAlignment);
1266
1267 vk::BufferHelper *vertexDataBuffer = nullptr;
1268 const uint8_t *src = static_cast<const uint8_t *>(attrib.pointer);
1269 const uint32_t divisor = binding.getDivisor();
1270
1271 bool combined = mergeAttribMask.test(attribIndex);
1272 GLuint stride = combined ? binding.getStride() : pixelBytes;
1273 VkDeviceSize startOffset = 0;
1274 if (divisor > 0)
1275 {
1276 // Instanced attrib
1277 if (divisor > renderer->getMaxVertexAttribDivisor())
1278 {
1279 // Divisor will be set to 1 & so update buffer to have 1 attrib per instance
1280 size_t bytesToAllocate = instanceCount * stride;
1281
1282 // Allocate buffer for results
1283 ANGLE_TRY(contextVk->allocateStreamedVertexBuffer(attribIndex, bytesToAllocate,
1284 &vertexDataBuffer));
1285
1286 gl::Buffer *bufferGL = binding.getBuffer().get();
1287 if (bufferGL != nullptr)
1288 {
1289 // Only do the data copy if src buffer is valid.
1290 if (bufferGL->getSize() > 0)
1291 {
1292 // Map buffer to expand attribs for divisor emulation
1293 BufferVk *bufferVk = vk::GetImpl(binding.getBuffer().get());
1294 void *buffSrc = nullptr;
1295 ANGLE_TRY(bufferVk->mapImpl(contextVk, GL_MAP_READ_BIT, &buffSrc));
1296 src = reinterpret_cast<const uint8_t *>(buffSrc) + binding.getOffset();
1297
1298 uint32_t srcAttributeSize =
1299 static_cast<uint32_t>(ComputeVertexAttributeTypeSize(attrib));
1300
1301 size_t numVertices = GetVertexCount(bufferVk, binding, srcAttributeSize);
1302
1303 ANGLE_TRY(StreamVertexDataWithDivisor(
1304 contextVk, vertexDataBuffer, src, bytesToAllocate, binding.getStride(),
1305 stride, vertexFormat.getVertexLoadFunction(compressed), divisor,
1306 numVertices));
1307
1308 ANGLE_TRY(bufferVk->unmapImpl(contextVk));
1309 }
1310 else if (contextVk->getExtensions().robustnessAny())
1311 {
1312 // Satisfy robustness constraints (only if extension enabled)
1313 uint8_t *dst = vertexDataBuffer->getMappedMemory();
1314 memset(dst, 0, bytesToAllocate);
1315 }
1316 }
1317 else
1318 {
1319 size_t numVertices = instanceCount;
1320 ANGLE_TRY(StreamVertexDataWithDivisor(
1321 contextVk, vertexDataBuffer, src, bytesToAllocate, binding.getStride(),
1322 stride, vertexFormat.getVertexLoadFunction(compressed), divisor,
1323 numVertices));
1324 }
1325 }
1326 else
1327 {
1328 ASSERT(binding.getBuffer().get() == nullptr);
1329 size_t count = UnsignedCeilDivide(instanceCount, divisor);
1330 size_t bytesToAllocate = count * stride;
1331
1332 // Allocate buffer for results
1333 ANGLE_TRY(contextVk->allocateStreamedVertexBuffer(attribIndex, bytesToAllocate,
1334 &vertexDataBuffer));
1335
1336 ANGLE_TRY(StreamVertexData(contextVk, vertexDataBuffer, src, bytesToAllocate, 0,
1337 count, binding.getStride(),
1338 vertexFormat.getVertexLoadFunction(compressed)));
1339 }
1340 }
1341 else
1342 {
1343 ASSERT(binding.getBuffer().get() == nullptr);
1344 size_t mergedAttribIdx = mergedIndexes[attribIndex];
1345 const AttributeRange &range = mergeRanges[attribIndex];
1346 if (attribBufferHelper[mergedAttribIdx] == nullptr)
1347 {
1348 size_t destOffset =
1349 combined ? range.copyStartAddr - range.startAddr : startVertex * stride;
1350 size_t bytesToAllocate = range.endAddr - range.startAddr;
1351 ANGLE_TRY(contextVk->allocateStreamedVertexBuffer(
1352 mergedAttribIdx, bytesToAllocate, &attribBufferHelper[mergedAttribIdx]));
1353 ANGLE_TRY(StreamVertexData(
1354 contextVk, attribBufferHelper[mergedAttribIdx],
1355 (const uint8_t *)range.copyStartAddr, bytesToAllocate - destOffset, destOffset,
1356 vertexCount, binding.getStride(),
1357 combined ? nullptr : vertexFormat.getVertexLoadFunction(compressed)));
1358 }
1359 vertexDataBuffer = attribBufferHelper[mergedAttribIdx];
1360 startOffset = combined ? (uintptr_t)attrib.pointer - range.startAddr : 0;
1361 }
1362 ASSERT(vertexDataBuffer != nullptr);
1363 mCurrentArrayBuffers[attribIndex] = vertexDataBuffer;
1364 mCurrentArrayBufferSerial[attribIndex] = vertexDataBuffer->getBufferSerial();
1365 VkDeviceSize bufferOffset;
1366 mCurrentArrayBufferHandles[attribIndex] =
1367 vertexDataBuffer
1368 ->getBufferForVertexArray(contextVk, vertexDataBuffer->getSize(), &bufferOffset)
1369 .getHandle();
1370 mCurrentArrayBufferOffsets[attribIndex] = bufferOffset + startOffset;
1371 mCurrentArrayBufferStrides[attribIndex] = stride;
1372 mCurrentArrayBufferDivisors[attribIndex] = divisor;
1373 ASSERT(BindingIsAligned(dstFormat, mCurrentArrayBufferOffsets[attribIndex],
1374 mCurrentArrayBufferStrides[attribIndex]));
1375 }
1376
1377 return angle::Result::Continue;
1378 }
1379
handleLineLoop(ContextVk * contextVk,GLint firstVertex,GLsizei vertexOrIndexCount,gl::DrawElementsType indexTypeOrInvalid,const void * indices,vk::BufferHelper ** indexBufferOut,uint32_t * indexCountOut)1380 angle::Result VertexArrayVk::handleLineLoop(ContextVk *contextVk,
1381 GLint firstVertex,
1382 GLsizei vertexOrIndexCount,
1383 gl::DrawElementsType indexTypeOrInvalid,
1384 const void *indices,
1385 vk::BufferHelper **indexBufferOut,
1386 uint32_t *indexCountOut)
1387 {
1388 if (indexTypeOrInvalid != gl::DrawElementsType::InvalidEnum)
1389 {
1390 // Handle GL_LINE_LOOP drawElements.
1391 if (mDirtyLineLoopTranslation)
1392 {
1393 gl::Buffer *elementArrayBuffer = mState.getElementArrayBuffer();
1394
1395 if (!elementArrayBuffer)
1396 {
1397 ANGLE_TRY(mLineLoopHelper.streamIndices(
1398 contextVk, indexTypeOrInvalid, vertexOrIndexCount,
1399 reinterpret_cast<const uint8_t *>(indices), indexBufferOut, indexCountOut));
1400 }
1401 else
1402 {
1403 // When using an element array buffer, 'indices' is an offset to the first element.
1404 intptr_t offset = reinterpret_cast<intptr_t>(indices);
1405 BufferVk *elementArrayBufferVk = vk::GetImpl(elementArrayBuffer);
1406 ANGLE_TRY(mLineLoopHelper.getIndexBufferForElementArrayBuffer(
1407 contextVk, elementArrayBufferVk, indexTypeOrInvalid, vertexOrIndexCount, offset,
1408 indexBufferOut, indexCountOut));
1409 }
1410 }
1411
1412 // If we've had a drawArrays call with a line loop before, we want to make sure this is
1413 // invalidated the next time drawArrays is called since we use the same index buffer for
1414 // both calls.
1415 mLineLoopBufferFirstIndex.reset();
1416 mLineLoopBufferLastIndex.reset();
1417 return angle::Result::Continue;
1418 }
1419
1420 // Note: Vertex indexes can be arbitrarily large.
1421 uint32_t clampedVertexCount = gl::clampCast<uint32_t>(vertexOrIndexCount);
1422
1423 // Handle GL_LINE_LOOP drawArrays.
1424 size_t lastVertex = static_cast<size_t>(firstVertex + clampedVertexCount);
1425 if (!mLineLoopBufferFirstIndex.valid() || !mLineLoopBufferLastIndex.valid() ||
1426 mLineLoopBufferFirstIndex != firstVertex || mLineLoopBufferLastIndex != lastVertex)
1427 {
1428 ANGLE_TRY(mLineLoopHelper.getIndexBufferForDrawArrays(contextVk, clampedVertexCount,
1429 firstVertex, indexBufferOut));
1430
1431 mLineLoopBufferFirstIndex = firstVertex;
1432 mLineLoopBufferLastIndex = lastVertex;
1433 }
1434 else
1435 {
1436 *indexBufferOut = mLineLoopHelper.getCurrentIndexBuffer();
1437 }
1438 *indexCountOut = vertexOrIndexCount + 1;
1439
1440 return angle::Result::Continue;
1441 }
1442
updateDefaultAttrib(ContextVk * contextVk,size_t attribIndex)1443 angle::Result VertexArrayVk::updateDefaultAttrib(ContextVk *contextVk, size_t attribIndex)
1444 {
1445 if (!mState.getEnabledAttributesMask().test(attribIndex))
1446 {
1447 vk::BufferHelper *bufferHelper;
1448 ANGLE_TRY(
1449 contextVk->allocateStreamedVertexBuffer(attribIndex, kDefaultValueSize, &bufferHelper));
1450
1451 const gl::VertexAttribCurrentValueData &defaultValue =
1452 contextVk->getState().getVertexAttribCurrentValues()[attribIndex];
1453 uint8_t *ptr = bufferHelper->getMappedMemory();
1454 memcpy(ptr, &defaultValue.Values, kDefaultValueSize);
1455 ANGLE_TRY(bufferHelper->flush(contextVk->getRenderer()));
1456
1457 VkDeviceSize bufferOffset;
1458 mCurrentArrayBufferHandles[attribIndex] =
1459 bufferHelper->getBufferForVertexArray(contextVk, kDefaultValueSize, &bufferOffset)
1460 .getHandle();
1461 mCurrentArrayBufferOffsets[attribIndex] = bufferOffset;
1462 mCurrentArrayBuffers[attribIndex] = bufferHelper;
1463 mCurrentArrayBufferSerial[attribIndex] = bufferHelper->getBufferSerial();
1464 mCurrentArrayBufferStrides[attribIndex] = 0;
1465 mCurrentArrayBufferDivisors[attribIndex] = 0;
1466
1467 ANGLE_TRY(setDefaultPackedInput(contextVk, attribIndex,
1468 &mCurrentArrayBufferFormats[attribIndex]));
1469 }
1470
1471 return angle::Result::Continue;
1472 }
1473 } // namespace rx
1474