1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Renderer.hpp"
16
17 #include "Clipper.hpp"
18 #include "Polygon.hpp"
19 #include "Primitive.hpp"
20 #include "Vertex.hpp"
21 #include "Pipeline/Constants.hpp"
22 #include "Pipeline/SpirvShader.hpp"
23 #include "Reactor/Reactor.hpp"
24 #include "System/Debug.hpp"
25 #include "System/Half.hpp"
26 #include "System/Math.hpp"
27 #include "System/Memory.hpp"
28 #include "System/Timer.hpp"
29 #include "Vulkan/VkConfig.hpp"
30 #include "Vulkan/VkDescriptorSet.hpp"
31 #include "Vulkan/VkDevice.hpp"
32 #include "Vulkan/VkFence.hpp"
33 #include "Vulkan/VkImageView.hpp"
34 #include "Vulkan/VkPipelineLayout.hpp"
35 #include "Vulkan/VkQueryPool.hpp"
36
37 #include "marl/containers.h"
38 #include "marl/defer.h"
39 #include "marl/trace.h"
40
41 #undef max
42
43 #ifndef NDEBUG
44 unsigned int minPrimitives = 1;
45 unsigned int maxPrimitives = 1 << 21;
46 #endif
47
48 namespace sw {
49
50 template<typename T>
setBatchIndices(unsigned int batch[128][3],VkPrimitiveTopology topology,VkProvokingVertexModeEXT provokingVertexMode,T indices,unsigned int start,unsigned int triangleCount)51 inline bool setBatchIndices(unsigned int batch[128][3], VkPrimitiveTopology topology, VkProvokingVertexModeEXT provokingVertexMode, T indices, unsigned int start, unsigned int triangleCount)
52 {
53 bool provokeFirst = (provokingVertexMode == VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT);
54
55 switch(topology)
56 {
57 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
58 {
59 auto index = start;
60 auto pointBatch = &(batch[0][0]);
61 for(unsigned int i = 0; i < triangleCount; i++)
62 {
63 *pointBatch++ = indices[index++];
64 }
65
66 // Repeat the last index to allow for SIMD width overrun.
67 index--;
68 for(unsigned int i = 0; i < 3; i++)
69 {
70 *pointBatch++ = indices[index];
71 }
72 }
73 break;
74 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
75 {
76 auto index = 2 * start;
77 for(unsigned int i = 0; i < triangleCount; i++)
78 {
79 batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
80 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
81 batch[i][2] = indices[index + 1];
82
83 index += 2;
84 }
85 }
86 break;
87 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
88 {
89 auto index = start;
90 for(unsigned int i = 0; i < triangleCount; i++)
91 {
92 batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
93 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
94 batch[i][2] = indices[index + 1];
95
96 index += 1;
97 }
98 }
99 break;
100 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
101 {
102 auto index = 3 * start;
103 for(unsigned int i = 0; i < triangleCount; i++)
104 {
105 batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
106 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
107 batch[i][2] = indices[index + (provokeFirst ? 2 : 1)];
108
109 index += 3;
110 }
111 }
112 break;
113 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
114 {
115 auto index = start;
116 for(unsigned int i = 0; i < triangleCount; i++)
117 {
118 batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
119 batch[i][1] = indices[index + ((start + i) & 1) + (provokeFirst ? 1 : 0)];
120 batch[i][2] = indices[index + (~(start + i) & 1) + (provokeFirst ? 1 : 0)];
121
122 index += 1;
123 }
124 }
125 break;
126 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
127 {
128 auto index = start + 1;
129 for(unsigned int i = 0; i < triangleCount; i++)
130 {
131 batch[i][provokeFirst ? 0 : 2] = indices[index + 0];
132 batch[i][provokeFirst ? 1 : 0] = indices[index + 1];
133 batch[i][provokeFirst ? 2 : 1] = indices[0];
134
135 index += 1;
136 }
137 }
138 break;
139 default:
140 ASSERT(false);
141 return false;
142 }
143
144 return true;
145 }
146
DrawCall()147 DrawCall::DrawCall()
148 {
149 // TODO(b/140991626): Use allocateUninitialized() instead of allocateZeroOrPoison() to improve startup peformance.
150 data = (DrawData *)sw::allocateZeroOrPoison(sizeof(DrawData));
151 }
152
~DrawCall()153 DrawCall::~DrawCall()
154 {
155 sw::freeMemory(data);
156 }
157
Renderer(vk::Device * device)158 Renderer::Renderer(vk::Device *device)
159 : device(device)
160 {
161 vertexProcessor.setRoutineCacheSize(1024);
162 pixelProcessor.setRoutineCacheSize(1024);
163 setupProcessor.setRoutineCacheSize(1024);
164 }
165
~Renderer()166 Renderer::~Renderer()
167 {
168 drawTickets.take().wait();
169 }
170
171 // Renderer objects have to be mem aligned to the alignment provided in the class declaration
operator new(size_t size)172 void *Renderer::operator new(size_t size)
173 {
174 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
175 return vk::allocateHostMemory(sizeof(Renderer), alignof(Renderer), vk::NULL_ALLOCATION_CALLBACKS, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
176 }
177
operator delete(void * mem)178 void Renderer::operator delete(void *mem)
179 {
180 vk::freeHostMemory(mem, vk::NULL_ALLOCATION_CALLBACKS);
181 }
182
draw(const vk::GraphicsPipeline * pipeline,const vk::DynamicState & dynamicState,unsigned int count,int baseVertex,CountedEvent * events,int instanceID,int layer,void * indexBuffer,const VkRect2D & renderArea,const vk::Pipeline::PushConstantStorage & pushConstants,bool update)183 void Renderer::draw(const vk::GraphicsPipeline *pipeline, const vk::DynamicState &dynamicState, unsigned int count, int baseVertex,
184 CountedEvent *events, int instanceID, int layer, void *indexBuffer, const VkRect2D &renderArea,
185 const vk::Pipeline::PushConstantStorage &pushConstants, bool update)
186 {
187 if(count == 0) { return; }
188
189 auto id = nextDrawID++;
190 MARL_SCOPED_EVENT("draw %d", id);
191
192 marl::Pool<sw::DrawCall>::Loan draw;
193 {
194 MARL_SCOPED_EVENT("drawCallPool.borrow()");
195 draw = drawCallPool.borrow();
196 }
197 draw->id = id;
198
199 const vk::GraphicsState &pipelineState = pipeline->getCombinedState(dynamicState);
200
201 // A graphics pipeline must always be "complete" before it can be used for drawing. A
202 // complete graphics pipeline always includes the vertex input interface and
203 // pre-rasterization subsets, but only includes fragment and fragment output interface
204 // subsets if rasterizer discard is not enabled.
205 //
206 // Note that in the following, the setupPrimitives, setupRoutine and pixelRoutine functions
207 // are only called when rasterizer discard is not enabled. If rasterizer discard is
208 // enabled, these functions and state for the latter two states are not set.
209 const vk::VertexInputInterfaceState &vertexInputInterfaceState = pipelineState.getVertexInputInterfaceState();
210 const vk::PreRasterizationState &preRasterizationState = pipelineState.getPreRasterizationState();
211 const vk::FragmentState *fragmentState = nullptr;
212 const vk::FragmentOutputInterfaceState *fragmentOutputInterfaceState = nullptr;
213
214 const bool hasRasterizerDiscard = preRasterizationState.hasRasterizerDiscard();
215 if(!hasRasterizerDiscard)
216 {
217 fragmentState = &pipelineState.getFragmentState();
218 fragmentOutputInterfaceState = &pipelineState.getFragmentOutputInterfaceState();
219
220 pixelProcessor.setBlendConstant(fragmentOutputInterfaceState->getBlendConstants());
221 }
222
223 const vk::Inputs &inputs = pipeline->getInputs();
224
225 if(update)
226 {
227 MARL_SCOPED_EVENT("update");
228
229 const sw::SpirvShader *fragmentShader = pipeline->getShader(VK_SHADER_STAGE_FRAGMENT_BIT).get();
230 const sw::SpirvShader *vertexShader = pipeline->getShader(VK_SHADER_STAGE_VERTEX_BIT).get();
231
232 const vk::Attachments attachments = pipeline->getAttachments();
233
234 vertexState = vertexProcessor.update(pipelineState, vertexShader, inputs);
235 vertexRoutine = vertexProcessor.routine(vertexState, preRasterizationState.getPipelineLayout(), vertexShader, inputs.getDescriptorSets());
236
237 if(!hasRasterizerDiscard)
238 {
239 setupState = setupProcessor.update(pipelineState, fragmentShader, vertexShader, attachments);
240 setupRoutine = setupProcessor.routine(setupState);
241
242 pixelState = pixelProcessor.update(pipelineState, fragmentShader, vertexShader, attachments, hasOcclusionQuery());
243 pixelRoutine = pixelProcessor.routine(pixelState, fragmentState->getPipelineLayout(), fragmentShader, attachments, inputs.getDescriptorSets());
244 }
245 }
246
247 draw->preRasterizationContainsImageWrite = pipeline->preRasterizationContainsImageWrite();
248 draw->fragmentContainsImageWrite = pipeline->fragmentContainsImageWrite();
249
250 // The sample count affects the batch size even if rasterization is disabled.
251 // TODO(b/147812380): Eliminate the dependency between multisampling and batch size.
252 int ms = hasRasterizerDiscard ? 1 : fragmentOutputInterfaceState->getSampleCount();
253 ASSERT(ms > 0);
254
255 unsigned int numPrimitivesPerBatch = MaxBatchSize / ms;
256
257 DrawData *data = draw->data;
258 draw->occlusionQuery = occlusionQuery;
259 draw->batchDataPool = &batchDataPool;
260 draw->numPrimitives = count;
261 draw->numPrimitivesPerBatch = numPrimitivesPerBatch;
262 draw->numBatches = (count + draw->numPrimitivesPerBatch - 1) / draw->numPrimitivesPerBatch;
263 draw->topology = vertexInputInterfaceState.getTopology();
264 draw->provokingVertexMode = preRasterizationState.getProvokingVertexMode();
265 draw->lineRasterizationMode = preRasterizationState.getLineRasterizationMode();
266 draw->descriptorSetObjects = inputs.getDescriptorSetObjects();
267 draw->preRasterizationPipelineLayout = preRasterizationState.getPipelineLayout();
268 draw->depthClipEnable = preRasterizationState.getDepthClipEnable();
269 draw->depthClipNegativeOneToOne = preRasterizationState.getDepthClipNegativeOneToOne();
270 data->lineWidth = preRasterizationState.getLineWidth();
271 data->rasterizerDiscard = hasRasterizerDiscard;
272
273 data->descriptorSets = inputs.getDescriptorSets();
274 data->descriptorDynamicOffsets = inputs.getDescriptorDynamicOffsets();
275
276 for(int i = 0; i < MAX_INTERFACE_COMPONENTS / 4; i++)
277 {
278 const sw::Stream &stream = inputs.getStream(i);
279 data->input[i] = stream.buffer;
280 data->robustnessSize[i] = stream.robustnessSize;
281 data->stride[i] = inputs.getVertexStride(i);
282 }
283
284 data->indices = indexBuffer;
285 data->layer = layer;
286 data->instanceID = instanceID;
287 data->baseVertex = baseVertex;
288 draw->indexType = indexBuffer ? pipeline->getIndexBuffer().getIndexType() : VK_INDEX_TYPE_UINT16;
289
290 draw->vertexRoutine = vertexRoutine;
291
292 vk::DescriptorSet::PrepareForSampling(draw->descriptorSetObjects, draw->preRasterizationPipelineLayout, device);
293
294 // Viewport
295 {
296 const VkViewport &viewport = preRasterizationState.getViewport();
297
298 float W = 0.5f * viewport.width;
299 float H = 0.5f * viewport.height;
300 float X0 = viewport.x + W;
301 float Y0 = viewport.y + H;
302 float N = viewport.minDepth;
303 float F = viewport.maxDepth;
304 float Z = F - N;
305 constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
306
307 data->WxF = W * subPixF;
308 data->HxF = H * subPixF;
309 data->X0xF = X0 * subPixF - subPixF / 2;
310 data->Y0xF = Y0 * subPixF - subPixF / 2;
311 data->halfPixelX = 0.5f / W;
312 data->halfPixelY = 0.5f / H;
313 data->depthRange = Z;
314 data->depthNear = N;
315 data->constantDepthBias = preRasterizationState.getConstantDepthBias();
316 data->slopeDepthBias = preRasterizationState.getSlopeDepthBias();
317 data->depthBiasClamp = preRasterizationState.getDepthBiasClamp();
318
319 // Adjust viewport transform based on the negativeOneToOne state.
320 if(preRasterizationState.getDepthClipNegativeOneToOne())
321 {
322 data->depthRange = Z * 0.5f;
323 data->depthNear = (F + N) * 0.5f;
324 }
325 }
326
327 // Scissor
328 {
329 const VkRect2D &scissor = preRasterizationState.getScissor();
330
331 int x0 = renderArea.offset.x;
332 int y0 = renderArea.offset.y;
333 int x1 = x0 + renderArea.extent.width;
334 int y1 = y0 + renderArea.extent.height;
335 data->scissorX0 = clamp<int>(scissor.offset.x, x0, x1);
336 data->scissorX1 = clamp<int>(scissor.offset.x + scissor.extent.width, x0, x1);
337 data->scissorY0 = clamp<int>(scissor.offset.y, y0, y1);
338 data->scissorY1 = clamp<int>(scissor.offset.y + scissor.extent.height, y0, y1);
339 }
340
341 if(!hasRasterizerDiscard)
342 {
343 const VkPolygonMode polygonMode = preRasterizationState.getPolygonMode();
344
345 DrawCall::SetupFunction setupPrimitives = nullptr;
346 if(vertexInputInterfaceState.isDrawTriangle(false, polygonMode))
347 {
348 switch(preRasterizationState.getPolygonMode())
349 {
350 case VK_POLYGON_MODE_FILL:
351 setupPrimitives = &DrawCall::setupSolidTriangles;
352 break;
353 case VK_POLYGON_MODE_LINE:
354 setupPrimitives = &DrawCall::setupWireframeTriangles;
355 numPrimitivesPerBatch /= 3;
356 break;
357 case VK_POLYGON_MODE_POINT:
358 setupPrimitives = &DrawCall::setupPointTriangles;
359 numPrimitivesPerBatch /= 3;
360 break;
361 default:
362 UNSUPPORTED("polygon mode: %d", int(preRasterizationState.getPolygonMode()));
363 return;
364 }
365 }
366 else if(vertexInputInterfaceState.isDrawLine(false, polygonMode))
367 {
368 setupPrimitives = &DrawCall::setupLines;
369 }
370 else // Point primitive topology
371 {
372 setupPrimitives = &DrawCall::setupPoints;
373 }
374
375 draw->setupState = setupState;
376 draw->setupRoutine = setupRoutine;
377 draw->pixelRoutine = pixelRoutine;
378 draw->setupPrimitives = setupPrimitives;
379 draw->fragmentPipelineLayout = fragmentState->getPipelineLayout();
380
381 if(pixelState.stencilActive)
382 {
383 data->stencil[0].set(fragmentState->getFrontStencil().reference, fragmentState->getFrontStencil().compareMask, fragmentState->getFrontStencil().writeMask);
384 data->stencil[1].set(fragmentState->getBackStencil().reference, fragmentState->getBackStencil().compareMask, fragmentState->getBackStencil().writeMask);
385 }
386
387 data->factor = pixelProcessor.factor;
388
389 if(pixelState.alphaToCoverage)
390 {
391 if(ms == 4)
392 {
393 data->a2c0 = 0.2f;
394 data->a2c1 = 0.4f;
395 data->a2c2 = 0.6f;
396 data->a2c3 = 0.8f;
397 }
398 else if(ms == 2)
399 {
400 data->a2c0 = 0.25f;
401 data->a2c1 = 0.75f;
402 }
403 else if(ms == 1)
404 {
405 data->a2c0 = 0.5f;
406 }
407 else
408 ASSERT(false);
409 }
410
411 if(pixelState.occlusionEnabled)
412 {
413 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
414 {
415 data->occlusion[cluster] = 0;
416 }
417 }
418
419 // Viewport
420 {
421 const vk::Attachments attachments = pipeline->getAttachments();
422 if(attachments.depthBuffer)
423 {
424 switch(attachments.depthBuffer->getFormat(VK_IMAGE_ASPECT_DEPTH_BIT))
425 {
426 case VK_FORMAT_D16_UNORM:
427 // Minimum is 1 unit, but account for potential floating-point rounding errors
428 data->minimumResolvableDepthDifference = 1.01f / 0xFFFF;
429 break;
430 case VK_FORMAT_D32_SFLOAT:
431 // The minimum resolvable depth difference is determined per-polygon for floating-point depth
432 // buffers. DrawData::minimumResolvableDepthDifference is unused.
433 break;
434 default:
435 UNSUPPORTED("Depth format: %d", int(attachments.depthBuffer->getFormat(VK_IMAGE_ASPECT_DEPTH_BIT)));
436 }
437 }
438 }
439
440 // Target
441 {
442 const vk::Attachments attachments = pipeline->getAttachments();
443
444 for(int index = 0; index < MAX_COLOR_BUFFERS; index++)
445 {
446 draw->colorBuffer[index] = attachments.colorBuffer[index];
447
448 if(draw->colorBuffer[index])
449 {
450 data->colorBuffer[index] = (unsigned int *)attachments.colorBuffer[index]->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_COLOR_BIT, 0, data->layer);
451 data->colorPitchB[index] = attachments.colorBuffer[index]->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
452 data->colorSliceB[index] = attachments.colorBuffer[index]->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
453 }
454 }
455
456 draw->depthBuffer = attachments.depthBuffer;
457 draw->stencilBuffer = attachments.stencilBuffer;
458
459 if(draw->depthBuffer)
460 {
461 data->depthBuffer = (float *)attachments.depthBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_DEPTH_BIT, 0, data->layer);
462 data->depthPitchB = attachments.depthBuffer->rowPitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
463 data->depthSliceB = attachments.depthBuffer->slicePitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
464 }
465
466 if(draw->stencilBuffer)
467 {
468 data->stencilBuffer = (unsigned char *)attachments.stencilBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_STENCIL_BIT, 0, data->layer);
469 data->stencilPitchB = attachments.stencilBuffer->rowPitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
470 data->stencilSliceB = attachments.stencilBuffer->slicePitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
471 }
472 }
473
474 if(draw->fragmentPipelineLayout != draw->preRasterizationPipelineLayout)
475 {
476 vk::DescriptorSet::PrepareForSampling(draw->descriptorSetObjects, draw->fragmentPipelineLayout, device);
477 }
478 }
479
480 // Push constants
481 {
482 data->pushConstants = pushConstants;
483 }
484
485 draw->events = events;
486
487 DrawCall::run(device, draw, &drawTickets, clusterQueues);
488 }
489
setup()490 void DrawCall::setup()
491 {
492 if(occlusionQuery != nullptr)
493 {
494 occlusionQuery->start();
495 }
496
497 if(events)
498 {
499 events->add();
500 }
501 }
502
teardown(vk::Device * device)503 void DrawCall::teardown(vk::Device *device)
504 {
505 if(events)
506 {
507 events->done();
508 events = nullptr;
509 }
510
511 vertexRoutine = {};
512 setupRoutine = {};
513 pixelRoutine = {};
514
515 if(preRasterizationContainsImageWrite)
516 {
517 vk::DescriptorSet::ContentsChanged(descriptorSetObjects, preRasterizationPipelineLayout, device);
518 }
519
520 if(!data->rasterizerDiscard)
521 {
522 if(occlusionQuery != nullptr)
523 {
524 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
525 {
526 occlusionQuery->add(data->occlusion[cluster]);
527 }
528 occlusionQuery->finish();
529 }
530
531 for(auto *target : colorBuffer)
532 {
533 if(target)
534 {
535 target->contentsChanged(vk::Image::DIRECT_MEMORY_ACCESS);
536 }
537 }
538
539 // If pre-rasterization and fragment use the same pipeline, and pre-rasterization
540 // also contains image writes, don't double-notify the descriptor set.
541 const bool descSetAlreadyNotified = preRasterizationContainsImageWrite && fragmentPipelineLayout == preRasterizationPipelineLayout;
542 if(fragmentContainsImageWrite && !descSetAlreadyNotified)
543 {
544 vk::DescriptorSet::ContentsChanged(descriptorSetObjects, fragmentPipelineLayout, device);
545 }
546 }
547 }
548
run(vk::Device * device,const marl::Loan<DrawCall> & draw,marl::Ticket::Queue * tickets,marl::Ticket::Queue clusterQueues[MaxClusterCount])549 void DrawCall::run(vk::Device *device, const marl::Loan<DrawCall> &draw, marl::Ticket::Queue *tickets, marl::Ticket::Queue clusterQueues[MaxClusterCount])
550 {
551 draw->setup();
552
553 const auto numPrimitives = draw->numPrimitives;
554 const auto numPrimitivesPerBatch = draw->numPrimitivesPerBatch;
555 const auto numBatches = draw->numBatches;
556
557 auto ticket = tickets->take();
558 auto finally = marl::make_shared_finally([device, draw, ticket] {
559 MARL_SCOPED_EVENT("FINISH draw %d", draw->id);
560 draw->teardown(device);
561 ticket.done();
562 });
563
564 for(unsigned int batchId = 0; batchId < numBatches; batchId++)
565 {
566 auto batch = draw->batchDataPool->borrow();
567 batch->id = batchId;
568 batch->firstPrimitive = batch->id * numPrimitivesPerBatch;
569 batch->numPrimitives = std::min(batch->firstPrimitive + numPrimitivesPerBatch, numPrimitives) - batch->firstPrimitive;
570
571 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
572 {
573 batch->clusterTickets[cluster] = std::move(clusterQueues[cluster].take());
574 }
575
576 marl::schedule([device, draw, batch, finally] {
577 processVertices(device, draw.get(), batch.get());
578
579 if(!draw->data->rasterizerDiscard)
580 {
581 processPrimitives(device, draw.get(), batch.get());
582
583 if(batch->numVisible > 0)
584 {
585 processPixels(device, draw, batch, finally);
586 return;
587 }
588 }
589
590 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
591 {
592 batch->clusterTickets[cluster].done();
593 }
594 });
595 }
596 }
597
processVertices(vk::Device * device,DrawCall * draw,BatchData * batch)598 void DrawCall::processVertices(vk::Device *device, DrawCall *draw, BatchData *batch)
599 {
600 MARL_SCOPED_EVENT("VERTEX draw %d, batch %d", draw->id, batch->id);
601
602 unsigned int triangleIndices[MaxBatchSize + 1][3]; // One extra for SIMD width overrun. TODO: Adjust to dynamic batch size.
603 {
604 MARL_SCOPED_EVENT("processPrimitiveVertices");
605 processPrimitiveVertices(
606 triangleIndices,
607 draw->data->indices,
608 draw->indexType,
609 batch->firstPrimitive,
610 batch->numPrimitives,
611 draw->topology,
612 draw->provokingVertexMode);
613 }
614
615 auto &vertexTask = batch->vertexTask;
616 vertexTask.primitiveStart = batch->firstPrimitive;
617 // We're only using batch compaction for points, not lines
618 vertexTask.vertexCount = batch->numPrimitives * ((draw->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) ? 1 : 3);
619 if(vertexTask.vertexCache.drawCall != draw->id)
620 {
621 vertexTask.vertexCache.clear();
622 vertexTask.vertexCache.drawCall = draw->id;
623 }
624
625 draw->vertexRoutine(device, &batch->triangles.front().v0, &triangleIndices[0][0], &vertexTask, draw->data);
626 }
627
processPrimitives(vk::Device * device,DrawCall * draw,BatchData * batch)628 void DrawCall::processPrimitives(vk::Device *device, DrawCall *draw, BatchData *batch)
629 {
630 MARL_SCOPED_EVENT("PRIMITIVES draw %d batch %d", draw->id, batch->id);
631 auto triangles = &batch->triangles[0];
632 auto primitives = &batch->primitives[0];
633 batch->numVisible = draw->setupPrimitives(device, triangles, primitives, draw, batch->numPrimitives);
634 }
635
processPixels(vk::Device * device,const marl::Loan<DrawCall> & draw,const marl::Loan<BatchData> & batch,const std::shared_ptr<marl::Finally> & finally)636 void DrawCall::processPixels(vk::Device *device, const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
637 {
638 struct Data
639 {
640 Data(const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
641 : draw(draw)
642 , batch(batch)
643 , finally(finally)
644 {}
645 marl::Loan<DrawCall> draw;
646 marl::Loan<BatchData> batch;
647 std::shared_ptr<marl::Finally> finally;
648 };
649 auto data = std::make_shared<Data>(draw, batch, finally);
650 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
651 {
652 batch->clusterTickets[cluster].onCall([device, data, cluster] {
653 auto &draw = data->draw;
654 auto &batch = data->batch;
655 MARL_SCOPED_EVENT("PIXEL draw %d, batch %d, cluster %d", draw->id, batch->id, cluster);
656 draw->pixelRoutine(device, &batch->primitives.front(), batch->numVisible, cluster, MaxClusterCount, draw->data);
657 batch->clusterTickets[cluster].done();
658 });
659 }
660 }
661
synchronize()662 void Renderer::synchronize()
663 {
664 MARL_SCOPED_EVENT("synchronize");
665 auto ticket = drawTickets.take();
666 ticket.wait();
667 device->updateSamplingRoutineSnapshotCache();
668 ticket.done();
669 }
670
processPrimitiveVertices(unsigned int triangleIndicesOut[MaxBatchSize+1][3],const void * primitiveIndices,VkIndexType indexType,unsigned int start,unsigned int triangleCount,VkPrimitiveTopology topology,VkProvokingVertexModeEXT provokingVertexMode)671 void DrawCall::processPrimitiveVertices(
672 unsigned int triangleIndicesOut[MaxBatchSize + 1][3],
673 const void *primitiveIndices,
674 VkIndexType indexType,
675 unsigned int start,
676 unsigned int triangleCount,
677 VkPrimitiveTopology topology,
678 VkProvokingVertexModeEXT provokingVertexMode)
679 {
680 if(!primitiveIndices)
681 {
682 struct LinearIndex
683 {
684 unsigned int operator[](unsigned int i) { return i; }
685 };
686
687 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, LinearIndex(), start, triangleCount))
688 {
689 return;
690 }
691 }
692 else
693 {
694 switch(indexType)
695 {
696 case VK_INDEX_TYPE_UINT16:
697 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint16_t *>(primitiveIndices), start, triangleCount))
698 {
699 return;
700 }
701 break;
702 case VK_INDEX_TYPE_UINT32:
703 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint32_t *>(primitiveIndices), start, triangleCount))
704 {
705 return;
706 }
707 break;
708 break;
709 default:
710 ASSERT(false);
711 return;
712 }
713 }
714
715 // setBatchIndices() takes care of the point case, since it's different due to the compaction
716 if(topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
717 {
718 // Repeat the last index to allow for SIMD width overrun.
719 triangleIndicesOut[triangleCount][0] = triangleIndicesOut[triangleCount - 1][2];
720 triangleIndicesOut[triangleCount][1] = triangleIndicesOut[triangleCount - 1][2];
721 triangleIndicesOut[triangleCount][2] = triangleIndicesOut[triangleCount - 1][2];
722 }
723 }
724
setupSolidTriangles(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)725 int DrawCall::setupSolidTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
726 {
727 auto &state = drawCall->setupState;
728
729 int ms = state.multiSampleCount;
730 const DrawData *data = drawCall->data;
731 int visible = 0;
732
733 for(int i = 0; i < count; i++, triangles++)
734 {
735 Vertex &v0 = triangles->v0;
736 Vertex &v1 = triangles->v1;
737 Vertex &v2 = triangles->v2;
738
739 Polygon polygon(&v0.position, &v1.position, &v2.position);
740
741 if((v0.cullMask | v1.cullMask | v2.cullMask) == 0)
742 {
743 continue;
744 }
745
746 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) != Clipper::CLIP_FINITE)
747 {
748 continue;
749 }
750
751 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags;
752 if(clipFlagsOr != Clipper::CLIP_FINITE)
753 {
754 if(!Clipper::Clip(polygon, clipFlagsOr, *drawCall))
755 {
756 continue;
757 }
758 }
759
760 if(drawCall->setupRoutine(device, primitives, triangles, &polygon, data))
761 {
762 primitives += ms;
763 visible++;
764 }
765 }
766
767 return visible;
768 }
769
setupWireframeTriangles(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)770 int DrawCall::setupWireframeTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
771 {
772 auto &state = drawCall->setupState;
773
774 int ms = state.multiSampleCount;
775 int visible = 0;
776
777 for(int i = 0; i < count; i++)
778 {
779 const Vertex &v0 = triangles[i].v0;
780 const Vertex &v1 = triangles[i].v1;
781 const Vertex &v2 = triangles[i].v2;
782
783 float A = ((float)v0.projected.y - (float)v2.projected.y) * (float)v1.projected.x +
784 ((float)v2.projected.y - (float)v1.projected.y) * (float)v0.projected.x +
785 ((float)v1.projected.y - (float)v0.projected.y) * (float)v2.projected.x; // Area
786
787 int w0w1w2 = bit_cast<int>(v0.w) ^
788 bit_cast<int>(v1.w) ^
789 bit_cast<int>(v2.w);
790
791 A = w0w1w2 < 0 ? -A : A;
792
793 bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (A >= 0.0f) : (A <= 0.0f);
794
795 if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
796 {
797 if(frontFacing) continue;
798 }
799 if(state.cullMode & VK_CULL_MODE_BACK_BIT)
800 {
801 if(!frontFacing) continue;
802 }
803
804 Triangle lines[3];
805 lines[0].v0 = v0;
806 lines[0].v1 = v1;
807 lines[1].v0 = v1;
808 lines[1].v1 = v2;
809 lines[2].v0 = v2;
810 lines[2].v1 = v0;
811
812 for(int i = 0; i < 3; i++)
813 {
814 if(setupLine(device, *primitives, lines[i], *drawCall))
815 {
816 primitives += ms;
817 visible++;
818 }
819 }
820 }
821
822 return visible;
823 }
824
setupPointTriangles(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)825 int DrawCall::setupPointTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
826 {
827 auto &state = drawCall->setupState;
828
829 int ms = state.multiSampleCount;
830 int visible = 0;
831
832 for(int i = 0; i < count; i++)
833 {
834 const Vertex &v0 = triangles[i].v0;
835 const Vertex &v1 = triangles[i].v1;
836 const Vertex &v2 = triangles[i].v2;
837
838 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w +
839 (v0.x * v2.y - v0.y * v2.x) * v1.w +
840 (v2.x * v1.y - v1.x * v2.y) * v0.w;
841
842 bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0);
843 if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
844 {
845 if(frontFacing) continue;
846 }
847 if(state.cullMode & VK_CULL_MODE_BACK_BIT)
848 {
849 if(!frontFacing) continue;
850 }
851
852 Triangle points[3];
853 points[0].v0 = v0;
854 points[1].v0 = v1;
855 points[2].v0 = v2;
856
857 for(int i = 0; i < 3; i++)
858 {
859 if(setupPoint(device, *primitives, points[i], *drawCall))
860 {
861 primitives += ms;
862 visible++;
863 }
864 }
865 }
866
867 return visible;
868 }
869
setupLines(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)870 int DrawCall::setupLines(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
871 {
872 auto &state = drawCall->setupState;
873
874 int visible = 0;
875 int ms = state.multiSampleCount;
876
877 for(int i = 0; i < count; i++)
878 {
879 if(setupLine(device, *primitives, *triangles, *drawCall))
880 {
881 primitives += ms;
882 visible++;
883 }
884
885 triangles++;
886 }
887
888 return visible;
889 }
890
setupPoints(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)891 int DrawCall::setupPoints(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
892 {
893 auto &state = drawCall->setupState;
894
895 int visible = 0;
896 int ms = state.multiSampleCount;
897
898 for(int i = 0; i < count; i++)
899 {
900 if(setupPoint(device, *primitives, *triangles, *drawCall))
901 {
902 primitives += ms;
903 visible++;
904 }
905
906 triangles++;
907 }
908
909 return visible;
910 }
911
setupLine(vk::Device * device,Primitive & primitive,Triangle & triangle,const DrawCall & draw)912 bool DrawCall::setupLine(vk::Device *device, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
913 {
914 const Vertex &v0 = triangle.v0;
915 const Vertex &v1 = triangle.v1;
916
917 if((v0.cullMask | v1.cullMask) == 0)
918 {
919 return false;
920 }
921
922 const float4 &P0 = v0.position;
923 const float4 &P1 = v1.position;
924
925 if(P0.w <= 0 && P1.w <= 0)
926 {
927 return false;
928 }
929
930 const DrawData &data = *draw.data;
931 const float lineWidth = data.lineWidth;
932 const int clipFlags = draw.depthClipEnable ? Clipper::CLIP_FRUSTUM : Clipper::CLIP_SIDES;
933 constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
934
935 const float W = data.WxF * (1.0f / subPixF);
936 const float H = data.HxF * (1.0f / subPixF);
937
938 float dx = W * (P1.x / P1.w - P0.x / P0.w);
939 float dy = H * (P1.y / P1.w - P0.y / P0.w);
940
941 if(dx == 0 && dy == 0)
942 {
943 return false;
944 }
945
946 if(draw.lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT)
947 {
948 // Rectangle centered on the line segment
949
950 float4 P[4];
951
952 P[0] = P0;
953 P[1] = P1;
954 P[2] = P1;
955 P[3] = P0;
956
957 float scale = lineWidth * 0.5f / sqrt(dx * dx + dy * dy);
958
959 dx *= scale;
960 dy *= scale;
961
962 float dx0h = dx * P0.w / H;
963 float dy0w = dy * P0.w / W;
964
965 float dx1h = dx * P1.w / H;
966 float dy1w = dy * P1.w / W;
967
968 P[0].x += -dy0w;
969 P[0].y += +dx0h;
970
971 P[1].x += -dy1w;
972 P[1].y += +dx1h;
973
974 P[2].x += +dy1w;
975 P[2].y += -dx1h;
976
977 P[3].x += +dy0w;
978 P[3].y += -dx0h;
979
980 Polygon polygon(P, 4);
981
982 if(!Clipper::Clip(polygon, clipFlags, draw))
983 {
984 return false;
985 }
986
987 return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
988 }
989 else if(false) // TODO(b/80135519): Deprecate
990 {
991 // Connecting diamonds polygon
992 // This shape satisfies the diamond test convention, except for the exit rule part.
993 // Line segments with overlapping endpoints have duplicate fragments.
994 // The ideal algorithm requires half-open line rasterization (b/80135519).
995
996 float4 P[8];
997
998 P[0] = P0;
999 P[1] = P0;
1000 P[2] = P0;
1001 P[3] = P0;
1002 P[4] = P1;
1003 P[5] = P1;
1004 P[6] = P1;
1005 P[7] = P1;
1006
1007 float dx0 = lineWidth * 0.5f * P0.w / W;
1008 float dy0 = lineWidth * 0.5f * P0.w / H;
1009
1010 float dx1 = lineWidth * 0.5f * P1.w / W;
1011 float dy1 = lineWidth * 0.5f * P1.w / H;
1012
1013 P[0].x += -dx0;
1014 P[1].y += +dy0;
1015 P[2].x += +dx0;
1016 P[3].y += -dy0;
1017 P[4].x += -dx1;
1018 P[5].y += +dy1;
1019 P[6].x += +dx1;
1020 P[7].y += -dy1;
1021
1022 float4 L[6];
1023
1024 if(dx > -dy)
1025 {
1026 if(dx > dy) // Right
1027 {
1028 L[0] = P[0];
1029 L[1] = P[1];
1030 L[2] = P[5];
1031 L[3] = P[6];
1032 L[4] = P[7];
1033 L[5] = P[3];
1034 }
1035 else // Down
1036 {
1037 L[0] = P[0];
1038 L[1] = P[4];
1039 L[2] = P[5];
1040 L[3] = P[6];
1041 L[4] = P[2];
1042 L[5] = P[3];
1043 }
1044 }
1045 else
1046 {
1047 if(dx > dy) // Up
1048 {
1049 L[0] = P[0];
1050 L[1] = P[1];
1051 L[2] = P[2];
1052 L[3] = P[6];
1053 L[4] = P[7];
1054 L[5] = P[4];
1055 }
1056 else // Left
1057 {
1058 L[0] = P[1];
1059 L[1] = P[2];
1060 L[2] = P[3];
1061 L[3] = P[7];
1062 L[4] = P[4];
1063 L[5] = P[5];
1064 }
1065 }
1066
1067 Polygon polygon(L, 6);
1068
1069 if(!Clipper::Clip(polygon, clipFlags, draw))
1070 {
1071 return false;
1072 }
1073
1074 return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
1075 }
1076 else
1077 {
1078 // Parallelogram approximating Bresenham line
1079 // This algorithm does not satisfy the ideal diamond-exit rule, but does avoid the
1080 // duplicate fragment rasterization problem and satisfies all of Vulkan's minimum
1081 // requirements for Bresenham line segment rasterization.
1082
1083 float4 P[8];
1084 P[0] = P0;
1085 P[1] = P0;
1086 P[2] = P0;
1087 P[3] = P0;
1088 P[4] = P1;
1089 P[5] = P1;
1090 P[6] = P1;
1091 P[7] = P1;
1092
1093 float dx0 = lineWidth * 0.5f * P0.w / W;
1094 float dy0 = lineWidth * 0.5f * P0.w / H;
1095
1096 float dx1 = lineWidth * 0.5f * P1.w / W;
1097 float dy1 = lineWidth * 0.5f * P1.w / H;
1098
1099 P[0].x += -dx0;
1100 P[1].y += +dy0;
1101 P[2].x += +dx0;
1102 P[3].y += -dy0;
1103 P[4].x += -dx1;
1104 P[5].y += +dy1;
1105 P[6].x += +dx1;
1106 P[7].y += -dy1;
1107
1108 float4 L[4];
1109
1110 if(dx > -dy)
1111 {
1112 if(dx > dy) // Right
1113 {
1114 L[0] = P[1];
1115 L[1] = P[5];
1116 L[2] = P[7];
1117 L[3] = P[3];
1118 }
1119 else // Down
1120 {
1121 L[0] = P[0];
1122 L[1] = P[4];
1123 L[2] = P[6];
1124 L[3] = P[2];
1125 }
1126 }
1127 else
1128 {
1129 if(dx > dy) // Up
1130 {
1131 L[0] = P[0];
1132 L[1] = P[2];
1133 L[2] = P[6];
1134 L[3] = P[4];
1135 }
1136 else // Left
1137 {
1138 L[0] = P[1];
1139 L[1] = P[3];
1140 L[2] = P[7];
1141 L[3] = P[5];
1142 }
1143 }
1144
1145 Polygon polygon(L, 4);
1146
1147 if(!Clipper::Clip(polygon, clipFlags, draw))
1148 {
1149 return false;
1150 }
1151
1152 return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
1153 }
1154
1155 return false;
1156 }
1157
setupPoint(vk::Device * device,Primitive & primitive,Triangle & triangle,const DrawCall & draw)1158 bool DrawCall::setupPoint(vk::Device *device, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1159 {
1160 const Vertex &v = triangle.v0;
1161
1162 if(v.cullMask == 0)
1163 {
1164 return false;
1165 }
1166
1167 const DrawData &data = *draw.data;
1168 const int clipFlags = draw.depthClipEnable ? Clipper::CLIP_FRUSTUM : Clipper::CLIP_SIDES;
1169
1170 const float pSize = clamp(v.pointSize, 1.0f, static_cast<float>(vk::MAX_POINT_SIZE));
1171 const float X = pSize * v.position.w * data.halfPixelX;
1172 const float Y = pSize * v.position.w * data.halfPixelY;
1173
1174 float4 P[4];
1175
1176 P[0] = v.position;
1177 P[0].x -= X;
1178 P[0].y += Y;
1179
1180 P[1] = v.position;
1181 P[1].x += X;
1182 P[1].y += Y;
1183
1184 P[2] = v.position;
1185 P[2].x += X;
1186 P[2].y -= Y;
1187
1188 P[3] = v.position;
1189 P[3].x -= X;
1190 P[3].y -= Y;
1191
1192 Polygon polygon(P, 4);
1193
1194 if(!Clipper::Clip(polygon, clipFlags, draw))
1195 {
1196 return false;
1197 }
1198
1199 primitive.pointSizeInv = 1.0f / pSize;
1200
1201 return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
1202 }
1203
addQuery(vk::Query * query)1204 void Renderer::addQuery(vk::Query *query)
1205 {
1206 ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1207 ASSERT(!occlusionQuery);
1208
1209 occlusionQuery = query;
1210 }
1211
removeQuery(vk::Query * query)1212 void Renderer::removeQuery(vk::Query *query)
1213 {
1214 ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1215 ASSERT(occlusionQuery == query);
1216
1217 occlusionQuery = nullptr;
1218 }
1219
1220 } // namespace sw
1221