xref: /aosp_15_r20/external/swiftshader/src/Device/Renderer.cpp (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Renderer.hpp"
16 
17 #include "Clipper.hpp"
18 #include "Polygon.hpp"
19 #include "Primitive.hpp"
20 #include "Vertex.hpp"
21 #include "Pipeline/Constants.hpp"
22 #include "Pipeline/SpirvShader.hpp"
23 #include "Reactor/Reactor.hpp"
24 #include "System/Debug.hpp"
25 #include "System/Half.hpp"
26 #include "System/Math.hpp"
27 #include "System/Memory.hpp"
28 #include "System/Timer.hpp"
29 #include "Vulkan/VkConfig.hpp"
30 #include "Vulkan/VkDescriptorSet.hpp"
31 #include "Vulkan/VkDevice.hpp"
32 #include "Vulkan/VkFence.hpp"
33 #include "Vulkan/VkImageView.hpp"
34 #include "Vulkan/VkPipelineLayout.hpp"
35 #include "Vulkan/VkQueryPool.hpp"
36 
37 #include "marl/containers.h"
38 #include "marl/defer.h"
39 #include "marl/trace.h"
40 
41 #undef max
42 
43 #ifndef NDEBUG
44 unsigned int minPrimitives = 1;
45 unsigned int maxPrimitives = 1 << 21;
46 #endif
47 
48 namespace sw {
49 
50 template<typename T>
setBatchIndices(unsigned int batch[128][3],VkPrimitiveTopology topology,VkProvokingVertexModeEXT provokingVertexMode,T indices,unsigned int start,unsigned int triangleCount)51 inline bool setBatchIndices(unsigned int batch[128][3], VkPrimitiveTopology topology, VkProvokingVertexModeEXT provokingVertexMode, T indices, unsigned int start, unsigned int triangleCount)
52 {
53 	bool provokeFirst = (provokingVertexMode == VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT);
54 
55 	switch(topology)
56 	{
57 	case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
58 		{
59 			auto index = start;
60 			auto pointBatch = &(batch[0][0]);
61 			for(unsigned int i = 0; i < triangleCount; i++)
62 			{
63 				*pointBatch++ = indices[index++];
64 			}
65 
66 			// Repeat the last index to allow for SIMD width overrun.
67 			index--;
68 			for(unsigned int i = 0; i < 3; i++)
69 			{
70 				*pointBatch++ = indices[index];
71 			}
72 		}
73 		break;
74 	case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
75 		{
76 			auto index = 2 * start;
77 			for(unsigned int i = 0; i < triangleCount; i++)
78 			{
79 				batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
80 				batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
81 				batch[i][2] = indices[index + 1];
82 
83 				index += 2;
84 			}
85 		}
86 		break;
87 	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
88 		{
89 			auto index = start;
90 			for(unsigned int i = 0; i < triangleCount; i++)
91 			{
92 				batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
93 				batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
94 				batch[i][2] = indices[index + 1];
95 
96 				index += 1;
97 			}
98 		}
99 		break;
100 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
101 		{
102 			auto index = 3 * start;
103 			for(unsigned int i = 0; i < triangleCount; i++)
104 			{
105 				batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
106 				batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
107 				batch[i][2] = indices[index + (provokeFirst ? 2 : 1)];
108 
109 				index += 3;
110 			}
111 		}
112 		break;
113 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
114 		{
115 			auto index = start;
116 			for(unsigned int i = 0; i < triangleCount; i++)
117 			{
118 				batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
119 				batch[i][1] = indices[index + ((start + i) & 1) + (provokeFirst ? 1 : 0)];
120 				batch[i][2] = indices[index + (~(start + i) & 1) + (provokeFirst ? 1 : 0)];
121 
122 				index += 1;
123 			}
124 		}
125 		break;
126 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
127 		{
128 			auto index = start + 1;
129 			for(unsigned int i = 0; i < triangleCount; i++)
130 			{
131 				batch[i][provokeFirst ? 0 : 2] = indices[index + 0];
132 				batch[i][provokeFirst ? 1 : 0] = indices[index + 1];
133 				batch[i][provokeFirst ? 2 : 1] = indices[0];
134 
135 				index += 1;
136 			}
137 		}
138 		break;
139 	default:
140 		ASSERT(false);
141 		return false;
142 	}
143 
144 	return true;
145 }
146 
DrawCall()147 DrawCall::DrawCall()
148 {
149 	// TODO(b/140991626): Use allocateUninitialized() instead of allocateZeroOrPoison() to improve startup peformance.
150 	data = (DrawData *)sw::allocateZeroOrPoison(sizeof(DrawData));
151 }
152 
~DrawCall()153 DrawCall::~DrawCall()
154 {
155 	sw::freeMemory(data);
156 }
157 
Renderer(vk::Device * device)158 Renderer::Renderer(vk::Device *device)
159     : device(device)
160 {
161 	vertexProcessor.setRoutineCacheSize(1024);
162 	pixelProcessor.setRoutineCacheSize(1024);
163 	setupProcessor.setRoutineCacheSize(1024);
164 }
165 
~Renderer()166 Renderer::~Renderer()
167 {
168 	drawTickets.take().wait();
169 }
170 
171 // Renderer objects have to be mem aligned to the alignment provided in the class declaration
operator new(size_t size)172 void *Renderer::operator new(size_t size)
173 {
174 	ASSERT(size == sizeof(Renderer));  // This operator can't be called from a derived class
175 	return vk::allocateHostMemory(sizeof(Renderer), alignof(Renderer), vk::NULL_ALLOCATION_CALLBACKS, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
176 }
177 
operator delete(void * mem)178 void Renderer::operator delete(void *mem)
179 {
180 	vk::freeHostMemory(mem, vk::NULL_ALLOCATION_CALLBACKS);
181 }
182 
draw(const vk::GraphicsPipeline * pipeline,const vk::DynamicState & dynamicState,unsigned int count,int baseVertex,CountedEvent * events,int instanceID,int layer,void * indexBuffer,const VkRect2D & renderArea,const vk::Pipeline::PushConstantStorage & pushConstants,bool update)183 void Renderer::draw(const vk::GraphicsPipeline *pipeline, const vk::DynamicState &dynamicState, unsigned int count, int baseVertex,
184                     CountedEvent *events, int instanceID, int layer, void *indexBuffer, const VkRect2D &renderArea,
185                     const vk::Pipeline::PushConstantStorage &pushConstants, bool update)
186 {
187 	if(count == 0) { return; }
188 
189 	auto id = nextDrawID++;
190 	MARL_SCOPED_EVENT("draw %d", id);
191 
192 	marl::Pool<sw::DrawCall>::Loan draw;
193 	{
194 		MARL_SCOPED_EVENT("drawCallPool.borrow()");
195 		draw = drawCallPool.borrow();
196 	}
197 	draw->id = id;
198 
199 	const vk::GraphicsState &pipelineState = pipeline->getCombinedState(dynamicState);
200 
201 	// A graphics pipeline must always be "complete" before it can be used for drawing.  A
202 	// complete graphics pipeline always includes the vertex input interface and
203 	// pre-rasterization subsets, but only includes fragment and fragment output interface
204 	// subsets if rasterizer discard is not enabled.
205 	//
206 	// Note that in the following, the setupPrimitives, setupRoutine and pixelRoutine functions
207 	// are only called when rasterizer discard is not enabled.  If rasterizer discard is
208 	// enabled, these functions and state for the latter two states are not set.
209 	const vk::VertexInputInterfaceState &vertexInputInterfaceState = pipelineState.getVertexInputInterfaceState();
210 	const vk::PreRasterizationState &preRasterizationState = pipelineState.getPreRasterizationState();
211 	const vk::FragmentState *fragmentState = nullptr;
212 	const vk::FragmentOutputInterfaceState *fragmentOutputInterfaceState = nullptr;
213 
214 	const bool hasRasterizerDiscard = preRasterizationState.hasRasterizerDiscard();
215 	if(!hasRasterizerDiscard)
216 	{
217 		fragmentState = &pipelineState.getFragmentState();
218 		fragmentOutputInterfaceState = &pipelineState.getFragmentOutputInterfaceState();
219 
220 		pixelProcessor.setBlendConstant(fragmentOutputInterfaceState->getBlendConstants());
221 	}
222 
223 	const vk::Inputs &inputs = pipeline->getInputs();
224 
225 	if(update)
226 	{
227 		MARL_SCOPED_EVENT("update");
228 
229 		const sw::SpirvShader *fragmentShader = pipeline->getShader(VK_SHADER_STAGE_FRAGMENT_BIT).get();
230 		const sw::SpirvShader *vertexShader = pipeline->getShader(VK_SHADER_STAGE_VERTEX_BIT).get();
231 
232 		const vk::Attachments attachments = pipeline->getAttachments();
233 
234 		vertexState = vertexProcessor.update(pipelineState, vertexShader, inputs);
235 		vertexRoutine = vertexProcessor.routine(vertexState, preRasterizationState.getPipelineLayout(), vertexShader, inputs.getDescriptorSets());
236 
237 		if(!hasRasterizerDiscard)
238 		{
239 			setupState = setupProcessor.update(pipelineState, fragmentShader, vertexShader, attachments);
240 			setupRoutine = setupProcessor.routine(setupState);
241 
242 			pixelState = pixelProcessor.update(pipelineState, fragmentShader, vertexShader, attachments, hasOcclusionQuery());
243 			pixelRoutine = pixelProcessor.routine(pixelState, fragmentState->getPipelineLayout(), fragmentShader, attachments, inputs.getDescriptorSets());
244 		}
245 	}
246 
247 	draw->preRasterizationContainsImageWrite = pipeline->preRasterizationContainsImageWrite();
248 	draw->fragmentContainsImageWrite = pipeline->fragmentContainsImageWrite();
249 
250 	// The sample count affects the batch size even if rasterization is disabled.
251 	// TODO(b/147812380): Eliminate the dependency between multisampling and batch size.
252 	int ms = hasRasterizerDiscard ? 1 : fragmentOutputInterfaceState->getSampleCount();
253 	ASSERT(ms > 0);
254 
255 	unsigned int numPrimitivesPerBatch = MaxBatchSize / ms;
256 
257 	DrawData *data = draw->data;
258 	draw->occlusionQuery = occlusionQuery;
259 	draw->batchDataPool = &batchDataPool;
260 	draw->numPrimitives = count;
261 	draw->numPrimitivesPerBatch = numPrimitivesPerBatch;
262 	draw->numBatches = (count + draw->numPrimitivesPerBatch - 1) / draw->numPrimitivesPerBatch;
263 	draw->topology = vertexInputInterfaceState.getTopology();
264 	draw->provokingVertexMode = preRasterizationState.getProvokingVertexMode();
265 	draw->lineRasterizationMode = preRasterizationState.getLineRasterizationMode();
266 	draw->descriptorSetObjects = inputs.getDescriptorSetObjects();
267 	draw->preRasterizationPipelineLayout = preRasterizationState.getPipelineLayout();
268 	draw->depthClipEnable = preRasterizationState.getDepthClipEnable();
269 	draw->depthClipNegativeOneToOne = preRasterizationState.getDepthClipNegativeOneToOne();
270 	data->lineWidth = preRasterizationState.getLineWidth();
271 	data->rasterizerDiscard = hasRasterizerDiscard;
272 
273 	data->descriptorSets = inputs.getDescriptorSets();
274 	data->descriptorDynamicOffsets = inputs.getDescriptorDynamicOffsets();
275 
276 	for(int i = 0; i < MAX_INTERFACE_COMPONENTS / 4; i++)
277 	{
278 		const sw::Stream &stream = inputs.getStream(i);
279 		data->input[i] = stream.buffer;
280 		data->robustnessSize[i] = stream.robustnessSize;
281 		data->stride[i] = inputs.getVertexStride(i);
282 	}
283 
284 	data->indices = indexBuffer;
285 	data->layer = layer;
286 	data->instanceID = instanceID;
287 	data->baseVertex = baseVertex;
288 	draw->indexType = indexBuffer ? pipeline->getIndexBuffer().getIndexType() : VK_INDEX_TYPE_UINT16;
289 
290 	draw->vertexRoutine = vertexRoutine;
291 
292 	vk::DescriptorSet::PrepareForSampling(draw->descriptorSetObjects, draw->preRasterizationPipelineLayout, device);
293 
294 	// Viewport
295 	{
296 		const VkViewport &viewport = preRasterizationState.getViewport();
297 
298 		float W = 0.5f * viewport.width;
299 		float H = 0.5f * viewport.height;
300 		float X0 = viewport.x + W;
301 		float Y0 = viewport.y + H;
302 		float N = viewport.minDepth;
303 		float F = viewport.maxDepth;
304 		float Z = F - N;
305 		constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
306 
307 		data->WxF = W * subPixF;
308 		data->HxF = H * subPixF;
309 		data->X0xF = X0 * subPixF - subPixF / 2;
310 		data->Y0xF = Y0 * subPixF - subPixF / 2;
311 		data->halfPixelX = 0.5f / W;
312 		data->halfPixelY = 0.5f / H;
313 		data->depthRange = Z;
314 		data->depthNear = N;
315 		data->constantDepthBias = preRasterizationState.getConstantDepthBias();
316 		data->slopeDepthBias = preRasterizationState.getSlopeDepthBias();
317 		data->depthBiasClamp = preRasterizationState.getDepthBiasClamp();
318 
319 		// Adjust viewport transform based on the negativeOneToOne state.
320 		if(preRasterizationState.getDepthClipNegativeOneToOne())
321 		{
322 			data->depthRange = Z * 0.5f;
323 			data->depthNear = (F + N) * 0.5f;
324 		}
325 	}
326 
327 	// Scissor
328 	{
329 		const VkRect2D &scissor = preRasterizationState.getScissor();
330 
331 		int x0 = renderArea.offset.x;
332 		int y0 = renderArea.offset.y;
333 		int x1 = x0 + renderArea.extent.width;
334 		int y1 = y0 + renderArea.extent.height;
335 		data->scissorX0 = clamp<int>(scissor.offset.x, x0, x1);
336 		data->scissorX1 = clamp<int>(scissor.offset.x + scissor.extent.width, x0, x1);
337 		data->scissorY0 = clamp<int>(scissor.offset.y, y0, y1);
338 		data->scissorY1 = clamp<int>(scissor.offset.y + scissor.extent.height, y0, y1);
339 	}
340 
341 	if(!hasRasterizerDiscard)
342 	{
343 		const VkPolygonMode polygonMode = preRasterizationState.getPolygonMode();
344 
345 		DrawCall::SetupFunction setupPrimitives = nullptr;
346 		if(vertexInputInterfaceState.isDrawTriangle(false, polygonMode))
347 		{
348 			switch(preRasterizationState.getPolygonMode())
349 			{
350 			case VK_POLYGON_MODE_FILL:
351 				setupPrimitives = &DrawCall::setupSolidTriangles;
352 				break;
353 			case VK_POLYGON_MODE_LINE:
354 				setupPrimitives = &DrawCall::setupWireframeTriangles;
355 				numPrimitivesPerBatch /= 3;
356 				break;
357 			case VK_POLYGON_MODE_POINT:
358 				setupPrimitives = &DrawCall::setupPointTriangles;
359 				numPrimitivesPerBatch /= 3;
360 				break;
361 			default:
362 				UNSUPPORTED("polygon mode: %d", int(preRasterizationState.getPolygonMode()));
363 				return;
364 			}
365 		}
366 		else if(vertexInputInterfaceState.isDrawLine(false, polygonMode))
367 		{
368 			setupPrimitives = &DrawCall::setupLines;
369 		}
370 		else  // Point primitive topology
371 		{
372 			setupPrimitives = &DrawCall::setupPoints;
373 		}
374 
375 		draw->setupState = setupState;
376 		draw->setupRoutine = setupRoutine;
377 		draw->pixelRoutine = pixelRoutine;
378 		draw->setupPrimitives = setupPrimitives;
379 		draw->fragmentPipelineLayout = fragmentState->getPipelineLayout();
380 
381 		if(pixelState.stencilActive)
382 		{
383 			data->stencil[0].set(fragmentState->getFrontStencil().reference, fragmentState->getFrontStencil().compareMask, fragmentState->getFrontStencil().writeMask);
384 			data->stencil[1].set(fragmentState->getBackStencil().reference, fragmentState->getBackStencil().compareMask, fragmentState->getBackStencil().writeMask);
385 		}
386 
387 		data->factor = pixelProcessor.factor;
388 
389 		if(pixelState.alphaToCoverage)
390 		{
391 			if(ms == 4)
392 			{
393 				data->a2c0 = 0.2f;
394 				data->a2c1 = 0.4f;
395 				data->a2c2 = 0.6f;
396 				data->a2c3 = 0.8f;
397 			}
398 			else if(ms == 2)
399 			{
400 				data->a2c0 = 0.25f;
401 				data->a2c1 = 0.75f;
402 			}
403 			else if(ms == 1)
404 			{
405 				data->a2c0 = 0.5f;
406 			}
407 			else
408 				ASSERT(false);
409 		}
410 
411 		if(pixelState.occlusionEnabled)
412 		{
413 			for(int cluster = 0; cluster < MaxClusterCount; cluster++)
414 			{
415 				data->occlusion[cluster] = 0;
416 			}
417 		}
418 
419 		// Viewport
420 		{
421 			const vk::Attachments attachments = pipeline->getAttachments();
422 			if(attachments.depthBuffer)
423 			{
424 				switch(attachments.depthBuffer->getFormat(VK_IMAGE_ASPECT_DEPTH_BIT))
425 				{
426 				case VK_FORMAT_D16_UNORM:
427 					// Minimum is 1 unit, but account for potential floating-point rounding errors
428 					data->minimumResolvableDepthDifference = 1.01f / 0xFFFF;
429 					break;
430 				case VK_FORMAT_D32_SFLOAT:
431 					// The minimum resolvable depth difference is determined per-polygon for floating-point depth
432 					// buffers. DrawData::minimumResolvableDepthDifference is unused.
433 					break;
434 				default:
435 					UNSUPPORTED("Depth format: %d", int(attachments.depthBuffer->getFormat(VK_IMAGE_ASPECT_DEPTH_BIT)));
436 				}
437 			}
438 		}
439 
440 		// Target
441 		{
442 			const vk::Attachments attachments = pipeline->getAttachments();
443 
444 			for(int index = 0; index < MAX_COLOR_BUFFERS; index++)
445 			{
446 				draw->colorBuffer[index] = attachments.colorBuffer[index];
447 
448 				if(draw->colorBuffer[index])
449 				{
450 					data->colorBuffer[index] = (unsigned int *)attachments.colorBuffer[index]->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_COLOR_BIT, 0, data->layer);
451 					data->colorPitchB[index] = attachments.colorBuffer[index]->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
452 					data->colorSliceB[index] = attachments.colorBuffer[index]->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
453 				}
454 			}
455 
456 			draw->depthBuffer = attachments.depthBuffer;
457 			draw->stencilBuffer = attachments.stencilBuffer;
458 
459 			if(draw->depthBuffer)
460 			{
461 				data->depthBuffer = (float *)attachments.depthBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_DEPTH_BIT, 0, data->layer);
462 				data->depthPitchB = attachments.depthBuffer->rowPitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
463 				data->depthSliceB = attachments.depthBuffer->slicePitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
464 			}
465 
466 			if(draw->stencilBuffer)
467 			{
468 				data->stencilBuffer = (unsigned char *)attachments.stencilBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_STENCIL_BIT, 0, data->layer);
469 				data->stencilPitchB = attachments.stencilBuffer->rowPitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
470 				data->stencilSliceB = attachments.stencilBuffer->slicePitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
471 			}
472 		}
473 
474 		if(draw->fragmentPipelineLayout != draw->preRasterizationPipelineLayout)
475 		{
476 			vk::DescriptorSet::PrepareForSampling(draw->descriptorSetObjects, draw->fragmentPipelineLayout, device);
477 		}
478 	}
479 
480 	// Push constants
481 	{
482 		data->pushConstants = pushConstants;
483 	}
484 
485 	draw->events = events;
486 
487 	DrawCall::run(device, draw, &drawTickets, clusterQueues);
488 }
489 
setup()490 void DrawCall::setup()
491 {
492 	if(occlusionQuery != nullptr)
493 	{
494 		occlusionQuery->start();
495 	}
496 
497 	if(events)
498 	{
499 		events->add();
500 	}
501 }
502 
teardown(vk::Device * device)503 void DrawCall::teardown(vk::Device *device)
504 {
505 	if(events)
506 	{
507 		events->done();
508 		events = nullptr;
509 	}
510 
511 	vertexRoutine = {};
512 	setupRoutine = {};
513 	pixelRoutine = {};
514 
515 	if(preRasterizationContainsImageWrite)
516 	{
517 		vk::DescriptorSet::ContentsChanged(descriptorSetObjects, preRasterizationPipelineLayout, device);
518 	}
519 
520 	if(!data->rasterizerDiscard)
521 	{
522 		if(occlusionQuery != nullptr)
523 		{
524 			for(int cluster = 0; cluster < MaxClusterCount; cluster++)
525 			{
526 				occlusionQuery->add(data->occlusion[cluster]);
527 			}
528 			occlusionQuery->finish();
529 		}
530 
531 		for(auto *target : colorBuffer)
532 		{
533 			if(target)
534 			{
535 				target->contentsChanged(vk::Image::DIRECT_MEMORY_ACCESS);
536 			}
537 		}
538 
539 		// If pre-rasterization and fragment use the same pipeline, and pre-rasterization
540 		// also contains image writes, don't double-notify the descriptor set.
541 		const bool descSetAlreadyNotified = preRasterizationContainsImageWrite && fragmentPipelineLayout == preRasterizationPipelineLayout;
542 		if(fragmentContainsImageWrite && !descSetAlreadyNotified)
543 		{
544 			vk::DescriptorSet::ContentsChanged(descriptorSetObjects, fragmentPipelineLayout, device);
545 		}
546 	}
547 }
548 
run(vk::Device * device,const marl::Loan<DrawCall> & draw,marl::Ticket::Queue * tickets,marl::Ticket::Queue clusterQueues[MaxClusterCount])549 void DrawCall::run(vk::Device *device, const marl::Loan<DrawCall> &draw, marl::Ticket::Queue *tickets, marl::Ticket::Queue clusterQueues[MaxClusterCount])
550 {
551 	draw->setup();
552 
553 	const auto numPrimitives = draw->numPrimitives;
554 	const auto numPrimitivesPerBatch = draw->numPrimitivesPerBatch;
555 	const auto numBatches = draw->numBatches;
556 
557 	auto ticket = tickets->take();
558 	auto finally = marl::make_shared_finally([device, draw, ticket] {
559 		MARL_SCOPED_EVENT("FINISH draw %d", draw->id);
560 		draw->teardown(device);
561 		ticket.done();
562 	});
563 
564 	for(unsigned int batchId = 0; batchId < numBatches; batchId++)
565 	{
566 		auto batch = draw->batchDataPool->borrow();
567 		batch->id = batchId;
568 		batch->firstPrimitive = batch->id * numPrimitivesPerBatch;
569 		batch->numPrimitives = std::min(batch->firstPrimitive + numPrimitivesPerBatch, numPrimitives) - batch->firstPrimitive;
570 
571 		for(int cluster = 0; cluster < MaxClusterCount; cluster++)
572 		{
573 			batch->clusterTickets[cluster] = std::move(clusterQueues[cluster].take());
574 		}
575 
576 		marl::schedule([device, draw, batch, finally] {
577 			processVertices(device, draw.get(), batch.get());
578 
579 			if(!draw->data->rasterizerDiscard)
580 			{
581 				processPrimitives(device, draw.get(), batch.get());
582 
583 				if(batch->numVisible > 0)
584 				{
585 					processPixels(device, draw, batch, finally);
586 					return;
587 				}
588 			}
589 
590 			for(int cluster = 0; cluster < MaxClusterCount; cluster++)
591 			{
592 				batch->clusterTickets[cluster].done();
593 			}
594 		});
595 	}
596 }
597 
processVertices(vk::Device * device,DrawCall * draw,BatchData * batch)598 void DrawCall::processVertices(vk::Device *device, DrawCall *draw, BatchData *batch)
599 {
600 	MARL_SCOPED_EVENT("VERTEX draw %d, batch %d", draw->id, batch->id);
601 
602 	unsigned int triangleIndices[MaxBatchSize + 1][3];  // One extra for SIMD width overrun. TODO: Adjust to dynamic batch size.
603 	{
604 		MARL_SCOPED_EVENT("processPrimitiveVertices");
605 		processPrimitiveVertices(
606 		    triangleIndices,
607 		    draw->data->indices,
608 		    draw->indexType,
609 		    batch->firstPrimitive,
610 		    batch->numPrimitives,
611 		    draw->topology,
612 		    draw->provokingVertexMode);
613 	}
614 
615 	auto &vertexTask = batch->vertexTask;
616 	vertexTask.primitiveStart = batch->firstPrimitive;
617 	// We're only using batch compaction for points, not lines
618 	vertexTask.vertexCount = batch->numPrimitives * ((draw->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) ? 1 : 3);
619 	if(vertexTask.vertexCache.drawCall != draw->id)
620 	{
621 		vertexTask.vertexCache.clear();
622 		vertexTask.vertexCache.drawCall = draw->id;
623 	}
624 
625 	draw->vertexRoutine(device, &batch->triangles.front().v0, &triangleIndices[0][0], &vertexTask, draw->data);
626 }
627 
processPrimitives(vk::Device * device,DrawCall * draw,BatchData * batch)628 void DrawCall::processPrimitives(vk::Device *device, DrawCall *draw, BatchData *batch)
629 {
630 	MARL_SCOPED_EVENT("PRIMITIVES draw %d batch %d", draw->id, batch->id);
631 	auto triangles = &batch->triangles[0];
632 	auto primitives = &batch->primitives[0];
633 	batch->numVisible = draw->setupPrimitives(device, triangles, primitives, draw, batch->numPrimitives);
634 }
635 
processPixels(vk::Device * device,const marl::Loan<DrawCall> & draw,const marl::Loan<BatchData> & batch,const std::shared_ptr<marl::Finally> & finally)636 void DrawCall::processPixels(vk::Device *device, const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
637 {
638 	struct Data
639 	{
640 		Data(const marl::Loan<DrawCall> &draw, const marl::Loan<BatchData> &batch, const std::shared_ptr<marl::Finally> &finally)
641 		    : draw(draw)
642 		    , batch(batch)
643 		    , finally(finally)
644 		{}
645 		marl::Loan<DrawCall> draw;
646 		marl::Loan<BatchData> batch;
647 		std::shared_ptr<marl::Finally> finally;
648 	};
649 	auto data = std::make_shared<Data>(draw, batch, finally);
650 	for(int cluster = 0; cluster < MaxClusterCount; cluster++)
651 	{
652 		batch->clusterTickets[cluster].onCall([device, data, cluster] {
653 			auto &draw = data->draw;
654 			auto &batch = data->batch;
655 			MARL_SCOPED_EVENT("PIXEL draw %d, batch %d, cluster %d", draw->id, batch->id, cluster);
656 			draw->pixelRoutine(device, &batch->primitives.front(), batch->numVisible, cluster, MaxClusterCount, draw->data);
657 			batch->clusterTickets[cluster].done();
658 		});
659 	}
660 }
661 
synchronize()662 void Renderer::synchronize()
663 {
664 	MARL_SCOPED_EVENT("synchronize");
665 	auto ticket = drawTickets.take();
666 	ticket.wait();
667 	device->updateSamplingRoutineSnapshotCache();
668 	ticket.done();
669 }
670 
processPrimitiveVertices(unsigned int triangleIndicesOut[MaxBatchSize+1][3],const void * primitiveIndices,VkIndexType indexType,unsigned int start,unsigned int triangleCount,VkPrimitiveTopology topology,VkProvokingVertexModeEXT provokingVertexMode)671 void DrawCall::processPrimitiveVertices(
672     unsigned int triangleIndicesOut[MaxBatchSize + 1][3],
673     const void *primitiveIndices,
674     VkIndexType indexType,
675     unsigned int start,
676     unsigned int triangleCount,
677     VkPrimitiveTopology topology,
678     VkProvokingVertexModeEXT provokingVertexMode)
679 {
680 	if(!primitiveIndices)
681 	{
682 		struct LinearIndex
683 		{
684 			unsigned int operator[](unsigned int i) { return i; }
685 		};
686 
687 		if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, LinearIndex(), start, triangleCount))
688 		{
689 			return;
690 		}
691 	}
692 	else
693 	{
694 		switch(indexType)
695 		{
696 		case VK_INDEX_TYPE_UINT16:
697 			if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint16_t *>(primitiveIndices), start, triangleCount))
698 			{
699 				return;
700 			}
701 			break;
702 		case VK_INDEX_TYPE_UINT32:
703 			if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint32_t *>(primitiveIndices), start, triangleCount))
704 			{
705 				return;
706 			}
707 			break;
708 			break;
709 		default:
710 			ASSERT(false);
711 			return;
712 		}
713 	}
714 
715 	// setBatchIndices() takes care of the point case, since it's different due to the compaction
716 	if(topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
717 	{
718 		// Repeat the last index to allow for SIMD width overrun.
719 		triangleIndicesOut[triangleCount][0] = triangleIndicesOut[triangleCount - 1][2];
720 		triangleIndicesOut[triangleCount][1] = triangleIndicesOut[triangleCount - 1][2];
721 		triangleIndicesOut[triangleCount][2] = triangleIndicesOut[triangleCount - 1][2];
722 	}
723 }
724 
setupSolidTriangles(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)725 int DrawCall::setupSolidTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
726 {
727 	auto &state = drawCall->setupState;
728 
729 	int ms = state.multiSampleCount;
730 	const DrawData *data = drawCall->data;
731 	int visible = 0;
732 
733 	for(int i = 0; i < count; i++, triangles++)
734 	{
735 		Vertex &v0 = triangles->v0;
736 		Vertex &v1 = triangles->v1;
737 		Vertex &v2 = triangles->v2;
738 
739 		Polygon polygon(&v0.position, &v1.position, &v2.position);
740 
741 		if((v0.cullMask | v1.cullMask | v2.cullMask) == 0)
742 		{
743 			continue;
744 		}
745 
746 		if((v0.clipFlags & v1.clipFlags & v2.clipFlags) != Clipper::CLIP_FINITE)
747 		{
748 			continue;
749 		}
750 
751 		int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags;
752 		if(clipFlagsOr != Clipper::CLIP_FINITE)
753 		{
754 			if(!Clipper::Clip(polygon, clipFlagsOr, *drawCall))
755 			{
756 				continue;
757 			}
758 		}
759 
760 		if(drawCall->setupRoutine(device, primitives, triangles, &polygon, data))
761 		{
762 			primitives += ms;
763 			visible++;
764 		}
765 	}
766 
767 	return visible;
768 }
769 
setupWireframeTriangles(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)770 int DrawCall::setupWireframeTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
771 {
772 	auto &state = drawCall->setupState;
773 
774 	int ms = state.multiSampleCount;
775 	int visible = 0;
776 
777 	for(int i = 0; i < count; i++)
778 	{
779 		const Vertex &v0 = triangles[i].v0;
780 		const Vertex &v1 = triangles[i].v1;
781 		const Vertex &v2 = triangles[i].v2;
782 
783 		float A = ((float)v0.projected.y - (float)v2.projected.y) * (float)v1.projected.x +
784 		          ((float)v2.projected.y - (float)v1.projected.y) * (float)v0.projected.x +
785 		          ((float)v1.projected.y - (float)v0.projected.y) * (float)v2.projected.x;  // Area
786 
787 		int w0w1w2 = bit_cast<int>(v0.w) ^
788 		             bit_cast<int>(v1.w) ^
789 		             bit_cast<int>(v2.w);
790 
791 		A = w0w1w2 < 0 ? -A : A;
792 
793 		bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (A >= 0.0f) : (A <= 0.0f);
794 
795 		if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
796 		{
797 			if(frontFacing) continue;
798 		}
799 		if(state.cullMode & VK_CULL_MODE_BACK_BIT)
800 		{
801 			if(!frontFacing) continue;
802 		}
803 
804 		Triangle lines[3];
805 		lines[0].v0 = v0;
806 		lines[0].v1 = v1;
807 		lines[1].v0 = v1;
808 		lines[1].v1 = v2;
809 		lines[2].v0 = v2;
810 		lines[2].v1 = v0;
811 
812 		for(int i = 0; i < 3; i++)
813 		{
814 			if(setupLine(device, *primitives, lines[i], *drawCall))
815 			{
816 				primitives += ms;
817 				visible++;
818 			}
819 		}
820 	}
821 
822 	return visible;
823 }
824 
setupPointTriangles(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)825 int DrawCall::setupPointTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
826 {
827 	auto &state = drawCall->setupState;
828 
829 	int ms = state.multiSampleCount;
830 	int visible = 0;
831 
832 	for(int i = 0; i < count; i++)
833 	{
834 		const Vertex &v0 = triangles[i].v0;
835 		const Vertex &v1 = triangles[i].v1;
836 		const Vertex &v2 = triangles[i].v2;
837 
838 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w +
839 		          (v0.x * v2.y - v0.y * v2.x) * v1.w +
840 		          (v2.x * v1.y - v1.x * v2.y) * v0.w;
841 
842 		bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0);
843 		if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
844 		{
845 			if(frontFacing) continue;
846 		}
847 		if(state.cullMode & VK_CULL_MODE_BACK_BIT)
848 		{
849 			if(!frontFacing) continue;
850 		}
851 
852 		Triangle points[3];
853 		points[0].v0 = v0;
854 		points[1].v0 = v1;
855 		points[2].v0 = v2;
856 
857 		for(int i = 0; i < 3; i++)
858 		{
859 			if(setupPoint(device, *primitives, points[i], *drawCall))
860 			{
861 				primitives += ms;
862 				visible++;
863 			}
864 		}
865 	}
866 
867 	return visible;
868 }
869 
setupLines(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)870 int DrawCall::setupLines(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
871 {
872 	auto &state = drawCall->setupState;
873 
874 	int visible = 0;
875 	int ms = state.multiSampleCount;
876 
877 	for(int i = 0; i < count; i++)
878 	{
879 		if(setupLine(device, *primitives, *triangles, *drawCall))
880 		{
881 			primitives += ms;
882 			visible++;
883 		}
884 
885 		triangles++;
886 	}
887 
888 	return visible;
889 }
890 
setupPoints(vk::Device * device,Triangle * triangles,Primitive * primitives,const DrawCall * drawCall,int count)891 int DrawCall::setupPoints(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
892 {
893 	auto &state = drawCall->setupState;
894 
895 	int visible = 0;
896 	int ms = state.multiSampleCount;
897 
898 	for(int i = 0; i < count; i++)
899 	{
900 		if(setupPoint(device, *primitives, *triangles, *drawCall))
901 		{
902 			primitives += ms;
903 			visible++;
904 		}
905 
906 		triangles++;
907 	}
908 
909 	return visible;
910 }
911 
setupLine(vk::Device * device,Primitive & primitive,Triangle & triangle,const DrawCall & draw)912 bool DrawCall::setupLine(vk::Device *device, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
913 {
914 	const Vertex &v0 = triangle.v0;
915 	const Vertex &v1 = triangle.v1;
916 
917 	if((v0.cullMask | v1.cullMask) == 0)
918 	{
919 		return false;
920 	}
921 
922 	const float4 &P0 = v0.position;
923 	const float4 &P1 = v1.position;
924 
925 	if(P0.w <= 0 && P1.w <= 0)
926 	{
927 		return false;
928 	}
929 
930 	const DrawData &data = *draw.data;
931 	const float lineWidth = data.lineWidth;
932 	const int clipFlags = draw.depthClipEnable ? Clipper::CLIP_FRUSTUM : Clipper::CLIP_SIDES;
933 	constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
934 
935 	const float W = data.WxF * (1.0f / subPixF);
936 	const float H = data.HxF * (1.0f / subPixF);
937 
938 	float dx = W * (P1.x / P1.w - P0.x / P0.w);
939 	float dy = H * (P1.y / P1.w - P0.y / P0.w);
940 
941 	if(dx == 0 && dy == 0)
942 	{
943 		return false;
944 	}
945 
946 	if(draw.lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT)
947 	{
948 		// Rectangle centered on the line segment
949 
950 		float4 P[4];
951 
952 		P[0] = P0;
953 		P[1] = P1;
954 		P[2] = P1;
955 		P[3] = P0;
956 
957 		float scale = lineWidth * 0.5f / sqrt(dx * dx + dy * dy);
958 
959 		dx *= scale;
960 		dy *= scale;
961 
962 		float dx0h = dx * P0.w / H;
963 		float dy0w = dy * P0.w / W;
964 
965 		float dx1h = dx * P1.w / H;
966 		float dy1w = dy * P1.w / W;
967 
968 		P[0].x += -dy0w;
969 		P[0].y += +dx0h;
970 
971 		P[1].x += -dy1w;
972 		P[1].y += +dx1h;
973 
974 		P[2].x += +dy1w;
975 		P[2].y += -dx1h;
976 
977 		P[3].x += +dy0w;
978 		P[3].y += -dx0h;
979 
980 		Polygon polygon(P, 4);
981 
982 		if(!Clipper::Clip(polygon, clipFlags, draw))
983 		{
984 			return false;
985 		}
986 
987 		return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
988 	}
989 	else if(false)  // TODO(b/80135519): Deprecate
990 	{
991 		// Connecting diamonds polygon
992 		// This shape satisfies the diamond test convention, except for the exit rule part.
993 		// Line segments with overlapping endpoints have duplicate fragments.
994 		// The ideal algorithm requires half-open line rasterization (b/80135519).
995 
996 		float4 P[8];
997 
998 		P[0] = P0;
999 		P[1] = P0;
1000 		P[2] = P0;
1001 		P[3] = P0;
1002 		P[4] = P1;
1003 		P[5] = P1;
1004 		P[6] = P1;
1005 		P[7] = P1;
1006 
1007 		float dx0 = lineWidth * 0.5f * P0.w / W;
1008 		float dy0 = lineWidth * 0.5f * P0.w / H;
1009 
1010 		float dx1 = lineWidth * 0.5f * P1.w / W;
1011 		float dy1 = lineWidth * 0.5f * P1.w / H;
1012 
1013 		P[0].x += -dx0;
1014 		P[1].y += +dy0;
1015 		P[2].x += +dx0;
1016 		P[3].y += -dy0;
1017 		P[4].x += -dx1;
1018 		P[5].y += +dy1;
1019 		P[6].x += +dx1;
1020 		P[7].y += -dy1;
1021 
1022 		float4 L[6];
1023 
1024 		if(dx > -dy)
1025 		{
1026 			if(dx > dy)  // Right
1027 			{
1028 				L[0] = P[0];
1029 				L[1] = P[1];
1030 				L[2] = P[5];
1031 				L[3] = P[6];
1032 				L[4] = P[7];
1033 				L[5] = P[3];
1034 			}
1035 			else  // Down
1036 			{
1037 				L[0] = P[0];
1038 				L[1] = P[4];
1039 				L[2] = P[5];
1040 				L[3] = P[6];
1041 				L[4] = P[2];
1042 				L[5] = P[3];
1043 			}
1044 		}
1045 		else
1046 		{
1047 			if(dx > dy)  // Up
1048 			{
1049 				L[0] = P[0];
1050 				L[1] = P[1];
1051 				L[2] = P[2];
1052 				L[3] = P[6];
1053 				L[4] = P[7];
1054 				L[5] = P[4];
1055 			}
1056 			else  // Left
1057 			{
1058 				L[0] = P[1];
1059 				L[1] = P[2];
1060 				L[2] = P[3];
1061 				L[3] = P[7];
1062 				L[4] = P[4];
1063 				L[5] = P[5];
1064 			}
1065 		}
1066 
1067 		Polygon polygon(L, 6);
1068 
1069 		if(!Clipper::Clip(polygon, clipFlags, draw))
1070 		{
1071 			return false;
1072 		}
1073 
1074 		return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
1075 	}
1076 	else
1077 	{
1078 		// Parallelogram approximating Bresenham line
1079 		// This algorithm does not satisfy the ideal diamond-exit rule, but does avoid the
1080 		// duplicate fragment rasterization problem and satisfies all of Vulkan's minimum
1081 		// requirements for Bresenham line segment rasterization.
1082 
1083 		float4 P[8];
1084 		P[0] = P0;
1085 		P[1] = P0;
1086 		P[2] = P0;
1087 		P[3] = P0;
1088 		P[4] = P1;
1089 		P[5] = P1;
1090 		P[6] = P1;
1091 		P[7] = P1;
1092 
1093 		float dx0 = lineWidth * 0.5f * P0.w / W;
1094 		float dy0 = lineWidth * 0.5f * P0.w / H;
1095 
1096 		float dx1 = lineWidth * 0.5f * P1.w / W;
1097 		float dy1 = lineWidth * 0.5f * P1.w / H;
1098 
1099 		P[0].x += -dx0;
1100 		P[1].y += +dy0;
1101 		P[2].x += +dx0;
1102 		P[3].y += -dy0;
1103 		P[4].x += -dx1;
1104 		P[5].y += +dy1;
1105 		P[6].x += +dx1;
1106 		P[7].y += -dy1;
1107 
1108 		float4 L[4];
1109 
1110 		if(dx > -dy)
1111 		{
1112 			if(dx > dy)  // Right
1113 			{
1114 				L[0] = P[1];
1115 				L[1] = P[5];
1116 				L[2] = P[7];
1117 				L[3] = P[3];
1118 			}
1119 			else  // Down
1120 			{
1121 				L[0] = P[0];
1122 				L[1] = P[4];
1123 				L[2] = P[6];
1124 				L[3] = P[2];
1125 			}
1126 		}
1127 		else
1128 		{
1129 			if(dx > dy)  // Up
1130 			{
1131 				L[0] = P[0];
1132 				L[1] = P[2];
1133 				L[2] = P[6];
1134 				L[3] = P[4];
1135 			}
1136 			else  // Left
1137 			{
1138 				L[0] = P[1];
1139 				L[1] = P[3];
1140 				L[2] = P[7];
1141 				L[3] = P[5];
1142 			}
1143 		}
1144 
1145 		Polygon polygon(L, 4);
1146 
1147 		if(!Clipper::Clip(polygon, clipFlags, draw))
1148 		{
1149 			return false;
1150 		}
1151 
1152 		return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
1153 	}
1154 
1155 	return false;
1156 }
1157 
setupPoint(vk::Device * device,Primitive & primitive,Triangle & triangle,const DrawCall & draw)1158 bool DrawCall::setupPoint(vk::Device *device, Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1159 {
1160 	const Vertex &v = triangle.v0;
1161 
1162 	if(v.cullMask == 0)
1163 	{
1164 		return false;
1165 	}
1166 
1167 	const DrawData &data = *draw.data;
1168 	const int clipFlags = draw.depthClipEnable ? Clipper::CLIP_FRUSTUM : Clipper::CLIP_SIDES;
1169 
1170 	const float pSize = clamp(v.pointSize, 1.0f, static_cast<float>(vk::MAX_POINT_SIZE));
1171 	const float X = pSize * v.position.w * data.halfPixelX;
1172 	const float Y = pSize * v.position.w * data.halfPixelY;
1173 
1174 	float4 P[4];
1175 
1176 	P[0] = v.position;
1177 	P[0].x -= X;
1178 	P[0].y += Y;
1179 
1180 	P[1] = v.position;
1181 	P[1].x += X;
1182 	P[1].y += Y;
1183 
1184 	P[2] = v.position;
1185 	P[2].x += X;
1186 	P[2].y -= Y;
1187 
1188 	P[3] = v.position;
1189 	P[3].x -= X;
1190 	P[3].y -= Y;
1191 
1192 	Polygon polygon(P, 4);
1193 
1194 	if(!Clipper::Clip(polygon, clipFlags, draw))
1195 	{
1196 		return false;
1197 	}
1198 
1199 	primitive.pointSizeInv = 1.0f / pSize;
1200 
1201 	return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data);
1202 }
1203 
addQuery(vk::Query * query)1204 void Renderer::addQuery(vk::Query *query)
1205 {
1206 	ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1207 	ASSERT(!occlusionQuery);
1208 
1209 	occlusionQuery = query;
1210 }
1211 
removeQuery(vk::Query * query)1212 void Renderer::removeQuery(vk::Query *query)
1213 {
1214 	ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1215 	ASSERT(occlusionQuery == query);
1216 
1217 	occlusionQuery = nullptr;
1218 }
1219 
1220 }  // namespace sw
1221