xref: /aosp_15_r20/external/swiftshader/src/Vulkan/VkDevice.cpp (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "VkDevice.hpp"
16 
17 #include "VkConfig.hpp"
18 #include "VkDescriptorSetLayout.hpp"
19 #include "VkFence.hpp"
20 #include "VkQueue.hpp"
21 #include "VkSemaphore.hpp"
22 #include "VkStringify.hpp"
23 #include "VkTimelineSemaphore.hpp"
24 #include "Debug/Context.hpp"
25 #include "Debug/Server.hpp"
26 #include "Device/Blitter.hpp"
27 #include "System/Debug.hpp"
28 
29 #include <chrono>
30 #include <climits>
31 #include <new>  // Must #include this to use "placement new"
32 
33 namespace {
34 
35 using time_point = std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;
36 
now()37 time_point now()
38 {
39 	return std::chrono::time_point_cast<std::chrono::nanoseconds>(std::chrono::system_clock::now());
40 }
41 
getEndTimePoint(uint64_t timeout,bool & infiniteTimeout)42 const time_point getEndTimePoint(uint64_t timeout, bool &infiniteTimeout)
43 {
44 	const time_point start = now();
45 	const uint64_t max_timeout = (LLONG_MAX - start.time_since_epoch().count());
46 	infiniteTimeout = (timeout > max_timeout);
47 	return start + std::chrono::nanoseconds(std::min(max_timeout, timeout));
48 }
49 
50 }  // anonymous namespace
51 
52 namespace vk {
53 
updateSnapshot()54 void Device::SamplingRoutineCache::updateSnapshot()
55 {
56 	marl::lock lock(mutex);
57 
58 	if(snapshotNeedsUpdate)
59 	{
60 		snapshot.clear();
61 
62 		for(auto it : cache)
63 		{
64 			snapshot[it.key()] = it.data();
65 		}
66 
67 		snapshotNeedsUpdate = false;
68 	}
69 }
70 
~SamplerIndexer()71 Device::SamplerIndexer::~SamplerIndexer()
72 {
73 	ASSERT(map.empty());
74 }
75 
index(const SamplerState & samplerState)76 uint32_t Device::SamplerIndexer::index(const SamplerState &samplerState)
77 {
78 	marl::lock lock(mutex);
79 
80 	auto it = map.find(samplerState);
81 
82 	if(it != map.end())
83 	{
84 		it->second.count++;
85 		return it->second.id;
86 	}
87 
88 	nextID++;
89 
90 	map.emplace(samplerState, Identifier{ nextID, 1 });
91 
92 	return nextID;
93 }
94 
remove(const SamplerState & samplerState)95 void Device::SamplerIndexer::remove(const SamplerState &samplerState)
96 {
97 	marl::lock lock(mutex);
98 
99 	auto it = map.find(samplerState);
100 	ASSERT(it != map.end());
101 
102 	auto count = --it->second.count;
103 	if(count == 0)
104 	{
105 		map.erase(it);
106 	}
107 }
108 
find(uint32_t id)109 const SamplerState *Device::SamplerIndexer::find(uint32_t id)
110 {
111 	marl::lock lock(mutex);
112 
113 	auto it = std::find_if(std::begin(map), std::end(map),
114 	                       [&id](auto &&p) { return p.second.id == id; });
115 
116 	return (it != std::end(map)) ? &(it->first) : nullptr;
117 }
118 
Device(const VkDeviceCreateInfo * pCreateInfo,void * mem,PhysicalDevice * physicalDevice,const VkPhysicalDeviceFeatures * enabledFeatures,const std::shared_ptr<marl::Scheduler> & scheduler)119 Device::Device(const VkDeviceCreateInfo *pCreateInfo, void *mem, PhysicalDevice *physicalDevice, const VkPhysicalDeviceFeatures *enabledFeatures, const std::shared_ptr<marl::Scheduler> &scheduler)
120     : physicalDevice(physicalDevice)
121     , queues(reinterpret_cast<Queue *>(mem))
122     , enabledExtensionCount(pCreateInfo->enabledExtensionCount)
123     , enabledFeatures(enabledFeatures ? *enabledFeatures : VkPhysicalDeviceFeatures{})  // "Setting pEnabledFeatures to NULL and not including a VkPhysicalDeviceFeatures2 in the pNext member of VkDeviceCreateInfo is equivalent to setting all members of the structure to VK_FALSE."
124     , scheduler(scheduler)
125 {
126 	for(uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
127 	{
128 		const VkDeviceQueueCreateInfo &queueCreateInfo = pCreateInfo->pQueueCreateInfos[i];
129 		queueCount += queueCreateInfo.queueCount;
130 	}
131 
132 	uint32_t queueID = 0;
133 	for(uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
134 	{
135 		const VkDeviceQueueCreateInfo &queueCreateInfo = pCreateInfo->pQueueCreateInfos[i];
136 
137 		for(uint32_t j = 0; j < queueCreateInfo.queueCount; j++, queueID++)
138 		{
139 			new(&queues[queueID]) Queue(this, scheduler.get());
140 		}
141 	}
142 
143 	extensions = reinterpret_cast<ExtensionName *>(static_cast<uint8_t *>(mem) + (sizeof(Queue) * queueCount));
144 	for(uint32_t i = 0; i < enabledExtensionCount; i++)
145 	{
146 		strncpy(extensions[i], pCreateInfo->ppEnabledExtensionNames[i], VK_MAX_EXTENSION_NAME_SIZE);
147 	}
148 
149 	if(pCreateInfo->enabledLayerCount)
150 	{
151 		// "The ppEnabledLayerNames and enabledLayerCount members of VkDeviceCreateInfo are deprecated and their values must be ignored by implementations."
152 		UNSUPPORTED("enabledLayerCount");
153 	}
154 
155 	// TODO(b/119409619): use an allocator here so we can control all memory allocations
156 	blitter.reset(new sw::Blitter());
157 	samplingRoutineCache.reset(new SamplingRoutineCache());
158 	samplerIndexer.reset(new SamplerIndexer());
159 
160 #ifdef SWIFTSHADER_DEVICE_MEMORY_REPORT
161 	const auto *deviceMemoryReportCreateInfo = GetExtendedStruct<VkDeviceDeviceMemoryReportCreateInfoEXT>(pCreateInfo->pNext, VK_STRUCTURE_TYPE_DEVICE_DEVICE_MEMORY_REPORT_CREATE_INFO_EXT);
162 	if(deviceMemoryReportCreateInfo && deviceMemoryReportCreateInfo->pfnUserCallback != nullptr)
163 	{
164 		deviceMemoryReportCallbacks.emplace_back(deviceMemoryReportCreateInfo->pfnUserCallback, deviceMemoryReportCreateInfo->pUserData);
165 	}
166 #endif  // SWIFTSHADER_DEVICE_MEMORY_REPORT
167 }
168 
destroy(const VkAllocationCallbacks * pAllocator)169 void Device::destroy(const VkAllocationCallbacks *pAllocator)
170 {
171 	for(uint32_t i = 0; i < queueCount; i++)
172 	{
173 		queues[i].~Queue();
174 	}
175 
176 	vk::freeHostMemory(queues, pAllocator);
177 }
178 
ComputeRequiredAllocationSize(const VkDeviceCreateInfo * pCreateInfo)179 size_t Device::ComputeRequiredAllocationSize(const VkDeviceCreateInfo *pCreateInfo)
180 {
181 	uint32_t queueCount = 0;
182 	for(uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
183 	{
184 		queueCount += pCreateInfo->pQueueCreateInfos[i].queueCount;
185 	}
186 
187 	return (sizeof(Queue) * queueCount) + (pCreateInfo->enabledExtensionCount * sizeof(ExtensionName));
188 }
189 
hasExtension(const char * extensionName) const190 bool Device::hasExtension(const char *extensionName) const
191 {
192 	for(uint32_t i = 0; i < enabledExtensionCount; i++)
193 	{
194 		if(strncmp(extensions[i], extensionName, VK_MAX_EXTENSION_NAME_SIZE) == 0)
195 		{
196 			return true;
197 		}
198 	}
199 	return false;
200 }
201 
getQueue(uint32_t queueFamilyIndex,uint32_t queueIndex) const202 VkQueue Device::getQueue(uint32_t queueFamilyIndex, uint32_t queueIndex) const
203 {
204 	ASSERT(queueFamilyIndex == 0);
205 
206 	return queues[queueIndex];
207 }
208 
waitForFences(uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)209 VkResult Device::waitForFences(uint32_t fenceCount, const VkFence *pFences, VkBool32 waitAll, uint64_t timeout)
210 {
211 	bool infiniteTimeout = false;
212 	const time_point end_ns = getEndTimePoint(timeout, infiniteTimeout);
213 
214 	if(waitAll != VK_FALSE)  // All fences must be signaled
215 	{
216 		for(uint32_t i = 0; i < fenceCount; i++)
217 		{
218 			if(timeout == 0)
219 			{
220 				if(Cast(pFences[i])->getStatus() != VK_SUCCESS)  // At least one fence is not signaled
221 				{
222 					return VK_TIMEOUT;
223 				}
224 			}
225 			else if(infiniteTimeout)
226 			{
227 				if(Cast(pFences[i])->wait() != VK_SUCCESS)  // At least one fence is not signaled
228 				{
229 					return VK_TIMEOUT;
230 				}
231 			}
232 			else
233 			{
234 				if(Cast(pFences[i])->wait(end_ns) != VK_SUCCESS)  // At least one fence is not signaled
235 				{
236 					return VK_TIMEOUT;
237 				}
238 			}
239 		}
240 
241 		return VK_SUCCESS;
242 	}
243 	else  // At least one fence must be signaled
244 	{
245 		marl::containers::vector<marl::Event, 8> events;
246 		for(uint32_t i = 0; i < fenceCount; i++)
247 		{
248 			events.push_back(Cast(pFences[i])->getCountedEvent()->event());
249 		}
250 
251 		auto any = marl::Event::any(events.begin(), events.end());
252 
253 		if(timeout == 0)
254 		{
255 			return any.isSignalled() ? VK_SUCCESS : VK_TIMEOUT;
256 		}
257 		else if(infiniteTimeout)
258 		{
259 			any.wait();
260 			return VK_SUCCESS;
261 		}
262 		else
263 		{
264 			return any.wait_until(end_ns) ? VK_SUCCESS : VK_TIMEOUT;
265 		}
266 	}
267 }
268 
waitForSemaphores(const VkSemaphoreWaitInfo * pWaitInfo,uint64_t timeout)269 VkResult Device::waitForSemaphores(const VkSemaphoreWaitInfo *pWaitInfo, uint64_t timeout)
270 {
271 	bool infiniteTimeout = false;
272 	const time_point end_ns = getEndTimePoint(timeout, infiniteTimeout);
273 
274 	if(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT)
275 	{
276 		TimelineSemaphore::WaitForAny any(pWaitInfo);
277 		if(infiniteTimeout)
278 		{
279 			any.wait();
280 			return VK_SUCCESS;
281 		}
282 		return any.wait(end_ns);
283 	}
284 	else
285 	{
286 		ASSERT(pWaitInfo->flags == 0);
287 		for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++)
288 		{
289 			TimelineSemaphore *semaphore = DynamicCast<TimelineSemaphore>(pWaitInfo->pSemaphores[i]);
290 			uint64_t value = pWaitInfo->pValues[i];
291 			if(infiniteTimeout)
292 			{
293 				semaphore->wait(value);
294 			}
295 			else if(semaphore->wait(pWaitInfo->pValues[i], end_ns) != VK_SUCCESS)
296 			{
297 				return VK_TIMEOUT;
298 			}
299 		}
300 		return VK_SUCCESS;
301 	}
302 }
303 
waitIdle()304 VkResult Device::waitIdle()
305 {
306 	for(uint32_t i = 0; i < queueCount; i++)
307 	{
308 		queues[i].waitIdle();
309 	}
310 
311 	return VK_SUCCESS;
312 }
313 
getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateInfo * pCreateInfo,VkDescriptorSetLayoutSupport * pSupport) const314 void Device::getDescriptorSetLayoutSupport(const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
315                                            VkDescriptorSetLayoutSupport *pSupport) const
316 {
317 	// From Vulkan Spec 13.2.1 Descriptor Set Layout, in description of vkGetDescriptorSetLayoutSupport:
318 	// "This command does not consider other limits such as maxPerStageDescriptor*, and so a descriptor
319 	// set layout that is supported according to this command must still satisfy the pipeline layout limits
320 	// such as maxPerStageDescriptor* in order to be used in a pipeline layout."
321 
322 	// We have no "strange" limitations to enforce beyond the device limits, so we can safely always claim support.
323 	pSupport->supported = VK_TRUE;
324 
325 	if(pCreateInfo->bindingCount > 0)
326 	{
327 		bool hasVariableSizedDescriptor = false;
328 
329 		const VkBaseInStructure *layoutInfo = reinterpret_cast<const VkBaseInStructure *>(pCreateInfo->pNext);
330 		while(layoutInfo && !hasVariableSizedDescriptor)
331 		{
332 			if(layoutInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO)
333 			{
334 				const VkDescriptorSetLayoutBindingFlagsCreateInfo *bindingFlagsCreateInfo =
335 				    reinterpret_cast<const VkDescriptorSetLayoutBindingFlagsCreateInfo *>(layoutInfo);
336 
337 				for(uint32_t i = 0; i < bindingFlagsCreateInfo->bindingCount; i++)
338 				{
339 					if(bindingFlagsCreateInfo->pBindingFlags[i] & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)
340 					{
341 						hasVariableSizedDescriptor = true;
342 						break;
343 					}
344 				}
345 			}
346 			else
347 			{
348 				UNSUPPORTED("layoutInfo->sType = %s", vk::Stringify(layoutInfo->sType).c_str());
349 			}
350 
351 			layoutInfo = layoutInfo->pNext;
352 		}
353 
354 		const auto &highestNumberedBinding = pCreateInfo->pBindings[pCreateInfo->bindingCount - 1];
355 
356 		VkBaseOutStructure *layoutSupport = reinterpret_cast<VkBaseOutStructure *>(pSupport->pNext);
357 		while(layoutSupport)
358 		{
359 			if(layoutSupport->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT)
360 			{
361 				VkDescriptorSetVariableDescriptorCountLayoutSupport *variableDescriptorCountLayoutSupport =
362 				    reinterpret_cast<VkDescriptorSetVariableDescriptorCountLayoutSupport *>(layoutSupport);
363 
364 				// If the VkDescriptorSetLayoutCreateInfo structure does not include a variable-sized descriptor,
365 				// [...] then maxVariableDescriptorCount is set to zero.
366 				variableDescriptorCountLayoutSupport->maxVariableDescriptorCount =
367 				    hasVariableSizedDescriptor ? ((highestNumberedBinding.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) ? vk::MAX_INLINE_UNIFORM_BLOCK_SIZE : vk::MAX_UPDATE_AFTER_BIND_DESCRIPTORS) : 0;
368 			}
369 			else
370 			{
371 				UNSUPPORTED("layoutSupport->sType = %s", vk::Stringify(layoutSupport->sType).c_str());
372 			}
373 
374 			layoutSupport = layoutSupport->pNext;
375 		}
376 	}
377 }
378 
updateDescriptorSets(uint32_t descriptorWriteCount,const VkWriteDescriptorSet * pDescriptorWrites,uint32_t descriptorCopyCount,const VkCopyDescriptorSet * pDescriptorCopies)379 void Device::updateDescriptorSets(uint32_t descriptorWriteCount, const VkWriteDescriptorSet *pDescriptorWrites,
380                                   uint32_t descriptorCopyCount, const VkCopyDescriptorSet *pDescriptorCopies)
381 {
382 	for(uint32_t i = 0; i < descriptorWriteCount; i++)
383 	{
384 		DescriptorSetLayout::WriteDescriptorSet(this, pDescriptorWrites[i]);
385 	}
386 
387 	for(uint32_t i = 0; i < descriptorCopyCount; i++)
388 	{
389 		DescriptorSetLayout::CopyDescriptorSet(pDescriptorCopies[i]);
390 	}
391 }
392 
getRequirements(VkMemoryDedicatedRequirements * requirements) const393 void Device::getRequirements(VkMemoryDedicatedRequirements *requirements) const
394 {
395 	requirements->prefersDedicatedAllocation = VK_FALSE;
396 	requirements->requiresDedicatedAllocation = VK_FALSE;
397 }
398 
getSamplingRoutineCache() const399 Device::SamplingRoutineCache *Device::getSamplingRoutineCache() const
400 {
401 	return samplingRoutineCache.get();
402 }
403 
updateSamplingRoutineSnapshotCache()404 void Device::updateSamplingRoutineSnapshotCache()
405 {
406 	samplingRoutineCache->updateSnapshot();
407 }
408 
indexSampler(const SamplerState & samplerState)409 uint32_t Device::indexSampler(const SamplerState &samplerState)
410 {
411 	return samplerIndexer->index(samplerState);
412 }
413 
removeSampler(const SamplerState & samplerState)414 void Device::removeSampler(const SamplerState &samplerState)
415 {
416 	samplerIndexer->remove(samplerState);
417 }
418 
findSampler(uint32_t samplerId) const419 const SamplerState *Device::findSampler(uint32_t samplerId) const
420 {
421 	return samplerIndexer->find(samplerId);
422 }
423 
setDebugUtilsObjectName(const VkDebugUtilsObjectNameInfoEXT * pNameInfo)424 VkResult Device::setDebugUtilsObjectName(const VkDebugUtilsObjectNameInfoEXT *pNameInfo)
425 {
426 	// Optionally maps user-friendly name to an object
427 	return VK_SUCCESS;
428 }
429 
setDebugUtilsObjectTag(const VkDebugUtilsObjectTagInfoEXT * pTagInfo)430 VkResult Device::setDebugUtilsObjectTag(const VkDebugUtilsObjectTagInfoEXT *pTagInfo)
431 {
432 	// Optionally attach arbitrary data to an object
433 	return VK_SUCCESS;
434 }
435 
registerImageView(ImageView * imageView)436 void Device::registerImageView(ImageView *imageView)
437 {
438 	if(imageView != nullptr)
439 	{
440 		marl::lock lock(imageViewSetMutex);
441 		imageViewSet.insert(imageView);
442 	}
443 }
444 
unregisterImageView(ImageView * imageView)445 void Device::unregisterImageView(ImageView *imageView)
446 {
447 	if(imageView != nullptr)
448 	{
449 		marl::lock lock(imageViewSetMutex);
450 		auto it = imageViewSet.find(imageView);
451 		if(it != imageViewSet.end())
452 		{
453 			imageViewSet.erase(it);
454 		}
455 	}
456 }
457 
prepareForSampling(ImageView * imageView)458 void Device::prepareForSampling(ImageView *imageView)
459 {
460 	if(imageView != nullptr)
461 	{
462 		marl::lock lock(imageViewSetMutex);
463 
464 		auto it = imageViewSet.find(imageView);
465 		if(it != imageViewSet.end())
466 		{
467 			imageView->prepareForSampling();
468 		}
469 	}
470 }
471 
contentsChanged(ImageView * imageView,Image::ContentsChangedContext context)472 void Device::contentsChanged(ImageView *imageView, Image::ContentsChangedContext context)
473 {
474 	if(imageView != nullptr)
475 	{
476 		marl::lock lock(imageViewSetMutex);
477 
478 		auto it = imageViewSet.find(imageView);
479 		if(it != imageViewSet.end())
480 		{
481 			imageView->contentsChanged(context);
482 		}
483 	}
484 }
485 
setPrivateData(VkObjectType objectType,uint64_t objectHandle,const PrivateData * privateDataSlot,uint64_t data)486 VkResult Device::setPrivateData(VkObjectType objectType, uint64_t objectHandle, const PrivateData *privateDataSlot, uint64_t data)
487 {
488 	marl::lock lock(privateDataMutex);
489 
490 	auto &privateDataSlotMap = privateData[privateDataSlot];
491 	const PrivateDataObject privateDataObject = { objectType, objectHandle };
492 	privateDataSlotMap[privateDataObject] = data;
493 	return VK_SUCCESS;
494 }
495 
getPrivateData(VkObjectType objectType,uint64_t objectHandle,const PrivateData * privateDataSlot,uint64_t * data)496 void Device::getPrivateData(VkObjectType objectType, uint64_t objectHandle, const PrivateData *privateDataSlot, uint64_t *data)
497 {
498 	marl::lock lock(privateDataMutex);
499 
500 	*data = 0;
501 	auto it = privateData.find(privateDataSlot);
502 	if(it != privateData.end())
503 	{
504 		auto &privateDataSlotMap = it->second;
505 		const PrivateDataObject privateDataObject = { objectType, objectHandle };
506 		auto it2 = privateDataSlotMap.find(privateDataObject);
507 		if(it2 != privateDataSlotMap.end())
508 		{
509 			*data = it2->second;
510 		}
511 	}
512 }
513 
removePrivateDataSlot(const PrivateData * privateDataSlot)514 void Device::removePrivateDataSlot(const PrivateData *privateDataSlot)
515 {
516 	marl::lock lock(privateDataMutex);
517 
518 	privateData.erase(privateDataSlot);
519 }
520 
521 #ifdef SWIFTSHADER_DEVICE_MEMORY_REPORT
emitDeviceMemoryReport(VkDeviceMemoryReportEventTypeEXT type,uint64_t memoryObjectId,VkDeviceSize size,VkObjectType objectType,uint64_t objectHandle,uint32_t heapIndex)522 void Device::emitDeviceMemoryReport(VkDeviceMemoryReportEventTypeEXT type, uint64_t memoryObjectId, VkDeviceSize size, VkObjectType objectType, uint64_t objectHandle, uint32_t heapIndex)
523 {
524 	if(deviceMemoryReportCallbacks.empty()) return;
525 
526 	const VkDeviceMemoryReportCallbackDataEXT callbackData = {
527 		VK_STRUCTURE_TYPE_DEVICE_MEMORY_REPORT_CALLBACK_DATA_EXT,  // sType
528 		nullptr,                                                   // pNext
529 		0,                                                         // flags
530 		type,                                                      // type
531 		memoryObjectId,                                            // memoryObjectId
532 		size,                                                      // size
533 		objectType,                                                // objectType
534 		objectHandle,                                              // objectHandle
535 		heapIndex,                                                 // heapIndex
536 	};
537 	for(const auto &callback : deviceMemoryReportCallbacks)
538 	{
539 		callback.first(&callbackData, callback.second);
540 	}
541 }
542 #endif  // SWIFTSHADER_DEVICE_MEMORY_REPORT
543 
544 }  // namespace vk
545