1 // Copyright (c) 2016 The vulkano developers
2 // Licensed under the Apache License, Version 2.0
3 // <LICENSE-APACHE or
4 // https://www.apache.org/licenses/LICENSE-2.0> or the MIT
5 // license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
6 // at your option. All files in the project carrying such
7 // notice may not be copied, modified, or distributed except
8 // according to those terms.
9
10 //! Suballocators are used to divide a *region* into smaller *suballocations*.
11 //!
12 //! See also [the parent module] for details about memory allocation in Vulkan.
13 //!
14 //! [the parent module]: super
15
16 use self::host::SlotId;
17 use super::{
18 align_down, align_up, array_vec::ArrayVec, AllocationCreationError, DeviceAlignment,
19 DeviceLayout,
20 };
21 use crate::{
22 device::{Device, DeviceOwned},
23 image::ImageTiling,
24 memory::{is_aligned, DeviceMemory, MemoryPropertyFlags},
25 DeviceSize, NonZeroDeviceSize, VulkanError, VulkanObject,
26 };
27 use crossbeam_queue::ArrayQueue;
28 use parking_lot::Mutex;
29 use std::{
30 cell::Cell,
31 cmp,
32 error::Error,
33 ffi::c_void,
34 fmt::{self, Display},
35 mem::{self, ManuallyDrop, MaybeUninit},
36 ops::Range,
37 ptr::{self, NonNull},
38 slice,
39 sync::{
40 atomic::{AtomicU64, Ordering},
41 Arc,
42 },
43 };
44
45 /// Memory allocations are portions of memory that are reserved for a specific resource or purpose.
46 ///
47 /// There's a few ways you can obtain a `MemoryAlloc` in Vulkano. Most commonly you will probably
48 /// want to use a [memory allocator]. If you already have a [`DeviceMemory`] block on hand that you
49 /// would like to turn into an allocation, you can use [the constructor]. Lastly, you can use a
50 /// [suballocator] if you want to create multiple smaller allocations out of a bigger one.
51 ///
52 /// [memory allocator]: super::MemoryAllocator
53 /// [the constructor]: Self::new
54 /// [suballocator]: Suballocator
55 #[derive(Debug)]
56 pub struct MemoryAlloc {
57 offset: DeviceSize,
58 size: DeviceSize,
59 // Needed when binding resources to the allocation in order to avoid aliasing memory.
60 allocation_type: AllocationType,
61 // Mapped pointer to the start of the allocation or `None` is the memory is not host-visible.
62 mapped_ptr: Option<NonNull<c_void>>,
63 // Used by the suballocators to align allocations to the non-coherent atom size when the memory
64 // type is host-visible but not host-coherent. This will be `None` for any other memory type.
65 atom_size: Option<DeviceAlignment>,
66 // Used in the `Drop` impl to free the allocation if required.
67 parent: AllocParent,
68 }
69
70 #[derive(Debug)]
71 enum AllocParent {
72 FreeList {
73 allocator: Arc<FreeListAllocator>,
74 id: SlotId,
75 },
76 Buddy {
77 allocator: Arc<BuddyAllocator>,
78 order: usize,
79 offset: DeviceSize,
80 },
81 Pool {
82 allocator: Arc<PoolAllocatorInner>,
83 index: DeviceSize,
84 },
85 Bump(Arc<BumpAllocator>),
86 Root(Arc<DeviceMemory>),
87 Dedicated(DeviceMemory),
88 }
89
90 // It is safe to share `mapped_ptr` between threads because the user would have to use unsafe code
91 // themself to get UB in the first place.
92 unsafe impl Send for MemoryAlloc {}
93 unsafe impl Sync for MemoryAlloc {}
94
95 impl MemoryAlloc {
96 /// Creates a new `MemoryAlloc`.
97 ///
98 /// The memory is mapped automatically if it's host-visible.
99 #[inline]
new(device_memory: DeviceMemory) -> Result<Self, AllocationCreationError>100 pub fn new(device_memory: DeviceMemory) -> Result<Self, AllocationCreationError> {
101 // Sanity check: this would lead to UB when suballocating.
102 assert!(device_memory.allocation_size() <= DeviceLayout::MAX_SIZE);
103
104 let device = device_memory.device();
105 let physical_device = device.physical_device();
106 let memory_type_index = device_memory.memory_type_index();
107 let property_flags = &physical_device.memory_properties().memory_types
108 [memory_type_index as usize]
109 .property_flags;
110
111 let mapped_ptr = if property_flags.intersects(MemoryPropertyFlags::HOST_VISIBLE) {
112 // Sanity check: this would lead to UB when calculating pointer offsets.
113 assert!(device_memory.allocation_size() <= isize::MAX.try_into().unwrap());
114
115 let fns = device.fns();
116 let mut output = MaybeUninit::uninit();
117 // This is always valid because we are mapping the whole range.
118 unsafe {
119 (fns.v1_0.map_memory)(
120 device.handle(),
121 device_memory.handle(),
122 0,
123 ash::vk::WHOLE_SIZE,
124 ash::vk::MemoryMapFlags::empty(),
125 output.as_mut_ptr(),
126 )
127 .result()
128 .map_err(VulkanError::from)?;
129
130 Some(NonNull::new(output.assume_init()).unwrap())
131 }
132 } else {
133 None
134 };
135
136 let atom_size = (property_flags.intersects(MemoryPropertyFlags::HOST_VISIBLE)
137 && !property_flags.intersects(MemoryPropertyFlags::HOST_COHERENT))
138 .then_some(physical_device.properties().non_coherent_atom_size);
139
140 Ok(MemoryAlloc {
141 offset: 0,
142 size: device_memory.allocation_size(),
143 allocation_type: AllocationType::Unknown,
144 mapped_ptr,
145 atom_size,
146 parent: if device_memory.is_dedicated() {
147 AllocParent::Dedicated(device_memory)
148 } else {
149 AllocParent::Root(Arc::new(device_memory))
150 },
151 })
152 }
153
154 /// Returns the offset of the allocation within the [`DeviceMemory`] block.
155 #[inline]
offset(&self) -> DeviceSize156 pub fn offset(&self) -> DeviceSize {
157 self.offset
158 }
159
160 /// Returns the size of the allocation.
161 #[inline]
size(&self) -> DeviceSize162 pub fn size(&self) -> DeviceSize {
163 self.size
164 }
165
166 /// Returns the type of resources that can be bound to this allocation.
167 #[inline]
allocation_type(&self) -> AllocationType168 pub fn allocation_type(&self) -> AllocationType {
169 self.allocation_type
170 }
171
172 /// Returns the mapped pointer to the start of the allocation if the memory is host-visible,
173 /// otherwise returns [`None`].
174 #[inline]
mapped_ptr(&self) -> Option<NonNull<c_void>>175 pub fn mapped_ptr(&self) -> Option<NonNull<c_void>> {
176 self.mapped_ptr
177 }
178
179 /// Returns a mapped slice to the data within the allocation if the memory is host-visible,
180 /// otherwise returns [`None`].
181 ///
182 /// # Safety
183 ///
184 /// - While the returned slice exists, there must be no operations pending or executing in a
185 /// GPU queue that write to the same memory.
186 #[inline]
mapped_slice(&self) -> Option<&[u8]>187 pub unsafe fn mapped_slice(&self) -> Option<&[u8]> {
188 self.mapped_ptr
189 .map(|ptr| slice::from_raw_parts(ptr.as_ptr().cast(), self.size as usize))
190 }
191
192 /// Returns a mapped mutable slice to the data within the allocation if the memory is
193 /// host-visible, otherwise returns [`None`].
194 ///
195 /// # Safety
196 ///
197 /// - While the returned slice exists, there must be no operations pending or executing in a
198 /// GPU queue that access the same memory.
199 #[inline]
mapped_slice_mut(&mut self) -> Option<&mut [u8]>200 pub unsafe fn mapped_slice_mut(&mut self) -> Option<&mut [u8]> {
201 self.mapped_ptr
202 .map(|ptr| slice::from_raw_parts_mut(ptr.as_ptr().cast(), self.size as usize))
203 }
204
atom_size(&self) -> Option<DeviceAlignment>205 pub(crate) fn atom_size(&self) -> Option<DeviceAlignment> {
206 self.atom_size
207 }
208
209 /// Invalidates the host (CPU) cache for a range of the allocation.
210 ///
211 /// You must call this method before the memory is read by the host, if the device previously
212 /// wrote to the memory. It has no effect if the memory is not mapped or if the memory is
213 /// [host-coherent].
214 ///
215 /// `range` is specified in bytes relative to the start of the allocation. The start and end of
216 /// `range` must be a multiple of the [`non_coherent_atom_size`] device property, but
217 /// `range.end` can also equal to `self.size()`.
218 ///
219 /// # Safety
220 ///
221 /// - If there are memory writes by the GPU that have not been propagated into the CPU cache,
222 /// then there must not be any references in Rust code to the specified `range` of the memory.
223 ///
224 /// # Panics
225 ///
226 /// - Panics if `range` is empty.
227 /// - Panics if `range.end` exceeds `self.size`.
228 /// - Panics if `range.start` or `range.end` are not a multiple of the `non_coherent_atom_size`.
229 ///
230 /// [host-coherent]: crate::memory::MemoryPropertyFlags::HOST_COHERENT
231 /// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size
232 #[inline]
invalidate_range(&self, range: Range<DeviceSize>) -> Result<(), VulkanError>233 pub unsafe fn invalidate_range(&self, range: Range<DeviceSize>) -> Result<(), VulkanError> {
234 // VUID-VkMappedMemoryRange-memory-00684
235 if let Some(atom_size) = self.atom_size {
236 let range = self.create_memory_range(range, atom_size);
237 let device = self.device();
238 let fns = device.fns();
239 (fns.v1_0.invalidate_mapped_memory_ranges)(device.handle(), 1, &range)
240 .result()
241 .map_err(VulkanError::from)?;
242 } else {
243 self.debug_validate_memory_range(&range);
244 }
245
246 Ok(())
247 }
248
249 /// Flushes the host (CPU) cache for a range of the allocation.
250 ///
251 /// You must call this method after writing to the memory from the host, if the device is going
252 /// to read the memory. It has no effect if the memory is not mapped or if the memory is
253 /// [host-coherent].
254 ///
255 /// `range` is specified in bytes relative to the start of the allocation. The start and end of
256 /// `range` must be a multiple of the [`non_coherent_atom_size`] device property, but
257 /// `range.end` can also equal to `self.size()`.
258 ///
259 /// # Safety
260 ///
261 /// - There must be no operations pending or executing in a GPU queue that access the specified
262 /// `range` of the memory.
263 ///
264 /// # Panics
265 ///
266 /// - Panics if `range` is empty.
267 /// - Panics if `range.end` exceeds `self.size`.
268 /// - Panics if `range.start` or `range.end` are not a multiple of the `non_coherent_atom_size`.
269 ///
270 /// [host-coherent]: crate::memory::MemoryPropertyFlags::HOST_COHERENT
271 /// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size
272 #[inline]
flush_range(&self, range: Range<DeviceSize>) -> Result<(), VulkanError>273 pub unsafe fn flush_range(&self, range: Range<DeviceSize>) -> Result<(), VulkanError> {
274 // VUID-VkMappedMemoryRange-memory-00684
275 if let Some(atom_size) = self.atom_size {
276 let range = self.create_memory_range(range, atom_size);
277 let device = self.device();
278 let fns = device.fns();
279 (fns.v1_0.flush_mapped_memory_ranges)(device.handle(), 1, &range)
280 .result()
281 .map_err(VulkanError::from)?;
282 } else {
283 self.debug_validate_memory_range(&range);
284 }
285
286 Ok(())
287 }
288
create_memory_range( &self, range: Range<DeviceSize>, atom_size: DeviceAlignment, ) -> ash::vk::MappedMemoryRange289 fn create_memory_range(
290 &self,
291 range: Range<DeviceSize>,
292 atom_size: DeviceAlignment,
293 ) -> ash::vk::MappedMemoryRange {
294 assert!(!range.is_empty() && range.end <= self.size);
295
296 // VUID-VkMappedMemoryRange-size-00685
297 // Guaranteed because we always map the entire `DeviceMemory`.
298
299 // VUID-VkMappedMemoryRange-offset-00687
300 // VUID-VkMappedMemoryRange-size-01390
301 assert!(
302 is_aligned(range.start, atom_size)
303 && (is_aligned(range.end, atom_size) || range.end == self.size)
304 );
305
306 // VUID-VkMappedMemoryRange-offset-00687
307 // Guaranteed as long as `range.start` is aligned because the suballocators always align
308 // `self.offset` to the non-coherent atom size for non-coherent host-visible memory.
309 let offset = self.offset + range.start;
310
311 let mut size = range.end - range.start;
312 let device_memory = self.device_memory();
313
314 // VUID-VkMappedMemoryRange-size-01390
315 if offset + size < device_memory.allocation_size() {
316 // We align the size in case `range.end == self.size`. We can do this without aliasing
317 // other allocations because the suballocators ensure that all allocations are aligned
318 // to the atom size for non-coherent host-visible memory.
319 size = align_up(size, atom_size);
320 }
321
322 ash::vk::MappedMemoryRange {
323 memory: device_memory.handle(),
324 offset,
325 size,
326 ..Default::default()
327 }
328 }
329
330 /// This exists because even if no cache control is required, the parameters should still be
331 /// valid, otherwise you might have bugs in your code forever just because your memory happens
332 /// to be host-coherent.
debug_validate_memory_range(&self, range: &Range<DeviceSize>)333 fn debug_validate_memory_range(&self, range: &Range<DeviceSize>) {
334 debug_assert!(!range.is_empty() && range.end <= self.size);
335
336 let atom_size = self
337 .device()
338 .physical_device()
339 .properties()
340 .non_coherent_atom_size;
341 debug_assert!(
342 is_aligned(range.start, atom_size)
343 && (is_aligned(range.end, atom_size) || range.end == self.size),
344 "attempted to invalidate or flush a memory range that is not aligned to the \
345 non-coherent atom size",
346 );
347 }
348
349 /// Returns the underlying block of [`DeviceMemory`].
350 #[inline]
device_memory(&self) -> &DeviceMemory351 pub fn device_memory(&self) -> &DeviceMemory {
352 match &self.parent {
353 AllocParent::FreeList { allocator, .. } => &allocator.device_memory,
354 AllocParent::Buddy { allocator, .. } => &allocator.device_memory,
355 AllocParent::Pool { allocator, .. } => &allocator.device_memory,
356 AllocParent::Bump(allocator) => &allocator.device_memory,
357 AllocParent::Root(device_memory) => device_memory,
358 AllocParent::Dedicated(device_memory) => device_memory,
359 }
360 }
361
362 /// Returns the parent allocation if this allocation is a [suballocation], otherwise returns
363 /// [`None`].
364 ///
365 /// [suballocation]: Suballocator
366 #[inline]
parent_allocation(&self) -> Option<&Self>367 pub fn parent_allocation(&self) -> Option<&Self> {
368 match &self.parent {
369 AllocParent::FreeList { allocator, .. } => Some(&allocator.region),
370 AllocParent::Buddy { allocator, .. } => Some(&allocator.region),
371 AllocParent::Pool { allocator, .. } => Some(&allocator.region),
372 AllocParent::Bump(allocator) => Some(&allocator.region),
373 AllocParent::Root(_) => None,
374 AllocParent::Dedicated(_) => None,
375 }
376 }
377
378 /// Returns `true` if this allocation is the root of the [memory hierarchy].
379 ///
380 /// [memory hierarchy]: Suballocator#memory-hierarchies
381 #[inline]
is_root(&self) -> bool382 pub fn is_root(&self) -> bool {
383 matches!(&self.parent, AllocParent::Root(_))
384 }
385
386 /// Returns `true` if this allocation is a [dedicated allocation].
387 ///
388 /// [dedicated allocation]: crate::memory::MemoryAllocateInfo#structfield.dedicated_allocation
389 #[inline]
is_dedicated(&self) -> bool390 pub fn is_dedicated(&self) -> bool {
391 matches!(&self.parent, AllocParent::Dedicated(_))
392 }
393
394 /// Returns the underlying block of [`DeviceMemory`] if this allocation [is the root
395 /// allocation] and is not [aliased], otherwise returns the allocation back wrapped in [`Err`].
396 ///
397 /// [is the root allocation]: Self::is_root
398 /// [aliased]: Self::alias
399 #[inline]
try_unwrap(self) -> Result<DeviceMemory, Self>400 pub fn try_unwrap(self) -> Result<DeviceMemory, Self> {
401 let this = ManuallyDrop::new(self);
402
403 // SAFETY: This is safe because even if a panic happens, `self.parent` can not be
404 // double-freed since `self` was wrapped in `ManuallyDrop`. If we fail to unwrap the
405 // `DeviceMemory`, the copy of `self.parent` is forgotten and only then is the
406 // `ManuallyDrop` wrapper removed from `self`.
407 match unsafe { ptr::read(&this.parent) } {
408 AllocParent::Root(device_memory) => {
409 Arc::try_unwrap(device_memory).map_err(|device_memory| {
410 mem::forget(device_memory);
411 ManuallyDrop::into_inner(this)
412 })
413 }
414 parent => {
415 mem::forget(parent);
416 Err(ManuallyDrop::into_inner(this))
417 }
418 }
419 }
420
421 /// Duplicates the allocation, creating aliased memory. Returns [`None`] if the allocation [is
422 /// a dedicated allocation].
423 ///
424 /// You might consider using this method if you want to optimize memory usage by aliasing
425 /// render targets for example, in which case you will have to double and triple check that the
426 /// memory is not used concurrently unless it only involves reading. You are highly discouraged
427 /// from doing this unless you have a reason to.
428 ///
429 /// # Safety
430 ///
431 /// - You must ensure memory accesses are synchronized yourself.
432 ///
433 /// [memory hierarchy]: Suballocator#memory-hierarchies
434 /// [is a dedicated allocation]: Self::is_dedicated
435 #[inline]
alias(&self) -> Option<Self>436 pub unsafe fn alias(&self) -> Option<Self> {
437 self.root().map(|device_memory| MemoryAlloc {
438 parent: AllocParent::Root(device_memory.clone()),
439 ..*self
440 })
441 }
442
root(&self) -> Option<&Arc<DeviceMemory>>443 fn root(&self) -> Option<&Arc<DeviceMemory>> {
444 match &self.parent {
445 AllocParent::FreeList { allocator, .. } => Some(&allocator.device_memory),
446 AllocParent::Buddy { allocator, .. } => Some(&allocator.device_memory),
447 AllocParent::Pool { allocator, .. } => Some(&allocator.device_memory),
448 AllocParent::Bump(allocator) => Some(&allocator.device_memory),
449 AllocParent::Root(device_memory) => Some(device_memory),
450 AllocParent::Dedicated(_) => None,
451 }
452 }
453
454 /// Increases the offset of the allocation by the specified `amount` and shrinks its size by
455 /// the same amount.
456 ///
457 /// # Panics
458 ///
459 /// - Panics if the `amount` exceeds the size of the allocation.
460 #[inline]
shift(&mut self, amount: DeviceSize)461 pub fn shift(&mut self, amount: DeviceSize) {
462 assert!(amount <= self.size);
463
464 unsafe { self.set_offset(self.offset + amount) };
465 self.size -= amount;
466 }
467
468 /// Shrinks the size of the allocation to the specified `new_size`.
469 ///
470 /// # Panics
471 ///
472 /// - Panics if the `new_size` exceeds the current size of the allocation.
473 #[inline]
shrink(&mut self, new_size: DeviceSize)474 pub fn shrink(&mut self, new_size: DeviceSize) {
475 assert!(new_size <= self.size);
476
477 self.size = new_size;
478 }
479
480 /// Sets the offset of the allocation without checking for memory aliasing.
481 ///
482 /// See also [`shift`], which moves the offset safely.
483 ///
484 /// # Safety
485 ///
486 /// - You must ensure that the allocation doesn't alias any other allocations within the
487 /// [`DeviceMemory`] block, and if it does, then you must ensure memory accesses are
488 /// synchronized yourself.
489 /// - You must ensure the allocation still fits inside the `DeviceMemory` block.
490 ///
491 /// [`shift`]: Self::shift
492 #[inline]
set_offset(&mut self, new_offset: DeviceSize)493 pub unsafe fn set_offset(&mut self, new_offset: DeviceSize) {
494 if let Some(ptr) = self.mapped_ptr.as_mut() {
495 *ptr = NonNull::new_unchecked(
496 ptr.as_ptr()
497 .offset(new_offset as isize - self.offset as isize),
498 );
499 }
500 self.offset = new_offset;
501 }
502
503 /// Sets the size of the allocation without checking for memory aliasing.
504 ///
505 /// See also [`shrink`], which sets the size safely.
506 ///
507 /// # Safety
508 ///
509 /// - You must ensure that the allocation doesn't alias any other allocations within the
510 /// [`DeviceMemory`] block, and if it does, then you must ensure memory accesses are
511 /// synchronized yourself.
512 /// - You must ensure the allocation still fits inside the `DeviceMemory` block.
513 ///
514 /// [`shrink`]: Self::shrink
515 #[inline]
set_size(&mut self, new_size: DeviceSize)516 pub unsafe fn set_size(&mut self, new_size: DeviceSize) {
517 self.size = new_size;
518 }
519
520 /// Sets the allocation type.
521 ///
522 /// This might cause memory aliasing due to [buffer-image granularity] conflicts if the
523 /// allocation type is [`Linear`] or [`NonLinear`] and is changed to a different one.
524 ///
525 /// # Safety
526 ///
527 /// - You must ensure that the allocation doesn't alias any other allocations within the
528 /// [`DeviceMemory`] block, and if it does, then you must ensure memory accesses are
529 /// synchronized yourself.
530 ///
531 /// [buffer-image granularity]: super#buffer-image-granularity
532 /// [`Linear`]: AllocationType::Linear
533 /// [`NonLinear`]: AllocationType::NonLinear
534 #[inline]
set_allocation_type(&mut self, new_type: AllocationType)535 pub unsafe fn set_allocation_type(&mut self, new_type: AllocationType) {
536 self.allocation_type = new_type;
537 }
538 }
539
540 impl Drop for MemoryAlloc {
541 #[inline]
drop(&mut self)542 fn drop(&mut self) {
543 match &self.parent {
544 AllocParent::FreeList { allocator, id } => {
545 unsafe { allocator.free(*id) };
546 }
547 AllocParent::Buddy {
548 allocator,
549 order,
550 offset,
551 } => {
552 unsafe { allocator.free(*order, *offset) };
553 }
554 AllocParent::Pool { allocator, index } => {
555 unsafe { allocator.free(*index) };
556 }
557 // The bump allocator can't free individually, but we need to keep a reference to it so
558 // it don't get reset or dropped while in use.
559 AllocParent::Bump(_) => {}
560 // A root allocation frees itself once all references to the `DeviceMemory` are dropped.
561 AllocParent::Root(_) => {}
562 // Dedicated allocations free themselves when the `DeviceMemory` is dropped.
563 AllocParent::Dedicated(_) => {}
564 }
565 }
566 }
567
568 unsafe impl DeviceOwned for MemoryAlloc {
569 #[inline]
device(&self) -> &Arc<Device>570 fn device(&self) -> &Arc<Device> {
571 self.device_memory().device()
572 }
573 }
574
575 /// Suballocators are used to divide a *region* into smaller *suballocations*.
576 ///
577 /// # Regions
578 ///
579 /// As the name implies, a region is a contiguous portion of memory. It may be the whole dedicated
580 /// block of [`DeviceMemory`], or only a part of it. Regions are just [allocations] like any other,
581 /// but we use this term to refer specifically to an allocation that is to be suballocated. Every
582 /// suballocator is created with a region to work with.
583 ///
584 /// # Free-lists
585 ///
586 /// A free-list, also kind of predictably, refers to a list of (sub)allocations within a region
587 /// that are currently free. Every (sub)allocator that can free allocations dynamically (in any
588 /// order) needs to keep a free-list of some sort. This list is then consulted when new allocations
589 /// are made, and can be used to coalesce neighboring allocations that are free into bigger ones.
590 ///
591 /// # Memory hierarchies
592 ///
593 /// Different applications have wildly different allocation needs, and there's no way to cover them
594 /// all with a single type of allocator. Furthermore, different allocators have different
595 /// trade-offs and are best suited to specific tasks. To account for all possible use-cases,
596 /// Vulkano offers the ability to create *memory hierarchies*. We refer to the [`DeviceMemory`] as
597 /// the root of any such hierarchy, even though technically the driver has levels that are further
598 /// up, because those `DeviceMemory` blocks need to be allocated from physical memory [pages]
599 /// themselves, but since those levels are not accessible to us we don't need to consider them. You
600 /// can create any number of levels/branches from there, bounded only by the amount of available
601 /// memory within a `DeviceMemory` block. You can suballocate the root into regions, which are then
602 /// suballocated into further regions and so on, creating hierarchies of arbitrary height.
603 ///
604 /// As an added bonus, memory hierarchies lend themselves perfectly to the concept of composability
605 /// we all love so much, making them a natural fit for Rust. For one, a region can be allocated any
606 /// way, and fed into any suballocator. Also, once you are done with a branch of a hierarchy,
607 /// meaning there are no more suballocations in use within the region of that branch, and you would
608 /// like to reuse the region, you can do so safely! All suballocators have a `try_into_region`
609 /// method for this purpose. This means that you can replace one suballocator with another without
610 /// consulting any of the higher levels in the hierarchy.
611 ///
612 /// # Examples
613 ///
614 /// Allocating a region to suballocatate:
615 ///
616 /// ```
617 /// use vulkano::memory::{DeviceMemory, MemoryAllocateInfo, MemoryPropertyFlags, MemoryType};
618 /// use vulkano::memory::allocator::MemoryAlloc;
619 /// # let device: std::sync::Arc<vulkano::device::Device> = return;
620 ///
621 /// // First you need to find a suitable memory type.
622 /// let memory_type_index = device
623 /// .physical_device()
624 /// .memory_properties()
625 /// .memory_types
626 /// .iter()
627 /// .enumerate()
628 /// // In a real-world scenario, you would probably want to rank the memory types based on your
629 /// // requirements, instead of picking the first one that satisfies them. Also, you have to
630 /// // take the requirements of the resources you want to allocate memory for into consideration.
631 /// .find_map(|(index, MemoryType { property_flags, .. })| {
632 /// property_flags.intersects(MemoryPropertyFlags::DEVICE_LOCAL).then_some(index)
633 /// })
634 /// .unwrap() as u32;
635 ///
636 /// let region = MemoryAlloc::new(
637 /// DeviceMemory::allocate(
638 /// device.clone(),
639 /// MemoryAllocateInfo {
640 /// allocation_size: 64 * 1024 * 1024,
641 /// memory_type_index,
642 /// ..Default::default()
643 /// },
644 /// )
645 /// .unwrap(),
646 /// )
647 /// .unwrap();
648 ///
649 /// // You can now feed `region` into any suballocator.
650 /// ```
651 ///
652 /// # Implementing the trait
653 ///
654 /// Please don't.
655 ///
656 /// [allocations]: MemoryAlloc
657 /// [pages]: super#pages
658 pub unsafe trait Suballocator: DeviceOwned {
659 /// Whether this allocator needs to block or not.
660 ///
661 /// This is used by the [`GenericMemoryAllocator`] to specialize the allocation strategy to the
662 /// suballocator at compile time.
663 ///
664 /// [`GenericMemoryAllocator`]: super::GenericMemoryAllocator
665 const IS_BLOCKING: bool;
666
667 /// Whether the allocator needs [`cleanup`] to be called before memory can be released.
668 ///
669 /// This is used by the [`GenericMemoryAllocator`] to specialize the allocation strategy to the
670 /// suballocator at compile time.
671 ///
672 /// [`cleanup`]: Self::cleanup
673 /// [`GenericMemoryAllocator`]: super::GenericMemoryAllocator
674 const NEEDS_CLEANUP: bool;
675
676 /// Creates a new suballocator for the given [region].
677 ///
678 /// [region]: Self#regions
new(region: MemoryAlloc) -> Self where Self: Sized679 fn new(region: MemoryAlloc) -> Self
680 where
681 Self: Sized;
682
683 /// Creates a new suballocation within the [region].
684 ///
685 /// [region]: Self#regions
allocate( &self, create_info: SuballocationCreateInfo, ) -> Result<MemoryAlloc, SuballocationCreationError>686 fn allocate(
687 &self,
688 create_info: SuballocationCreateInfo,
689 ) -> Result<MemoryAlloc, SuballocationCreationError>;
690
691 /// Returns a reference to the underlying [region].
692 ///
693 /// [region]: Self#regions
region(&self) -> &MemoryAlloc694 fn region(&self) -> &MemoryAlloc;
695
696 /// Returns the underlying [region] if there are no other strong references to the allocator,
697 /// otherwise hands you back the allocator wrapped in [`Err`]. Allocations made with the
698 /// allocator count as references for as long as they are alive.
699 ///
700 /// [region]: Self#regions
try_into_region(self) -> Result<MemoryAlloc, Self> where Self: Sized701 fn try_into_region(self) -> Result<MemoryAlloc, Self>
702 where
703 Self: Sized;
704
705 /// Returns the total amount of free space that is left in the [region].
706 ///
707 /// [region]: Self#regions
free_size(&self) -> DeviceSize708 fn free_size(&self) -> DeviceSize;
709
710 /// Tries to free some space, if applicable.
cleanup(&mut self)711 fn cleanup(&mut self);
712 }
713
714 /// Parameters to create a new [allocation] using a [suballocator].
715 ///
716 /// [allocation]: MemoryAlloc
717 /// [suballocator]: Suballocator
718 #[derive(Clone, Debug)]
719 pub struct SuballocationCreateInfo {
720 /// Memory layout required for the allocation.
721 ///
722 /// The default value is a layout with size [`DeviceLayout::MAX_SIZE`] and alignment
723 /// [`DeviceAlignment::MIN`], which must be overridden.
724 pub layout: DeviceLayout,
725
726 /// Type of resources that can be bound to the allocation.
727 ///
728 /// The default value is [`AllocationType::Unknown`].
729 pub allocation_type: AllocationType,
730
731 pub _ne: crate::NonExhaustive,
732 }
733
734 impl Default for SuballocationCreateInfo {
735 #[inline]
default() -> Self736 fn default() -> Self {
737 SuballocationCreateInfo {
738 layout: DeviceLayout::new(
739 NonZeroDeviceSize::new(DeviceLayout::MAX_SIZE).unwrap(),
740 DeviceAlignment::MIN,
741 )
742 .unwrap(),
743 allocation_type: AllocationType::Unknown,
744 _ne: crate::NonExhaustive(()),
745 }
746 }
747 }
748
749 /// Tells the [suballocator] what type of resource will be bound to the allocation, so that it can
750 /// optimize memory usage while still respecting the [buffer-image granularity].
751 ///
752 /// [suballocator]: Suballocator
753 /// [buffer-image granularity]: super#buffer-image-granularity
754 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
755 #[non_exhaustive]
756 pub enum AllocationType {
757 /// The type of resource is unknown, it might be either linear or non-linear. What this means is
758 /// that allocations created with this type must always be aligned to the buffer-image
759 /// granularity.
760 Unknown = 0,
761
762 /// The resource is linear, e.g. buffers, linear images. A linear allocation following another
763 /// linear allocation never needs to be aligned to the buffer-image granularity.
764 Linear = 1,
765
766 /// The resource is non-linear, e.g. optimal images. A non-linear allocation following another
767 /// non-linear allocation never needs to be aligned to the buffer-image granularity.
768 NonLinear = 2,
769 }
770
771 impl From<ImageTiling> for AllocationType {
772 #[inline]
from(tiling: ImageTiling) -> Self773 fn from(tiling: ImageTiling) -> Self {
774 match tiling {
775 ImageTiling::Optimal => AllocationType::NonLinear,
776 ImageTiling::Linear => AllocationType::Linear,
777 ImageTiling::DrmFormatModifier => AllocationType::Linear, // TODO: improve
778 }
779 }
780 }
781
782 /// Error that can be returned when using a [suballocator].
783 ///
784 /// [suballocator]: Suballocator
785 #[derive(Clone, Debug, PartialEq, Eq)]
786 pub enum SuballocationCreationError {
787 /// There is no more space available in the region.
788 OutOfRegionMemory,
789
790 /// The region has enough free space to satisfy the request but is too fragmented.
791 FragmentedRegion,
792
793 /// The allocation was larger than the allocator's block size, meaning that this error would
794 /// arise with the parameters no matter the state the allocator was in.
795 ///
796 /// This can be used to let the [`GenericMemoryAllocator`] know that allocating a new block of
797 /// [`DeviceMemory`] and trying to suballocate it with the same parameters would not solve the
798 /// issue.
799 ///
800 /// [`GenericMemoryAllocator`]: super::GenericMemoryAllocator
801 BlockSizeExceeded,
802 }
803
804 impl Error for SuballocationCreationError {}
805
806 impl Display for SuballocationCreationError {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result807 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
808 write!(
809 f,
810 "{}",
811 match self {
812 Self::OutOfRegionMemory => "out of region memory",
813 Self::FragmentedRegion => "the region is too fragmented",
814 Self::BlockSizeExceeded =>
815 "the allocation size was greater than the suballocator's block size",
816 }
817 )
818 }
819 }
820
821 /// A [suballocator] that uses the most generic [free-list].
822 ///
823 /// The strength of this allocator is that it can create and free allocations completely
824 /// dynamically, which means they can be any size and created/freed in any order. The downside is
825 /// that this always leads to horrific [external fragmentation] the more such dynamic allocations
826 /// are made. Therefore, this allocator is best suited for long-lived allocations. If you need
827 /// to create allocations of various sizes, but can't afford this fragmentation, then the
828 /// [`BuddyAllocator`] is your best buddy. If you need to create allocations which share a similar
829 /// size, consider the [`PoolAllocator`]. Lastly, if you need to allocate very often, then
830 /// [`BumpAllocator`] is best suited.
831 ///
832 /// See also [the `Suballocator` implementation].
833 ///
834 /// # Algorithm
835 ///
836 /// The free-list stores suballocations which can have any offset and size. When an allocation
837 /// request is made, the list is searched using the best-fit strategy, meaning that the smallest
838 /// suballocation that fits the request is chosen. If required, the chosen suballocation is trimmed
839 /// at the ends and the ends are returned to the free-list. As such, no [internal fragmentation]
840 /// occurs. The front might need to be trimmed because of [alignment requirements] and the end
841 /// because of a larger than required size. When an allocation is freed, the allocator checks if
842 /// the adjacent suballocations are free, and if so it coalesces them into a bigger one before
843 /// putting it in the free-list.
844 ///
845 /// # Efficiency
846 ///
847 /// The allocator is synchronized internally with a lock, which is held only for a very short
848 /// period each time an allocation is created and freed. The free-list is sorted by size, which
849 /// means that when allocating, finding a best-fit is always possible in *O*(log(*n*)) time in the
850 /// worst case. When freeing, the coalescing requires us to remove the adjacent free suballocations
851 /// from the free-list which is *O*(log(*n*)), and insert the possibly coalesced suballocation into
852 /// the free-list which has the same time complexity, so in total freeing is *O*(log(*n*)).
853 ///
854 /// There is one notable edge-case: after the allocator finds a best-fit, it is possible that it
855 /// needs to align the suballocation's offset to a higher value, after which the requested size
856 /// might no longer fit. In such a case, the next free suballocation in sorted order is tried until
857 /// a fit is successful. If this issue is encountered with all candidates, then the time complexity
858 /// would be *O*(*n*). However, this scenario is extremely unlikely which is why we are not
859 /// considering it in the above analysis. Additionally, if your free-list is filled with
860 /// allocations that all have the same size then that seems pretty sus. Sounds like you're in dire
861 /// need of a `PoolAllocator`.
862 ///
863 /// # Examples
864 ///
865 /// Most commonly you will not want to use this suballocator directly but rather use it within
866 /// [`GenericMemoryAllocator`], having one global [`StandardMemoryAllocator`] for most if not all
867 /// of your allocation needs.
868 ///
869 /// Basic usage as a global allocator for long-lived resources:
870 ///
871 /// ```
872 /// use vulkano::format::Format;
873 /// use vulkano::image::{ImageDimensions, ImmutableImage};
874 /// use vulkano::memory::allocator::StandardMemoryAllocator;
875 /// # let device: std::sync::Arc<vulkano::device::Device> = return;
876 ///
877 /// let memory_allocator = StandardMemoryAllocator::new_default(device.clone());
878 ///
879 /// # fn read_textures() -> Vec<Vec<u8>> { Vec::new() }
880 /// # let mut command_buffer_builder: vulkano::command_buffer::AutoCommandBufferBuilder<vulkano::command_buffer::PrimaryAutoCommandBuffer<vulkano::command_buffer::allocator::StandardCommandBufferAlloc>> = return;
881 /// // Allocate some resources.
882 /// let textures_data: Vec<Vec<u8>> = read_textures();
883 /// let textures = textures_data.into_iter().map(|data| {
884 /// ImmutableImage::from_iter(
885 /// &memory_allocator,
886 /// data,
887 /// ImageDimensions::Dim2d {
888 /// width: 1024,
889 /// height: 1024,
890 /// array_layers: 1,
891 /// },
892 /// 1.into(),
893 /// Format::R8G8B8A8_UNORM,
894 /// &mut command_buffer_builder,
895 /// )
896 /// .unwrap()
897 /// });
898 /// ```
899 ///
900 /// For use in allocating arenas for [`SubbufferAllocator`]:
901 ///
902 /// ```
903 /// use std::sync::Arc;
904 /// use vulkano::buffer::allocator::SubbufferAllocator;
905 /// use vulkano::memory::allocator::StandardMemoryAllocator;
906 /// # let device: std::sync::Arc<vulkano::device::Device> = return;
907 ///
908 /// // We need to wrap the allocator in an `Arc` so that we can share ownership of it.
909 /// let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone()));
910 /// let buffer_allocator = SubbufferAllocator::new(memory_allocator.clone(), Default::default());
911 ///
912 /// // You can continue using `memory_allocator` for other things.
913 /// ```
914 ///
915 /// Sometimes, it is neccessary to suballocate an allocation. If you don't want to allocate new
916 /// [`DeviceMemory`] blocks to suballocate, perhaps because of concerns of memory wastage or
917 /// allocation efficiency, you can use your existing global `StandardMemoryAllocator` to allocate
918 /// regions for your suballocation needs:
919 ///
920 /// ```
921 /// use vulkano::memory::allocator::{
922 /// DeviceLayout, MemoryAllocator, StandardMemoryAllocator, SuballocationCreateInfo,
923 /// };
924 ///
925 /// # let device: std::sync::Arc<vulkano::device::Device> = return;
926 /// let memory_allocator = StandardMemoryAllocator::new_default(device.clone());
927 ///
928 /// # let memory_type_index = 0;
929 /// let region = memory_allocator.allocate_from_type(
930 /// // When choosing the index, you have to make sure that the memory type is allowed for the
931 /// // type of resource that you want to bind the suballocations to.
932 /// memory_type_index,
933 /// SuballocationCreateInfo {
934 /// layout: DeviceLayout::from_size_alignment(
935 /// // This will be the size of your region.
936 /// 16 * 1024 * 1024,
937 /// // It generally does not matter what the alignment is, because you're going to
938 /// // suballocate the allocation anyway, and not bind it directly.
939 /// 1,
940 /// )
941 /// .unwrap(),
942 /// ..Default::default()
943 /// },
944 /// )
945 /// .unwrap();
946 ///
947 /// // You can now feed the `region` into any suballocator.
948 /// ```
949 ///
950 /// [suballocator]: Suballocator
951 /// [free-list]: Suballocator#free-lists
952 /// [external fragmentation]: super#external-fragmentation
953 /// [the `Suballocator` implementation]: Suballocator#impl-Suballocator-for-Arc<FreeListAllocator>
954 /// [internal fragmentation]: super#internal-fragmentation
955 /// [alignment requirements]: super#alignment
956 /// [`GenericMemoryAllocator`]: super::GenericMemoryAllocator
957 /// [`StandardMemoryAllocator`]: super::StandardMemoryAllocator
958 /// [`SubbufferAllocator`]: crate::buffer::allocator::SubbufferAllocator
959 #[derive(Debug)]
960 pub struct FreeListAllocator {
961 region: MemoryAlloc,
962 device_memory: Arc<DeviceMemory>,
963 buffer_image_granularity: DeviceAlignment,
964 atom_size: DeviceAlignment,
965 // Total memory remaining in the region.
966 free_size: AtomicU64,
967 state: Mutex<FreeListAllocatorState>,
968 }
969
970 impl FreeListAllocator {
971 /// Creates a new `FreeListAllocator` for the given [region].
972 ///
973 /// # Panics
974 ///
975 /// - Panics if `region.allocation_type` is not [`AllocationType::Unknown`]. This is done to
976 /// avoid checking for a special case of [buffer-image granularity] conflict.
977 /// - Panics if `region` is a [dedicated allocation].
978 ///
979 /// [region]: Suballocator#regions
980 /// [buffer-image granularity]: super#buffer-image-granularity
981 /// [dedicated allocation]: MemoryAlloc::is_dedicated
new(region: MemoryAlloc) -> Arc<Self>982 pub fn new(region: MemoryAlloc) -> Arc<Self> {
983 // NOTE(Marc): This number was pulled straight out of my a-
984 const AVERAGE_ALLOCATION_SIZE: DeviceSize = 64 * 1024;
985
986 assert!(region.allocation_type == AllocationType::Unknown);
987
988 let device_memory = region
989 .root()
990 .expect("dedicated allocations can't be suballocated")
991 .clone();
992 let buffer_image_granularity = device_memory
993 .device()
994 .physical_device()
995 .properties()
996 .buffer_image_granularity;
997
998 let atom_size = region.atom_size.unwrap_or(DeviceAlignment::MIN);
999 let free_size = AtomicU64::new(region.size);
1000
1001 let capacity = (region.size / AVERAGE_ALLOCATION_SIZE) as usize;
1002 let mut nodes = host::PoolAllocator::new(capacity + 64);
1003 let mut free_list = Vec::with_capacity(capacity / 16 + 16);
1004 let root_id = nodes.allocate(SuballocationListNode {
1005 prev: None,
1006 next: None,
1007 offset: region.offset,
1008 size: region.size,
1009 ty: SuballocationType::Free,
1010 });
1011 free_list.push(root_id);
1012 let state = Mutex::new(FreeListAllocatorState { nodes, free_list });
1013
1014 Arc::new(FreeListAllocator {
1015 region,
1016 device_memory,
1017 buffer_image_granularity,
1018 atom_size,
1019 free_size,
1020 state,
1021 })
1022 }
1023
1024 /// # Safety
1025 ///
1026 /// - `node_id` must refer to an occupied suballocation allocated by `self`.
free(&self, node_id: SlotId)1027 unsafe fn free(&self, node_id: SlotId) {
1028 let mut state = self.state.lock();
1029 let node = state.nodes.get_mut(node_id);
1030
1031 debug_assert!(node.ty != SuballocationType::Free);
1032
1033 // Suballocation sizes are constrained by the size of the region, so they can't possibly
1034 // overflow when added up.
1035 self.free_size.fetch_add(node.size, Ordering::Release);
1036
1037 node.ty = SuballocationType::Free;
1038 state.coalesce(node_id);
1039 state.free(node_id);
1040 }
1041 }
1042
1043 unsafe impl Suballocator for Arc<FreeListAllocator> {
1044 const IS_BLOCKING: bool = true;
1045
1046 const NEEDS_CLEANUP: bool = false;
1047
1048 #[inline]
new(region: MemoryAlloc) -> Self1049 fn new(region: MemoryAlloc) -> Self {
1050 FreeListAllocator::new(region)
1051 }
1052
1053 /// Creates a new suballocation within the [region].
1054 ///
1055 /// # Errors
1056 ///
1057 /// - Returns [`OutOfRegionMemory`] if there are no free suballocations large enough so satisfy
1058 /// the request.
1059 /// - Returns [`FragmentedRegion`] if a suballocation large enough to satisfy the request could
1060 /// have been formed, but wasn't because of [external fragmentation].
1061 ///
1062 /// [region]: Suballocator#regions
1063 /// [`allocate`]: Suballocator::allocate
1064 /// [`OutOfRegionMemory`]: SuballocationCreationError::OutOfRegionMemory
1065 /// [`FragmentedRegion`]: SuballocationCreationError::FragmentedRegion
1066 /// [external fragmentation]: super#external-fragmentation
1067 #[inline]
allocate( &self, create_info: SuballocationCreateInfo, ) -> Result<MemoryAlloc, SuballocationCreationError>1068 fn allocate(
1069 &self,
1070 create_info: SuballocationCreateInfo,
1071 ) -> Result<MemoryAlloc, SuballocationCreationError> {
1072 fn has_granularity_conflict(prev_ty: SuballocationType, ty: AllocationType) -> bool {
1073 if prev_ty == SuballocationType::Free {
1074 false
1075 } else if prev_ty == SuballocationType::Unknown {
1076 true
1077 } else {
1078 prev_ty != ty.into()
1079 }
1080 }
1081
1082 let SuballocationCreateInfo {
1083 layout,
1084 allocation_type,
1085 _ne: _,
1086 } = create_info;
1087
1088 let size = layout.size();
1089 let alignment = cmp::max(layout.alignment(), self.atom_size);
1090 let mut state = self.state.lock();
1091
1092 unsafe {
1093 match state.free_list.last() {
1094 Some(&last) if state.nodes.get(last).size >= size => {
1095 let index = match state
1096 .free_list
1097 .binary_search_by_key(&size, |&id| state.nodes.get(id).size)
1098 {
1099 // Exact fit.
1100 Ok(index) => index,
1101 // Next-best fit. Note that `index == free_list.len()` can't be because we
1102 // checked that the free-list contains a suballocation that is big enough.
1103 Err(index) => index,
1104 };
1105
1106 for (index, &id) in state.free_list.iter().enumerate().skip(index) {
1107 let suballoc = state.nodes.get(id);
1108
1109 // This can't overflow because suballocation offsets are constrained by
1110 // the size of the root allocation, which can itself not exceed
1111 // `DeviceLayout::MAX_SIZE`.
1112 let mut offset = align_up(suballoc.offset, alignment);
1113
1114 if let Some(prev_id) = suballoc.prev {
1115 let prev = state.nodes.get(prev_id);
1116
1117 if are_blocks_on_same_page(
1118 prev.offset,
1119 prev.size,
1120 offset,
1121 self.buffer_image_granularity,
1122 ) && has_granularity_conflict(prev.ty, allocation_type)
1123 {
1124 // This is overflow-safe for the same reason as above.
1125 offset = align_up(offset, self.buffer_image_granularity);
1126 }
1127 }
1128
1129 if offset + size <= suballoc.offset + suballoc.size {
1130 state.free_list.remove(index);
1131
1132 // SAFETY:
1133 // - `suballoc` is free.
1134 // - `offset` is that of `suballoc`, possibly rounded up.
1135 // - We checked that `offset + size` falls within `suballoc`.
1136 state.split(id, offset, size);
1137 state.nodes.get_mut(id).ty = allocation_type.into();
1138
1139 // This can't overflow because suballocation sizes in the free-list are
1140 // constrained by the remaining size of the region.
1141 self.free_size.fetch_sub(size, Ordering::Release);
1142
1143 let mapped_ptr = self.region.mapped_ptr.map(|ptr| {
1144 // This can't overflow because offsets in the free-list are confined
1145 // to the range [region.offset, region.offset + region.size).
1146 let relative_offset = offset - self.region.offset;
1147
1148 // SAFETY: Allocation sizes are guaranteed to not exceed
1149 // `isize::MAX` when they have a mapped pointer, and the original
1150 // pointer was handed to us from the Vulkan implementation,
1151 // so the offset better be in range.
1152 let ptr = ptr.as_ptr().offset(relative_offset as isize);
1153
1154 // SAFETY: Same as the previous.
1155 NonNull::new_unchecked(ptr)
1156 });
1157
1158 return Ok(MemoryAlloc {
1159 offset,
1160 size,
1161 allocation_type,
1162 mapped_ptr,
1163 atom_size: self.region.atom_size,
1164 parent: AllocParent::FreeList {
1165 allocator: self.clone(),
1166 id,
1167 },
1168 });
1169 }
1170 }
1171
1172 // There is not enough space due to alignment requirements.
1173 Err(SuballocationCreationError::OutOfRegionMemory)
1174 }
1175 // There would be enough space if the region wasn't so fragmented. :(
1176 Some(_) if self.free_size() >= size => {
1177 Err(SuballocationCreationError::FragmentedRegion)
1178 }
1179 // There is not enough space.
1180 Some(_) => Err(SuballocationCreationError::OutOfRegionMemory),
1181 // There is no space at all.
1182 None => Err(SuballocationCreationError::OutOfRegionMemory),
1183 }
1184 }
1185 }
1186
1187 #[inline]
region(&self) -> &MemoryAlloc1188 fn region(&self) -> &MemoryAlloc {
1189 &self.region
1190 }
1191
1192 #[inline]
try_into_region(self) -> Result<MemoryAlloc, Self>1193 fn try_into_region(self) -> Result<MemoryAlloc, Self> {
1194 Arc::try_unwrap(self).map(|allocator| allocator.region)
1195 }
1196
1197 #[inline]
free_size(&self) -> DeviceSize1198 fn free_size(&self) -> DeviceSize {
1199 self.free_size.load(Ordering::Acquire)
1200 }
1201
1202 #[inline]
cleanup(&mut self)1203 fn cleanup(&mut self) {}
1204 }
1205
1206 unsafe impl DeviceOwned for FreeListAllocator {
1207 #[inline]
device(&self) -> &Arc<Device>1208 fn device(&self) -> &Arc<Device> {
1209 self.device_memory.device()
1210 }
1211 }
1212
1213 #[derive(Debug)]
1214 struct FreeListAllocatorState {
1215 nodes: host::PoolAllocator<SuballocationListNode>,
1216 // Free suballocations sorted by size in ascending order. This means we can always find a
1217 // best-fit in *O*(log(*n*)) time in the worst case, and iterating in order is very efficient.
1218 free_list: Vec<SlotId>,
1219 }
1220
1221 #[derive(Clone, Copy, Debug)]
1222 struct SuballocationListNode {
1223 prev: Option<SlotId>,
1224 next: Option<SlotId>,
1225 offset: DeviceSize,
1226 size: DeviceSize,
1227 ty: SuballocationType,
1228 }
1229
1230 /// Tells us if a suballocation is free, and if not, whether it is linear or not. This is needed in
1231 /// order to be able to respect the buffer-image granularity.
1232 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
1233 enum SuballocationType {
1234 Unknown,
1235 Linear,
1236 NonLinear,
1237 Free,
1238 }
1239
1240 impl From<AllocationType> for SuballocationType {
from(ty: AllocationType) -> Self1241 fn from(ty: AllocationType) -> Self {
1242 match ty {
1243 AllocationType::Unknown => SuballocationType::Unknown,
1244 AllocationType::Linear => SuballocationType::Linear,
1245 AllocationType::NonLinear => SuballocationType::NonLinear,
1246 }
1247 }
1248 }
1249
1250 impl FreeListAllocatorState {
1251 /// Removes the target suballocation from the free-list.
1252 ///
1253 /// # Safety
1254 ///
1255 /// - `node_id` must have been allocated by `self`.
1256 /// - `node_id` must be in the free-list.
allocate(&mut self, node_id: SlotId)1257 unsafe fn allocate(&mut self, node_id: SlotId) {
1258 debug_assert!(self.free_list.contains(&node_id));
1259
1260 let node = self.nodes.get(node_id);
1261
1262 match self
1263 .free_list
1264 .binary_search_by_key(&node.size, |&id| self.nodes.get(id).size)
1265 {
1266 Ok(index) => {
1267 // If there are multiple free suballocations with the same size, the search might
1268 // have returned any one, so we need to find the one corresponding to the target ID.
1269 if self.free_list[index] == node_id {
1270 self.free_list.remove(index);
1271 return;
1272 }
1273
1274 // Check all previous indices that point to suballocations with the same size.
1275 {
1276 let mut index = index;
1277 loop {
1278 index = index.wrapping_sub(1);
1279 if let Some(&id) = self.free_list.get(index) {
1280 if id == node_id {
1281 self.free_list.remove(index);
1282 return;
1283 }
1284 if self.nodes.get(id).size != node.size {
1285 break;
1286 }
1287 } else {
1288 break;
1289 }
1290 }
1291 }
1292
1293 // Check all next indices that point to suballocations with the same size.
1294 {
1295 let mut index = index;
1296 loop {
1297 index += 1;
1298 if let Some(&id) = self.free_list.get(index) {
1299 if id == node_id {
1300 self.free_list.remove(index);
1301 return;
1302 }
1303 if self.nodes.get(id).size != node.size {
1304 break;
1305 }
1306 } else {
1307 break;
1308 }
1309 }
1310 }
1311
1312 unreachable!();
1313 }
1314 Err(_) => unreachable!(),
1315 }
1316 }
1317
1318 /// Fits a suballocation inside the target one, splitting the target at the ends if required.
1319 ///
1320 /// # Safety
1321 ///
1322 /// - `node_id` must have been allocated by `self`.
1323 /// - `node_id` must refer to a free suballocation.
1324 /// - `offset` and `size` must refer to a subregion of the given suballocation.
split(&mut self, node_id: SlotId, offset: DeviceSize, size: DeviceSize)1325 unsafe fn split(&mut self, node_id: SlotId, offset: DeviceSize, size: DeviceSize) {
1326 let node = self.nodes.get(node_id);
1327
1328 debug_assert!(node.ty == SuballocationType::Free);
1329 debug_assert!(offset >= node.offset);
1330 debug_assert!(offset + size <= node.offset + node.size);
1331
1332 // These are guaranteed to not overflow because the caller must uphold that the given
1333 // region is contained within that of `node`.
1334 let padding_front = offset - node.offset;
1335 let padding_back = node.offset + node.size - offset - size;
1336
1337 if padding_front > 0 {
1338 let padding = SuballocationListNode {
1339 prev: node.prev,
1340 next: Some(node_id),
1341 offset: node.offset,
1342 size: padding_front,
1343 ty: SuballocationType::Free,
1344 };
1345 let padding_id = self.nodes.allocate(padding);
1346
1347 if let Some(prev_id) = padding.prev {
1348 self.nodes.get_mut(prev_id).next = Some(padding_id);
1349 }
1350
1351 let node = self.nodes.get_mut(node_id);
1352 node.prev = Some(padding_id);
1353 node.offset = offset;
1354 // The caller must uphold that the given region is contained within that of `node`, and
1355 // it follows that if there is padding, the size of the node must be larger than that
1356 // of the padding, so this can't overflow.
1357 node.size -= padding.size;
1358
1359 self.free(padding_id);
1360 }
1361
1362 if padding_back > 0 {
1363 let padding = SuballocationListNode {
1364 prev: Some(node_id),
1365 next: node.next,
1366 offset: offset + size,
1367 size: padding_back,
1368 ty: SuballocationType::Free,
1369 };
1370 let padding_id = self.nodes.allocate(padding);
1371
1372 if let Some(next_id) = padding.next {
1373 self.nodes.get_mut(next_id).prev = Some(padding_id);
1374 }
1375
1376 let node = self.nodes.get_mut(node_id);
1377 node.next = Some(padding_id);
1378 // This is overflow-safe for the same reason as above.
1379 node.size -= padding.size;
1380
1381 self.free(padding_id);
1382 }
1383 }
1384
1385 /// Inserts the target suballocation into the free-list.
1386 ///
1387 /// # Safety
1388 ///
1389 /// - `node_id` must have been allocated by `self`.
1390 /// - The free-list must not contain the given suballocation already, as that would constitude
1391 /// a double-free.
free(&mut self, node_id: SlotId)1392 unsafe fn free(&mut self, node_id: SlotId) {
1393 debug_assert!(!self.free_list.contains(&node_id));
1394
1395 let node = self.nodes.get(node_id);
1396 let (Ok(index) | Err(index)) = self
1397 .free_list
1398 .binary_search_by_key(&node.size, |&id| self.nodes.get(id).size);
1399 self.free_list.insert(index, node_id);
1400 }
1401
1402 /// Coalesces the target (free) suballocation with adjacent ones that are also free.
1403 ///
1404 /// # Safety
1405 ///
1406 /// - `node_id` must have been allocated by `self`.
1407 /// - `node_id` must refer to a free suballocation.
coalesce(&mut self, node_id: SlotId)1408 unsafe fn coalesce(&mut self, node_id: SlotId) {
1409 let node = self.nodes.get(node_id);
1410
1411 debug_assert!(node.ty == SuballocationType::Free);
1412
1413 if let Some(prev_id) = node.prev {
1414 let prev = self.nodes.get(prev_id);
1415
1416 if prev.ty == SuballocationType::Free {
1417 // SAFETY: We checked that the suballocation is free.
1418 self.allocate(prev_id);
1419
1420 let node = self.nodes.get_mut(node_id);
1421 node.prev = prev.prev;
1422 node.offset = prev.offset;
1423 // The sizes of suballocations are constrained by that of the parent allocation, so
1424 // they can't possibly overflow when added up.
1425 node.size += prev.size;
1426
1427 if let Some(prev_id) = node.prev {
1428 self.nodes.get_mut(prev_id).next = Some(node_id);
1429 }
1430
1431 // SAFETY:
1432 // - The suballocation is free.
1433 // - The suballocation was removed from the free-list.
1434 // - The next suballocation and possibly a previous suballocation have been updated
1435 // such that they no longer reference the suballocation.
1436 // All of these conditions combined guarantee that `prev_id` can not be used again.
1437 self.nodes.free(prev_id);
1438 }
1439 }
1440
1441 if let Some(next_id) = node.next {
1442 let next = self.nodes.get(next_id);
1443
1444 if next.ty == SuballocationType::Free {
1445 // SAFETY: Same as above.
1446 self.allocate(next_id);
1447
1448 let node = self.nodes.get_mut(node_id);
1449 node.next = next.next;
1450 // This is overflow-safe for the same reason as above.
1451 node.size += next.size;
1452
1453 if let Some(next_id) = node.next {
1454 self.nodes.get_mut(next_id).prev = Some(node_id);
1455 }
1456
1457 // SAFETY: Same as above.
1458 self.nodes.free(next_id);
1459 }
1460 }
1461 }
1462 }
1463
1464 /// A [suballocator] whose structure forms a binary tree of power-of-two-sized suballocations.
1465 ///
1466 /// That is, all allocation sizes are rounded up to the next power of two. This helps reduce
1467 /// [external fragmentation] by a lot, at the expense of possibly severe [internal fragmentation]
1468 /// if you're not careful. For example, if you needed an allocation size of 64MiB, you would be
1469 /// wasting no memory. But with an allocation size of 70MiB, you would use a whole 128MiB instead,
1470 /// wasting 45% of the memory. Use this algorithm if you need to create and free a lot of
1471 /// allocations, which would cause too much external fragmentation when using
1472 /// [`FreeListAllocator`]. However, if the sizes of your allocations are more or less the same,
1473 /// then the [`PoolAllocator`] would be a better choice and would eliminate external fragmentation
1474 /// completely.
1475 ///
1476 /// See also [the `Suballocator` implementation].
1477 ///
1478 /// # Algorithm
1479 ///
1480 /// Say you have a [region] of size 256MiB, and you want to allocate 14MiB. Assuming there are no
1481 /// existing allocations, the `BuddyAllocator` would split the 256MiB root *node* into two 128MiB
1482 /// nodes. These two nodes are called *buddies*. The allocator would then proceed to split the left
1483 /// node recursively until it wouldn't be able to fit the allocation anymore. In this example, that
1484 /// would happen after 4 splits and end up with a node size of 16MiB. Since the allocation
1485 /// requested was 14MiB, 2MiB would become internal fragmentation and be unusable for the lifetime
1486 /// of the allocation. When an allocation is freed, this process is done backwards, checking if the
1487 /// buddy of each node on the way up is free and if so they are coalesced.
1488 ///
1489 /// Each possible node size has an *order*, with the smallest node size being of order 0 and the
1490 /// largest of the highest order. With this notion, node sizes are proportional to 2<sup>*n*</sup>
1491 /// where *n* is the order. The highest order is determined from the size of the region and a
1492 /// constant minimum node size, which we chose to be 16B: log(*region size* / 16) or
1493 /// equiavalently log(*region size*) - 4 (assuming
1494 /// *region size* ≥ 16).
1495 ///
1496 /// It's safe to say that this algorithm works best if you have some level of control over your
1497 /// allocation sizes, so that you don't end up allocating twice as much memory. An example of this
1498 /// would be when you need to allocate regions for other allocators, such as the `PoolAllocator` or
1499 /// the [`BumpAllocator`].
1500 ///
1501 /// # Efficiency
1502 ///
1503 /// The allocator is synchronized internally with a lock, which is held only for a very short
1504 /// period each time an allocation is created and freed. The time complexity of both allocation and
1505 /// freeing is *O*(*m*) in the worst case where *m* is the highest order, which equates to *O*(log
1506 /// (*n*)) where *n* is the size of the region.
1507 ///
1508 /// # Examples
1509 ///
1510 /// Basic usage together with [`GenericMemoryAllocator`], to allocate resources that have a
1511 /// moderately low life span (for example if you have a lot of images, each of which needs to be
1512 /// resized every now and then):
1513 ///
1514 /// ```
1515 /// use std::sync::Arc;
1516 /// use vulkano::memory::allocator::{
1517 /// BuddyAllocator, GenericMemoryAllocator, GenericMemoryAllocatorCreateInfo,
1518 /// };
1519 ///
1520 /// # let device: std::sync::Arc<vulkano::device::Device> = return;
1521 /// let memory_allocator = GenericMemoryAllocator::<Arc<BuddyAllocator>>::new(
1522 /// device.clone(),
1523 /// GenericMemoryAllocatorCreateInfo {
1524 /// // Your block sizes must be powers of two, because `BuddyAllocator` only accepts
1525 /// // power-of-two-sized regions.
1526 /// block_sizes: &[(0, 64 * 1024 * 1024)],
1527 /// ..Default::default()
1528 /// },
1529 /// )
1530 /// .unwrap();
1531 ///
1532 /// // Now you can use `memory_allocator` to allocate whatever it is you need.
1533 /// ```
1534 ///
1535 /// [suballocator]: Suballocator
1536 /// [internal fragmentation]: super#internal-fragmentation
1537 /// [external fragmentation]: super#external-fragmentation
1538 /// [the `Suballocator` implementation]: Suballocator#impl-Suballocator-for-Arc<BuddyAllocator>
1539 /// [region]: Suballocator#regions
1540 /// [`GenericMemoryAllocator`]: super::GenericMemoryAllocator
1541 #[derive(Debug)]
1542 pub struct BuddyAllocator {
1543 region: MemoryAlloc,
1544 device_memory: Arc<DeviceMemory>,
1545 buffer_image_granularity: DeviceAlignment,
1546 atom_size: DeviceAlignment,
1547 // Total memory remaining in the region.
1548 free_size: AtomicU64,
1549 state: Mutex<BuddyAllocatorState>,
1550 }
1551
1552 impl BuddyAllocator {
1553 const MIN_NODE_SIZE: DeviceSize = 16;
1554
1555 /// Arbitrary maximum number of orders, used to avoid a 2D `Vec`. Together with a minimum node
1556 /// size of 16, this is enough for a 64GiB region.
1557 const MAX_ORDERS: usize = 32;
1558
1559 /// Creates a new `BuddyAllocator` for the given [region].
1560 ///
1561 /// # Panics
1562 ///
1563 /// - Panics if `region.allocation_type` is not [`AllocationType::Unknown`]. This is done to
1564 /// avoid checking for a special case of [buffer-image granularity] conflict.
1565 /// - Panics if `region.size` is not a power of two.
1566 /// - Panics if `region.size` is not in the range \[16B, 64GiB\].
1567 /// - Panics if `region` is a [dedicated allocation].
1568 ///
1569 /// [region]: Suballocator#regions
1570 /// [buffer-image granularity]: super#buffer-image-granularity
1571 /// [dedicated allocation]: MemoryAlloc::is_dedicated
1572 #[inline]
new(region: MemoryAlloc) -> Arc<Self>1573 pub fn new(region: MemoryAlloc) -> Arc<Self> {
1574 const EMPTY_FREE_LIST: Vec<DeviceSize> = Vec::new();
1575
1576 assert!(region.allocation_type == AllocationType::Unknown);
1577 assert!(region.size.is_power_of_two());
1578 assert!(region.size >= BuddyAllocator::MIN_NODE_SIZE);
1579
1580 let max_order = (region.size / BuddyAllocator::MIN_NODE_SIZE).trailing_zeros() as usize;
1581
1582 assert!(max_order < BuddyAllocator::MAX_ORDERS);
1583
1584 let device_memory = region
1585 .root()
1586 .expect("dedicated allocations can't be suballocated")
1587 .clone();
1588 let buffer_image_granularity = device_memory
1589 .device()
1590 .physical_device()
1591 .properties()
1592 .buffer_image_granularity;
1593 let atom_size = region.atom_size.unwrap_or(DeviceAlignment::MIN);
1594 let free_size = AtomicU64::new(region.size);
1595
1596 let mut free_list =
1597 ArrayVec::new(max_order + 1, [EMPTY_FREE_LIST; BuddyAllocator::MAX_ORDERS]);
1598 // The root node has the lowest offset and highest order, so it's the whole region.
1599 free_list[max_order].push(region.offset);
1600 let state = Mutex::new(BuddyAllocatorState { free_list });
1601
1602 Arc::new(BuddyAllocator {
1603 region,
1604 device_memory,
1605 buffer_image_granularity,
1606 atom_size,
1607 free_size,
1608 state,
1609 })
1610 }
1611
1612 /// # Safety
1613 ///
1614 /// - `order` and `offset` must refer to an occupied suballocation allocated by `self`.
free(&self, order: usize, mut offset: DeviceSize)1615 unsafe fn free(&self, order: usize, mut offset: DeviceSize) {
1616 let min_order = order;
1617 let mut state = self.state.lock();
1618
1619 debug_assert!(!state.free_list[order].contains(&offset));
1620
1621 // Try to coalesce nodes while incrementing the order.
1622 for (order, free_list) in state.free_list.iter_mut().enumerate().skip(min_order) {
1623 // This can't discard any bits because `order` is confined to the range
1624 // [0, log(region.size / BuddyAllocator::MIN_NODE_SIZE)].
1625 let size = BuddyAllocator::MIN_NODE_SIZE << order;
1626
1627 // This can't overflow because the offsets in the free-list are confined to the range
1628 // [region.offset, region.offset + region.size).
1629 let buddy_offset = ((offset - self.region.offset) ^ size) + self.region.offset;
1630
1631 match free_list.binary_search(&buddy_offset) {
1632 // If the buddy is in the free-list, we can coalesce.
1633 Ok(index) => {
1634 free_list.remove(index);
1635 offset = cmp::min(offset, buddy_offset);
1636 }
1637 // Otherwise free the node.
1638 Err(_) => {
1639 let (Ok(index) | Err(index)) = free_list.binary_search(&offset);
1640 free_list.insert(index, offset);
1641
1642 // This can't discard any bits for the same reason as above.
1643 let size = BuddyAllocator::MIN_NODE_SIZE << min_order;
1644
1645 // The sizes of suballocations allocated by `self` are constrained by that of
1646 // its region, so they can't possibly overflow when added up.
1647 self.free_size.fetch_add(size, Ordering::Release);
1648
1649 break;
1650 }
1651 }
1652 }
1653 }
1654 }
1655
1656 unsafe impl Suballocator for Arc<BuddyAllocator> {
1657 const IS_BLOCKING: bool = true;
1658
1659 const NEEDS_CLEANUP: bool = false;
1660
1661 #[inline]
new(region: MemoryAlloc) -> Self1662 fn new(region: MemoryAlloc) -> Self {
1663 BuddyAllocator::new(region)
1664 }
1665
1666 /// Creates a new suballocation within the [region].
1667 ///
1668 /// # Errors
1669 ///
1670 /// - Returns [`OutOfRegionMemory`] if there are no free nodes large enough so satisfy the
1671 /// request.
1672 /// - Returns [`FragmentedRegion`] if a node large enough to satisfy the request could have
1673 /// been formed, but wasn't because of [external fragmentation].
1674 ///
1675 /// [region]: Suballocator#regions
1676 /// [`allocate`]: Suballocator::allocate
1677 /// [`OutOfRegionMemory`]: SuballocationCreationError::OutOfRegionMemory
1678 /// [`FragmentedRegion`]: SuballocationCreationError::FragmentedRegion
1679 /// [external fragmentation]: super#external-fragmentation
1680 #[inline]
allocate( &self, create_info: SuballocationCreateInfo, ) -> Result<MemoryAlloc, SuballocationCreationError>1681 fn allocate(
1682 &self,
1683 create_info: SuballocationCreateInfo,
1684 ) -> Result<MemoryAlloc, SuballocationCreationError> {
1685 /// Returns the largest power of two smaller or equal to the input, or zero if the input is
1686 /// zero.
1687 fn prev_power_of_two(val: DeviceSize) -> DeviceSize {
1688 const MAX_POWER_OF_TWO: DeviceSize = DeviceAlignment::MAX.as_devicesize();
1689
1690 if let Some(val) = NonZeroDeviceSize::new(val) {
1691 // This can't overflow because `val` is non-zero, which means it has fewer leading
1692 // zeroes than the total number of bits.
1693 MAX_POWER_OF_TWO >> val.leading_zeros()
1694 } else {
1695 0
1696 }
1697 }
1698
1699 let SuballocationCreateInfo {
1700 layout,
1701 allocation_type,
1702 _ne: _,
1703 } = create_info;
1704
1705 let mut size = layout.size();
1706 let mut alignment = cmp::max(layout.alignment(), self.atom_size);
1707
1708 if allocation_type == AllocationType::Unknown
1709 || allocation_type == AllocationType::NonLinear
1710 {
1711 // This can't overflow because `DeviceLayout` guarantees that `size` doesn't exceed
1712 // `DeviceLayout::MAX_SIZE`.
1713 size = align_up(size, self.buffer_image_granularity);
1714 alignment = cmp::max(alignment, self.buffer_image_granularity);
1715 }
1716
1717 // `DeviceLayout` guarantees that its size does not exceed `DeviceLayout::MAX_SIZE`,
1718 // which means it can't overflow when rounded up to the next power of two.
1719 let size = cmp::max(size, BuddyAllocator::MIN_NODE_SIZE).next_power_of_two();
1720
1721 let min_order = (size / BuddyAllocator::MIN_NODE_SIZE).trailing_zeros() as usize;
1722 let mut state = self.state.lock();
1723
1724 // Start searching at the lowest possible order going up.
1725 for (order, free_list) in state.free_list.iter_mut().enumerate().skip(min_order) {
1726 for (index, &offset) in free_list.iter().enumerate() {
1727 if is_aligned(offset, alignment) {
1728 free_list.remove(index);
1729
1730 // Go in the opposite direction, splitting nodes from higher orders. The lowest
1731 // order doesn't need any splitting.
1732 for (order, free_list) in state
1733 .free_list
1734 .iter_mut()
1735 .enumerate()
1736 .skip(min_order)
1737 .take(order - min_order)
1738 .rev()
1739 {
1740 // This can't discard any bits because `order` is confined to the range
1741 // [0, log(region.size / BuddyAllocator::MIN_NODE_SIZE)].
1742 let size = BuddyAllocator::MIN_NODE_SIZE << order;
1743
1744 // This can't overflow because offsets are confined to the size of the root
1745 // allocation, which can itself not exceed `DeviceLayout::MAX_SIZE`.
1746 let right_child = offset + size;
1747
1748 // Insert the right child in sorted order.
1749 let (Ok(index) | Err(index)) = free_list.binary_search(&right_child);
1750 free_list.insert(index, right_child);
1751
1752 // Repeat splitting for the left child if required in the next loop turn.
1753 }
1754
1755 // This can't overflow because suballocation sizes in the free-list are
1756 // constrained by the remaining size of the region.
1757 self.free_size.fetch_sub(size, Ordering::Release);
1758
1759 let mapped_ptr = self.region.mapped_ptr.map(|ptr| {
1760 // This can't overflow because offsets in the free-list are confined to the
1761 // range [region.offset, region.offset + region.size).
1762 let relative_offset = offset - self.region.offset;
1763
1764 // SAFETY: Allocation sizes are guaranteed to not exceed `isize::MAX` when
1765 // they have a mapped pointer, and the original pointer was handed to us
1766 // from the Vulkan implementation, so the offset better be in range.
1767 let ptr = unsafe { ptr.as_ptr().offset(relative_offset as isize) };
1768
1769 // SAFETY: Same as the previous.
1770 unsafe { NonNull::new_unchecked(ptr) }
1771 });
1772
1773 return Ok(MemoryAlloc {
1774 offset,
1775 size: layout.size(),
1776 allocation_type,
1777 mapped_ptr,
1778 atom_size: self.region.atom_size,
1779 parent: AllocParent::Buddy {
1780 allocator: self.clone(),
1781 order: min_order,
1782 offset, // The offset in the alloc itself can change.
1783 },
1784 });
1785 }
1786 }
1787 }
1788
1789 if prev_power_of_two(self.free_size()) >= layout.size() {
1790 // A node large enough could be formed if the region wasn't so fragmented.
1791 Err(SuballocationCreationError::FragmentedRegion)
1792 } else {
1793 Err(SuballocationCreationError::OutOfRegionMemory)
1794 }
1795 }
1796
1797 #[inline]
region(&self) -> &MemoryAlloc1798 fn region(&self) -> &MemoryAlloc {
1799 &self.region
1800 }
1801
1802 #[inline]
try_into_region(self) -> Result<MemoryAlloc, Self>1803 fn try_into_region(self) -> Result<MemoryAlloc, Self> {
1804 Arc::try_unwrap(self).map(|allocator| allocator.region)
1805 }
1806
1807 /// Returns the total amount of free space left in the [region] that is available to the
1808 /// allocator, which means that [internal fragmentation] is excluded.
1809 ///
1810 /// [region]: Suballocator#regions
1811 /// [internal fragmentation]: super#internal-fragmentation
1812 #[inline]
free_size(&self) -> DeviceSize1813 fn free_size(&self) -> DeviceSize {
1814 self.free_size.load(Ordering::Acquire)
1815 }
1816
1817 #[inline]
cleanup(&mut self)1818 fn cleanup(&mut self) {}
1819 }
1820
1821 unsafe impl DeviceOwned for BuddyAllocator {
1822 #[inline]
device(&self) -> &Arc<Device>1823 fn device(&self) -> &Arc<Device> {
1824 self.device_memory.device()
1825 }
1826 }
1827
1828 #[derive(Debug)]
1829 struct BuddyAllocatorState {
1830 // Every order has its own free-list for convenience, so that we don't have to traverse a tree.
1831 // Each free-list is sorted by offset because we want to find the first-fit as this strategy
1832 // minimizes external fragmentation.
1833 free_list: ArrayVec<Vec<DeviceSize>, { BuddyAllocator::MAX_ORDERS }>,
1834 }
1835
1836 /// A [suballocator] using a pool of fixed-size blocks as a [free-list].
1837 ///
1838 /// Since the size of the blocks is fixed, you can not create allocations bigger than that. You can
1839 /// create smaller ones, though, which leads to more and more [internal fragmentation] the smaller
1840 /// the allocations get. This is generally a good trade-off, as internal fragmentation is nowhere
1841 /// near as hard to deal with as [external fragmentation].
1842 ///
1843 /// See also [the `Suballocator` implementation].
1844 ///
1845 /// # Algorithm
1846 ///
1847 /// The free-list contains indices of blocks in the region that are available, so allocation
1848 /// consists merely of popping an index from the free-list. The same goes for freeing, all that is
1849 /// required is to push the index of the block into the free-list. Note that this is only possible
1850 /// because the blocks have a fixed size. Due to this one fact, the free-list doesn't need to be
1851 /// sorted or traversed. As long as there is a free block, it will do, no matter which block it is.
1852 ///
1853 /// Since the `PoolAllocator` doesn't keep a list of suballocations that are currently in use,
1854 /// resolving [buffer-image granularity] conflicts on a case-by-case basis is not possible.
1855 /// Therefore, it is an all or nothing situation:
1856 ///
1857 /// - you use the allocator for only one type of allocation, [`Linear`] or [`NonLinear`], or
1858 /// - you allow both but align the blocks to the granularity so that no conflics can happen.
1859 ///
1860 /// The way this is done is that every suballocation inherits the allocation type of the region.
1861 /// The latter is done by using a region whose allocation type is [`Unknown`]. You are discouraged
1862 /// from using this type if you can avoid it.
1863 ///
1864 /// The block size can end up bigger than specified if the allocator is created with a region whose
1865 /// allocation type is `Unknown`. In that case all blocks are aligned to the buffer-image
1866 /// granularity, which may or may not cause signifficant memory usage increase. Say for example
1867 /// your driver reports a granularity of 4KiB. If you need a block size of 8KiB, you would waste no
1868 /// memory. On the other hand, if you needed a block size of 6KiB, you would be wasting 25% of the
1869 /// memory. In such a scenario you are highly encouraged to use a different allocation type.
1870 ///
1871 /// The reverse is also true: with an allocation type other than `Unknown`, not all memory within a
1872 /// block may be usable depending on the requested [suballocation]. For instance, with a block size
1873 /// of 1152B (9 * 128B) and a suballocation with `alignment: 256`, a block at an odd index could
1874 /// not utilize its first 128B, reducing its effective size to 1024B. This is usually only relevant
1875 /// with small block sizes, as [alignment requirements] are usually rather small, but it completely
1876 /// depends on the resource and driver.
1877 ///
1878 /// In summary, the block size you choose has a signifficant impact on internal fragmentation due
1879 /// to the two reasons described above. You need to choose your block size carefully, *especially*
1880 /// if you require small allocations. Some rough guidelines:
1881 ///
1882 /// - Always [align] your blocks to a sufficiently large power of 2. This does **not** mean your
1883 /// block size must be a power of two. For example with a block size of 3KiB, your blocks would
1884 /// be aligned to 1KiB.
1885 /// - Prefer not using the allocation type `Unknown`. You can always create as many
1886 /// `PoolAllocator`s as you like for different allocation types and sizes, and they can all work
1887 /// within the same memory block. You should be safe from fragmentation if your blocks are
1888 /// aligned to 1KiB.
1889 /// - If you must use the allocation type `Unknown`, then you should be safe from fragmentation on
1890 /// pretty much any driver if your blocks are aligned to 64KiB. Keep in mind that this might
1891 /// change any time as new devices appear or new drivers come out. Always look at the properties
1892 /// of the devices you want to support before relying on any such data.
1893 ///
1894 /// # Efficiency
1895 ///
1896 /// In theory, a pool allocator is the ideal one because it causes no external fragmentation, and
1897 /// both allocation and freeing is *O*(1). It also never needs to lock and hence also lends itself
1898 /// perfectly to concurrency. But of course, there is the trade-off that block sizes are not
1899 /// dynamic.
1900 ///
1901 /// As you can imagine, the `PoolAllocator` is the perfect fit if you know the sizes of the
1902 /// allocations you will be making, and they are more or less in the same size class. But this
1903 /// allocation algorithm really shines when combined with others, as most do. For one, nothing is
1904 /// stopping you from having multiple `PoolAllocator`s for many different size classes. You could
1905 /// consider a pool of pools, by layering `PoolAllocator` with itself, but this would have the
1906 /// downside that the regions of the pools for all size classes would have to match. Usually this
1907 /// is not desired. If you want pools for different size classes to all have about the same number
1908 /// of blocks, or you even know that some size classes require more or less blocks (because of how
1909 /// many resources you will be allocating for each), then you need an allocator that can allocate
1910 /// regions of different sizes. You can use the [`FreeListAllocator`] for this, if external
1911 /// fragmentation is not an issue, otherwise you might consider using the [`BuddyAllocator`]. On
1912 /// the other hand, you might also want to consider having a `PoolAllocator` at the top of a
1913 /// [hierarchy]. Again, this allocator never needs to lock making it *the* perfect fit for a global
1914 /// concurrent allocator, which hands out large regions which can then be suballocated locally on a
1915 /// thread, by the [`BumpAllocator`] for example.
1916 ///
1917 /// # Examples
1918 ///
1919 /// Basic usage together with [`GenericMemoryAllocator`]:
1920 ///
1921 /// ```
1922 /// use std::sync::Arc;
1923 /// use vulkano::memory::allocator::{
1924 /// GenericMemoryAllocator, GenericMemoryAllocatorCreateInfo, PoolAllocator,
1925 /// };
1926 ///
1927 /// # let device: std::sync::Arc<vulkano::device::Device> = return;
1928 /// let memory_allocator = GenericMemoryAllocator::<Arc<PoolAllocator<{ 64 * 1024 }>>>::new(
1929 /// device.clone(),
1930 /// GenericMemoryAllocatorCreateInfo {
1931 /// block_sizes: &[(0, 64 * 1024 * 1024)],
1932 /// ..Default::default()
1933 /// },
1934 /// )
1935 /// .unwrap();
1936 ///
1937 /// // Now you can use `memory_allocator` to allocate whatever it is you need.
1938 /// ```
1939 ///
1940 /// [suballocator]: Suballocator
1941 /// [free-list]: Suballocator#free-lists
1942 /// [internal fragmentation]: super#internal-fragmentation
1943 /// [external fragmentation]: super#external-fragmentation
1944 /// [the `Suballocator` implementation]: Suballocator#impl-Suballocator-for-Arc<PoolAllocator<BLOCK_SIZE>>
1945 /// [region]: Suballocator#regions
1946 /// [buffer-image granularity]: super#buffer-image-granularity
1947 /// [`Linear`]: AllocationType::Linear
1948 /// [`NonLinear`]: AllocationType::NonLinear
1949 /// [`Unknown`]: AllocationType::Unknown
1950 /// [suballocation]: SuballocationCreateInfo
1951 /// [alignment requirements]: super#memory-requirements
1952 /// [align]: super#alignment
1953 /// [hierarchy]: Suballocator#memory-hierarchies
1954 /// [`GenericMemoryAllocator`]: super::GenericMemoryAllocator
1955 #[derive(Debug)]
1956 #[repr(transparent)]
1957 pub struct PoolAllocator<const BLOCK_SIZE: DeviceSize> {
1958 inner: PoolAllocatorInner,
1959 }
1960
1961 impl<const BLOCK_SIZE: DeviceSize> PoolAllocator<BLOCK_SIZE> {
1962 /// Creates a new `PoolAllocator` for the given [region].
1963 ///
1964 /// # Panics
1965 ///
1966 /// - Panics if `region.size < BLOCK_SIZE`.
1967 /// - Panics if `region` is a [dedicated allocation].
1968 ///
1969 /// [region]: Suballocator#regions
1970 /// [dedicated allocation]: MemoryAlloc::is_dedicated
1971 #[inline]
new( region: MemoryAlloc, #[cfg(test)] buffer_image_granularity: DeviceAlignment, ) -> Arc<Self>1972 pub fn new(
1973 region: MemoryAlloc,
1974 #[cfg(test)] buffer_image_granularity: DeviceAlignment,
1975 ) -> Arc<Self> {
1976 Arc::new(PoolAllocator {
1977 inner: PoolAllocatorInner::new(
1978 region,
1979 BLOCK_SIZE,
1980 #[cfg(test)]
1981 buffer_image_granularity,
1982 ),
1983 })
1984 }
1985
1986 /// Size of a block. Can be bigger than `BLOCK_SIZE` due to alignment requirements.
1987 #[inline]
block_size(&self) -> DeviceSize1988 pub fn block_size(&self) -> DeviceSize {
1989 self.inner.block_size
1990 }
1991
1992 /// Total number of blocks available to the allocator. This is always equal to
1993 /// `self.region().size() / self.block_size()`.
1994 #[inline]
block_count(&self) -> usize1995 pub fn block_count(&self) -> usize {
1996 self.inner.free_list.capacity()
1997 }
1998
1999 /// Number of free blocks.
2000 #[inline]
free_count(&self) -> usize2001 pub fn free_count(&self) -> usize {
2002 self.inner.free_list.len()
2003 }
2004 }
2005
2006 unsafe impl<const BLOCK_SIZE: DeviceSize> Suballocator for Arc<PoolAllocator<BLOCK_SIZE>> {
2007 const IS_BLOCKING: bool = false;
2008
2009 const NEEDS_CLEANUP: bool = false;
2010
2011 #[inline]
new(region: MemoryAlloc) -> Self2012 fn new(region: MemoryAlloc) -> Self {
2013 PoolAllocator::new(
2014 region,
2015 #[cfg(test)]
2016 DeviceAlignment::MIN,
2017 )
2018 }
2019
2020 /// Creates a new suballocation within the [region].
2021 ///
2022 /// > **Note**: `create_info.allocation_type` is silently ignored because all suballocations
2023 /// > inherit the allocation type from the region.
2024 ///
2025 /// # Errors
2026 ///
2027 /// - Returns [`OutOfRegionMemory`] if the [free-list] is empty.
2028 /// - Returns [`OutOfRegionMemory`] if the allocation can't fit inside a block. Only the first
2029 /// block in the free-list is tried, which means that if one block isn't usable due to
2030 /// [internal fragmentation] but a different one would be, you still get this error. See the
2031 /// [type-level documentation] for details on how to properly configure your allocator.
2032 /// - Returns [`BlockSizeExceeded`] if `create_info.size` exceeds `BLOCK_SIZE`.
2033 ///
2034 /// [region]: Suballocator#regions
2035 /// [`allocate`]: Suballocator::allocate
2036 /// [`OutOfRegionMemory`]: SuballocationCreationError::OutOfRegionMemory
2037 /// [free-list]: Suballocator#free-lists
2038 /// [internal fragmentation]: super#internal-fragmentation
2039 /// [type-level documentation]: PoolAllocator
2040 /// [`BlockSizeExceeded`]: SuballocationCreationError::BlockSizeExceeded
2041 #[inline]
allocate( &self, create_info: SuballocationCreateInfo, ) -> Result<MemoryAlloc, SuballocationCreationError>2042 fn allocate(
2043 &self,
2044 create_info: SuballocationCreateInfo,
2045 ) -> Result<MemoryAlloc, SuballocationCreationError> {
2046 // SAFETY: `PoolAllocator<BLOCK_SIZE>` and `PoolAllocatorInner` have the same layout.
2047 //
2048 // This is not quite optimal, because we are always cloning the `Arc` even if allocation
2049 // fails, in which case the `Arc` gets cloned and dropped for no reason. Unfortunately,
2050 // there is currently no way to turn `&Arc<T>` into `&Arc<U>` that is sound.
2051 unsafe { Arc::from_raw(Arc::into_raw(self.clone()).cast::<PoolAllocatorInner>()) }
2052 .allocate(create_info)
2053 }
2054
2055 #[inline]
region(&self) -> &MemoryAlloc2056 fn region(&self) -> &MemoryAlloc {
2057 &self.inner.region
2058 }
2059
2060 #[inline]
try_into_region(self) -> Result<MemoryAlloc, Self>2061 fn try_into_region(self) -> Result<MemoryAlloc, Self> {
2062 Arc::try_unwrap(self).map(|allocator| allocator.inner.region)
2063 }
2064
2065 #[inline]
free_size(&self) -> DeviceSize2066 fn free_size(&self) -> DeviceSize {
2067 self.free_count() as DeviceSize * self.block_size()
2068 }
2069
2070 #[inline]
cleanup(&mut self)2071 fn cleanup(&mut self) {}
2072 }
2073
2074 unsafe impl<const BLOCK_SIZE: DeviceSize> DeviceOwned for PoolAllocator<BLOCK_SIZE> {
2075 #[inline]
device(&self) -> &Arc<Device>2076 fn device(&self) -> &Arc<Device> {
2077 self.inner.device_memory.device()
2078 }
2079 }
2080
2081 #[derive(Debug)]
2082 struct PoolAllocatorInner {
2083 region: MemoryAlloc,
2084 device_memory: Arc<DeviceMemory>,
2085 atom_size: DeviceAlignment,
2086 block_size: DeviceSize,
2087 // Unsorted list of free block indices.
2088 free_list: ArrayQueue<DeviceSize>,
2089 }
2090
2091 impl PoolAllocatorInner {
new( region: MemoryAlloc, mut block_size: DeviceSize, #[cfg(test)] buffer_image_granularity: DeviceAlignment, ) -> Self2092 fn new(
2093 region: MemoryAlloc,
2094 mut block_size: DeviceSize,
2095 #[cfg(test)] buffer_image_granularity: DeviceAlignment,
2096 ) -> Self {
2097 let device_memory = region
2098 .root()
2099 .expect("dedicated allocations can't be suballocated")
2100 .clone();
2101 #[cfg(not(test))]
2102 let buffer_image_granularity = device_memory
2103 .device()
2104 .physical_device()
2105 .properties()
2106 .buffer_image_granularity;
2107 let atom_size = region.atom_size.unwrap_or(DeviceAlignment::MIN);
2108 if region.allocation_type == AllocationType::Unknown {
2109 block_size = align_up(block_size, buffer_image_granularity);
2110 }
2111
2112 let block_count = region.size / block_size;
2113 let free_list = ArrayQueue::new(block_count as usize);
2114 for i in 0..block_count {
2115 free_list.push(i).unwrap();
2116 }
2117
2118 PoolAllocatorInner {
2119 region,
2120 device_memory,
2121 atom_size,
2122 block_size,
2123 free_list,
2124 }
2125 }
2126
allocate( self: Arc<Self>, create_info: SuballocationCreateInfo, ) -> Result<MemoryAlloc, SuballocationCreationError>2127 fn allocate(
2128 self: Arc<Self>,
2129 create_info: SuballocationCreateInfo,
2130 ) -> Result<MemoryAlloc, SuballocationCreationError> {
2131 let SuballocationCreateInfo {
2132 layout,
2133 allocation_type: _,
2134 _ne: _,
2135 } = create_info;
2136
2137 let size = layout.size();
2138 let alignment = cmp::max(layout.alignment(), self.atom_size);
2139 let index = self
2140 .free_list
2141 .pop()
2142 .ok_or(SuballocationCreationError::OutOfRegionMemory)?;
2143
2144 // Indices in the free-list are confined to the range [0, region.size / block_size], so
2145 // this can't overflow.
2146 let relative_offset = index * self.block_size;
2147 // This can't overflow because offsets are confined to the size of the root allocation,
2148 // which can itself not exceed `DeviceLayout::MAX_SIZE`.
2149 let offset = align_up(self.region.offset + relative_offset, alignment);
2150
2151 if offset + size > self.region.offset + relative_offset + self.block_size {
2152 let _ = self.free_list.push(index);
2153
2154 return if size > self.block_size {
2155 Err(SuballocationCreationError::BlockSizeExceeded)
2156 } else {
2157 // There is not enough space due to alignment requirements.
2158 Err(SuballocationCreationError::OutOfRegionMemory)
2159 };
2160 }
2161
2162 let mapped_ptr = self.region.mapped_ptr.map(|ptr| {
2163 // SAFETY: Allocation sizes are guaranteed to not exceed `isize::MAX` when they have a
2164 // mapped pointer, and the original pointer was handed to us from the Vulkan
2165 // implementation, so the offset better be in range.
2166 let ptr = unsafe { ptr.as_ptr().offset(relative_offset as isize) };
2167
2168 // SAFETY: Same as the previous.
2169 unsafe { NonNull::new_unchecked(ptr) }
2170 });
2171
2172 Ok(MemoryAlloc {
2173 offset,
2174 size,
2175 allocation_type: self.region.allocation_type,
2176 mapped_ptr,
2177 atom_size: self.region.atom_size,
2178 parent: AllocParent::Pool {
2179 allocator: self,
2180 index,
2181 },
2182 })
2183 }
2184
2185 /// # Safety
2186 ///
2187 /// - `index` must refer to an occupied suballocation allocated by `self`.
free(&self, index: DeviceSize)2188 unsafe fn free(&self, index: DeviceSize) {
2189 let _ = self.free_list.push(index);
2190 }
2191 }
2192
2193 /// A [suballocator] which can allocate dynamically, but can only free all allocations at once.
2194 ///
2195 /// With bump allocation, the used up space increases linearly as allocations are made and
2196 /// allocations can never be freed individually, which is why this algorithm is also called *linear
2197 /// allocation*. It is also known as *arena allocation*.
2198 ///
2199 /// `BumpAllocator`s are best suited for very short-lived (say a few frames at best) resources that
2200 /// need to be allocated often (say each frame), to really take advantage of the performance gains.
2201 /// For creating long-lived allocations, [`FreeListAllocator`] is best suited. The way you would
2202 /// typically use this allocator is to have one for each frame in flight. At the start of a frame,
2203 /// you reset it and allocate your resources with it. You write to the resources, render with them,
2204 /// and drop them at the end of the frame.
2205 ///
2206 /// See also [the `Suballocator` implementation].
2207 ///
2208 /// # Algorithm
2209 ///
2210 /// What happens is that every time you make an allocation, you receive one with an offset
2211 /// corresponding to the *free start* within the [region], and then the free start is *bumped*, so
2212 /// that following allocations wouldn't alias it. As you can imagine, this is **extremely fast**,
2213 /// because it doesn't need to keep a [free-list]. It only needs to do a few additions and
2214 /// comparisons. But beware, **fast is about all this is**. It is horribly memory inefficient when
2215 /// used wrong, and is very susceptible to [memory leaks].
2216 ///
2217 /// Once you know that you are done with the allocations, meaning you know they have all been
2218 /// dropped, you can safely reset the allocator using the [`try_reset`] method as long as the
2219 /// allocator is not shared between threads. It is hard to safely reset a bump allocator that is
2220 /// used concurrently. In such a scenario it's best not to reset it at all and instead drop it once
2221 /// it reaches the end of the [region], freeing the region to a higher level in the [hierarchy]
2222 /// once all threads have dropped their reference to the allocator. This is one of the reasons you
2223 /// are generally advised to use one `BumpAllocator` per thread if you can.
2224 ///
2225 /// # Efficiency
2226 ///
2227 /// Allocation is *O*(1), and so is resetting the allocator (freeing all allocations). Allocation
2228 /// is always lock-free, and most of the time even wait-free. The only case in which it is not
2229 /// wait-free is if a lot of allocations are made concurrently, which results in CPU-level
2230 /// contention. Therefore, if you for example need to allocate a lot of buffers each frame from
2231 /// multiple threads, you might get better performance by using one `BumpAllocator` per thread.
2232 ///
2233 /// The reason synchronization can be avoided entirely is that the created allocations can be
2234 /// dropped without needing to talk back to the allocator to free anything. The other allocation
2235 /// algorithms all have a free-list which needs to be modified once an allocation is dropped. Since
2236 /// Vulkano's buffers and images are `Sync`, that means that even if the allocator only allocates
2237 /// from one thread, it can still be used to free from multiple threads.
2238 ///
2239 /// [suballocator]: Suballocator
2240 /// [the `Suballocator` implementation]: Suballocator#impl-Suballocator-for-Arc<BumpAllocator>
2241 /// [region]: Suballocator#regions
2242 /// [free-list]: Suballocator#free-lists
2243 /// [memory leaks]: super#leakage
2244 /// [`try_reset`]: Self::try_reset
2245 /// [hierarchy]: Suballocator#memory-hierarchies
2246 #[derive(Debug)]
2247 pub struct BumpAllocator {
2248 region: MemoryAlloc,
2249 device_memory: Arc<DeviceMemory>,
2250 buffer_image_granularity: DeviceAlignment,
2251 atom_size: DeviceAlignment,
2252 // Encodes the previous allocation type in the 2 least signifficant bits and the free start in
2253 // the rest.
2254 state: AtomicU64,
2255 }
2256
2257 impl BumpAllocator {
2258 /// Creates a new `BumpAllocator` for the given [region].
2259 ///
2260 /// # Panics
2261 ///
2262 /// - Panics if `region` is a [dedicated allocation].
2263 /// - Panics if `region.size` exceeds `DeviceLayout::MAX_SIZE >> 2`.
2264 ///
2265 /// [region]: Suballocator#regions
2266 /// [dedicated allocation]: MemoryAlloc::is_dedicated
new(region: MemoryAlloc) -> Arc<Self>2267 pub fn new(region: MemoryAlloc) -> Arc<Self> {
2268 // Sanity check: this would lead to UB because of the left-shifting by 2 needed to encode
2269 // the free-start into the state.
2270 assert!(region.size <= (DeviceLayout::MAX_SIZE >> 2));
2271
2272 let device_memory = region
2273 .root()
2274 .expect("dedicated allocations can't be suballocated")
2275 .clone();
2276 let buffer_image_granularity = device_memory
2277 .device()
2278 .physical_device()
2279 .properties()
2280 .buffer_image_granularity;
2281 let atom_size = region.atom_size.unwrap_or(DeviceAlignment::MIN);
2282 let state = AtomicU64::new(region.allocation_type as DeviceSize);
2283
2284 Arc::new(BumpAllocator {
2285 region,
2286 device_memory,
2287 buffer_image_granularity,
2288 atom_size,
2289 state,
2290 })
2291 }
2292
2293 /// Resets the free-start back to the beginning of the [region] if there are no other strong
2294 /// references to the allocator.
2295 ///
2296 /// [region]: Suballocator#regions
2297 #[inline]
try_reset(self: &mut Arc<Self>) -> Result<(), BumpAllocatorResetError>2298 pub fn try_reset(self: &mut Arc<Self>) -> Result<(), BumpAllocatorResetError> {
2299 Arc::get_mut(self)
2300 .map(|allocator| {
2301 *allocator.state.get_mut() = allocator.region.allocation_type as DeviceSize;
2302 })
2303 .ok_or(BumpAllocatorResetError)
2304 }
2305
2306 /// Resets the free-start to the beginning of the [region] without checking if there are other
2307 /// strong references to the allocator.
2308 ///
2309 /// This could be useful if you cloned the [`Arc`] yourself, and can guarantee that no
2310 /// allocations currently hold a reference to it.
2311 ///
2312 /// As a safe alternative, you can let the `Arc` do all the work. Simply drop it once it
2313 /// reaches the end of the region. After all threads do that, the region will be freed to the
2314 /// next level up the [hierarchy]. If you only use the allocator on one thread and need shared
2315 /// ownership, you can use `Rc<RefCell<Arc<BumpAllocator>>>` together with [`try_reset`] for a
2316 /// safe alternative as well.
2317 ///
2318 /// # Safety
2319 ///
2320 /// - All allocations made with the allocator must have been dropped.
2321 ///
2322 /// [region]: Suballocator#regions
2323 /// [hierarchy]: Suballocator#memory-hierarchies
2324 /// [`try_reset`]: Self::try_reset
2325 #[inline]
reset_unchecked(&self)2326 pub unsafe fn reset_unchecked(&self) {
2327 self.state
2328 .store(self.region.allocation_type as DeviceSize, Ordering::Release);
2329 }
2330 }
2331
2332 unsafe impl Suballocator for Arc<BumpAllocator> {
2333 const IS_BLOCKING: bool = false;
2334
2335 const NEEDS_CLEANUP: bool = true;
2336
2337 #[inline]
new(region: MemoryAlloc) -> Self2338 fn new(region: MemoryAlloc) -> Self {
2339 BumpAllocator::new(region)
2340 }
2341
2342 /// Creates a new suballocation within the [region].
2343 ///
2344 /// # Errors
2345 ///
2346 /// - Returns [`OutOfRegionMemory`] if the requested allocation can't fit in the free space
2347 /// remaining in the region.
2348 ///
2349 /// [region]: Suballocator#regions
2350 /// [`allocate`]: Suballocator::allocate
2351 /// [`OutOfRegionMemory`]: SuballocationCreationError::OutOfRegionMemory
2352 #[inline]
allocate( &self, create_info: SuballocationCreateInfo, ) -> Result<MemoryAlloc, SuballocationCreationError>2353 fn allocate(
2354 &self,
2355 create_info: SuballocationCreateInfo,
2356 ) -> Result<MemoryAlloc, SuballocationCreationError> {
2357 const SPIN_LIMIT: u32 = 6;
2358
2359 // NOTE(Marc): The following code is a minimal version `Backoff` taken from
2360 // crossbeam_utils v0.8.11, because we didn't want to add a dependency for a couple lines
2361 // that are used in one place only.
2362 /// Original documentation:
2363 /// https://docs.rs/crossbeam-utils/0.8.11/crossbeam_utils/struct.Backoff.html
2364 struct Backoff {
2365 step: Cell<u32>,
2366 }
2367
2368 impl Backoff {
2369 fn new() -> Self {
2370 Backoff { step: Cell::new(0) }
2371 }
2372
2373 fn spin(&self) {
2374 for _ in 0..1 << self.step.get().min(SPIN_LIMIT) {
2375 core::hint::spin_loop();
2376 }
2377
2378 if self.step.get() <= SPIN_LIMIT {
2379 self.step.set(self.step.get() + 1);
2380 }
2381 }
2382 }
2383
2384 fn has_granularity_conflict(prev_ty: AllocationType, ty: AllocationType) -> bool {
2385 prev_ty == AllocationType::Unknown || prev_ty != ty
2386 }
2387
2388 let SuballocationCreateInfo {
2389 layout,
2390 allocation_type,
2391 _ne: _,
2392 } = create_info;
2393
2394 let size = layout.size();
2395 let alignment = cmp::max(layout.alignment(), self.atom_size);
2396 let backoff = Backoff::new();
2397 let mut state = self.state.load(Ordering::Relaxed);
2398
2399 loop {
2400 let free_start = state >> 2;
2401 let prev_alloc_type = match state & 0b11 {
2402 0 => AllocationType::Unknown,
2403 1 => AllocationType::Linear,
2404 2 => AllocationType::NonLinear,
2405 _ => unreachable!(),
2406 };
2407
2408 // These can't overflow because offsets are constrained by the size of the root
2409 // allocation, which can itself not exceed `DeviceLayout::MAX_SIZE`.
2410 let prev_end = self.region.offset + free_start;
2411 let mut offset = align_up(prev_end, alignment);
2412
2413 if prev_end > 0
2414 && are_blocks_on_same_page(0, prev_end, offset, self.buffer_image_granularity)
2415 && has_granularity_conflict(prev_alloc_type, allocation_type)
2416 {
2417 offset = align_up(offset, self.buffer_image_granularity);
2418 }
2419
2420 let relative_offset = offset - self.region.offset;
2421
2422 let free_start = relative_offset + size;
2423
2424 if free_start > self.region.size {
2425 return Err(SuballocationCreationError::OutOfRegionMemory);
2426 }
2427
2428 // This can't discard any bits because we checked that `region.size` does not exceed
2429 // `DeviceLayout::MAX_SIZE >> 2`.
2430 let new_state = free_start << 2 | allocation_type as DeviceSize;
2431
2432 match self.state.compare_exchange_weak(
2433 state,
2434 new_state,
2435 Ordering::Release,
2436 Ordering::Relaxed,
2437 ) {
2438 Ok(_) => {
2439 let mapped_ptr = self.region.mapped_ptr.map(|ptr| {
2440 // SAFETY: Allocation sizes are guaranteed to not exceed `isize::MAX` when
2441 // they have a mapped pointer, and the original pointer was handed to us
2442 // from the Vulkan implementation, so the offset better be in range.
2443 let ptr = unsafe { ptr.as_ptr().offset(relative_offset as isize) };
2444
2445 // SAFETY: Same as the previous.
2446 unsafe { NonNull::new_unchecked(ptr) }
2447 });
2448
2449 return Ok(MemoryAlloc {
2450 offset,
2451 size,
2452 allocation_type,
2453 mapped_ptr,
2454 atom_size: self.region.atom_size,
2455 parent: AllocParent::Bump(self.clone()),
2456 });
2457 }
2458 Err(new_state) => {
2459 state = new_state;
2460 backoff.spin();
2461 }
2462 }
2463 }
2464 }
2465
2466 #[inline]
region(&self) -> &MemoryAlloc2467 fn region(&self) -> &MemoryAlloc {
2468 &self.region
2469 }
2470
2471 #[inline]
try_into_region(self) -> Result<MemoryAlloc, Self>2472 fn try_into_region(self) -> Result<MemoryAlloc, Self> {
2473 Arc::try_unwrap(self).map(|allocator| allocator.region)
2474 }
2475
2476 #[inline]
free_size(&self) -> DeviceSize2477 fn free_size(&self) -> DeviceSize {
2478 self.region.size - (self.state.load(Ordering::Acquire) >> 2)
2479 }
2480
2481 #[inline]
cleanup(&mut self)2482 fn cleanup(&mut self) {
2483 let _ = self.try_reset();
2484 }
2485 }
2486
2487 unsafe impl DeviceOwned for BumpAllocator {
2488 #[inline]
device(&self) -> &Arc<Device>2489 fn device(&self) -> &Arc<Device> {
2490 self.device_memory.device()
2491 }
2492 }
2493
2494 /// Error that can be returned when resetting the [`BumpAllocator`].
2495 #[derive(Clone, Debug, PartialEq, Eq)]
2496 pub struct BumpAllocatorResetError;
2497
2498 impl Error for BumpAllocatorResetError {}
2499
2500 impl Display for BumpAllocatorResetError {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2501 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2502 f.write_str("the allocator is still in use")
2503 }
2504 }
2505
2506 /// Checks if resouces A and B share a page.
2507 ///
2508 /// > **Note**: Assumes `a_offset + a_size > 0` and `a_offset + a_size <= b_offset`.
are_blocks_on_same_page( a_offset: DeviceSize, a_size: DeviceSize, b_offset: DeviceSize, page_size: DeviceAlignment, ) -> bool2509 fn are_blocks_on_same_page(
2510 a_offset: DeviceSize,
2511 a_size: DeviceSize,
2512 b_offset: DeviceSize,
2513 page_size: DeviceAlignment,
2514 ) -> bool {
2515 debug_assert!(a_offset + a_size > 0);
2516 debug_assert!(a_offset + a_size <= b_offset);
2517
2518 let a_end = a_offset + a_size - 1;
2519 let a_end_page = align_down(a_end, page_size);
2520 let b_start_page = align_down(b_offset, page_size);
2521
2522 a_end_page == b_start_page
2523 }
2524
2525 /// Allocators for memory on the host, used to speed up the allocators for the device.
2526 mod host {
2527 use std::num::NonZeroUsize;
2528
2529 /// Allocates objects from a pool on the host, which has the following benefits:
2530 ///
2531 /// - Allocation is much faster because there is no need to consult the global allocator or even
2532 /// worse, the operating system, each time a small object needs to be created.
2533 /// - Freeing is extremely fast, because the whole pool can be dropped at once. This is
2534 /// particularily useful for linked structures, whose nodes need to be freed one-by-one by
2535 /// traversing the whole structure otherwise.
2536 /// - Cache locality is somewhat improved for linked structures with few nodes.
2537 ///
2538 /// The allocator doesn't hand out pointers but rather IDs that are relative to the pool. This
2539 /// simplifies the logic because the pool can easily be moved and hence also resized, but the
2540 /// downside is that the whole pool must be copied when it runs out of memory. It is therefore
2541 /// best to start out with a safely large capacity.
2542 #[derive(Debug)]
2543 pub(super) struct PoolAllocator<T> {
2544 pool: Vec<T>,
2545 // Unsorted list of free slots.
2546 free_list: Vec<SlotId>,
2547 }
2548
2549 impl<T> PoolAllocator<T> {
new(capacity: usize) -> Self2550 pub fn new(capacity: usize) -> Self {
2551 debug_assert!(capacity > 0);
2552
2553 PoolAllocator {
2554 pool: Vec::with_capacity(capacity),
2555 free_list: Vec::new(),
2556 }
2557 }
2558
2559 /// Allocates a slot and initializes it with the provided value. Returns the ID of the slot.
allocate(&mut self, val: T) -> SlotId2560 pub fn allocate(&mut self, val: T) -> SlotId {
2561 if let Some(id) = self.free_list.pop() {
2562 *unsafe { self.get_mut(id) } = val;
2563
2564 id
2565 } else {
2566 self.pool.push(val);
2567
2568 // SAFETY: `self.pool` is guaranteed to be non-empty.
2569 SlotId(unsafe { NonZeroUsize::new_unchecked(self.pool.len()) })
2570 }
2571 }
2572
2573 /// Returns the slot with the given ID to the allocator to be reused.
2574 ///
2575 /// # Safety
2576 ///
2577 /// - `id` must not be freed again, as that would constitute a double-free.
2578 /// - `id` must not be used to to access the slot again afterward, as that would constitute
2579 /// a use-after-free.
free(&mut self, id: SlotId)2580 pub unsafe fn free(&mut self, id: SlotId) {
2581 debug_assert!(!self.free_list.contains(&id));
2582 self.free_list.push(id);
2583 }
2584
2585 /// Returns a mutable reference to the slot with the given ID.
2586 ///
2587 /// # Safety
2588 ///
2589 /// - `SlotId` must have been allocated by `self`.
get_mut(&mut self, id: SlotId) -> &mut T2590 pub unsafe fn get_mut(&mut self, id: SlotId) -> &mut T {
2591 debug_assert!(!self.free_list.contains(&id));
2592 debug_assert!(id.0.get() <= self.pool.len());
2593
2594 // SAFETY:
2595 // - The caller must uphold that the `SlotId` was allocated with this allocator.
2596 // - The only way to obtain a `SlotId` is through `Self::allocate`.
2597 // - `Self::allocate` returns `SlotId`s in the range [1, self.pool.len()].
2598 // - `self.pool` only grows and never shrinks.
2599 self.pool.get_unchecked_mut(id.0.get() - 1)
2600 }
2601 }
2602
2603 impl<T: Copy> PoolAllocator<T> {
2604 /// Returns a copy of the slot with the given ID.
2605 ///
2606 /// # Safety
2607 ///
2608 /// - `SlotId` must have been allocated by `self`.
get(&self, id: SlotId) -> T2609 pub unsafe fn get(&self, id: SlotId) -> T {
2610 debug_assert!(!self.free_list.contains(&id));
2611 debug_assert!(id.0.get() <= self.pool.len());
2612
2613 // SAFETY: Same as the `get_unchecked_mut` above.
2614 *self.pool.get_unchecked(id.0.get() - 1)
2615 }
2616 }
2617
2618 /// ID of a slot in the pool of the `host::PoolAllocator`. This is used to limit the visibility
2619 /// of the actual ID to this `host` module, making it easier to reason about unsafe code.
2620 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
2621 pub(super) struct SlotId(NonZeroUsize);
2622 }
2623
2624 #[cfg(test)]
2625 mod tests {
2626 use super::*;
2627 use crate::memory::MemoryAllocateInfo;
2628 use std::thread;
2629
2630 #[test]
memory_alloc_set_offset()2631 fn memory_alloc_set_offset() {
2632 let (device, _) = gfx_dev_and_queue!();
2633 let memory_type_index = device
2634 .physical_device()
2635 .memory_properties()
2636 .memory_types
2637 .iter()
2638 .position(|memory_type| {
2639 memory_type
2640 .property_flags
2641 .contains(MemoryPropertyFlags::HOST_VISIBLE)
2642 })
2643 .unwrap() as u32;
2644 let mut alloc = MemoryAlloc::new(
2645 DeviceMemory::allocate(
2646 device,
2647 MemoryAllocateInfo {
2648 memory_type_index,
2649 allocation_size: 1024,
2650 ..Default::default()
2651 },
2652 )
2653 .unwrap(),
2654 )
2655 .unwrap();
2656 let ptr = alloc.mapped_ptr().unwrap().as_ptr();
2657
2658 unsafe {
2659 alloc.set_offset(16);
2660 assert_eq!(alloc.mapped_ptr().unwrap().as_ptr(), ptr.offset(16));
2661 alloc.set_offset(0);
2662 assert_eq!(alloc.mapped_ptr().unwrap().as_ptr(), ptr.offset(0));
2663 alloc.set_offset(32);
2664 assert_eq!(alloc.mapped_ptr().unwrap().as_ptr(), ptr.offset(32));
2665 }
2666 }
2667
2668 #[test]
free_list_allocator_capacity()2669 fn free_list_allocator_capacity() {
2670 const THREADS: DeviceSize = 12;
2671 const ALLOCATIONS_PER_THREAD: DeviceSize = 100;
2672 const ALLOCATION_STEP: DeviceSize = 117;
2673 const REGION_SIZE: DeviceSize =
2674 (ALLOCATION_STEP * (THREADS + 1) * THREADS / 2) * ALLOCATIONS_PER_THREAD;
2675
2676 let allocator = dummy_allocator!(FreeListAllocator, REGION_SIZE);
2677 let allocs = ArrayQueue::new((ALLOCATIONS_PER_THREAD * THREADS) as usize);
2678
2679 // Using threads to randomize allocation order.
2680 thread::scope(|scope| {
2681 for i in 1..=THREADS {
2682 let (allocator, allocs) = (&allocator, &allocs);
2683
2684 scope.spawn(move || {
2685 let info = dummy_info!(i * ALLOCATION_STEP);
2686
2687 for _ in 0..ALLOCATIONS_PER_THREAD {
2688 allocs
2689 .push(allocator.allocate(info.clone()).unwrap())
2690 .unwrap();
2691 }
2692 });
2693 }
2694 });
2695
2696 assert!(allocator.allocate(dummy_info!()).is_err());
2697 assert!(allocator.free_size() == 0);
2698
2699 drop(allocs);
2700 assert!(allocator.free_size() == REGION_SIZE);
2701 assert!(allocator.allocate(dummy_info!(REGION_SIZE)).is_ok());
2702 }
2703
2704 #[test]
free_list_allocator_respects_alignment()2705 fn free_list_allocator_respects_alignment() {
2706 const REGION_SIZE: DeviceSize = 10 * 256;
2707
2708 let info = dummy_info!(1, 256);
2709
2710 let allocator = dummy_allocator!(FreeListAllocator, REGION_SIZE);
2711 let mut allocs = Vec::with_capacity(10);
2712
2713 for _ in 0..10 {
2714 allocs.push(allocator.allocate(info.clone()).unwrap());
2715 }
2716
2717 assert!(allocator.allocate(info).is_err());
2718 assert!(allocator.free_size() == REGION_SIZE - 10);
2719 }
2720
2721 #[test]
free_list_allocator_respects_granularity()2722 fn free_list_allocator_respects_granularity() {
2723 const GRANULARITY: DeviceSize = 16;
2724 const REGION_SIZE: DeviceSize = 2 * GRANULARITY;
2725
2726 let allocator = dummy_allocator!(FreeListAllocator, REGION_SIZE, GRANULARITY);
2727 let mut linear_allocs = Vec::with_capacity(GRANULARITY as usize);
2728 let mut nonlinear_allocs = Vec::with_capacity(GRANULARITY as usize);
2729
2730 for i in 0..REGION_SIZE {
2731 if i % 2 == 0 {
2732 linear_allocs.push(allocator.allocate(dummy_info_linear!()).unwrap());
2733 } else {
2734 nonlinear_allocs.push(allocator.allocate(dummy_info_nonlinear!()).unwrap());
2735 }
2736 }
2737
2738 assert!(allocator.allocate(dummy_info_linear!()).is_err());
2739 assert!(allocator.free_size() == 0);
2740
2741 drop(linear_allocs);
2742 assert!(allocator.allocate(dummy_info!(GRANULARITY)).is_ok());
2743
2744 let _alloc = allocator.allocate(dummy_info!()).unwrap();
2745 assert!(allocator.allocate(dummy_info!()).is_err());
2746 assert!(allocator.allocate(dummy_info_linear!()).is_err());
2747 }
2748
2749 #[test]
pool_allocator_capacity()2750 fn pool_allocator_capacity() {
2751 const BLOCK_SIZE: DeviceSize = 1024;
2752
2753 fn dummy_allocator(
2754 device: Arc<Device>,
2755 allocation_size: DeviceSize,
2756 ) -> Arc<PoolAllocator<BLOCK_SIZE>> {
2757 let device_memory = DeviceMemory::allocate(
2758 device,
2759 MemoryAllocateInfo {
2760 allocation_size,
2761 memory_type_index: 0,
2762 ..Default::default()
2763 },
2764 )
2765 .unwrap();
2766
2767 PoolAllocator::new(
2768 MemoryAlloc::new(device_memory).unwrap(),
2769 DeviceAlignment::new(1).unwrap(),
2770 )
2771 }
2772
2773 let (device, _) = gfx_dev_and_queue!();
2774
2775 assert_should_panic!({ dummy_allocator(device.clone(), BLOCK_SIZE - 1) });
2776
2777 let allocator = dummy_allocator(device.clone(), 2 * BLOCK_SIZE - 1);
2778 {
2779 let alloc = allocator.allocate(dummy_info!()).unwrap();
2780 assert!(allocator.allocate(dummy_info!()).is_err());
2781
2782 drop(alloc);
2783 let _alloc = allocator.allocate(dummy_info!()).unwrap();
2784 }
2785
2786 let allocator = dummy_allocator(device, 2 * BLOCK_SIZE);
2787 {
2788 let alloc1 = allocator.allocate(dummy_info!()).unwrap();
2789 let alloc2 = allocator.allocate(dummy_info!()).unwrap();
2790 assert!(allocator.allocate(dummy_info!()).is_err());
2791
2792 drop(alloc1);
2793 let alloc1 = allocator.allocate(dummy_info!()).unwrap();
2794 assert!(allocator.allocate(dummy_info!()).is_err());
2795
2796 drop(alloc1);
2797 drop(alloc2);
2798 let _alloc1 = allocator.allocate(dummy_info!()).unwrap();
2799 let _alloc2 = allocator.allocate(dummy_info!()).unwrap();
2800 }
2801 }
2802
2803 #[test]
pool_allocator_respects_alignment()2804 fn pool_allocator_respects_alignment() {
2805 const BLOCK_SIZE: DeviceSize = 1024 + 128;
2806
2807 let info_a = dummy_info!(BLOCK_SIZE, 256);
2808 let info_b = dummy_info!(1024, 256);
2809
2810 let allocator = {
2811 let (device, _) = gfx_dev_and_queue!();
2812 let device_memory = DeviceMemory::allocate(
2813 device,
2814 MemoryAllocateInfo {
2815 allocation_size: 10 * BLOCK_SIZE,
2816 memory_type_index: 0,
2817 ..Default::default()
2818 },
2819 )
2820 .unwrap();
2821
2822 PoolAllocator::<BLOCK_SIZE>::new(
2823 MemoryAlloc::new(device_memory).unwrap(),
2824 DeviceAlignment::new(1).unwrap(),
2825 )
2826 };
2827
2828 // This uses the fact that block indices are inserted into the free-list in order, so
2829 // the first allocation succeeds because the block has an even index, while the second
2830 // has an odd index.
2831 allocator.allocate(info_a.clone()).unwrap();
2832 assert!(allocator.allocate(info_a.clone()).is_err());
2833 allocator.allocate(info_a.clone()).unwrap();
2834 assert!(allocator.allocate(info_a).is_err());
2835
2836 for _ in 0..10 {
2837 allocator.allocate(info_b.clone()).unwrap();
2838 }
2839 }
2840
2841 #[test]
pool_allocator_respects_granularity()2842 fn pool_allocator_respects_granularity() {
2843 const BLOCK_SIZE: DeviceSize = 128;
2844
2845 fn dummy_allocator(
2846 device: Arc<Device>,
2847 allocation_type: AllocationType,
2848 ) -> Arc<PoolAllocator<BLOCK_SIZE>> {
2849 let device_memory = DeviceMemory::allocate(
2850 device,
2851 MemoryAllocateInfo {
2852 allocation_size: 1024,
2853 memory_type_index: 0,
2854 ..Default::default()
2855 },
2856 )
2857 .unwrap();
2858 let mut region = MemoryAlloc::new(device_memory).unwrap();
2859 unsafe { region.set_allocation_type(allocation_type) };
2860
2861 PoolAllocator::new(region, DeviceAlignment::new(256).unwrap())
2862 }
2863
2864 let (device, _) = gfx_dev_and_queue!();
2865
2866 let allocator = dummy_allocator(device.clone(), AllocationType::Unknown);
2867 assert!(allocator.block_count() == 4);
2868
2869 let allocator = dummy_allocator(device.clone(), AllocationType::Linear);
2870 assert!(allocator.block_count() == 8);
2871
2872 let allocator = dummy_allocator(device, AllocationType::NonLinear);
2873 assert!(allocator.block_count() == 8);
2874 }
2875
2876 #[test]
buddy_allocator_capacity()2877 fn buddy_allocator_capacity() {
2878 const MAX_ORDER: usize = 10;
2879 const REGION_SIZE: DeviceSize = BuddyAllocator::MIN_NODE_SIZE << MAX_ORDER;
2880
2881 let allocator = dummy_allocator!(BuddyAllocator, REGION_SIZE);
2882 let mut allocs = Vec::with_capacity(1 << MAX_ORDER);
2883
2884 for order in 0..=MAX_ORDER {
2885 let size = BuddyAllocator::MIN_NODE_SIZE << order;
2886
2887 for _ in 0..1 << (MAX_ORDER - order) {
2888 allocs.push(allocator.allocate(dummy_info!(size)).unwrap());
2889 }
2890
2891 assert!(allocator.allocate(dummy_info!()).is_err());
2892 assert!(allocator.free_size() == 0);
2893 allocs.clear();
2894 }
2895
2896 let mut orders = (0..MAX_ORDER).collect::<Vec<_>>();
2897
2898 for mid in 0..MAX_ORDER {
2899 orders.rotate_left(mid);
2900
2901 for &order in &orders {
2902 let size = BuddyAllocator::MIN_NODE_SIZE << order;
2903 allocs.push(allocator.allocate(dummy_info!(size)).unwrap());
2904 }
2905
2906 let _alloc = allocator.allocate(dummy_info!()).unwrap();
2907 assert!(allocator.allocate(dummy_info!()).is_err());
2908 assert!(allocator.free_size() == 0);
2909 allocs.clear();
2910 }
2911 }
2912
2913 #[test]
buddy_allocator_respects_alignment()2914 fn buddy_allocator_respects_alignment() {
2915 const REGION_SIZE: DeviceSize = 4096;
2916
2917 let allocator = dummy_allocator!(BuddyAllocator, REGION_SIZE);
2918
2919 {
2920 let info = dummy_info!(1, 4096);
2921
2922 let _alloc = allocator.allocate(info.clone()).unwrap();
2923 assert!(allocator.allocate(info).is_err());
2924 assert!(allocator.free_size() == REGION_SIZE - BuddyAllocator::MIN_NODE_SIZE);
2925 }
2926
2927 {
2928 let info_a = dummy_info!(1, 256);
2929 let allocations_a = REGION_SIZE / info_a.layout.alignment().as_devicesize();
2930 let info_b = dummy_info!(1, 16);
2931 let allocations_b =
2932 REGION_SIZE / info_b.layout.alignment().as_devicesize() - allocations_a;
2933
2934 let mut allocs =
2935 Vec::with_capacity((REGION_SIZE / BuddyAllocator::MIN_NODE_SIZE) as usize);
2936
2937 for _ in 0..allocations_a {
2938 allocs.push(allocator.allocate(info_a.clone()).unwrap());
2939 }
2940
2941 assert!(allocator.allocate(info_a).is_err());
2942 assert!(
2943 allocator.free_size()
2944 == REGION_SIZE - allocations_a * BuddyAllocator::MIN_NODE_SIZE
2945 );
2946
2947 for _ in 0..allocations_b {
2948 allocs.push(allocator.allocate(info_b.clone()).unwrap());
2949 }
2950
2951 assert!(allocator.allocate(dummy_info!()).is_err());
2952 assert!(allocator.free_size() == 0);
2953 }
2954 }
2955
2956 #[test]
buddy_allocator_respects_granularity()2957 fn buddy_allocator_respects_granularity() {
2958 const GRANULARITY: DeviceSize = 256;
2959 const REGION_SIZE: DeviceSize = 2 * GRANULARITY;
2960
2961 let allocator = dummy_allocator!(BuddyAllocator, REGION_SIZE, GRANULARITY);
2962
2963 {
2964 const ALLOCATIONS: DeviceSize = REGION_SIZE / BuddyAllocator::MIN_NODE_SIZE;
2965
2966 let mut allocs = Vec::with_capacity(ALLOCATIONS as usize);
2967 for _ in 0..ALLOCATIONS {
2968 allocs.push(allocator.allocate(dummy_info_linear!()).unwrap());
2969 }
2970
2971 assert!(allocator.allocate(dummy_info_linear!()).is_err());
2972 assert!(allocator.free_size() == 0);
2973 }
2974
2975 {
2976 let _alloc1 = allocator.allocate(dummy_info!()).unwrap();
2977 let _alloc2 = allocator.allocate(dummy_info!()).unwrap();
2978 assert!(allocator.allocate(dummy_info!()).is_err());
2979 assert!(allocator.free_size() == 0);
2980 }
2981 }
2982
2983 #[test]
bump_allocator_respects_alignment()2984 fn bump_allocator_respects_alignment() {
2985 const ALIGNMENT: DeviceSize = 16;
2986
2987 let info = dummy_info!(1, ALIGNMENT);
2988 let allocator = dummy_allocator!(BumpAllocator, ALIGNMENT * 10);
2989
2990 for _ in 0..10 {
2991 allocator.allocate(info.clone()).unwrap();
2992 }
2993
2994 assert!(allocator.allocate(info.clone()).is_err());
2995
2996 for _ in 0..ALIGNMENT - 1 {
2997 allocator.allocate(dummy_info!()).unwrap();
2998 }
2999
3000 assert!(allocator.allocate(info).is_err());
3001 assert!(allocator.free_size() == 0);
3002 }
3003
3004 #[test]
bump_allocator_respects_granularity()3005 fn bump_allocator_respects_granularity() {
3006 const ALLOCATIONS: DeviceSize = 10;
3007 const GRANULARITY: DeviceSize = 1024;
3008
3009 let mut allocator = dummy_allocator!(BumpAllocator, GRANULARITY * ALLOCATIONS, GRANULARITY);
3010
3011 for i in 0..ALLOCATIONS {
3012 for _ in 0..GRANULARITY {
3013 allocator
3014 .allocate(SuballocationCreateInfo {
3015 allocation_type: if i % 2 == 0 {
3016 AllocationType::NonLinear
3017 } else {
3018 AllocationType::Linear
3019 },
3020 ..dummy_info!()
3021 })
3022 .unwrap();
3023 }
3024 }
3025
3026 assert!(allocator.allocate(dummy_info_linear!()).is_err());
3027 assert!(allocator.free_size() == 0);
3028
3029 allocator.try_reset().unwrap();
3030
3031 for i in 0..ALLOCATIONS {
3032 allocator
3033 .allocate(SuballocationCreateInfo {
3034 allocation_type: if i % 2 == 0 {
3035 AllocationType::Linear
3036 } else {
3037 AllocationType::NonLinear
3038 },
3039 ..dummy_info!()
3040 })
3041 .unwrap();
3042 }
3043
3044 assert!(allocator.allocate(dummy_info_linear!()).is_err());
3045 assert!(allocator.free_size() == GRANULARITY - 1);
3046 }
3047
3048 #[test]
bump_allocator_syncness()3049 fn bump_allocator_syncness() {
3050 const THREADS: DeviceSize = 12;
3051 const ALLOCATIONS_PER_THREAD: DeviceSize = 100_000;
3052 const ALLOCATION_STEP: DeviceSize = 117;
3053 const REGION_SIZE: DeviceSize =
3054 (ALLOCATION_STEP * (THREADS + 1) * THREADS / 2) * ALLOCATIONS_PER_THREAD;
3055
3056 let mut allocator = dummy_allocator!(BumpAllocator, REGION_SIZE);
3057
3058 thread::scope(|scope| {
3059 for i in 1..=THREADS {
3060 let allocator = &allocator;
3061
3062 scope.spawn(move || {
3063 let info = dummy_info!(i * ALLOCATION_STEP);
3064
3065 for _ in 0..ALLOCATIONS_PER_THREAD {
3066 allocator.allocate(info.clone()).unwrap();
3067 }
3068 });
3069 }
3070 });
3071
3072 assert!(allocator.allocate(dummy_info!()).is_err());
3073 assert!(allocator.free_size() == 0);
3074
3075 allocator.try_reset().unwrap();
3076 assert!(allocator.free_size() == REGION_SIZE);
3077 }
3078
3079 macro_rules! dummy_allocator {
3080 ($type:ty, $size:expr) => {
3081 dummy_allocator!($type, $size, 1)
3082 };
3083 ($type:ty, $size:expr, $granularity:expr) => {{
3084 let (device, _) = gfx_dev_and_queue!();
3085 let device_memory = DeviceMemory::allocate(
3086 device,
3087 MemoryAllocateInfo {
3088 allocation_size: $size,
3089 memory_type_index: 0,
3090 ..Default::default()
3091 },
3092 )
3093 .unwrap();
3094 let mut allocator = <$type>::new(MemoryAlloc::new(device_memory).unwrap());
3095 Arc::get_mut(&mut allocator)
3096 .unwrap()
3097 .buffer_image_granularity = DeviceAlignment::new($granularity).unwrap();
3098
3099 allocator
3100 }};
3101 }
3102
3103 macro_rules! dummy_info {
3104 () => {
3105 dummy_info!(1)
3106 };
3107 ($size:expr) => {
3108 dummy_info!($size, 1)
3109 };
3110 ($size:expr, $alignment:expr) => {
3111 SuballocationCreateInfo {
3112 layout: DeviceLayout::new(
3113 NonZeroDeviceSize::new($size).unwrap(),
3114 DeviceAlignment::new($alignment).unwrap(),
3115 )
3116 .unwrap(),
3117 allocation_type: AllocationType::Unknown,
3118 ..Default::default()
3119 }
3120 };
3121 }
3122
3123 macro_rules! dummy_info_linear {
3124 ($($args:tt)*) => {
3125 SuballocationCreateInfo {
3126 allocation_type: AllocationType::Linear,
3127 ..dummy_info!($($args)*)
3128 }
3129 };
3130 }
3131
3132 macro_rules! dummy_info_nonlinear {
3133 ($($args:tt)*) => {
3134 SuballocationCreateInfo {
3135 allocation_type: AllocationType::NonLinear,
3136 ..dummy_info!($($args)*)
3137 }
3138 };
3139 }
3140
3141 pub(self) use {dummy_allocator, dummy_info, dummy_info_linear, dummy_info_nonlinear};
3142 }
3143