xref: /aosp_15_r20/external/mesa3d/src/nouveau/vulkan/nvk_heap.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_heap.h"
6 
7 #include "nvk_device.h"
8 #include "nvk_physical_device.h"
9 #include "nvk_queue.h"
10 
11 #include "util/macros.h"
12 
13 #include "nv_push.h"
14 #include "nv_push_cl90b5.h"
15 
16 VkResult
nvk_heap_init(struct nvk_device * dev,struct nvk_heap * heap,enum nvkmd_mem_flags mem_flags,enum nvkmd_mem_map_flags map_flags,uint32_t overalloc,bool contiguous)17 nvk_heap_init(struct nvk_device *dev, struct nvk_heap *heap,
18               enum nvkmd_mem_flags mem_flags,
19               enum nvkmd_mem_map_flags map_flags,
20               uint32_t overalloc, bool contiguous)
21 {
22    VkResult result;
23 
24    memset(heap, 0, sizeof(*heap));
25 
26    heap->mem_flags = mem_flags;
27    if (map_flags)
28       heap->mem_flags |= NVKMD_MEM_CAN_MAP;
29    heap->map_flags = map_flags;
30    heap->overalloc = overalloc;
31 
32    if (contiguous) {
33       result = nvkmd_dev_alloc_va(dev->nvkmd, &dev->vk.base,
34                                   0 /* va_flags */, 0 /* pte_kind */,
35                                   NVK_HEAP_MAX_SIZE, 0 /* align_B */,
36                                   0 /* fixed_addr */,
37                                   &heap->contig_va);
38       if (result != VK_SUCCESS)
39          return result;
40    }
41 
42    simple_mtx_init(&heap->mutex, mtx_plain);
43    util_vma_heap_init(&heap->heap, 0, 0);
44 
45    heap->total_size = 0;
46    heap->mem_count = 0;
47 
48    return VK_SUCCESS;
49 }
50 
51 void
nvk_heap_finish(struct nvk_device * dev,struct nvk_heap * heap)52 nvk_heap_finish(struct nvk_device *dev, struct nvk_heap *heap)
53 {
54    /* Freeing the VA will unbind all the memory */
55    if (heap->contig_va)
56       nvkmd_va_free(heap->contig_va);
57 
58    for (uint32_t mem_idx = 0; mem_idx < heap->mem_count; mem_idx++)
59       nvkmd_mem_unref(heap->mem[mem_idx].mem);
60 
61    util_vma_heap_finish(&heap->heap);
62    simple_mtx_destroy(&heap->mutex);
63 }
64 
65 static uint64_t
encode_vma(uint32_t mem_idx,uint64_t mem_offset)66 encode_vma(uint32_t mem_idx, uint64_t mem_offset)
67 {
68    assert(mem_idx < UINT16_MAX - 1);
69    assert(mem_offset < (1ull << 48));
70    return ((uint64_t)(mem_idx + 1) << 48) | mem_offset;
71 }
72 
73 static uint32_t
vma_mem_idx(uint64_t offset)74 vma_mem_idx(uint64_t offset)
75 {
76    offset = offset >> 48;
77    assert(offset > 0);
78    return offset - 1;
79 }
80 
81 static uint64_t
vma_mem_offset(uint64_t offset)82 vma_mem_offset(uint64_t offset)
83 {
84    return offset & BITFIELD64_MASK(48);
85 }
86 
87 static VkResult
nvk_heap_grow_locked(struct nvk_device * dev,struct nvk_heap * heap)88 nvk_heap_grow_locked(struct nvk_device *dev, struct nvk_heap *heap)
89 {
90    VkResult result;
91 
92    if (heap->mem_count >= NVK_HEAP_MAX_BO_COUNT) {
93       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
94                        "Heap has already hit its maximum size");
95    }
96 
97    /* First two BOs are MIN_SIZE, double after that */
98    const uint64_t new_mem_size =
99       NVK_HEAP_MIN_SIZE << (MAX2(heap->mem_count, 1) - 1);
100 
101    struct nvkmd_mem *mem;
102    if (heap->map_flags) {
103       result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
104                                           new_mem_size, 0, heap->mem_flags,
105                                           heap->map_flags, &mem);
106    } else {
107       result = nvkmd_dev_alloc_mem(dev->nvkmd, &dev->vk.base,
108                                    new_mem_size, 0, heap->mem_flags, &mem);
109    }
110    if (result != VK_SUCCESS)
111       return result;
112 
113    assert(mem->size_B == new_mem_size);
114 
115    uint64_t addr;
116    if (heap->contig_va != NULL) {
117       result = nvkmd_va_bind_mem(heap->contig_va, &dev->vk.base,
118                                  heap->total_size, mem, 0, new_mem_size);
119       if (result != VK_SUCCESS) {
120          nvkmd_mem_unref(mem);
121          return result;
122       }
123       addr = heap->contig_va->addr + heap->total_size;
124 
125       /* For contiguous heaps, we can now free the padding from the previous
126        * BO because the BO we just added will provide the needed padding. For
127        * non-contiguous heaps, we have to leave each BO padded individually.
128        */
129       if (heap->mem_count > 0) {
130          struct nvkmd_mem *prev_mem = heap->mem[heap->mem_count - 1].mem;
131          assert(heap->overalloc < prev_mem->size_B);
132          const uint64_t pad_vma =
133             encode_vma(heap->mem_count - 1, prev_mem->size_B - heap->overalloc);
134          util_vma_heap_free(&heap->heap, pad_vma, heap->overalloc);
135       }
136    } else {
137       addr = mem->va->addr;
138    }
139 
140    uint64_t vma = encode_vma(heap->mem_count, 0);
141    assert(heap->overalloc < new_mem_size);
142    util_vma_heap_free(&heap->heap, vma, new_mem_size - heap->overalloc);
143 
144    heap->mem[heap->mem_count++] = (struct nvk_heap_mem) {
145       .mem = mem,
146       .addr = addr,
147    };
148    heap->total_size += new_mem_size;
149 
150    return VK_SUCCESS;
151 }
152 
153 static VkResult
nvk_heap_alloc_locked(struct nvk_device * dev,struct nvk_heap * heap,uint64_t size,uint32_t alignment,uint64_t * addr_out,void ** map_out)154 nvk_heap_alloc_locked(struct nvk_device *dev, struct nvk_heap *heap,
155                       uint64_t size, uint32_t alignment,
156                       uint64_t *addr_out, void **map_out)
157 {
158    while (1) {
159       uint64_t vma = util_vma_heap_alloc(&heap->heap, size, alignment);
160       if (vma != 0) {
161          uint32_t mem_idx = vma_mem_idx(vma);
162          uint64_t mem_offset = vma_mem_offset(vma);
163 
164          assert(mem_idx < heap->mem_count);
165          assert(heap->mem[mem_idx].mem != NULL);
166          assert(mem_offset + size <= heap->mem[mem_idx].mem->size_B);
167 
168          *addr_out = heap->mem[mem_idx].addr + mem_offset;
169          if (map_out != NULL) {
170             if (heap->mem[mem_idx].mem->map != NULL)
171                *map_out = (char *)heap->mem[mem_idx].mem->map + mem_offset;
172             else
173                *map_out = NULL;
174          }
175 
176          return VK_SUCCESS;
177       }
178 
179       VkResult result = nvk_heap_grow_locked(dev, heap);
180       if (result != VK_SUCCESS)
181          return result;
182    }
183 }
184 
185 static void
nvk_heap_free_locked(struct nvk_device * dev,struct nvk_heap * heap,uint64_t addr,uint64_t size)186 nvk_heap_free_locked(struct nvk_device *dev, struct nvk_heap *heap,
187                      uint64_t addr, uint64_t size)
188 {
189    assert(addr + size > addr);
190 
191    for (uint32_t mem_idx = 0; mem_idx < heap->mem_count; mem_idx++) {
192       if (addr < heap->mem[mem_idx].addr)
193          continue;
194 
195       uint64_t mem_offset = addr - heap->mem[mem_idx].addr;
196       if (mem_offset >= heap->mem[mem_idx].mem->size_B)
197          continue;
198 
199       assert(mem_offset + size <= heap->mem[mem_idx].mem->size_B);
200       uint64_t vma = encode_vma(mem_idx, mem_offset);
201 
202       util_vma_heap_free(&heap->heap, vma, size);
203       return;
204    }
205    assert(!"Failed to find heap BO");
206 }
207 
208 VkResult
nvk_heap_alloc(struct nvk_device * dev,struct nvk_heap * heap,uint64_t size,uint32_t alignment,uint64_t * addr_out,void ** map_out)209 nvk_heap_alloc(struct nvk_device *dev, struct nvk_heap *heap,
210                uint64_t size, uint32_t alignment,
211                uint64_t *addr_out, void **map_out)
212 {
213    simple_mtx_lock(&heap->mutex);
214    VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment,
215                                            addr_out, map_out);
216    simple_mtx_unlock(&heap->mutex);
217 
218    return result;
219 }
220 
221 VkResult
nvk_heap_upload(struct nvk_device * dev,struct nvk_heap * heap,const void * data,size_t size,uint32_t alignment,uint64_t * addr_out)222 nvk_heap_upload(struct nvk_device *dev, struct nvk_heap *heap,
223                 const void *data, size_t size, uint32_t alignment,
224                 uint64_t *addr_out)
225 {
226    simple_mtx_lock(&heap->mutex);
227    void *map = NULL;
228    VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment,
229                                            addr_out, &map);
230    simple_mtx_unlock(&heap->mutex);
231 
232    if (result != VK_SUCCESS)
233       return result;
234 
235    if (map != NULL && (heap->map_flags & NVKMD_MEM_MAP_WR)) {
236       /* If we have a map, copy directly with memcpy */
237       memcpy(map, data, size);
238    } else {
239       /* Otherwise, kick off an upload with the upload queue.
240        *
241        * This is a queued operation that the driver ensures happens before any
242        * more client work via semaphores.  Because this is asynchronous and
243        * heap allocations are synchronous we have to be a bit careful here.
244        * The heap only ever tracks the current known CPU state of everything
245        * while the upload queue makes that state valid at some point in the
246        * future.
247        *
248        * This can be especially tricky for very fast upload/free cycles such
249        * as if the client compiles a shader, throws it away without using it,
250        * and then compiles another shader that ends up at the same address.
251        * What makes this all correct is the fact that the everything on the
252        * upload queue happens in a well-defined device-wide order.  In this
253        * case the first shader will get uploaded and then the second will get
254        * uploaded over top of it.  As long as we don't free the memory out
255        * from under the upload queue, everything will end up in the correct
256        * state by the time the client's shaders actually execute.
257        */
258       result = nvk_upload_queue_upload(dev, &dev->upload, *addr_out, data, size);
259       if (result != VK_SUCCESS) {
260          nvk_heap_free(dev, heap, *addr_out, size);
261          return result;
262       }
263    }
264 
265    return VK_SUCCESS;
266 }
267 
268 void
nvk_heap_free(struct nvk_device * dev,struct nvk_heap * heap,uint64_t addr,uint64_t size)269 nvk_heap_free(struct nvk_device *dev, struct nvk_heap *heap,
270               uint64_t addr, uint64_t size)
271 {
272    simple_mtx_lock(&heap->mutex);
273    nvk_heap_free_locked(dev, heap, addr, size);
274    simple_mtx_unlock(&heap->mutex);
275 }
276