xref: /aosp_15_r20/external/mesa3d/src/nouveau/vulkan/nvk_upload_queue.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2024 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "nvk_upload_queue.h"
7 
8 #include "nvk_device.h"
9 #include "nvk_physical_device.h"
10 #include "nvkmd/nvkmd.h"
11 #include "vk_alloc.h"
12 
13 #include "nv_push.h"
14 #include "nv_push_cl90b5.h"
15 
16 #define NVK_UPLOAD_MEM_SIZE 64*1024
17 
18 struct nvk_upload_mem {
19    struct nvkmd_mem *mem;
20 
21    /** Link in nvk_upload_queue::recycle */
22    struct list_head link;
23 
24    /** Time point at which point this BO will be idle */
25    uint64_t idle_time_point;
26 };
27 
28 static VkResult
nvk_upload_mem_create(struct nvk_device * dev,struct nvk_upload_mem ** mem_out)29 nvk_upload_mem_create(struct nvk_device *dev,
30                      struct nvk_upload_mem **mem_out)
31 {
32    struct nvk_upload_mem *mem;
33    VkResult result;
34 
35    mem = vk_zalloc(&dev->vk.alloc, sizeof(*mem), 8,
36                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
37    if (mem == NULL)
38       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
39 
40    result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
41                                        NVK_UPLOAD_MEM_SIZE, 0, NVKMD_MEM_GART,
42                                        NVKMD_MEM_MAP_WR, &mem->mem);
43    if (result != VK_SUCCESS) {
44       vk_free(&dev->vk.alloc, mem);
45       return result;
46    }
47 
48    *mem_out = mem;
49 
50    return VK_SUCCESS;
51 }
52 
53 static void
nvk_upload_mem_destroy(struct nvk_device * dev,struct nvk_upload_mem * mem)54 nvk_upload_mem_destroy(struct nvk_device *dev,
55                       struct nvk_upload_mem *mem)
56 {
57    nvkmd_mem_unref(mem->mem);
58    vk_free(&dev->vk.alloc, mem);
59 }
60 
61 VkResult
nvk_upload_queue_init(struct nvk_device * dev,struct nvk_upload_queue * queue)62 nvk_upload_queue_init(struct nvk_device *dev,
63                       struct nvk_upload_queue *queue)
64 {
65    struct nvk_physical_device *pdev = nvk_device_physical(dev);
66    VkResult result;
67 
68    memset(queue, 0, sizeof(*queue));
69 
70    simple_mtx_init(&queue->mutex, mtx_plain);
71 
72    result = nvkmd_dev_create_ctx(dev->nvkmd, &dev->vk.base,
73                                  NVKMD_ENGINE_COPY, &queue->ctx);
74    if (result != VK_SUCCESS)
75       goto fail_mutex;
76 
77    const struct vk_sync_type *sync_type = pdev->nvkmd->sync_types[0];
78    assert(sync_type->features & VK_SYNC_FEATURE_TIMELINE);
79 
80    result = vk_sync_create(&dev->vk, sync_type, VK_SYNC_IS_TIMELINE,
81                            0, &queue->sync);
82    if (result != VK_SUCCESS)
83       goto fail_ctx;
84 
85    list_inithead(&queue->recycle);
86 
87    return VK_SUCCESS;
88 
89 fail_ctx:
90    nvkmd_ctx_destroy(queue->ctx);
91 fail_mutex:
92    simple_mtx_destroy(&queue->mutex);
93 
94    return result;
95 }
96 
97 void
nvk_upload_queue_finish(struct nvk_device * dev,struct nvk_upload_queue * queue)98 nvk_upload_queue_finish(struct nvk_device *dev,
99                         struct nvk_upload_queue *queue)
100 {
101    list_for_each_entry_safe(struct nvk_upload_mem, mem, &queue->recycle, link)
102       nvk_upload_mem_destroy(dev, mem);
103 
104    if (queue->mem != NULL)
105       nvk_upload_mem_destroy(dev, queue->mem);
106 
107    vk_sync_destroy(&dev->vk, queue->sync);
108    nvkmd_ctx_destroy(queue->ctx);
109    simple_mtx_destroy(&queue->mutex);
110 }
111 
112 static VkResult
nvk_upload_queue_flush_locked(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t * time_point_out)113 nvk_upload_queue_flush_locked(struct nvk_device *dev,
114                               struct nvk_upload_queue *queue,
115                               uint64_t *time_point_out)
116 {
117    VkResult result;
118 
119    if (queue->mem == NULL || queue->mem_push_start == queue->mem_push_end) {
120       if (time_point_out != NULL)
121          *time_point_out = queue->last_time_point;
122       return VK_SUCCESS;
123    }
124 
125    uint64_t time_point = queue->last_time_point + 1;
126    if (time_point == UINT64_MAX)
127       abort();
128 
129    const struct nvkmd_ctx_exec exec = {
130       .addr = queue->mem->mem->va->addr + queue->mem_push_start,
131       .size_B = queue->mem_push_end - queue->mem_push_start,
132    };
133    result = nvkmd_ctx_exec(queue->ctx, &dev->vk.base, 1, &exec);
134    if (result != VK_SUCCESS)
135       return result;
136 
137    const struct vk_sync_signal signal = {
138       .sync = queue->sync,
139       .stage_mask = ~0,
140       .signal_value = time_point,
141    };
142    result = nvkmd_ctx_signal(queue->ctx, &dev->vk.base, 1, &signal);
143    if (result != VK_SUCCESS)
144       return result;
145 
146    /* Wait until now to update last_time_point so that, if we do fail and lose
147     * the device, nvk_upload_queue_sync won't wait forever on a time point
148     * that will never signal.
149     */
150    queue->last_time_point = time_point;
151 
152    queue->mem->idle_time_point = time_point;
153    queue->mem_push_start = queue->mem_push_end;
154 
155    if (time_point_out != NULL)
156       *time_point_out = time_point;
157 
158    return VK_SUCCESS;
159 }
160 
161 VkResult
nvk_upload_queue_flush(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t * time_point_out)162 nvk_upload_queue_flush(struct nvk_device *dev,
163                        struct nvk_upload_queue *queue,
164                        uint64_t *time_point_out)
165 {
166    VkResult result;
167 
168    simple_mtx_lock(&queue->mutex);
169    result = nvk_upload_queue_flush_locked(dev, queue, time_point_out);
170    simple_mtx_unlock(&queue->mutex);
171 
172    return result;
173 }
174 
175 static VkResult
nvk_upload_queue_sync_locked(struct nvk_device * dev,struct nvk_upload_queue * queue)176 nvk_upload_queue_sync_locked(struct nvk_device *dev,
177                              struct nvk_upload_queue *queue)
178 {
179    VkResult result;
180 
181    result = nvk_upload_queue_flush_locked(dev, queue, NULL);
182    if (result != VK_SUCCESS)
183       return result;
184 
185    if (queue->last_time_point == 0)
186       return VK_SUCCESS;
187 
188    return vk_sync_wait(&dev->vk, queue->sync, queue->last_time_point,
189                        VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
190 }
191 
192 VkResult
nvk_upload_queue_sync(struct nvk_device * dev,struct nvk_upload_queue * queue)193 nvk_upload_queue_sync(struct nvk_device *dev,
194                       struct nvk_upload_queue *queue)
195 {
196    VkResult result;
197 
198    simple_mtx_lock(&queue->mutex);
199    result = nvk_upload_queue_sync_locked(dev, queue);
200    simple_mtx_unlock(&queue->mutex);
201 
202    return result;
203 }
204 
205 static VkResult
nvk_upload_queue_reserve(struct nvk_device * dev,struct nvk_upload_queue * queue,uint32_t min_mem_size)206 nvk_upload_queue_reserve(struct nvk_device *dev,
207                          struct nvk_upload_queue *queue,
208                          uint32_t min_mem_size)
209 {
210    VkResult result;
211 
212    assert(min_mem_size <= NVK_UPLOAD_MEM_SIZE);
213    assert(queue->mem_push_end <= queue->mem_data_start);
214 
215    if (queue->mem != NULL) {
216       if (queue->mem_data_start - queue->mem_push_end >= min_mem_size)
217          return VK_SUCCESS;
218 
219       /* Not enough room in the BO.  Flush and add it to the recycle list */
220       result = nvk_upload_queue_flush_locked(dev, queue, NULL);
221       if (result != VK_SUCCESS)
222          return result;
223 
224       assert(queue->mem_push_start == queue->mem_push_end);
225       list_addtail(&queue->mem->link, &queue->recycle);
226       queue->mem = NULL;
227    }
228 
229    assert(queue->mem == NULL);
230    queue->mem_push_start = queue->mem_push_end = 0;
231    queue->mem_data_start = NVK_UPLOAD_MEM_SIZE;
232 
233    /* Try to pop an idle BO off the recycle list */
234    if (!list_is_empty(&queue->recycle)) {
235       uint64_t time_point_passed = 0;
236       result = vk_sync_get_value(&dev->vk, queue->sync, &time_point_passed);
237       if (result != VK_SUCCESS)
238          return result;
239 
240       struct nvk_upload_mem *mem =
241          list_first_entry(&queue->recycle, struct nvk_upload_mem, link);
242       if (time_point_passed >= mem->idle_time_point) {
243          list_del(&mem->link);
244          queue->mem = mem;
245          return VK_SUCCESS;
246       }
247    }
248 
249    return nvk_upload_mem_create(dev, &queue->mem);
250 }
251 
252 static VkResult
nvk_upload_queue_upload_locked(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t dst_addr,const void * src,size_t size)253 nvk_upload_queue_upload_locked(struct nvk_device *dev,
254                                struct nvk_upload_queue *queue,
255                                uint64_t dst_addr,
256                                const void *src, size_t size)
257 {
258    VkResult result;
259 
260    assert(dst_addr % 4 == 0);
261    assert(size % 4 == 0);
262 
263    while (size > 0) {
264       const uint32_t cmd_size_dw = 12;
265       const uint32_t cmd_size = cmd_size_dw * 4;
266 
267       /* Don't split the upload for stmall stuff.  If it's under 1KB and we
268        * can't fit it in the current buffer, just get another.
269        */
270       const uint32_t min_size = cmd_size + MIN2(size, 1024);
271       result = nvk_upload_queue_reserve(dev, queue, min_size);
272       if (result != VK_SUCCESS)
273          return result;
274 
275       assert(queue->mem != NULL);
276       assert(queue->mem_data_start > queue->mem_push_end);
277       const uint32_t avail = queue->mem_data_start - queue->mem_push_end;
278       assert(avail >= min_size);
279 
280       const uint32_t data_size = MIN2(size, avail - cmd_size);
281 
282       const uint32_t data_mem_offset = queue->mem_data_start - data_size;
283       assert(queue->mem_push_end + cmd_size <= data_mem_offset);
284       const uint64_t data_addr = queue->mem->mem->va->addr + data_mem_offset;
285       memcpy(queue->mem->mem->map + data_mem_offset, src, data_size);
286       queue->mem_data_start = data_mem_offset;
287 
288       struct nv_push p;
289       nv_push_init(&p, queue->mem->mem->map + queue->mem_push_end, cmd_size_dw);
290 
291       assert(data_size <= (1 << 17));
292 
293       P_MTHD(&p, NV90B5, OFFSET_IN_UPPER);
294       P_NV90B5_OFFSET_IN_UPPER(&p, data_addr >> 32);
295       P_NV90B5_OFFSET_IN_LOWER(&p, data_addr & 0xffffffff);
296       P_NV90B5_OFFSET_OUT_UPPER(&p, dst_addr >> 32);
297       P_NV90B5_OFFSET_OUT_LOWER(&p, dst_addr & 0xffffffff);
298       P_NV90B5_PITCH_IN(&p, data_size);
299       P_NV90B5_PITCH_OUT(&p, data_size);
300       P_NV90B5_LINE_LENGTH_IN(&p, data_size);
301       P_NV90B5_LINE_COUNT(&p, 1);
302 
303       P_IMMD(&p, NV90B5, LAUNCH_DMA, {
304          .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
305          .multi_line_enable = MULTI_LINE_ENABLE_FALSE,
306          .flush_enable = FLUSH_ENABLE_TRUE,
307          .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
308          .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
309       });
310 
311       assert(nv_push_dw_count(&p) <= cmd_size_dw);
312       queue->mem_push_end += nv_push_dw_count(&p) * 4;
313 
314       dst_addr += data_size;
315       src += data_size;
316       size -= data_size;
317    }
318 
319    return VK_SUCCESS;
320 }
321 
322 VkResult
nvk_upload_queue_upload(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t dst_addr,const void * src,size_t size)323 nvk_upload_queue_upload(struct nvk_device *dev,
324                         struct nvk_upload_queue *queue,
325                         uint64_t dst_addr,
326                         const void *src, size_t size)
327 {
328    VkResult result;
329 
330    simple_mtx_lock(&queue->mutex);
331    result = nvk_upload_queue_upload_locked(dev, queue, dst_addr, src, size);
332    simple_mtx_unlock(&queue->mutex);
333 
334    return result;
335 }
336 
337 static VkResult
nvk_upload_queue_fill_locked(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t dst_addr,uint32_t data,size_t size)338 nvk_upload_queue_fill_locked(struct nvk_device *dev,
339                              struct nvk_upload_queue *queue,
340                              uint64_t dst_addr, uint32_t data, size_t size)
341 {
342    VkResult result;
343 
344    assert(dst_addr % 4 == 0);
345    assert(size % 4 == 0);
346 
347    while (size > 0) {
348       const uint32_t cmd_size_dw = 14;
349       const uint32_t cmd_size = cmd_size_dw * 4;
350 
351       result = nvk_upload_queue_reserve(dev, queue, cmd_size);
352       if (result != VK_SUCCESS)
353          return result;
354 
355       const uint32_t max_dim = 1 << 17;
356       uint32_t width_B, height;
357       if (size > max_dim) {
358          width_B = max_dim;
359          height = MIN2(max_dim, size / width_B);
360       } else {
361          width_B = size;
362          height = 1;
363       }
364       assert(width_B * height <= size);
365 
366       struct nv_push p;
367       nv_push_init(&p, queue->mem->mem->map + queue->mem_push_end, cmd_size_dw);
368 
369       P_MTHD(&p, NV90B5, OFFSET_OUT_UPPER);
370       P_NV90B5_OFFSET_OUT_UPPER(&p, dst_addr >> 32);
371       P_NV90B5_OFFSET_OUT_LOWER(&p, dst_addr & 0xffffffff);
372       P_NV90B5_PITCH_IN(&p, width_B);
373       P_NV90B5_PITCH_OUT(&p, width_B);
374       P_NV90B5_LINE_LENGTH_IN(&p, width_B / 4);
375       P_NV90B5_LINE_COUNT(&p, height);
376 
377       P_IMMD(&p, NV90B5, SET_REMAP_CONST_A, data);
378       P_IMMD(&p, NV90B5, SET_REMAP_COMPONENTS, {
379          .dst_x = DST_X_CONST_A,
380          .dst_y = DST_Y_CONST_A,
381          .dst_z = DST_Z_CONST_A,
382          .dst_w = DST_W_CONST_A,
383          .component_size = COMPONENT_SIZE_FOUR,
384          .num_src_components = NUM_SRC_COMPONENTS_ONE,
385          .num_dst_components = NUM_DST_COMPONENTS_ONE,
386       });
387 
388       P_IMMD(&p, NV90B5, LAUNCH_DMA, {
389          .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
390          .multi_line_enable = height > 1,
391          .flush_enable = FLUSH_ENABLE_TRUE,
392          .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
393          .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
394          .remap_enable = REMAP_ENABLE_TRUE,
395       });
396 
397       assert(nv_push_dw_count(&p) <= cmd_size_dw);
398       queue->mem_push_end += nv_push_dw_count(&p) * 4;
399 
400       dst_addr += width_B * height;
401       size -= width_B * height;
402    }
403 
404    return VK_SUCCESS;
405 }
406 
407 VkResult
nvk_upload_queue_fill(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t dst_addr,uint32_t data,size_t size)408 nvk_upload_queue_fill(struct nvk_device *dev,
409                       struct nvk_upload_queue *queue,
410                       uint64_t dst_addr, uint32_t data, size_t size)
411 {
412    VkResult result;
413 
414    simple_mtx_lock(&queue->mutex);
415    result = nvk_upload_queue_fill_locked(dev, queue, dst_addr, data, size);
416    simple_mtx_unlock(&queue->mutex);
417 
418    return result;
419 }
420