1 /*
2 * Copyright © 2024 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "nvk_upload_queue.h"
7
8 #include "nvk_device.h"
9 #include "nvk_physical_device.h"
10 #include "nvkmd/nvkmd.h"
11 #include "vk_alloc.h"
12
13 #include "nv_push.h"
14 #include "nv_push_cl90b5.h"
15
16 #define NVK_UPLOAD_MEM_SIZE 64*1024
17
18 struct nvk_upload_mem {
19 struct nvkmd_mem *mem;
20
21 /** Link in nvk_upload_queue::recycle */
22 struct list_head link;
23
24 /** Time point at which point this BO will be idle */
25 uint64_t idle_time_point;
26 };
27
28 static VkResult
nvk_upload_mem_create(struct nvk_device * dev,struct nvk_upload_mem ** mem_out)29 nvk_upload_mem_create(struct nvk_device *dev,
30 struct nvk_upload_mem **mem_out)
31 {
32 struct nvk_upload_mem *mem;
33 VkResult result;
34
35 mem = vk_zalloc(&dev->vk.alloc, sizeof(*mem), 8,
36 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
37 if (mem == NULL)
38 return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
39
40 result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
41 NVK_UPLOAD_MEM_SIZE, 0, NVKMD_MEM_GART,
42 NVKMD_MEM_MAP_WR, &mem->mem);
43 if (result != VK_SUCCESS) {
44 vk_free(&dev->vk.alloc, mem);
45 return result;
46 }
47
48 *mem_out = mem;
49
50 return VK_SUCCESS;
51 }
52
53 static void
nvk_upload_mem_destroy(struct nvk_device * dev,struct nvk_upload_mem * mem)54 nvk_upload_mem_destroy(struct nvk_device *dev,
55 struct nvk_upload_mem *mem)
56 {
57 nvkmd_mem_unref(mem->mem);
58 vk_free(&dev->vk.alloc, mem);
59 }
60
61 VkResult
nvk_upload_queue_init(struct nvk_device * dev,struct nvk_upload_queue * queue)62 nvk_upload_queue_init(struct nvk_device *dev,
63 struct nvk_upload_queue *queue)
64 {
65 struct nvk_physical_device *pdev = nvk_device_physical(dev);
66 VkResult result;
67
68 memset(queue, 0, sizeof(*queue));
69
70 simple_mtx_init(&queue->mutex, mtx_plain);
71
72 result = nvkmd_dev_create_ctx(dev->nvkmd, &dev->vk.base,
73 NVKMD_ENGINE_COPY, &queue->ctx);
74 if (result != VK_SUCCESS)
75 goto fail_mutex;
76
77 const struct vk_sync_type *sync_type = pdev->nvkmd->sync_types[0];
78 assert(sync_type->features & VK_SYNC_FEATURE_TIMELINE);
79
80 result = vk_sync_create(&dev->vk, sync_type, VK_SYNC_IS_TIMELINE,
81 0, &queue->sync);
82 if (result != VK_SUCCESS)
83 goto fail_ctx;
84
85 list_inithead(&queue->recycle);
86
87 return VK_SUCCESS;
88
89 fail_ctx:
90 nvkmd_ctx_destroy(queue->ctx);
91 fail_mutex:
92 simple_mtx_destroy(&queue->mutex);
93
94 return result;
95 }
96
97 void
nvk_upload_queue_finish(struct nvk_device * dev,struct nvk_upload_queue * queue)98 nvk_upload_queue_finish(struct nvk_device *dev,
99 struct nvk_upload_queue *queue)
100 {
101 list_for_each_entry_safe(struct nvk_upload_mem, mem, &queue->recycle, link)
102 nvk_upload_mem_destroy(dev, mem);
103
104 if (queue->mem != NULL)
105 nvk_upload_mem_destroy(dev, queue->mem);
106
107 vk_sync_destroy(&dev->vk, queue->sync);
108 nvkmd_ctx_destroy(queue->ctx);
109 simple_mtx_destroy(&queue->mutex);
110 }
111
112 static VkResult
nvk_upload_queue_flush_locked(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t * time_point_out)113 nvk_upload_queue_flush_locked(struct nvk_device *dev,
114 struct nvk_upload_queue *queue,
115 uint64_t *time_point_out)
116 {
117 VkResult result;
118
119 if (queue->mem == NULL || queue->mem_push_start == queue->mem_push_end) {
120 if (time_point_out != NULL)
121 *time_point_out = queue->last_time_point;
122 return VK_SUCCESS;
123 }
124
125 uint64_t time_point = queue->last_time_point + 1;
126 if (time_point == UINT64_MAX)
127 abort();
128
129 const struct nvkmd_ctx_exec exec = {
130 .addr = queue->mem->mem->va->addr + queue->mem_push_start,
131 .size_B = queue->mem_push_end - queue->mem_push_start,
132 };
133 result = nvkmd_ctx_exec(queue->ctx, &dev->vk.base, 1, &exec);
134 if (result != VK_SUCCESS)
135 return result;
136
137 const struct vk_sync_signal signal = {
138 .sync = queue->sync,
139 .stage_mask = ~0,
140 .signal_value = time_point,
141 };
142 result = nvkmd_ctx_signal(queue->ctx, &dev->vk.base, 1, &signal);
143 if (result != VK_SUCCESS)
144 return result;
145
146 /* Wait until now to update last_time_point so that, if we do fail and lose
147 * the device, nvk_upload_queue_sync won't wait forever on a time point
148 * that will never signal.
149 */
150 queue->last_time_point = time_point;
151
152 queue->mem->idle_time_point = time_point;
153 queue->mem_push_start = queue->mem_push_end;
154
155 if (time_point_out != NULL)
156 *time_point_out = time_point;
157
158 return VK_SUCCESS;
159 }
160
161 VkResult
nvk_upload_queue_flush(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t * time_point_out)162 nvk_upload_queue_flush(struct nvk_device *dev,
163 struct nvk_upload_queue *queue,
164 uint64_t *time_point_out)
165 {
166 VkResult result;
167
168 simple_mtx_lock(&queue->mutex);
169 result = nvk_upload_queue_flush_locked(dev, queue, time_point_out);
170 simple_mtx_unlock(&queue->mutex);
171
172 return result;
173 }
174
175 static VkResult
nvk_upload_queue_sync_locked(struct nvk_device * dev,struct nvk_upload_queue * queue)176 nvk_upload_queue_sync_locked(struct nvk_device *dev,
177 struct nvk_upload_queue *queue)
178 {
179 VkResult result;
180
181 result = nvk_upload_queue_flush_locked(dev, queue, NULL);
182 if (result != VK_SUCCESS)
183 return result;
184
185 if (queue->last_time_point == 0)
186 return VK_SUCCESS;
187
188 return vk_sync_wait(&dev->vk, queue->sync, queue->last_time_point,
189 VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
190 }
191
192 VkResult
nvk_upload_queue_sync(struct nvk_device * dev,struct nvk_upload_queue * queue)193 nvk_upload_queue_sync(struct nvk_device *dev,
194 struct nvk_upload_queue *queue)
195 {
196 VkResult result;
197
198 simple_mtx_lock(&queue->mutex);
199 result = nvk_upload_queue_sync_locked(dev, queue);
200 simple_mtx_unlock(&queue->mutex);
201
202 return result;
203 }
204
205 static VkResult
nvk_upload_queue_reserve(struct nvk_device * dev,struct nvk_upload_queue * queue,uint32_t min_mem_size)206 nvk_upload_queue_reserve(struct nvk_device *dev,
207 struct nvk_upload_queue *queue,
208 uint32_t min_mem_size)
209 {
210 VkResult result;
211
212 assert(min_mem_size <= NVK_UPLOAD_MEM_SIZE);
213 assert(queue->mem_push_end <= queue->mem_data_start);
214
215 if (queue->mem != NULL) {
216 if (queue->mem_data_start - queue->mem_push_end >= min_mem_size)
217 return VK_SUCCESS;
218
219 /* Not enough room in the BO. Flush and add it to the recycle list */
220 result = nvk_upload_queue_flush_locked(dev, queue, NULL);
221 if (result != VK_SUCCESS)
222 return result;
223
224 assert(queue->mem_push_start == queue->mem_push_end);
225 list_addtail(&queue->mem->link, &queue->recycle);
226 queue->mem = NULL;
227 }
228
229 assert(queue->mem == NULL);
230 queue->mem_push_start = queue->mem_push_end = 0;
231 queue->mem_data_start = NVK_UPLOAD_MEM_SIZE;
232
233 /* Try to pop an idle BO off the recycle list */
234 if (!list_is_empty(&queue->recycle)) {
235 uint64_t time_point_passed = 0;
236 result = vk_sync_get_value(&dev->vk, queue->sync, &time_point_passed);
237 if (result != VK_SUCCESS)
238 return result;
239
240 struct nvk_upload_mem *mem =
241 list_first_entry(&queue->recycle, struct nvk_upload_mem, link);
242 if (time_point_passed >= mem->idle_time_point) {
243 list_del(&mem->link);
244 queue->mem = mem;
245 return VK_SUCCESS;
246 }
247 }
248
249 return nvk_upload_mem_create(dev, &queue->mem);
250 }
251
252 static VkResult
nvk_upload_queue_upload_locked(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t dst_addr,const void * src,size_t size)253 nvk_upload_queue_upload_locked(struct nvk_device *dev,
254 struct nvk_upload_queue *queue,
255 uint64_t dst_addr,
256 const void *src, size_t size)
257 {
258 VkResult result;
259
260 assert(dst_addr % 4 == 0);
261 assert(size % 4 == 0);
262
263 while (size > 0) {
264 const uint32_t cmd_size_dw = 12;
265 const uint32_t cmd_size = cmd_size_dw * 4;
266
267 /* Don't split the upload for stmall stuff. If it's under 1KB and we
268 * can't fit it in the current buffer, just get another.
269 */
270 const uint32_t min_size = cmd_size + MIN2(size, 1024);
271 result = nvk_upload_queue_reserve(dev, queue, min_size);
272 if (result != VK_SUCCESS)
273 return result;
274
275 assert(queue->mem != NULL);
276 assert(queue->mem_data_start > queue->mem_push_end);
277 const uint32_t avail = queue->mem_data_start - queue->mem_push_end;
278 assert(avail >= min_size);
279
280 const uint32_t data_size = MIN2(size, avail - cmd_size);
281
282 const uint32_t data_mem_offset = queue->mem_data_start - data_size;
283 assert(queue->mem_push_end + cmd_size <= data_mem_offset);
284 const uint64_t data_addr = queue->mem->mem->va->addr + data_mem_offset;
285 memcpy(queue->mem->mem->map + data_mem_offset, src, data_size);
286 queue->mem_data_start = data_mem_offset;
287
288 struct nv_push p;
289 nv_push_init(&p, queue->mem->mem->map + queue->mem_push_end, cmd_size_dw);
290
291 assert(data_size <= (1 << 17));
292
293 P_MTHD(&p, NV90B5, OFFSET_IN_UPPER);
294 P_NV90B5_OFFSET_IN_UPPER(&p, data_addr >> 32);
295 P_NV90B5_OFFSET_IN_LOWER(&p, data_addr & 0xffffffff);
296 P_NV90B5_OFFSET_OUT_UPPER(&p, dst_addr >> 32);
297 P_NV90B5_OFFSET_OUT_LOWER(&p, dst_addr & 0xffffffff);
298 P_NV90B5_PITCH_IN(&p, data_size);
299 P_NV90B5_PITCH_OUT(&p, data_size);
300 P_NV90B5_LINE_LENGTH_IN(&p, data_size);
301 P_NV90B5_LINE_COUNT(&p, 1);
302
303 P_IMMD(&p, NV90B5, LAUNCH_DMA, {
304 .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
305 .multi_line_enable = MULTI_LINE_ENABLE_FALSE,
306 .flush_enable = FLUSH_ENABLE_TRUE,
307 .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
308 .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
309 });
310
311 assert(nv_push_dw_count(&p) <= cmd_size_dw);
312 queue->mem_push_end += nv_push_dw_count(&p) * 4;
313
314 dst_addr += data_size;
315 src += data_size;
316 size -= data_size;
317 }
318
319 return VK_SUCCESS;
320 }
321
322 VkResult
nvk_upload_queue_upload(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t dst_addr,const void * src,size_t size)323 nvk_upload_queue_upload(struct nvk_device *dev,
324 struct nvk_upload_queue *queue,
325 uint64_t dst_addr,
326 const void *src, size_t size)
327 {
328 VkResult result;
329
330 simple_mtx_lock(&queue->mutex);
331 result = nvk_upload_queue_upload_locked(dev, queue, dst_addr, src, size);
332 simple_mtx_unlock(&queue->mutex);
333
334 return result;
335 }
336
337 static VkResult
nvk_upload_queue_fill_locked(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t dst_addr,uint32_t data,size_t size)338 nvk_upload_queue_fill_locked(struct nvk_device *dev,
339 struct nvk_upload_queue *queue,
340 uint64_t dst_addr, uint32_t data, size_t size)
341 {
342 VkResult result;
343
344 assert(dst_addr % 4 == 0);
345 assert(size % 4 == 0);
346
347 while (size > 0) {
348 const uint32_t cmd_size_dw = 14;
349 const uint32_t cmd_size = cmd_size_dw * 4;
350
351 result = nvk_upload_queue_reserve(dev, queue, cmd_size);
352 if (result != VK_SUCCESS)
353 return result;
354
355 const uint32_t max_dim = 1 << 17;
356 uint32_t width_B, height;
357 if (size > max_dim) {
358 width_B = max_dim;
359 height = MIN2(max_dim, size / width_B);
360 } else {
361 width_B = size;
362 height = 1;
363 }
364 assert(width_B * height <= size);
365
366 struct nv_push p;
367 nv_push_init(&p, queue->mem->mem->map + queue->mem_push_end, cmd_size_dw);
368
369 P_MTHD(&p, NV90B5, OFFSET_OUT_UPPER);
370 P_NV90B5_OFFSET_OUT_UPPER(&p, dst_addr >> 32);
371 P_NV90B5_OFFSET_OUT_LOWER(&p, dst_addr & 0xffffffff);
372 P_NV90B5_PITCH_IN(&p, width_B);
373 P_NV90B5_PITCH_OUT(&p, width_B);
374 P_NV90B5_LINE_LENGTH_IN(&p, width_B / 4);
375 P_NV90B5_LINE_COUNT(&p, height);
376
377 P_IMMD(&p, NV90B5, SET_REMAP_CONST_A, data);
378 P_IMMD(&p, NV90B5, SET_REMAP_COMPONENTS, {
379 .dst_x = DST_X_CONST_A,
380 .dst_y = DST_Y_CONST_A,
381 .dst_z = DST_Z_CONST_A,
382 .dst_w = DST_W_CONST_A,
383 .component_size = COMPONENT_SIZE_FOUR,
384 .num_src_components = NUM_SRC_COMPONENTS_ONE,
385 .num_dst_components = NUM_DST_COMPONENTS_ONE,
386 });
387
388 P_IMMD(&p, NV90B5, LAUNCH_DMA, {
389 .data_transfer_type = DATA_TRANSFER_TYPE_NON_PIPELINED,
390 .multi_line_enable = height > 1,
391 .flush_enable = FLUSH_ENABLE_TRUE,
392 .src_memory_layout = SRC_MEMORY_LAYOUT_PITCH,
393 .dst_memory_layout = DST_MEMORY_LAYOUT_PITCH,
394 .remap_enable = REMAP_ENABLE_TRUE,
395 });
396
397 assert(nv_push_dw_count(&p) <= cmd_size_dw);
398 queue->mem_push_end += nv_push_dw_count(&p) * 4;
399
400 dst_addr += width_B * height;
401 size -= width_B * height;
402 }
403
404 return VK_SUCCESS;
405 }
406
407 VkResult
nvk_upload_queue_fill(struct nvk_device * dev,struct nvk_upload_queue * queue,uint64_t dst_addr,uint32_t data,size_t size)408 nvk_upload_queue_fill(struct nvk_device *dev,
409 struct nvk_upload_queue *queue,
410 uint64_t dst_addr, uint32_t data, size_t size)
411 {
412 VkResult result;
413
414 simple_mtx_lock(&queue->mutex);
415 result = nvk_upload_queue_fill_locked(dev, queue, dst_addr, data, size);
416 simple_mtx_unlock(&queue->mutex);
417
418 return result;
419 }
420