1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
4 */
5
6 #include <drm/amdxdna_accel.h>
7 #include <drm/drm_device.h>
8 #include <drm/drm_drv.h>
9 #include <drm/drm_file.h>
10 #include <drm/drm_gem.h>
11 #include <drm/drm_gem_shmem_helper.h>
12 #include <drm/drm_print.h>
13 #include <drm/gpu_scheduler.h>
14 #include <linux/xarray.h>
15 #include <trace/events/amdxdna.h>
16
17 #include "amdxdna_ctx.h"
18 #include "amdxdna_gem.h"
19 #include "amdxdna_pci_drv.h"
20
21 #define MAX_HWCTX_ID 255
22 #define MAX_ARG_COUNT 4095
23
24 struct amdxdna_fence {
25 struct dma_fence base;
26 spinlock_t lock; /* for base */
27 struct amdxdna_hwctx *hwctx;
28 };
29
amdxdna_fence_get_driver_name(struct dma_fence * fence)30 static const char *amdxdna_fence_get_driver_name(struct dma_fence *fence)
31 {
32 return KBUILD_MODNAME;
33 }
34
amdxdna_fence_get_timeline_name(struct dma_fence * fence)35 static const char *amdxdna_fence_get_timeline_name(struct dma_fence *fence)
36 {
37 struct amdxdna_fence *xdna_fence;
38
39 xdna_fence = container_of(fence, struct amdxdna_fence, base);
40
41 return xdna_fence->hwctx->name;
42 }
43
44 static const struct dma_fence_ops fence_ops = {
45 .get_driver_name = amdxdna_fence_get_driver_name,
46 .get_timeline_name = amdxdna_fence_get_timeline_name,
47 };
48
amdxdna_fence_create(struct amdxdna_hwctx * hwctx)49 static struct dma_fence *amdxdna_fence_create(struct amdxdna_hwctx *hwctx)
50 {
51 struct amdxdna_fence *fence;
52
53 fence = kzalloc(sizeof(*fence), GFP_KERNEL);
54 if (!fence)
55 return NULL;
56
57 fence->hwctx = hwctx;
58 spin_lock_init(&fence->lock);
59 dma_fence_init(&fence->base, &fence_ops, &fence->lock, hwctx->id, 0);
60 return &fence->base;
61 }
62
amdxdna_hwctx_suspend(struct amdxdna_client * client)63 void amdxdna_hwctx_suspend(struct amdxdna_client *client)
64 {
65 struct amdxdna_dev *xdna = client->xdna;
66 struct amdxdna_hwctx *hwctx;
67 unsigned long hwctx_id;
68
69 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
70 mutex_lock(&client->hwctx_lock);
71 amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
72 xdna->dev_info->ops->hwctx_suspend(hwctx);
73 mutex_unlock(&client->hwctx_lock);
74 }
75
amdxdna_hwctx_resume(struct amdxdna_client * client)76 void amdxdna_hwctx_resume(struct amdxdna_client *client)
77 {
78 struct amdxdna_dev *xdna = client->xdna;
79 struct amdxdna_hwctx *hwctx;
80 unsigned long hwctx_id;
81
82 drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
83 mutex_lock(&client->hwctx_lock);
84 amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
85 xdna->dev_info->ops->hwctx_resume(hwctx);
86 mutex_unlock(&client->hwctx_lock);
87 }
88
amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx * hwctx,struct srcu_struct * ss)89 static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx,
90 struct srcu_struct *ss)
91 {
92 struct amdxdna_dev *xdna = hwctx->client->xdna;
93
94 synchronize_srcu(ss);
95
96 /* At this point, user is not able to submit new commands */
97 mutex_lock(&xdna->dev_lock);
98 xdna->dev_info->ops->hwctx_fini(hwctx);
99 mutex_unlock(&xdna->dev_lock);
100
101 kfree(hwctx->name);
102 kfree(hwctx);
103 }
104
amdxdna_cmd_get_payload(struct amdxdna_gem_obj * abo,u32 * size)105 void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size)
106 {
107 struct amdxdna_cmd *cmd = abo->mem.kva;
108 u32 num_masks, count;
109
110 if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
111 num_masks = 0;
112 else
113 num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
114
115 if (size) {
116 count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header);
117 if (unlikely(count <= num_masks)) {
118 *size = 0;
119 return NULL;
120 }
121 *size = (count - num_masks) * sizeof(u32);
122 }
123 return &cmd->data[num_masks];
124 }
125
amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj * abo)126 int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
127 {
128 struct amdxdna_cmd *cmd = abo->mem.kva;
129 u32 num_masks, i;
130 u32 *cu_mask;
131
132 if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
133 return -1;
134
135 num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
136 cu_mask = cmd->data;
137 for (i = 0; i < num_masks; i++) {
138 if (cu_mask[i])
139 return ffs(cu_mask[i]) - 1;
140 }
141
142 return -1;
143 }
144
145 /*
146 * This should be called in close() and remove(). DO NOT call in other syscalls.
147 * This guarantee that when hwctx and resources will be released, if user
148 * doesn't call amdxdna_drm_destroy_hwctx_ioctl.
149 */
amdxdna_hwctx_remove_all(struct amdxdna_client * client)150 void amdxdna_hwctx_remove_all(struct amdxdna_client *client)
151 {
152 struct amdxdna_hwctx *hwctx;
153 unsigned long hwctx_id;
154
155 mutex_lock(&client->hwctx_lock);
156 amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
157 XDNA_DBG(client->xdna, "PID %d close HW context %d",
158 client->pid, hwctx->id);
159 xa_erase(&client->hwctx_xa, hwctx->id);
160 mutex_unlock(&client->hwctx_lock);
161 amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
162 mutex_lock(&client->hwctx_lock);
163 }
164 mutex_unlock(&client->hwctx_lock);
165 }
166
amdxdna_drm_create_hwctx_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)167 int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
168 {
169 struct amdxdna_client *client = filp->driver_priv;
170 struct amdxdna_drm_create_hwctx *args = data;
171 struct amdxdna_dev *xdna = to_xdna_dev(dev);
172 struct amdxdna_hwctx *hwctx;
173 int ret, idx;
174
175 if (args->ext || args->ext_flags)
176 return -EINVAL;
177
178 if (!drm_dev_enter(dev, &idx))
179 return -ENODEV;
180
181 hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL);
182 if (!hwctx) {
183 ret = -ENOMEM;
184 goto exit;
185 }
186
187 if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p), sizeof(hwctx->qos))) {
188 XDNA_ERR(xdna, "Access QoS info failed");
189 ret = -EFAULT;
190 goto free_hwctx;
191 }
192
193 hwctx->client = client;
194 hwctx->fw_ctx_id = -1;
195 hwctx->num_tiles = args->num_tiles;
196 hwctx->mem_size = args->mem_size;
197 hwctx->max_opc = args->max_opc;
198 ret = xa_alloc_cyclic(&client->hwctx_xa, &hwctx->id, hwctx,
199 XA_LIMIT(AMDXDNA_INVALID_CTX_HANDLE + 1, MAX_HWCTX_ID),
200 &client->next_hwctxid, GFP_KERNEL);
201 if (ret < 0) {
202 XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
203 goto free_hwctx;
204 }
205
206 hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->id);
207 if (!hwctx->name) {
208 ret = -ENOMEM;
209 goto rm_id;
210 }
211
212 mutex_lock(&xdna->dev_lock);
213 ret = xdna->dev_info->ops->hwctx_init(hwctx);
214 if (ret) {
215 mutex_unlock(&xdna->dev_lock);
216 XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
217 goto free_name;
218 }
219 args->handle = hwctx->id;
220 args->syncobj_handle = hwctx->syncobj_hdl;
221 mutex_unlock(&xdna->dev_lock);
222
223 XDNA_DBG(xdna, "PID %d create HW context %d, ret %d", client->pid, args->handle, ret);
224 drm_dev_exit(idx);
225 return 0;
226
227 free_name:
228 kfree(hwctx->name);
229 rm_id:
230 xa_erase(&client->hwctx_xa, hwctx->id);
231 free_hwctx:
232 kfree(hwctx);
233 exit:
234 drm_dev_exit(idx);
235 return ret;
236 }
237
amdxdna_drm_destroy_hwctx_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)238 int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
239 {
240 struct amdxdna_client *client = filp->driver_priv;
241 struct amdxdna_drm_destroy_hwctx *args = data;
242 struct amdxdna_dev *xdna = to_xdna_dev(dev);
243 struct amdxdna_hwctx *hwctx;
244 int ret = 0, idx;
245
246 if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad)))
247 return -EINVAL;
248
249 if (!drm_dev_enter(dev, &idx))
250 return -ENODEV;
251
252 hwctx = xa_erase(&client->hwctx_xa, args->handle);
253 if (!hwctx) {
254 ret = -EINVAL;
255 XDNA_DBG(xdna, "PID %d HW context %d not exist",
256 client->pid, args->handle);
257 goto out;
258 }
259
260 /*
261 * The pushed jobs are handled by DRM scheduler during destroy.
262 * SRCU to synchronize with exec command ioctls.
263 */
264 amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
265
266 XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle);
267 out:
268 drm_dev_exit(idx);
269 return ret;
270 }
271
amdxdna_drm_config_hwctx_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)272 int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
273 {
274 struct amdxdna_client *client = filp->driver_priv;
275 struct amdxdna_drm_config_hwctx *args = data;
276 struct amdxdna_dev *xdna = to_xdna_dev(dev);
277 struct amdxdna_hwctx *hwctx;
278 int ret, idx;
279 u32 buf_size;
280 void *buf;
281 u64 val;
282
283 if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad)))
284 return -EINVAL;
285
286 if (!xdna->dev_info->ops->hwctx_config)
287 return -EOPNOTSUPP;
288
289 val = args->param_val;
290 buf_size = args->param_val_size;
291
292 switch (args->param_type) {
293 case DRM_AMDXDNA_HWCTX_CONFIG_CU:
294 /* For those types that param_val is pointer */
295 if (buf_size > PAGE_SIZE) {
296 XDNA_ERR(xdna, "Config CU param buffer too large");
297 return -E2BIG;
298 }
299
300 /* Hwctx needs to keep buf */
301 buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
302 if (!buf)
303 return -ENOMEM;
304
305 if (copy_from_user(buf, u64_to_user_ptr(val), buf_size)) {
306 kfree(buf);
307 return -EFAULT;
308 }
309
310 break;
311 case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
312 case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
313 /* For those types that param_val is a value */
314 buf = NULL;
315 buf_size = 0;
316 break;
317 default:
318 XDNA_DBG(xdna, "Unknown HW context config type %d", args->param_type);
319 return -EINVAL;
320 }
321
322 mutex_lock(&xdna->dev_lock);
323 idx = srcu_read_lock(&client->hwctx_srcu);
324 hwctx = xa_load(&client->hwctx_xa, args->handle);
325 if (!hwctx) {
326 XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle);
327 ret = -EINVAL;
328 goto unlock_srcu;
329 }
330
331 ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size);
332
333 unlock_srcu:
334 srcu_read_unlock(&client->hwctx_srcu, idx);
335 mutex_unlock(&xdna->dev_lock);
336 kfree(buf);
337 return ret;
338 }
339
340 static void
amdxdna_arg_bos_put(struct amdxdna_sched_job * job)341 amdxdna_arg_bos_put(struct amdxdna_sched_job *job)
342 {
343 int i;
344
345 for (i = 0; i < job->bo_cnt; i++) {
346 if (!job->bos[i])
347 break;
348 drm_gem_object_put(job->bos[i]);
349 }
350 }
351
352 static int
amdxdna_arg_bos_lookup(struct amdxdna_client * client,struct amdxdna_sched_job * job,u32 * bo_hdls,u32 bo_cnt)353 amdxdna_arg_bos_lookup(struct amdxdna_client *client,
354 struct amdxdna_sched_job *job,
355 u32 *bo_hdls, u32 bo_cnt)
356 {
357 struct drm_gem_object *gobj;
358 int i, ret;
359
360 job->bo_cnt = bo_cnt;
361 for (i = 0; i < job->bo_cnt; i++) {
362 struct amdxdna_gem_obj *abo;
363
364 gobj = drm_gem_object_lookup(client->filp, bo_hdls[i]);
365 if (!gobj) {
366 ret = -ENOENT;
367 goto put_shmem_bo;
368 }
369 abo = to_xdna_obj(gobj);
370
371 mutex_lock(&abo->lock);
372 if (abo->pinned) {
373 mutex_unlock(&abo->lock);
374 job->bos[i] = gobj;
375 continue;
376 }
377
378 ret = amdxdna_gem_pin_nolock(abo);
379 if (ret) {
380 mutex_unlock(&abo->lock);
381 drm_gem_object_put(gobj);
382 goto put_shmem_bo;
383 }
384 abo->pinned = true;
385 mutex_unlock(&abo->lock);
386
387 job->bos[i] = gobj;
388 }
389
390 return 0;
391
392 put_shmem_bo:
393 amdxdna_arg_bos_put(job);
394 return ret;
395 }
396
amdxdna_sched_job_cleanup(struct amdxdna_sched_job * job)397 void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job)
398 {
399 trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release");
400 amdxdna_arg_bos_put(job);
401 amdxdna_gem_put_obj(job->cmd_bo);
402 }
403
amdxdna_cmd_submit(struct amdxdna_client * client,u32 cmd_bo_hdl,u32 * arg_bo_hdls,u32 arg_bo_cnt,u32 hwctx_hdl,u64 * seq)404 int amdxdna_cmd_submit(struct amdxdna_client *client,
405 u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt,
406 u32 hwctx_hdl, u64 *seq)
407 {
408 struct amdxdna_dev *xdna = client->xdna;
409 struct amdxdna_sched_job *job;
410 struct amdxdna_hwctx *hwctx;
411 int ret, idx;
412
413 XDNA_DBG(xdna, "Command BO hdl %d, Arg BO count %d", cmd_bo_hdl, arg_bo_cnt);
414 job = kzalloc(struct_size(job, bos, arg_bo_cnt), GFP_KERNEL);
415 if (!job)
416 return -ENOMEM;
417
418 if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) {
419 job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD);
420 if (!job->cmd_bo) {
421 XDNA_ERR(xdna, "Failed to get cmd bo from %d", cmd_bo_hdl);
422 ret = -EINVAL;
423 goto free_job;
424 }
425 } else {
426 job->cmd_bo = NULL;
427 }
428
429 ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt);
430 if (ret) {
431 XDNA_ERR(xdna, "Argument BOs lookup failed, ret %d", ret);
432 goto cmd_put;
433 }
434
435 idx = srcu_read_lock(&client->hwctx_srcu);
436 hwctx = xa_load(&client->hwctx_xa, hwctx_hdl);
437 if (!hwctx) {
438 XDNA_DBG(xdna, "PID %d failed to get hwctx %d",
439 client->pid, hwctx_hdl);
440 ret = -EINVAL;
441 goto unlock_srcu;
442 }
443
444 if (hwctx->status != HWCTX_STAT_READY) {
445 XDNA_ERR(xdna, "HW Context is not ready");
446 ret = -EINVAL;
447 goto unlock_srcu;
448 }
449
450 job->hwctx = hwctx;
451 job->mm = current->mm;
452
453 job->fence = amdxdna_fence_create(hwctx);
454 if (!job->fence) {
455 XDNA_ERR(xdna, "Failed to create fence");
456 ret = -ENOMEM;
457 goto unlock_srcu;
458 }
459 kref_init(&job->refcnt);
460
461 ret = xdna->dev_info->ops->cmd_submit(hwctx, job, seq);
462 if (ret)
463 goto put_fence;
464
465 /*
466 * The amdxdna_hwctx_destroy_rcu() will release hwctx and associated
467 * resource after synchronize_srcu(). The submitted jobs should be
468 * handled by the queue, for example DRM scheduler, in device layer.
469 * For here we can unlock SRCU.
470 */
471 srcu_read_unlock(&client->hwctx_srcu, idx);
472 trace_amdxdna_debug_point(hwctx->name, *seq, "job pushed");
473
474 return 0;
475
476 put_fence:
477 dma_fence_put(job->fence);
478 unlock_srcu:
479 srcu_read_unlock(&client->hwctx_srcu, idx);
480 amdxdna_arg_bos_put(job);
481 cmd_put:
482 amdxdna_gem_put_obj(job->cmd_bo);
483 free_job:
484 kfree(job);
485 return ret;
486 }
487
488 /*
489 * The submit command ioctl submits a command to firmware. One firmware command
490 * may contain multiple command BOs for processing as a whole.
491 * The command sequence number is returned which can be used for wait command ioctl.
492 */
amdxdna_drm_submit_execbuf(struct amdxdna_client * client,struct amdxdna_drm_exec_cmd * args)493 static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client,
494 struct amdxdna_drm_exec_cmd *args)
495 {
496 struct amdxdna_dev *xdna = client->xdna;
497 u32 *arg_bo_hdls;
498 u32 cmd_bo_hdl;
499 int ret;
500
501 if (!args->arg_count || args->arg_count > MAX_ARG_COUNT) {
502 XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count);
503 return -EINVAL;
504 }
505
506 /* Only support single command for now. */
507 if (args->cmd_count != 1) {
508 XDNA_ERR(xdna, "Invalid cmd bo count %d", args->cmd_count);
509 return -EINVAL;
510 }
511
512 cmd_bo_hdl = (u32)args->cmd_handles;
513 arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL);
514 if (!arg_bo_hdls)
515 return -ENOMEM;
516 ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args),
517 args->arg_count * sizeof(u32));
518 if (ret) {
519 ret = -EFAULT;
520 goto free_cmd_bo_hdls;
521 }
522
523 ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls,
524 args->arg_count, args->hwctx, &args->seq);
525 if (ret)
526 XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret);
527
528 free_cmd_bo_hdls:
529 kfree(arg_bo_hdls);
530 if (!ret)
531 XDNA_DBG(xdna, "Pushed cmd %lld to scheduler", args->seq);
532 return ret;
533 }
534
amdxdna_drm_submit_cmd_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)535 int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
536 {
537 struct amdxdna_client *client = filp->driver_priv;
538 struct amdxdna_drm_exec_cmd *args = data;
539
540 if (args->ext || args->ext_flags)
541 return -EINVAL;
542
543 switch (args->type) {
544 case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
545 return amdxdna_drm_submit_execbuf(client, args);
546 }
547
548 XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
549 return -EINVAL;
550 }
551