xref: /aosp_15_r20/external/mesa3d/src/nouveau/mme/tests/mme_runner.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Collabora Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "mme_runner.h"
6 
7 #include <fcntl.h>
8 #include <string.h>
9 #include <xf86drm.h>
10 
11 #include "mme_fermi_sim.h"
12 #include "mme_tu104_sim.h"
13 
14 #include "nv_push_clc597.h"
15 
16 #include "nouveau_bo.h"
17 #include "nouveau_context.h"
18 
19 /* nouveau_drm.h isn't C++-friendly */
20 #define class cls
21 #include "drm-uapi/nouveau_drm.h"
22 #undef class
23 
mme_runner()24 mme_runner::mme_runner() :
25   devinfo(NULL), data_addr(0), data(NULL)
26 { }
27 
~mme_runner()28 mme_runner::~mme_runner()
29 { }
30 
mme_hw_runner()31 mme_hw_runner::mme_hw_runner() :
32   mme_runner(), p(NULL), dev(NULL), ctx(NULL),
33   data_bo(NULL), push_bo(NULL),
34   syncobj(0),
35   push_map(NULL)
36 {
37    memset(&push, 0, sizeof(push));
38 }
39 
40 void
mme_store_data(mme_builder * b,uint32_t dw_idx,mme_value data,bool free_reg)41 mme_runner::mme_store_data(mme_builder *b, uint32_t dw_idx,
42                            mme_value data, bool free_reg)
43 {
44    mme_store_imm_addr(b, data_addr + dw_idx * 4, data, free_reg);
45 }
46 
~mme_hw_runner()47 mme_hw_runner::~mme_hw_runner()
48 {
49    if (syncobj)
50       drmSyncobjDestroy(dev->fd, syncobj);
51    if (data_bo)
52       nouveau_ws_bo_destroy(data_bo);
53    if (push_bo) {
54       nouveau_ws_bo_unmap(push_bo, push_map);
55       nouveau_ws_bo_destroy(push_bo);
56    }
57    if (ctx)
58       nouveau_ws_context_destroy(ctx);
59    if (dev)
60       nouveau_ws_device_destroy(dev);
61 }
62 
63 #define PUSH_SIZE 64 * 4096
64 
65 #define DATA_BO_ADDR 0x100000
66 #define PUSH_BO_ADDR 0x200000
67 
68 bool
set_up_hw(uint16_t min_cls,uint16_t max_cls)69 mme_hw_runner::set_up_hw(uint16_t min_cls, uint16_t max_cls)
70 {
71    drmDevicePtr devices[8];
72    int max_devices = drmGetDevices2(0, devices, 8);
73 
74    int i;
75    for (i = 0; i < max_devices; i++) {
76       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
77           devices[i]->bustype == DRM_BUS_PCI &&
78           devices[i]->deviceinfo.pci->vendor_id == 0x10de) {
79          dev = nouveau_ws_device_new(devices[i]);
80          if (dev == NULL)
81             continue;
82 
83          if (dev->info.cls_eng3d < min_cls || dev->info.cls_eng3d > max_cls) {
84             nouveau_ws_device_destroy(dev);
85             dev = NULL;
86             continue;
87          }
88 
89          /* Found a Turning+ device */
90          break;
91       }
92    }
93 
94    if (dev == NULL)
95       return false;
96 
97    devinfo = &dev->info;
98 
99    int ret = nouveau_ws_context_create(dev, NOUVEAU_WS_ENGINE_3D, &ctx);
100    if (ret)
101       return false;
102 
103    uint32_t data_bo_flags = NOUVEAU_WS_BO_GART | NOUVEAU_WS_BO_MAP;
104    data_bo = nouveau_ws_bo_new_mapped(dev, DATA_BO_SIZE, 0,
105                                       (nouveau_ws_bo_flags)data_bo_flags,
106                                       NOUVEAU_WS_BO_RDWR, (void **)&data);
107    if (data_bo == NULL)
108       return false;
109 
110    memset(data, 139, DATA_BO_SIZE);
111 
112    assert(DATA_BO_ADDR + DATA_BO_SIZE < PUSH_BO_ADDR);
113    nouveau_ws_bo_bind_vma(dev, data_bo, DATA_BO_ADDR, DATA_BO_SIZE, 0, 0);
114    data_addr = DATA_BO_ADDR;
115 
116    uint32_t push_bo_flags = NOUVEAU_WS_BO_GART | NOUVEAU_WS_BO_MAP;
117    push_bo = nouveau_ws_bo_new_mapped(dev, PUSH_SIZE, 0,
118                                       (nouveau_ws_bo_flags)push_bo_flags,
119                                       NOUVEAU_WS_BO_WR, &push_map);
120    if (push_bo == NULL)
121       return false;
122 
123    nouveau_ws_bo_bind_vma(dev, push_bo, PUSH_BO_ADDR, PUSH_SIZE, 0, 0);
124 
125    ret = drmSyncobjCreate(dev->fd, 0, &syncobj);
126    if (ret < 0)
127       return false;
128 
129    reset_push();
130 
131    return true;
132 }
133 
134 void
reset_push()135 mme_hw_runner::reset_push()
136 {
137    nv_push_init(&push, (uint32_t *)push_map, PUSH_SIZE / 4);
138    p = &push;
139 
140    P_MTHD(p, NV9097, SET_OBJECT);
141    P_NV9097_SET_OBJECT(p, {
142       .class_id = dev->info.cls_eng3d,
143       .engine_id = 0,
144    });
145 }
146 
147 void
submit_push()148 mme_hw_runner::submit_push()
149 {
150    struct drm_nouveau_exec_push push = {
151       .va = PUSH_BO_ADDR,
152       .va_len = (uint32_t)nv_push_dw_count(&this->push) * 4,
153    };
154 
155    struct drm_nouveau_sync sync = {
156       .flags = DRM_NOUVEAU_SYNC_SYNCOBJ,
157       .handle = syncobj,
158       .timeline_value = 0,
159    };
160 
161    struct drm_nouveau_exec req = {
162       .channel = (uint32_t)ctx->channel,
163       .push_count = 1,
164       .sig_count = 1,
165       .sig_ptr = (uintptr_t)&sync,
166       .push_ptr = (uintptr_t)&push,
167    };
168 
169    int ret = drmCommandWriteRead(dev->fd, DRM_NOUVEAU_EXEC,
170                                  &req, sizeof(req));
171    ASSERT_EQ(ret, 0);
172 
173    ret = drmSyncobjWait(dev->fd, &syncobj, 1, INT64_MAX,
174                         DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL);
175    ASSERT_EQ(ret, 0);
176 }
177 
178 void
push_macro(uint32_t id,const std::vector<uint32_t> & macro)179 mme_hw_runner::push_macro(uint32_t id, const std::vector<uint32_t> &macro)
180 {
181    P_MTHD(p, NV9097, LOAD_MME_START_ADDRESS_RAM_POINTER);
182    P_NV9097_LOAD_MME_START_ADDRESS_RAM_POINTER(p, id);
183    P_NV9097_LOAD_MME_START_ADDRESS_RAM(p, 0);
184    P_1INC(p, NV9097, LOAD_MME_INSTRUCTION_RAM_POINTER);
185    P_NV9097_LOAD_MME_INSTRUCTION_RAM_POINTER(p, 0);
186    P_INLINE_ARRAY(p, &macro[0], macro.size());
187 }
188 
189 void
run_macro(const std::vector<uint32_t> & macro,const std::vector<uint32_t> & params)190 mme_hw_runner::run_macro(const std::vector<uint32_t>& macro,
191                          const std::vector<uint32_t>& params)
192 {
193    push_macro(0, macro);
194 
195    P_1INC(p, NV9097, CALL_MME_MACRO(0));
196    if (params.empty()) {
197       P_NV9097_CALL_MME_MACRO(p, 0, 0);
198    } else {
199       P_INLINE_ARRAY(p, &params[0], params.size());
200    }
201 
202    submit_push();
203 }
204 
mme_fermi_sim_runner(uint64_t data_addr)205 mme_fermi_sim_runner::mme_fermi_sim_runner(uint64_t data_addr)
206 {
207    memset(&info, 0, sizeof(info));
208    info.cls_eng3d = FERMI_A;
209 
210    memset(data_store, 0, sizeof(data_store));
211 
212    this->devinfo = &info;
213    this->data_addr = data_addr,
214    this->data = data_store;
215 }
216 
~mme_fermi_sim_runner()217 mme_fermi_sim_runner::~mme_fermi_sim_runner()
218 { }
219 
220 void
run_macro(const std::vector<uint32_t> & macro,const std::vector<uint32_t> & params)221 mme_fermi_sim_runner::run_macro(const std::vector<uint32_t>& macro,
222                                 const std::vector<uint32_t>& params)
223 {
224    std::vector<mme_fermi_inst> insts(macro.size());
225    mme_fermi_decode(&insts[0], &macro[0], macro.size());
226 
227    /* First, make a copy of the data and simulate the macro */
228    mme_fermi_sim_mem sim_mem = {
229       .addr = data_addr,
230       .data = data,
231       .size = DATA_BO_SIZE,
232    };
233    const uint32_t* p_params = params.size() ? &params[0] : NULL;
234    mme_fermi_sim(insts.size(), &insts[0],
235                  params.size(), p_params,
236                  1, &sim_mem);
237 }
238 
mme_tu104_sim_runner(uint64_t data_addr)239 mme_tu104_sim_runner::mme_tu104_sim_runner(uint64_t data_addr)
240 {
241    memset(&info, 0, sizeof(info));
242    info.cls_eng3d = TURING_A;
243 
244    memset(data_store, 0, sizeof(data_store));
245 
246    this->devinfo = &info;
247    this->data_addr = data_addr,
248    this->data = data_store;
249 }
250 
~mme_tu104_sim_runner()251 mme_tu104_sim_runner::~mme_tu104_sim_runner()
252 { }
253 
254 void
run_macro(const std::vector<uint32_t> & macro,const std::vector<uint32_t> & params)255 mme_tu104_sim_runner::run_macro(const std::vector<uint32_t>& macro,
256                                const std::vector<uint32_t>& params)
257 {
258    std::vector<mme_tu104_inst> insts(macro.size());
259    mme_tu104_decode(&insts[0], &macro[0], macro.size() / 3);
260 
261    /* First, make a copy of the data and simulate the macro */
262    mme_tu104_sim_mem sim_mem = {
263       .addr = data_addr,
264       .data = data,
265       .size = DATA_BO_SIZE,
266    };
267    const uint32_t* p_params = params.size() ? &params[0] : NULL;
268    mme_tu104_sim(insts.size(), &insts[0],
269                  params.size(), p_params,
270                  1, &sim_mem);
271 }
272