1 /*
2 * Copyright © 2022 Collabora Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "mme_runner.h"
6
7 #include <fcntl.h>
8 #include <string.h>
9 #include <xf86drm.h>
10
11 #include "mme_fermi_sim.h"
12 #include "mme_tu104_sim.h"
13
14 #include "nv_push_clc597.h"
15
16 #include "nouveau_bo.h"
17 #include "nouveau_context.h"
18
19 /* nouveau_drm.h isn't C++-friendly */
20 #define class cls
21 #include "drm-uapi/nouveau_drm.h"
22 #undef class
23
mme_runner()24 mme_runner::mme_runner() :
25 devinfo(NULL), data_addr(0), data(NULL)
26 { }
27
~mme_runner()28 mme_runner::~mme_runner()
29 { }
30
mme_hw_runner()31 mme_hw_runner::mme_hw_runner() :
32 mme_runner(), p(NULL), dev(NULL), ctx(NULL),
33 data_bo(NULL), push_bo(NULL),
34 syncobj(0),
35 push_map(NULL)
36 {
37 memset(&push, 0, sizeof(push));
38 }
39
40 void
mme_store_data(mme_builder * b,uint32_t dw_idx,mme_value data,bool free_reg)41 mme_runner::mme_store_data(mme_builder *b, uint32_t dw_idx,
42 mme_value data, bool free_reg)
43 {
44 mme_store_imm_addr(b, data_addr + dw_idx * 4, data, free_reg);
45 }
46
~mme_hw_runner()47 mme_hw_runner::~mme_hw_runner()
48 {
49 if (syncobj)
50 drmSyncobjDestroy(dev->fd, syncobj);
51 if (data_bo)
52 nouveau_ws_bo_destroy(data_bo);
53 if (push_bo) {
54 nouveau_ws_bo_unmap(push_bo, push_map);
55 nouveau_ws_bo_destroy(push_bo);
56 }
57 if (ctx)
58 nouveau_ws_context_destroy(ctx);
59 if (dev)
60 nouveau_ws_device_destroy(dev);
61 }
62
63 #define PUSH_SIZE 64 * 4096
64
65 #define DATA_BO_ADDR 0x100000
66 #define PUSH_BO_ADDR 0x200000
67
68 bool
set_up_hw(uint16_t min_cls,uint16_t max_cls)69 mme_hw_runner::set_up_hw(uint16_t min_cls, uint16_t max_cls)
70 {
71 drmDevicePtr devices[8];
72 int max_devices = drmGetDevices2(0, devices, 8);
73
74 int i;
75 for (i = 0; i < max_devices; i++) {
76 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
77 devices[i]->bustype == DRM_BUS_PCI &&
78 devices[i]->deviceinfo.pci->vendor_id == 0x10de) {
79 dev = nouveau_ws_device_new(devices[i]);
80 if (dev == NULL)
81 continue;
82
83 if (dev->info.cls_eng3d < min_cls || dev->info.cls_eng3d > max_cls) {
84 nouveau_ws_device_destroy(dev);
85 dev = NULL;
86 continue;
87 }
88
89 /* Found a Turning+ device */
90 break;
91 }
92 }
93
94 if (dev == NULL)
95 return false;
96
97 devinfo = &dev->info;
98
99 int ret = nouveau_ws_context_create(dev, NOUVEAU_WS_ENGINE_3D, &ctx);
100 if (ret)
101 return false;
102
103 uint32_t data_bo_flags = NOUVEAU_WS_BO_GART | NOUVEAU_WS_BO_MAP;
104 data_bo = nouveau_ws_bo_new_mapped(dev, DATA_BO_SIZE, 0,
105 (nouveau_ws_bo_flags)data_bo_flags,
106 NOUVEAU_WS_BO_RDWR, (void **)&data);
107 if (data_bo == NULL)
108 return false;
109
110 memset(data, 139, DATA_BO_SIZE);
111
112 assert(DATA_BO_ADDR + DATA_BO_SIZE < PUSH_BO_ADDR);
113 nouveau_ws_bo_bind_vma(dev, data_bo, DATA_BO_ADDR, DATA_BO_SIZE, 0, 0);
114 data_addr = DATA_BO_ADDR;
115
116 uint32_t push_bo_flags = NOUVEAU_WS_BO_GART | NOUVEAU_WS_BO_MAP;
117 push_bo = nouveau_ws_bo_new_mapped(dev, PUSH_SIZE, 0,
118 (nouveau_ws_bo_flags)push_bo_flags,
119 NOUVEAU_WS_BO_WR, &push_map);
120 if (push_bo == NULL)
121 return false;
122
123 nouveau_ws_bo_bind_vma(dev, push_bo, PUSH_BO_ADDR, PUSH_SIZE, 0, 0);
124
125 ret = drmSyncobjCreate(dev->fd, 0, &syncobj);
126 if (ret < 0)
127 return false;
128
129 reset_push();
130
131 return true;
132 }
133
134 void
reset_push()135 mme_hw_runner::reset_push()
136 {
137 nv_push_init(&push, (uint32_t *)push_map, PUSH_SIZE / 4);
138 p = &push;
139
140 P_MTHD(p, NV9097, SET_OBJECT);
141 P_NV9097_SET_OBJECT(p, {
142 .class_id = dev->info.cls_eng3d,
143 .engine_id = 0,
144 });
145 }
146
147 void
submit_push()148 mme_hw_runner::submit_push()
149 {
150 struct drm_nouveau_exec_push push = {
151 .va = PUSH_BO_ADDR,
152 .va_len = (uint32_t)nv_push_dw_count(&this->push) * 4,
153 };
154
155 struct drm_nouveau_sync sync = {
156 .flags = DRM_NOUVEAU_SYNC_SYNCOBJ,
157 .handle = syncobj,
158 .timeline_value = 0,
159 };
160
161 struct drm_nouveau_exec req = {
162 .channel = (uint32_t)ctx->channel,
163 .push_count = 1,
164 .sig_count = 1,
165 .sig_ptr = (uintptr_t)&sync,
166 .push_ptr = (uintptr_t)&push,
167 };
168
169 int ret = drmCommandWriteRead(dev->fd, DRM_NOUVEAU_EXEC,
170 &req, sizeof(req));
171 ASSERT_EQ(ret, 0);
172
173 ret = drmSyncobjWait(dev->fd, &syncobj, 1, INT64_MAX,
174 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL);
175 ASSERT_EQ(ret, 0);
176 }
177
178 void
push_macro(uint32_t id,const std::vector<uint32_t> & macro)179 mme_hw_runner::push_macro(uint32_t id, const std::vector<uint32_t> ¯o)
180 {
181 P_MTHD(p, NV9097, LOAD_MME_START_ADDRESS_RAM_POINTER);
182 P_NV9097_LOAD_MME_START_ADDRESS_RAM_POINTER(p, id);
183 P_NV9097_LOAD_MME_START_ADDRESS_RAM(p, 0);
184 P_1INC(p, NV9097, LOAD_MME_INSTRUCTION_RAM_POINTER);
185 P_NV9097_LOAD_MME_INSTRUCTION_RAM_POINTER(p, 0);
186 P_INLINE_ARRAY(p, ¯o[0], macro.size());
187 }
188
189 void
run_macro(const std::vector<uint32_t> & macro,const std::vector<uint32_t> & params)190 mme_hw_runner::run_macro(const std::vector<uint32_t>& macro,
191 const std::vector<uint32_t>& params)
192 {
193 push_macro(0, macro);
194
195 P_1INC(p, NV9097, CALL_MME_MACRO(0));
196 if (params.empty()) {
197 P_NV9097_CALL_MME_MACRO(p, 0, 0);
198 } else {
199 P_INLINE_ARRAY(p, ¶ms[0], params.size());
200 }
201
202 submit_push();
203 }
204
mme_fermi_sim_runner(uint64_t data_addr)205 mme_fermi_sim_runner::mme_fermi_sim_runner(uint64_t data_addr)
206 {
207 memset(&info, 0, sizeof(info));
208 info.cls_eng3d = FERMI_A;
209
210 memset(data_store, 0, sizeof(data_store));
211
212 this->devinfo = &info;
213 this->data_addr = data_addr,
214 this->data = data_store;
215 }
216
~mme_fermi_sim_runner()217 mme_fermi_sim_runner::~mme_fermi_sim_runner()
218 { }
219
220 void
run_macro(const std::vector<uint32_t> & macro,const std::vector<uint32_t> & params)221 mme_fermi_sim_runner::run_macro(const std::vector<uint32_t>& macro,
222 const std::vector<uint32_t>& params)
223 {
224 std::vector<mme_fermi_inst> insts(macro.size());
225 mme_fermi_decode(&insts[0], ¯o[0], macro.size());
226
227 /* First, make a copy of the data and simulate the macro */
228 mme_fermi_sim_mem sim_mem = {
229 .addr = data_addr,
230 .data = data,
231 .size = DATA_BO_SIZE,
232 };
233 const uint32_t* p_params = params.size() ? ¶ms[0] : NULL;
234 mme_fermi_sim(insts.size(), &insts[0],
235 params.size(), p_params,
236 1, &sim_mem);
237 }
238
mme_tu104_sim_runner(uint64_t data_addr)239 mme_tu104_sim_runner::mme_tu104_sim_runner(uint64_t data_addr)
240 {
241 memset(&info, 0, sizeof(info));
242 info.cls_eng3d = TURING_A;
243
244 memset(data_store, 0, sizeof(data_store));
245
246 this->devinfo = &info;
247 this->data_addr = data_addr,
248 this->data = data_store;
249 }
250
~mme_tu104_sim_runner()251 mme_tu104_sim_runner::~mme_tu104_sim_runner()
252 { }
253
254 void
run_macro(const std::vector<uint32_t> & macro,const std::vector<uint32_t> & params)255 mme_tu104_sim_runner::run_macro(const std::vector<uint32_t>& macro,
256 const std::vector<uint32_t>& params)
257 {
258 std::vector<mme_tu104_inst> insts(macro.size());
259 mme_tu104_decode(&insts[0], ¯o[0], macro.size() / 3);
260
261 /* First, make a copy of the data and simulate the macro */
262 mme_tu104_sim_mem sim_mem = {
263 .addr = data_addr,
264 .data = data,
265 .size = DATA_BO_SIZE,
266 };
267 const uint32_t* p_params = params.size() ? ¶ms[0] : NULL;
268 mme_tu104_sim(insts.size(), &insts[0],
269 params.size(), p_params,
270 1, &sim_mem);
271 }
272