1 /*
2 * Copyright © 2022 Collabora Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "mme_runner.h"
6 #include "mme_tu104_sim.h"
7
8 #include "nv_push_clc597.h"
9
10 class mme_tu104_sim_test : public ::testing::Test, public mme_hw_runner {
11 public:
12 mme_tu104_sim_test();
13 ~mme_tu104_sim_test();
14
15 void SetUp();
16 void test_macro(const mme_builder *b,
17 const std::vector<uint32_t>& macro,
18 const std::vector<uint32_t>& params);
19 };
20
mme_tu104_sim_test()21 mme_tu104_sim_test::mme_tu104_sim_test() :
22 ::testing::Test(),
23 mme_hw_runner()
24 { }
25
~mme_tu104_sim_test()26 mme_tu104_sim_test::~mme_tu104_sim_test()
27 { }
28
29 void
SetUp()30 mme_tu104_sim_test::SetUp()
31 {
32 ASSERT_TRUE(set_up_hw(TURING_A, UINT16_MAX));
33 }
34
35 void
test_macro(const mme_builder * b,const std::vector<uint32_t> & macro,const std::vector<uint32_t> & params)36 mme_tu104_sim_test::test_macro(const mme_builder *b,
37 const std::vector<uint32_t>& macro,
38 const std::vector<uint32_t>& params)
39 {
40 const uint32_t data_dwords = DATA_BO_SIZE / sizeof(uint32_t);
41
42 std::vector<mme_tu104_inst> insts(macro.size() / 3);
43 mme_tu104_decode(&insts[0], ¯o[0], macro.size() / 3);
44
45 /* First, make a copy of the data and simulate the macro */
46 std::vector<uint32_t> sim_data(data, data + (DATA_BO_SIZE / 4));
47 mme_tu104_sim_mem sim_mem = {
48 .addr = data_addr,
49 .data = &sim_data[0],
50 .size = DATA_BO_SIZE,
51 };
52 mme_tu104_sim(insts.size(), &insts[0],
53 params.size(), params.size() ? ¶ms[0] : NULL,
54 1, &sim_mem);
55
56 /* Now run the macro on the GPU */
57 push_macro(0, macro);
58
59 P_1INC(p, NVC597, CALL_MME_MACRO(0));
60 if (params.empty()) {
61 P_NVC597_CALL_MME_MACRO(p, 0, 0);
62 } else {
63 P_INLINE_ARRAY(p, ¶ms[0], params.size());
64 }
65
66 submit_push();
67
68 /* Check the results */
69 for (uint32_t i = 0; i < data_dwords; i++)
70 ASSERT_EQ(data[i], sim_data[i]);
71 }
72
73 static mme_tu104_reg
mme_value_as_reg(mme_value val)74 mme_value_as_reg(mme_value val)
75 {
76 assert(val.type == MME_VALUE_TYPE_REG);
77 return (mme_tu104_reg)(MME_TU104_REG_R0 + val.reg);
78 }
79
TEST_F(mme_tu104_sim_test,sanity)80 TEST_F(mme_tu104_sim_test, sanity)
81 {
82 const uint32_t canary = 0xc0ffee01;
83
84 mme_builder b;
85 mme_builder_init(&b, devinfo);
86
87 mme_store_imm_addr(&b, data_addr, mme_imm(canary));
88
89 auto macro = mme_builder_finish_vec(&b);
90
91 std::vector<uint32_t> params;
92 test_macro(&b, macro, params);
93 }
94
TEST_F(mme_tu104_sim_test,multi_param)95 TEST_F(mme_tu104_sim_test, multi_param)
96 {
97 mme_builder b;
98 mme_builder_init(&b, devinfo);
99
100 mme_value v0 = mme_alloc_reg(&b);
101 mme_value v1 = mme_alloc_reg(&b);
102
103 mme_tu104_asm(&b, i) {
104 i.alu[0].dst = mme_value_as_reg(v0);
105 i.alu[0].src[0] = MME_TU104_REG_LOAD1;
106 i.alu[1].dst = mme_value_as_reg(v1);
107 i.alu[1].src[0] = MME_TU104_REG_LOAD0;
108 i.imm[0] = (1<<12) | (NVC597_SET_MME_SHADOW_SCRATCH(12) >> 2);
109 i.out[0].mthd = MME_TU104_OUT_OP_IMM0;
110 i.out[0].emit = MME_TU104_OUT_OP_LOAD0;
111 i.imm[1] = (1<<12) | (NVC597_SET_MME_SHADOW_SCRATCH(35) >> 2);
112 i.out[1].mthd = MME_TU104_OUT_OP_IMM1;
113 i.out[1].emit = MME_TU104_OUT_OP_LOAD1;
114 }
115
116 mme_value v2 = mme_state(&b, NVC597_SET_MME_SHADOW_SCRATCH(12));
117 mme_value v3 = mme_state(&b, NVC597_SET_MME_SHADOW_SCRATCH(35));
118
119 mme_store_imm_addr(&b, data_addr + 0, v0);
120 mme_store_imm_addr(&b, data_addr + 4, v1);
121 mme_store_imm_addr(&b, data_addr + 8, v2);
122 mme_store_imm_addr(&b, data_addr + 12, v3);
123
124 auto macro = mme_builder_finish_vec(&b);
125
126 std::vector<uint32_t> params;
127 params.push_back(2581);
128 params.push_back(3048);
129
130 test_macro(&b, macro, params);
131 }
132
TEST_F(mme_tu104_sim_test,pred_param)133 TEST_F(mme_tu104_sim_test, pred_param)
134 {
135 mme_builder b;
136 mme_builder_init(&b, devinfo);
137
138 mme_value v0 = mme_load(&b);
139 mme_value v1 = mme_mov(&b, mme_imm(240));
140
141 mme_tu104_asm(&b, i) {
142 i.pred_mode = MME_TU104_PRED_TTTT;
143 i.alu[0].dst = mme_value_as_reg(v1);
144 i.alu[0].src[0] = MME_TU104_REG_LOAD0;
145 }
146
147 mme_value v2 = mme_load(&b);
148
149 mme_store_imm_addr(&b, data_addr + 0, v0);
150 mme_store_imm_addr(&b, data_addr + 4, v1);
151 mme_store_imm_addr(&b, data_addr + 8, v2);
152
153 auto macro = mme_builder_finish_vec(&b);
154
155 for (uint32_t j = 0; j < 4; j++) {
156 reset_push();
157
158 std::vector<uint32_t> params;
159 params.push_back((j & 1) * 2043);
160 params.push_back((j & 2) * 523);
161 params.push_back(2581);
162 params.push_back(3048);
163
164 test_macro(&b, macro, params);
165 }
166 }
167
TEST_F(mme_tu104_sim_test,out_imm0)168 TEST_F(mme_tu104_sim_test, out_imm0)
169 {
170 mme_builder b;
171 mme_builder_init(&b, devinfo);
172
173 mme_mthd(&b, NVC597_SET_REPORT_SEMAPHORE_A);
174 mme_emit(&b, mme_imm(high32(data_addr + 0)));
175 mme_emit(&b, mme_imm(low32(data_addr + 0)));
176 mme_tu104_asm(&b, i) {
177 i.imm[0] = 0x1234;
178 i.out[0].emit = MME_TU104_OUT_OP_IMM0;
179 }
180 mme_emit(&b, mme_imm(0x10000000));
181
182 mme_mthd(&b, NVC597_SET_REPORT_SEMAPHORE_A);
183 mme_emit(&b, mme_imm(high32(data_addr + 4)));
184 mme_emit(&b, mme_imm(low32(data_addr + 4)));
185 mme_tu104_asm(&b, i) {
186 i.imm[0] = 0x8765;
187 i.out[0].emit = MME_TU104_OUT_OP_IMM0;
188 }
189 mme_emit(&b, mme_imm(0x10000000));
190
191 auto macro = mme_builder_finish_vec(&b);
192
193 std::vector<uint32_t> params;
194 test_macro(&b, macro, params);
195 }
196
TEST_F(mme_tu104_sim_test,out_imm1)197 TEST_F(mme_tu104_sim_test, out_imm1)
198 {
199 mme_builder b;
200 mme_builder_init(&b, devinfo);
201
202 mme_mthd(&b, NVC597_SET_REPORT_SEMAPHORE_A);
203 mme_emit(&b, mme_imm(high32(data_addr + 0)));
204 mme_emit(&b, mme_imm(low32(data_addr + 0)));
205 mme_tu104_asm(&b, i) {
206 i.imm[1] = 0x1234;
207 i.out[0].emit = MME_TU104_OUT_OP_IMM1;
208 }
209 mme_emit(&b, mme_imm(0x10000000));
210
211 mme_mthd(&b, NVC597_SET_REPORT_SEMAPHORE_A);
212 mme_emit(&b, mme_imm(high32(data_addr + 4)));
213 mme_emit(&b, mme_imm(low32(data_addr + 4)));
214 mme_tu104_asm(&b, i) {
215 i.imm[1] = 0x8765;
216 i.out[0].emit = MME_TU104_OUT_OP_IMM1;
217 }
218 mme_emit(&b, mme_imm(0x10000000));
219
220 auto macro = mme_builder_finish_vec(&b);
221
222 std::vector<uint32_t> params;
223 test_macro(&b, macro, params);
224 }
225
TEST_F(mme_tu104_sim_test,out_immhigh0)226 TEST_F(mme_tu104_sim_test, out_immhigh0)
227 {
228 mme_builder b;
229 mme_builder_init(&b, devinfo);
230
231 mme_mthd(&b, NVC597_SET_REPORT_SEMAPHORE_A);
232 mme_emit(&b, mme_imm(high32(data_addr + 0)));
233 mme_emit(&b, mme_imm(low32(data_addr + 0)));
234 mme_tu104_asm(&b, i) {
235 i.imm[0] = 0x1234;
236 i.out[0].emit = MME_TU104_OUT_OP_IMMHIGH0;
237 }
238 mme_emit(&b, mme_imm(0x10000000));
239
240 mme_mthd(&b, NVC597_SET_REPORT_SEMAPHORE_A);
241 mme_emit(&b, mme_imm(high32(data_addr + 4)));
242 mme_emit(&b, mme_imm(low32(data_addr + 4)));
243 mme_tu104_asm(&b, i) {
244 i.imm[0] = 0x8765;
245 i.out[1].emit = MME_TU104_OUT_OP_IMMHIGH0;
246 }
247 mme_emit(&b, mme_imm(0x10000000));
248
249 auto macro = mme_builder_finish_vec(&b);
250
251 std::vector<uint32_t> params;
252 test_macro(&b, macro, params);
253 }
254
TEST_F(mme_tu104_sim_test,out_immhigh1)255 TEST_F(mme_tu104_sim_test, out_immhigh1)
256 {
257 mme_builder b;
258 mme_builder_init(&b, devinfo);
259
260 mme_mthd(&b, NVC597_SET_REPORT_SEMAPHORE_A);
261 mme_emit(&b, mme_imm(high32(data_addr + 0)));
262 mme_emit(&b, mme_imm(low32(data_addr + 0)));
263 mme_tu104_asm(&b, i) {
264 i.imm[1] = 0x1234;
265 i.out[0].emit = MME_TU104_OUT_OP_IMMHIGH1;
266 }
267 mme_emit(&b, mme_imm(0x10000000));
268
269 mme_mthd(&b, NVC597_SET_REPORT_SEMAPHORE_A);
270 mme_emit(&b, mme_imm(high32(data_addr + 4)));
271 mme_emit(&b, mme_imm(low32(data_addr + 4)));
272 mme_tu104_asm(&b, i) {
273 i.imm[1] = 0x8765;
274 i.out[1].emit = MME_TU104_OUT_OP_IMMHIGH1;
275 }
276 mme_emit(&b, mme_imm(0x10000000));
277
278 auto macro = mme_builder_finish_vec(&b);
279
280 std::vector<uint32_t> params;
281 test_macro(&b, macro, params);
282 }
283
TEST_F(mme_tu104_sim_test,out_imm32)284 TEST_F(mme_tu104_sim_test, out_imm32)
285 {
286 mme_builder b;
287 mme_builder_init(&b, devinfo);
288
289 mme_mthd(&b, NVC597_SET_REPORT_SEMAPHORE_A);
290 mme_emit(&b, mme_imm(high32(data_addr + 0)));
291 mme_emit(&b, mme_imm(low32(data_addr + 0)));
292 mme_tu104_asm(&b, i) {
293 i.imm[0] = 0x1234;
294 i.imm[1] = 0x7654;
295 i.out[0].emit = MME_TU104_OUT_OP_IMM32;
296 }
297 mme_emit(&b, mme_imm(0x10000000));
298
299 mme_mthd(&b, NVC597_SET_REPORT_SEMAPHORE_A);
300 mme_emit(&b, mme_imm(high32(data_addr + 4)));
301 mme_emit(&b, mme_imm(low32(data_addr + 4)));
302 mme_tu104_asm(&b, i) {
303 i.imm[0] = 0x1234;
304 i.imm[1] = 0x7654;
305 i.out[1].emit = MME_TU104_OUT_OP_IMM32;
306 }
307 mme_emit(&b, mme_imm(0x10000000));
308
309 auto macro = mme_builder_finish_vec(&b);
310
311 std::vector<uint32_t> params;
312 test_macro(&b, macro, params);
313 }
314
TEST_F(mme_tu104_sim_test,reg_imm32)315 TEST_F(mme_tu104_sim_test, reg_imm32)
316 {
317 const uint32_t canary = 0xc0ffee01;
318
319 mme_builder b;
320 mme_builder_init(&b, devinfo);
321
322 mme_value v = mme_alloc_reg(&b);
323
324 mme_tu104_asm(&b, i) {
325 i.alu[0].dst = mme_value_as_reg(v);
326 i.alu[0].op = MME_TU104_ALU_OP_ADD;
327 i.alu[0].src[0] = MME_TU104_REG_IMM32,
328 i.imm[0] = (uint16_t)canary;
329 i.imm[1] = (uint16_t)(canary >> 16);
330 }
331
332 mme_store_imm_addr(&b, data_addr, v);
333
334 auto macro = mme_builder_finish_vec(&b);
335
336 std::vector<uint32_t> params;
337 test_macro(&b, macro, params);
338 }
339
TEST_F(mme_tu104_sim_test,pred_alu)340 TEST_F(mme_tu104_sim_test, pred_alu)
341 {
342 static const enum mme_tu104_pred preds[] = {
343 MME_TU104_PRED_UUUU,
344 MME_TU104_PRED_TTTT,
345 MME_TU104_PRED_FFFF,
346 MME_TU104_PRED_TTUU,
347 MME_TU104_PRED_FFUU,
348 MME_TU104_PRED_TFUU,
349 MME_TU104_PRED_TUUU,
350 MME_TU104_PRED_FUUU,
351 MME_TU104_PRED_UUTT,
352 MME_TU104_PRED_UUTF,
353 MME_TU104_PRED_UUTU,
354 MME_TU104_PRED_UUFT,
355 MME_TU104_PRED_UUFF,
356 MME_TU104_PRED_UUFU,
357 MME_TU104_PRED_UUUT,
358 MME_TU104_PRED_UUUF,
359 };
360
361 for (uint32_t i = 0; i < ARRAY_SIZE(preds); i++) {
362 mme_builder b;
363 mme_builder_init(&b, devinfo);
364
365 mme_value pred = mme_load(&b);
366 mme_value v0 = mme_mov(&b, mme_imm(i * 100 + 13));
367 mme_value v1 = mme_mov(&b, mme_imm(i * 100 + 62));
368
369 mme_tu104_asm(&b, inst) {
370 inst.pred = mme_value_as_reg(pred);
371 inst.pred_mode = preds[i];
372 inst.alu[0].dst = mme_value_as_reg(v0);
373 inst.alu[0].src[0] = MME_TU104_REG_IMM;
374 inst.imm[0] = i * 100 + 25;
375 inst.alu[1].dst = mme_value_as_reg(v1);
376 inst.alu[1].src[0] = MME_TU104_REG_IMM;
377 inst.imm[1] = i * 100 + 73;
378 }
379
380 mme_store_imm_addr(&b, data_addr + i * 8 + 0, v0);
381 mme_store_imm_addr(&b, data_addr + i * 8 + 4, v1);
382
383 auto macro = mme_builder_finish_vec(&b);
384
385 for (uint32_t j = 0; j < 2; j++) {
386 reset_push();
387
388 std::vector<uint32_t> params;
389 params.push_back(j * 25894);
390
391 test_macro(&b, macro, params);
392 }
393 }
394 }
395
TEST_F(mme_tu104_sim_test,pred_out)396 TEST_F(mme_tu104_sim_test, pred_out)
397 {
398 static const enum mme_tu104_pred preds[] = {
399 MME_TU104_PRED_UUUU,
400 MME_TU104_PRED_TTTT,
401 MME_TU104_PRED_FFFF,
402 MME_TU104_PRED_TTUU,
403 MME_TU104_PRED_FFUU,
404 MME_TU104_PRED_TFUU,
405 MME_TU104_PRED_TUUU,
406 MME_TU104_PRED_FUUU,
407 MME_TU104_PRED_UUTT,
408 MME_TU104_PRED_UUTF,
409 MME_TU104_PRED_UUTU,
410 MME_TU104_PRED_UUFT,
411 MME_TU104_PRED_UUFF,
412 MME_TU104_PRED_UUFU,
413 MME_TU104_PRED_UUUT,
414 MME_TU104_PRED_UUUF,
415 };
416
417 for (uint32_t i = 0; i < ARRAY_SIZE(preds); i++) {
418 mme_builder b;
419 mme_builder_init(&b, devinfo);
420
421 mme_value pred = mme_load(&b);
422
423 mme_tu104_asm(&b, inst) {
424 inst.imm[0] = (1<<12) | (NVC597_SET_MME_SHADOW_SCRATCH(i*2 + 0) >> 2);
425 inst.imm[1] = i * 100 + 25;
426 inst.out[0].mthd = MME_TU104_OUT_OP_IMM0;
427 inst.out[0].emit = MME_TU104_OUT_OP_IMM1;
428 }
429
430 mme_tu104_asm(&b, inst) {
431 inst.imm[0] = (1<<12) | (NVC597_SET_MME_SHADOW_SCRATCH(i*2 + 1) >> 2);
432 inst.imm[1] = i * 100 + 75;
433 inst.out[0].mthd = MME_TU104_OUT_OP_IMM0;
434 inst.out[0].emit = MME_TU104_OUT_OP_IMM1;
435 }
436
437 mme_tu104_asm(&b, inst) {
438 inst.pred = mme_value_as_reg(pred);
439 inst.pred_mode = preds[i];
440 inst.imm[0] = (1<<12) | (NVC597_SET_MME_SHADOW_SCRATCH(i*2 + 0) >> 2);
441 inst.imm[1] = (1<<12) | (NVC597_SET_MME_SHADOW_SCRATCH(i*2 + 1) >> 2);
442 inst.out[0].mthd = MME_TU104_OUT_OP_IMM0;
443 inst.out[0].emit = MME_TU104_OUT_OP_IMM1;
444 inst.out[1].mthd = MME_TU104_OUT_OP_IMM1;
445 inst.out[1].emit = MME_TU104_OUT_OP_IMM0;
446 }
447
448 mme_value v0 = mme_state(&b, NVC597_SET_MME_SHADOW_SCRATCH(i*2 + 0));
449 mme_value v1 = mme_state(&b, NVC597_SET_MME_SHADOW_SCRATCH(i*2 + 1));
450
451 mme_store_imm_addr(&b, data_addr + i * 8 + 0, v0);
452 mme_store_imm_addr(&b, data_addr + i * 8 + 4, v1);
453
454 auto macro = mme_builder_finish_vec(&b);
455
456 for (uint32_t j = 0; j < 2; j++) {
457 reset_push();
458
459 std::vector<uint32_t> params;
460 params.push_back(j * 25894);
461
462 test_macro(&b, macro, params);
463 }
464 }
465 }
466
TEST_F(mme_tu104_sim_test,add)467 TEST_F(mme_tu104_sim_test, add)
468 {
469 mme_builder b;
470 mme_builder_init(&b, devinfo);
471
472 mme_value x = mme_load(&b);
473 mme_value y = mme_load(&b);
474 mme_value sum = mme_add(&b, x, y);
475 mme_store_imm_addr(&b, data_addr, sum);
476
477 auto macro = mme_builder_finish_vec(&b);
478
479 std::vector<uint32_t> params;
480 params.push_back(25);
481 params.push_back(138);
482
483 test_macro(&b, macro, params);
484 }
485
TEST_F(mme_tu104_sim_test,add_imm)486 TEST_F(mme_tu104_sim_test, add_imm)
487 {
488 mme_builder b;
489 mme_builder_init(&b, devinfo);
490
491 mme_value x = mme_load(&b);
492
493 mme_value v0 = mme_add(&b, x, mme_imm(0x00000001));
494 mme_value v1 = mme_add(&b, x, mme_imm(0xffffffff));
495 mme_value v2 = mme_add(&b, x, mme_imm(0xffff8000));
496 mme_value v3 = mme_add(&b, mme_imm(0x00000001), x);
497 mme_value v4 = mme_add(&b, mme_imm(0xffffffff), x);
498 mme_value v5 = mme_add(&b, mme_imm(0xffff8000), x);
499 mme_value v6 = mme_add(&b, mme_zero(), mme_imm(0x00000001));
500 mme_value v7 = mme_add(&b, mme_zero(), mme_imm(0xffffffff));
501 mme_value v8 = mme_add(&b, mme_zero(), mme_imm(0xffff8000));
502
503 mme_store_imm_addr(&b, data_addr + 0, v0);
504 mme_store_imm_addr(&b, data_addr + 4, v1);
505 mme_store_imm_addr(&b, data_addr + 8, v2);
506 mme_store_imm_addr(&b, data_addr + 12, v3);
507 mme_store_imm_addr(&b, data_addr + 16, v4);
508 mme_store_imm_addr(&b, data_addr + 20, v5);
509 mme_store_imm_addr(&b, data_addr + 24, v6);
510 mme_store_imm_addr(&b, data_addr + 28, v7);
511 mme_store_imm_addr(&b, data_addr + 32, v8);
512
513 auto macro = mme_builder_finish_vec(&b);
514
515 uint32_t vals[] = {
516 0x0000ffff,
517 0x00008000,
518 0x0001ffff,
519 0xffffffff,
520 };
521
522 for (uint32_t i = 0; i < ARRAY_SIZE(vals); i++) {
523 reset_push();
524
525 std::vector<uint32_t> params;
526 params.push_back(vals[i]);
527
528 test_macro(&b, macro, params);
529 }
530 }
531
TEST_F(mme_tu104_sim_test,addc)532 TEST_F(mme_tu104_sim_test, addc)
533 {
534 mme_builder b;
535 mme_builder_init(&b, devinfo);
536
537 struct mme_value64 x = { mme_load(&b), mme_load(&b) };
538 struct mme_value64 y = { mme_load(&b), mme_load(&b) };
539
540 struct mme_value64 sum = mme_add64(&b, x, y);
541
542 mme_store_imm_addr(&b, data_addr + 0, sum.lo);
543 mme_store_imm_addr(&b, data_addr + 4, sum.hi);
544
545 auto macro = mme_builder_finish_vec(&b);
546
547 std::vector<uint32_t> params;
548 params.push_back(0x80008650);
549 params.push_back(0x596);
550 params.push_back(0x8000a8f6);
551 params.push_back(0x836);
552
553 test_macro(&b, macro, params);
554 }
555
TEST_F(mme_tu104_sim_test,addc_imm)556 TEST_F(mme_tu104_sim_test, addc_imm)
557 {
558 mme_builder b;
559 mme_builder_init(&b, devinfo);
560
561 mme_value x_lo = mme_load(&b);
562 mme_value x_hi = mme_load(&b);
563
564 mme_value v1_lo = mme_alloc_reg(&b);
565 mme_value v1_hi = mme_alloc_reg(&b);
566 mme_tu104_asm(&b, i) {
567 i.alu[0].dst = mme_value_as_reg(v1_lo);
568 i.alu[0].op = MME_TU104_ALU_OP_ADD;
569 i.alu[0].src[0] = mme_value_as_reg(x_lo);
570 i.alu[0].src[1] = MME_TU104_REG_IMM;
571 i.imm[0] = 0x0001;
572 i.alu[1].dst = mme_value_as_reg(v1_hi);
573 i.alu[1].op = MME_TU104_ALU_OP_ADDC;
574 i.alu[1].src[0] = mme_value_as_reg(x_hi);
575 i.alu[1].src[1] = MME_TU104_REG_IMM;
576 i.imm[1] = 0x0000;
577 }
578
579 mme_value v2_lo = mme_alloc_reg(&b);
580 mme_value v2_hi = mme_alloc_reg(&b);
581 mme_tu104_asm(&b, i) {
582 i.alu[0].dst = mme_value_as_reg(v2_lo);
583 i.alu[0].op = MME_TU104_ALU_OP_ADD;
584 i.alu[0].src[0] = mme_value_as_reg(x_lo);
585 i.alu[0].src[1] = MME_TU104_REG_IMM;
586 i.imm[0] = 0x0000;
587 i.alu[1].dst = mme_value_as_reg(v2_hi);
588 i.alu[1].op = MME_TU104_ALU_OP_ADDC;
589 i.alu[1].src[0] = mme_value_as_reg(x_hi);
590 i.alu[1].src[1] = MME_TU104_REG_IMM;
591 i.imm[1] = 0x0001;
592 }
593
594 mme_value v3_lo = mme_alloc_reg(&b);
595 mme_value v3_hi = mme_alloc_reg(&b);
596 mme_tu104_asm(&b, i) {
597 i.alu[0].dst = mme_value_as_reg(v3_lo);
598 i.alu[0].op = MME_TU104_ALU_OP_ADD;
599 i.alu[0].src[0] = mme_value_as_reg(x_lo);
600 i.alu[0].src[1] = MME_TU104_REG_IMM;
601 i.imm[0] = 0x0000;
602 i.alu[1].dst = mme_value_as_reg(v3_hi);
603 i.alu[1].op = MME_TU104_ALU_OP_ADDC;
604 i.alu[1].src[0] = mme_value_as_reg(x_hi);
605 i.alu[1].src[1] = MME_TU104_REG_IMM;
606 i.imm[1] = 0xffff;
607 }
608
609 mme_value v4_lo = mme_alloc_reg(&b);
610 mme_value v4_hi = mme_alloc_reg(&b);
611 mme_tu104_asm(&b, i) {
612 i.alu[0].dst = mme_value_as_reg(v4_lo);
613 i.alu[0].op = MME_TU104_ALU_OP_ADD;
614 i.alu[0].src[0] = mme_value_as_reg(x_lo);
615 i.alu[0].src[1] = MME_TU104_REG_IMM;
616 i.imm[0] = 0x0000;
617 i.alu[1].dst = mme_value_as_reg(v4_hi);
618 i.alu[1].op = MME_TU104_ALU_OP_ADDC;
619 i.alu[1].src[0] = mme_value_as_reg(x_hi);
620 i.alu[1].src[1] = MME_TU104_REG_IMM;
621 i.imm[1] = 0x8000;
622 }
623
624 mme_store_imm_addr(&b, data_addr + 0, v1_lo);
625 mme_store_imm_addr(&b, data_addr + 4, v1_hi);
626 mme_store_imm_addr(&b, data_addr + 8, v2_lo);
627 mme_store_imm_addr(&b, data_addr + 12, v2_hi);
628 mme_store_imm_addr(&b, data_addr + 16, v3_lo);
629 mme_store_imm_addr(&b, data_addr + 20, v3_hi);
630 mme_store_imm_addr(&b, data_addr + 24, v4_lo);
631 mme_store_imm_addr(&b, data_addr + 28, v4_hi);
632
633 auto macro = mme_builder_finish_vec(&b);
634
635 uint64_t vals[] = {
636 0x0000ffffffffffffull,
637 0x0000ffffffff8000ull,
638 0x0000ffff00000000ull,
639 0x0000800000000000ull,
640 0x00008000ffffffffull,
641 0x0001ffff00000000ull,
642 0xffffffff00000000ull,
643 0xffffffffffffffffull,
644 };
645
646 for (uint32_t i = 0; i < ARRAY_SIZE(vals); i++) {
647 reset_push();
648
649 std::vector<uint32_t> params;
650 params.push_back(low32(vals[i]));
651 params.push_back(high32(vals[i]));
652
653 test_macro(&b, macro, params);
654 }
655 }
656
TEST_F(mme_tu104_sim_test,sub)657 TEST_F(mme_tu104_sim_test, sub)
658 {
659 mme_builder b;
660 mme_builder_init(&b, devinfo);
661
662 mme_value x = mme_load(&b);
663 mme_value y = mme_load(&b);
664 mme_value diff = mme_sub(&b, x, y);
665 mme_store_imm_addr(&b, data_addr, diff);
666
667 auto macro = mme_builder_finish_vec(&b);
668
669 std::vector<uint32_t> params;
670 params.push_back(25);
671 params.push_back(138);
672
673 test_macro(&b, macro, params);
674 }
675
TEST_F(mme_tu104_sim_test,subb)676 TEST_F(mme_tu104_sim_test, subb)
677 {
678 mme_builder b;
679 mme_builder_init(&b, devinfo);
680
681 struct mme_value64 x = { mme_load(&b), mme_load(&b) };
682 struct mme_value64 y = { mme_load(&b), mme_load(&b) };
683
684 struct mme_value64 diff = mme_sub64(&b, x, y);
685
686 mme_store_imm_addr(&b, data_addr + 0, diff.lo);
687 mme_store_imm_addr(&b, data_addr + 4, diff.hi);
688
689 auto macro = mme_builder_finish_vec(&b);
690
691 std::vector<uint32_t> params;
692 params.push_back(0x80008650);
693 params.push_back(0x596);
694 params.push_back(0x8000a8f6);
695 params.push_back(0x836);
696
697 test_macro(&b, macro, params);
698 }
699
TEST_F(mme_tu104_sim_test,mul)700 TEST_F(mme_tu104_sim_test, mul)
701 {
702 mme_builder b;
703 mme_builder_init(&b, devinfo);
704
705 mme_value x = mme_load(&b);
706 mme_value y = mme_load(&b);
707 mme_value sum = mme_mul(&b, x, y);
708 mme_store_imm_addr(&b, data_addr, sum);
709
710 auto macro = mme_builder_finish_vec(&b);
711
712 std::vector<uint32_t> params;
713 params.push_back(25);
714 params.push_back(138);
715
716 test_macro(&b, macro, params);
717 }
718
TEST_F(mme_tu104_sim_test,mul_imm)719 TEST_F(mme_tu104_sim_test, mul_imm)
720 {
721 mme_builder b;
722 mme_builder_init(&b, devinfo);
723
724 mme_value x = mme_load(&b);
725
726 mme_value v0 = mme_mul(&b, x, mme_imm(0x00000001));
727 mme_value v1 = mme_mul(&b, x, mme_imm(0xffffffff));
728 mme_value v2 = mme_mul(&b, x, mme_imm(0xffff8000));
729 mme_value v3 = mme_mul(&b, mme_imm(0x00000001), x);
730 mme_value v4 = mme_mul(&b, mme_imm(0xffffffff), x);
731 mme_value v5 = mme_mul(&b, mme_imm(0xffff8000), x);
732
733 mme_store_imm_addr(&b, data_addr + 0, v0);
734 mme_store_imm_addr(&b, data_addr + 4, v1);
735 mme_store_imm_addr(&b, data_addr + 8, v2);
736 mme_store_imm_addr(&b, data_addr + 12, v3);
737 mme_store_imm_addr(&b, data_addr + 16, v4);
738 mme_store_imm_addr(&b, data_addr + 20, v5);
739
740 auto macro = mme_builder_finish_vec(&b);
741
742 int32_t vals[] = { 1, -5, -1, 5 };
743
744 for (uint32_t i = 0; i < ARRAY_SIZE(vals); i++) {
745 reset_push();
746
747 std::vector<uint32_t> params;
748 params.push_back(vals[i]);
749
750 test_macro(&b, macro, params);
751 }
752 }
753
TEST_F(mme_tu104_sim_test,mul_mulh)754 TEST_F(mme_tu104_sim_test, mul_mulh)
755 {
756 mme_builder b;
757 mme_builder_init(&b, devinfo);
758
759 mme_value x = mme_load(&b);
760 mme_value y = mme_load(&b);
761
762 struct mme_value64 prod = mme_imul_32x32_64(&b, x, y);
763
764 mme_store_imm_addr(&b, data_addr + 0, prod.lo);
765 mme_store_imm_addr(&b, data_addr + 4, prod.hi);
766
767 auto macro = mme_builder_finish_vec(&b);
768
769 std::vector<uint32_t> params;
770 params.push_back(0x80008650);
771 params.push_back(0x596);
772
773 test_macro(&b, macro, params);
774 }
775
776 static inline struct mme_value
mme_mulu(struct mme_builder * b,struct mme_value x,struct mme_value y)777 mme_mulu(struct mme_builder *b, struct mme_value x, struct mme_value y)
778 {
779 return mme_alu(b, MME_ALU_OP_MULU, x, y);
780 }
781
TEST_F(mme_tu104_sim_test,mulu_imm)782 TEST_F(mme_tu104_sim_test, mulu_imm)
783 {
784 mme_builder b;
785 mme_builder_init(&b, devinfo);
786
787 mme_value x = mme_load(&b);
788
789 mme_value v0 = mme_mulu(&b, x, mme_imm(0x00000001));
790 mme_value v1 = mme_mulu(&b, x, mme_imm(0xffffffff));
791 mme_value v2 = mme_mulu(&b, x, mme_imm(0xffff8000));
792 mme_value v3 = mme_mulu(&b, mme_imm(0x00000001), x);
793 mme_value v4 = mme_mulu(&b, mme_imm(0xffffffff), x);
794 mme_value v5 = mme_mulu(&b, mme_imm(0xffff8000), x);
795
796 mme_store_imm_addr(&b, data_addr + 0, v0);
797 mme_store_imm_addr(&b, data_addr + 4, v1);
798 mme_store_imm_addr(&b, data_addr + 8, v2);
799 mme_store_imm_addr(&b, data_addr + 12, v3);
800 mme_store_imm_addr(&b, data_addr + 16, v4);
801 mme_store_imm_addr(&b, data_addr + 20, v5);
802
803 auto macro = mme_builder_finish_vec(&b);
804
805 int32_t vals[] = { 1, -5, -1, 5 };
806
807 for (uint32_t i = 0; i < ARRAY_SIZE(vals); i++) {
808 reset_push();
809
810 std::vector<uint32_t> params;
811 params.push_back(vals[i]);
812
813 test_macro(&b, macro, params);
814 }
815 }
816
TEST_F(mme_tu104_sim_test,mulu_mulh)817 TEST_F(mme_tu104_sim_test, mulu_mulh)
818 {
819 mme_builder b;
820 mme_builder_init(&b, devinfo);
821
822 mme_value x = mme_load(&b);
823 mme_value y = mme_load(&b);
824
825 struct mme_value64 prod = mme_umul_32x32_64(&b, x, y);
826
827 mme_store_imm_addr(&b, data_addr + 0, prod.lo);
828 mme_store_imm_addr(&b, data_addr + 4, prod.hi);
829
830 auto macro = mme_builder_finish_vec(&b);
831
832 std::vector<uint32_t> params;
833 params.push_back(0x80008650);
834 params.push_back(0x596);
835
836 test_macro(&b, macro, params);
837 }
838
TEST_F(mme_tu104_sim_test,clz)839 TEST_F(mme_tu104_sim_test, clz)
840 {
841 mme_builder b;
842 mme_builder_init(&b, devinfo);
843
844 mme_value bits = mme_clz(&b, mme_load(&b));
845 mme_store_imm_addr(&b, data_addr, bits);
846
847 auto macro = mme_builder_finish_vec(&b);
848
849 std::vector<uint32_t> params;
850 params.push_back(0x00406fe0);
851
852 test_macro(&b, macro, params);
853 }
854
855 #define SHIFT_TEST(op) \
856 TEST_F(mme_tu104_sim_test, op) \
857 { \
858 mme_builder b; \
859 mme_builder_init(&b, devinfo); \
860 \
861 mme_value val = mme_load(&b); \
862 mme_value shift1 = mme_load(&b); \
863 mme_value shift2 = mme_load(&b); \
864 mme_store_imm_addr(&b, data_addr + 0, mme_##op(&b, val, shift1)); \
865 mme_store_imm_addr(&b, data_addr + 4, mme_##op(&b, val, shift2)); \
866 \
867 auto macro = mme_builder_finish_vec(&b); \
868 \
869 std::vector<uint32_t> params; \
870 params.push_back(0x0c406fe0); \
871 params.push_back(5); \
872 params.push_back(51); \
873 \
874 test_macro(&b, macro, params); \
875 }
876
877 SHIFT_TEST(sll)
SHIFT_TEST(srl)878 SHIFT_TEST(srl)
879 SHIFT_TEST(sra)
880
881 #undef SHIFT_TEST
882
883 TEST_F(mme_tu104_sim_test, bfe)
884 {
885 const uint32_t canary = 0xc0ffee01;
886
887 mme_builder b;
888 mme_builder_init(&b, devinfo);
889
890 mme_value val = mme_load(&b);
891 mme_value pos = mme_load(&b);
892
893 mme_store_imm_addr(&b, data_addr + 0, mme_bfe(&b, val, pos, 1), true);
894 mme_store_imm_addr(&b, data_addr + 4, mme_bfe(&b, val, pos, 2), true);
895 mme_store_imm_addr(&b, data_addr + 8, mme_bfe(&b, val, pos, 5), true);
896
897 auto macro = mme_builder_finish_vec(&b);
898
899 for (unsigned i = 0; i < 31; i++) {
900 std::vector<uint32_t> params;
901 params.push_back(canary);
902 params.push_back(i);
903
904 test_macro(&b, macro, params);
905
906 ASSERT_EQ(data[0], (canary >> i) & 0x1);
907 ASSERT_EQ(data[1], (canary >> i) & 0x3);
908 ASSERT_EQ(data[2], (canary >> i) & 0x1f);
909 }
910 }
911
TEST_F(mme_tu104_sim_test,not)912 TEST_F(mme_tu104_sim_test, not)
913 {
914 mme_builder b;
915 mme_builder_init(&b, devinfo);
916
917 mme_value x = mme_load(&b);
918 mme_value v1 = mme_not(&b, x);
919 mme_store_imm_addr(&b, data_addr + 0, v1);
920
921 auto macro = mme_builder_finish_vec(&b);
922
923 std::vector<uint32_t> params;
924 params.push_back(0x0c406fe0);
925
926 test_macro(&b, macro, params);
927 }
928
929 #define BITOP_TEST(op) \
930 TEST_F(mme_tu104_sim_test, op) \
931 { \
932 mme_builder b; \
933 mme_builder_init(&b, devinfo); \
934 \
935 mme_value x = mme_load(&b); \
936 mme_value y = mme_load(&b); \
937 mme_value v1 = mme_##op(&b, x, y); \
938 mme_value v2 = mme_##op(&b, x, mme_imm(0xffff8000)); \
939 mme_value v3 = mme_##op(&b, x, mme_imm(0xffffffff)); \
940 mme_store_imm_addr(&b, data_addr + 0, v1); \
941 mme_store_imm_addr(&b, data_addr + 4, v2); \
942 mme_store_imm_addr(&b, data_addr + 8, v3); \
943 \
944 auto macro = mme_builder_finish_vec(&b); \
945 \
946 std::vector<uint32_t> params; \
947 params.push_back(0x0c406fe0); \
948 params.push_back(0x00fff0c0); \
949 \
950 test_macro(&b, macro, params); \
951 }
952
953 BITOP_TEST(and)
BITOP_TEST(and_not)954 BITOP_TEST(and_not)
955 BITOP_TEST(nand)
956 BITOP_TEST(or)
957 BITOP_TEST(xor)
958
959 #undef BITOP_TEST
960
961 TEST_F(mme_tu104_sim_test, merge)
962 {
963 mme_builder b;
964 mme_builder_init(&b, devinfo);
965
966 mme_value x = mme_load(&b);
967 mme_value y = mme_load(&b);
968
969 mme_value m1 = mme_merge(&b, x, y, 12, 12, 20);
970 mme_value m2 = mme_merge(&b, x, y, 12, 8, 20);
971 mme_value m3 = mme_merge(&b, x, y, 8, 12, 20);
972 mme_value m4 = mme_merge(&b, x, y, 12, 16, 8);
973 mme_value m5 = mme_merge(&b, x, y, 24, 12, 8);
974
975 mme_store_imm_addr(&b, data_addr + 0, m1);
976 mme_store_imm_addr(&b, data_addr + 4, m2);
977 mme_store_imm_addr(&b, data_addr + 8, m3);
978 mme_store_imm_addr(&b, data_addr + 12, m4);
979 mme_store_imm_addr(&b, data_addr + 16, m5);
980
981 auto macro = mme_builder_finish_vec(&b);
982
983 std::vector<uint32_t> params;
984 params.push_back(0x0c406fe0);
985 params.push_back(0x76543210u);
986
987 test_macro(&b, macro, params);
988 }
989
990 #define COMPARISON_TEST(op) \
991 TEST_F(mme_tu104_sim_test, op) \
992 { \
993 mme_builder b; \
994 mme_builder_init(&b, devinfo); \
995 \
996 mme_value x = mme_load(&b); \
997 mme_value y = mme_load(&b); \
998 mme_value z = mme_load(&b); \
999 mme_value w = mme_load(&b); \
1000 \
1001 mme_value v1 = mme_##op(&b, x, y); \
1002 mme_value v2 = mme_##op(&b, y, x); \
1003 mme_value v3 = mme_##op(&b, y, z); \
1004 mme_value v4 = mme_##op(&b, z, y); \
1005 mme_value v5 = mme_##op(&b, w, z); \
1006 mme_value v6 = mme_##op(&b, z, w); \
1007 mme_value v7 = mme_##op(&b, w, w); \
1008 \
1009 mme_store_imm_addr(&b, data_addr + 0, v1); \
1010 mme_store_imm_addr(&b, data_addr + 4, v2); \
1011 mme_store_imm_addr(&b, data_addr + 8, v3); \
1012 mme_store_imm_addr(&b, data_addr + 12, v4); \
1013 mme_store_imm_addr(&b, data_addr + 16, v5); \
1014 mme_store_imm_addr(&b, data_addr + 20, v6); \
1015 mme_store_imm_addr(&b, data_addr + 24, v7); \
1016 \
1017 auto macro = mme_builder_finish_vec(&b); \
1018 \
1019 std::vector<uint32_t> params; \
1020 params.push_back(-5); \
1021 params.push_back(-10); \
1022 params.push_back(5); \
1023 params.push_back(10); \
1024 \
1025 test_macro(&b, macro, params); \
1026 }
1027
1028 COMPARISON_TEST(slt)
COMPARISON_TEST(sltu)1029 COMPARISON_TEST(sltu)
1030 COMPARISON_TEST(sle)
1031 COMPARISON_TEST(sleu)
1032 COMPARISON_TEST(seq)
1033
1034 #undef COMPARISON_TEST
1035
1036 static inline void
1037 mme_inc_whole_inst(mme_builder *b, mme_value val)
1038 {
1039 mme_tu104_asm(b, i) {
1040 i.alu[0].dst = mme_value_as_reg(val);
1041 i.alu[0].op = MME_TU104_ALU_OP_ADD;
1042 i.alu[0].src[0] = mme_value_as_reg(val);
1043 i.alu[0].src[1] = MME_TU104_REG_IMM;
1044 i.imm[0] = 1;
1045 }
1046 }
1047
TEST_F(mme_tu104_sim_test,loop)1048 TEST_F(mme_tu104_sim_test, loop)
1049 {
1050 mme_builder b;
1051 mme_builder_init(&b, devinfo);
1052
1053 mme_value count = mme_load(&b);
1054
1055 mme_value x = mme_mov(&b, mme_zero());
1056 mme_value y = mme_mov(&b, mme_zero());
1057
1058 mme_loop(&b, count) {
1059 mme_tu104_asm(&b, i) { } /* noop */
1060 mme_add_to(&b, x, x, count);
1061 }
1062 mme_add_to(&b, y, y, mme_imm(1));
1063 mme_tu104_asm(&b, i) { } /* noop */
1064 mme_tu104_asm(&b, i) { } /* noop */
1065 mme_tu104_asm(&b, i) { } /* noop */
1066
1067 mme_store_imm_addr(&b, data_addr + 0, count);
1068 mme_store_imm_addr(&b, data_addr + 4, x);
1069 mme_store_imm_addr(&b, data_addr + 8, y);
1070
1071 auto macro = mme_builder_finish_vec(&b);
1072
1073 uint32_t counts[] = {0, 1, 5, 9};
1074
1075 for (uint32_t i = 0; i < ARRAY_SIZE(counts); i++) {
1076 reset_push();
1077
1078 std::vector<uint32_t> params;
1079 params.push_back(counts[i]);
1080
1081 test_macro(&b, macro, params);
1082 ASSERT_EQ(data[0], counts[i]);
1083 ASSERT_EQ(data[1], counts[i] * counts[i]);
1084 ASSERT_EQ(data[2], 1);
1085 }
1086 }
1087
TEST_F(mme_tu104_sim_test,jal)1088 TEST_F(mme_tu104_sim_test, jal)
1089 {
1090 mme_builder b;
1091 mme_builder_init(&b, devinfo);
1092
1093 mme_value x = mme_mov(&b, mme_zero());
1094 mme_value y = mme_mov(&b, mme_zero());
1095
1096 mme_tu104_asm(&b, i) {
1097 i.alu[0].op = MME_TU104_ALU_OP_JAL;
1098 i.imm[0] = (1 << 15) | 6;
1099 }
1100
1101 for (uint32_t j = 0; j < 10; j++)
1102 mme_inc_whole_inst(&b, x);
1103
1104 // mme_tu104_asm(&b, i) {
1105 // i.alu[0].op = MME_TU104_ALU_OP_JAL;
1106 // i.imm[0] = 6;
1107 // }
1108 //
1109 // for (uint32_t j = 0; j < 10; j++)
1110 // mme_inc_whole_inst(&b, y);
1111
1112 mme_store_imm_addr(&b, data_addr + 0, x);
1113 mme_store_imm_addr(&b, data_addr + 4, y);
1114
1115 auto macro = mme_builder_finish_vec(&b);
1116
1117 std::vector<uint32_t> params;
1118 test_macro(&b, macro, params);
1119 ASSERT_EQ(data[0], 5);
1120 }
1121
TEST_F(mme_tu104_sim_test,bxx_fwd)1122 TEST_F(mme_tu104_sim_test, bxx_fwd)
1123 {
1124 mme_builder b;
1125 mme_builder_init(&b, devinfo);
1126
1127 mme_value vals[10];
1128 for (uint32_t i = 0; i < 10; i++)
1129 vals[i] = mme_mov(&b, mme_zero());
1130
1131 mme_tu104_asm(&b, i) {
1132 i.alu[0].op = MME_TU104_ALU_OP_BEQ;
1133 i.imm[0] = (1 << 15) | 6;
1134 }
1135
1136 for (uint32_t j = 0; j < 10; j++)
1137 mme_inc_whole_inst(&b, vals[j]);
1138
1139 for (uint32_t j = 0; j < 10; j++)
1140 mme_store_imm_addr(&b, data_addr + j * 4, vals[j]);
1141
1142 auto macro = mme_builder_finish_vec(&b);
1143
1144 std::vector<uint32_t> params;
1145 test_macro(&b, macro, params);
1146 }
1147
TEST_F(mme_tu104_sim_test,bxx_bwd)1148 TEST_F(mme_tu104_sim_test, bxx_bwd)
1149 {
1150 mme_builder b;
1151 mme_builder_init(&b, devinfo);
1152
1153 mme_value vals[15];
1154 for (uint32_t i = 0; i < 15; i++)
1155 vals[i] = mme_mov(&b, mme_zero());
1156
1157 mme_tu104_asm(&b, i) {
1158 i.alu[0].op = MME_TU104_ALU_OP_JAL;
1159 i.imm[0] = (1 << 15) | 12;
1160 }
1161
1162 for (uint32_t j = 0; j < 10; j++)
1163 mme_inc_whole_inst(&b, vals[j]);
1164
1165 mme_tu104_asm(&b, i) {
1166 i.alu[0].op = MME_TU104_ALU_OP_JAL;
1167 i.imm[0] = (1 << 15) | 2;
1168 }
1169
1170 mme_tu104_asm(&b, i) {
1171 i.alu[0].op = MME_TU104_ALU_OP_BEQ;
1172 i.imm[0] = (1 << 15) | ((-8) & 0x1fff);
1173 }
1174
1175 for (uint32_t j = 10; j < 15; j++)
1176 mme_inc_whole_inst(&b, vals[j]);
1177
1178 for (uint32_t j = 0; j < 15; j++)
1179 mme_store_imm_addr(&b, data_addr + j * 4, vals[j]);
1180
1181 auto macro = mme_builder_finish_vec(&b);
1182
1183 std::vector<uint32_t> params;
1184 test_macro(&b, macro, params);
1185 for (uint32_t j = 0; j < 3; j++)
1186 ASSERT_EQ(data[j], 0);
1187 for (uint32_t j = 3; j < 15; j++)
1188 ASSERT_EQ(data[j], 1);
1189 }
1190
TEST_F(mme_tu104_sim_test,bxx_exit)1191 TEST_F(mme_tu104_sim_test, bxx_exit)
1192 {
1193 mme_builder b;
1194 mme_builder_init(&b, devinfo);
1195
1196 mme_value vals[10];
1197 for (uint32_t i = 0; i < 10; i++)
1198 vals[i] = mme_mov(&b, mme_zero());
1199
1200 for (uint32_t i = 0; i < 10; i++)
1201 mme_store_imm_addr(&b, data_addr + i * 4, mme_imm(0));
1202
1203 mme_tu104_asm(&b, i) {
1204 i.alu[0].op = MME_TU104_ALU_OP_BEQ;
1205 i.imm[0] = (1 << 15) | 0x1000;
1206 }
1207
1208 /* those writes won't be visible */
1209 for (uint32_t j = 0; j < 10; j++)
1210 mme_inc_whole_inst(&b, vals[j]);
1211
1212 for (uint32_t i = 0; i < 10; i++)
1213 mme_store_imm_addr(&b, data_addr + i * 4, vals[i]);
1214
1215 std::vector<uint32_t> params;
1216
1217 auto macro = mme_builder_finish_vec(&b);
1218 test_macro(&b, macro, params);
1219
1220 uint32_t i;
1221 for (i = 0; i < 10; i++)
1222 ASSERT_EQ(data[i], 0);
1223 }
1224
TEST_F(mme_tu104_sim_test,mme_exit)1225 TEST_F(mme_tu104_sim_test, mme_exit)
1226 {
1227 mme_builder b;
1228 mme_builder_init(&b, devinfo);
1229
1230 mme_value vals[10];
1231 for (uint32_t i = 0; i < 10; i++)
1232 vals[i] = mme_mov(&b, mme_zero());
1233
1234 for (uint32_t i = 0; i < 10; i++)
1235 mme_store_imm_addr(&b, data_addr + i * 4, mme_imm(0));
1236
1237 /* abort */
1238 mme_exit(&b);
1239
1240 /* those writes won't be visible */
1241 for (uint32_t i = 0; i < 10; i++)
1242 vals[i] = mme_mov(&b, mme_imm(i));
1243
1244 for (uint32_t i = 0; i < 10; i++) {
1245 mme_store_imm_addr(&b, data_addr + i * 4, vals[i]);
1246 }
1247
1248 std::vector<uint32_t> params;
1249
1250 auto macro = mme_builder_finish_vec(&b);
1251 test_macro(&b, macro, params);
1252
1253 uint32_t i;
1254 for (i = 0; i < 10; i++)
1255 ASSERT_EQ(data[i], 0);
1256 }
1257
TEST_F(mme_tu104_sim_test,mme_exit_if)1258 TEST_F(mme_tu104_sim_test, mme_exit_if)
1259 {
1260 mme_builder b;
1261 mme_builder_init(&b, devinfo);
1262
1263 mme_value vals[10];
1264 for (uint32_t i = 0; i < 10; i++)
1265 vals[i] = mme_mov(&b, mme_zero());
1266
1267 for (uint32_t i = 0; i < 10; i++)
1268 mme_store_imm_addr(&b, data_addr + i * 4, mme_imm(0));
1269
1270 /* shouldn't do anything */
1271 mme_exit_if(&b, ieq, mme_zero(), mme_imm(1));
1272
1273 for (uint32_t i = 0; i < 10; i++)
1274 vals[i] = mme_mov(&b, mme_imm(i));
1275
1276 for (uint32_t i = 0; i < 10; i++) {
1277 /* abort on reaching 5 */
1278 mme_exit_if(&b, ile, mme_imm(5), vals[i]);
1279 mme_store_imm_addr(&b, data_addr + i * 4, vals[i]);
1280 }
1281
1282 std::vector<uint32_t> params;
1283
1284 auto macro = mme_builder_finish_vec(&b);
1285 test_macro(&b, macro, params);
1286
1287 uint32_t i;
1288 for (i = 0; i < 10; i++)
1289 ASSERT_EQ(data[i], i < 5 ? i : 0);
1290 }
1291
c_ilt(int32_t x,int32_t y)1292 static bool c_ilt(int32_t x, int32_t y) { return x < y; };
c_ult(uint32_t x,uint32_t y)1293 static bool c_ult(uint32_t x, uint32_t y) { return x < y; };
c_ile(int32_t x,int32_t y)1294 static bool c_ile(int32_t x, int32_t y) { return x <= y; };
c_ule(uint32_t x,uint32_t y)1295 static bool c_ule(uint32_t x, uint32_t y) { return x <= y; };
c_ieq(int32_t x,int32_t y)1296 static bool c_ieq(int32_t x, int32_t y) { return x == y; };
c_ige(int32_t x,int32_t y)1297 static bool c_ige(int32_t x, int32_t y) { return x >= y; };
c_uge(uint32_t x,uint32_t y)1298 static bool c_uge(uint32_t x, uint32_t y) { return x >= y; };
c_igt(int32_t x,int32_t y)1299 static bool c_igt(int32_t x, int32_t y) { return x > y; };
c_ugt(uint32_t x,uint32_t y)1300 static bool c_ugt(uint32_t x, uint32_t y) { return x > y; };
c_ine(int32_t x,int32_t y)1301 static bool c_ine(int32_t x, int32_t y) { return x != y; };
1302
1303 #define IF_TEST(op) \
1304 TEST_F(mme_tu104_sim_test, if_##op) \
1305 { \
1306 mme_builder b; \
1307 mme_builder_init(&b, devinfo); \
1308 \
1309 mme_value x = mme_load(&b); \
1310 mme_value y = mme_load(&b); \
1311 mme_value i = mme_mov(&b, mme_zero()); \
1312 \
1313 mme_start_if_##op(&b, x, y); \
1314 { \
1315 mme_add_to(&b, i, i, mme_imm(1)); \
1316 mme_add_to(&b, i, i, mme_imm(1)); \
1317 } \
1318 mme_end_if(&b); \
1319 mme_add_to(&b, i, i, mme_imm(1)); \
1320 mme_add_to(&b, i, i, mme_imm(1)); \
1321 mme_add_to(&b, i, i, mme_imm(1)); \
1322 \
1323 mme_store_imm_addr(&b, data_addr + 0, i); \
1324 \
1325 auto macro = mme_builder_finish_vec(&b); \
1326 \
1327 uint32_t vals[] = {23, 56, (uint32_t)-5, (uint32_t)-10, 56, 14}; \
1328 \
1329 for (uint32_t i = 0; i < ARRAY_SIZE(vals) - 1; i++) { \
1330 reset_push(); \
1331 \
1332 std::vector<uint32_t> params; \
1333 params.push_back(vals[i + 0]); \
1334 params.push_back(vals[i + 1]); \
1335 \
1336 test_macro(&b, macro, params); \
1337 \
1338 ASSERT_EQ(data[0], c_##op(params[0], params[1]) ? 5 : 3); \
1339 } \
1340 }
1341
1342 IF_TEST(ilt)
IF_TEST(ult)1343 IF_TEST(ult)
1344 IF_TEST(ile)
1345 IF_TEST(ule)
1346 IF_TEST(ieq)
1347 IF_TEST(ige)
1348 IF_TEST(uge)
1349 IF_TEST(igt)
1350 IF_TEST(ugt)
1351 IF_TEST(ine)
1352
1353 #undef IF_TEST
1354
1355 #define WHILE_TEST(op, start, step, bound) \
1356 TEST_F(mme_tu104_sim_test, while_##op) \
1357 { \
1358 mme_builder b; \
1359 mme_builder_init(&b, devinfo); \
1360 \
1361 mme_value x = mme_mov(&b, mme_zero()); \
1362 mme_value y = mme_mov(&b, mme_zero()); \
1363 mme_value z = mme_mov(&b, mme_imm(start)); \
1364 mme_value w = mme_mov(&b, mme_zero()); \
1365 mme_value v = mme_mov(&b, mme_zero()); \
1366 \
1367 for (uint32_t j = 0; j < 5; j++) \
1368 mme_inc_whole_inst(&b, x); \
1369 \
1370 mme_while(&b, op, z, mme_imm(bound)) { \
1371 for (uint32_t j = 0; j < 5; j++) \
1372 mme_inc_whole_inst(&b, y); \
1373 \
1374 mme_add_to(&b, z, z, mme_imm(step)); \
1375 \
1376 for (uint32_t j = 0; j < 5; j++) \
1377 mme_inc_whole_inst(&b, w); \
1378 } \
1379 \
1380 for (uint32_t j = 0; j < 5; j++) \
1381 mme_inc_whole_inst(&b, v); \
1382 \
1383 mme_store_imm_addr(&b, data_addr + 0, x); \
1384 mme_store_imm_addr(&b, data_addr + 4, y); \
1385 mme_store_imm_addr(&b, data_addr + 8, z); \
1386 mme_store_imm_addr(&b, data_addr + 12, w); \
1387 mme_store_imm_addr(&b, data_addr + 16, v); \
1388 \
1389 auto macro = mme_builder_finish_vec(&b); \
1390 \
1391 uint32_t end = (uint32_t)(start), count = 0; \
1392 while (c_##op(end, (bound))) { \
1393 end += (uint32_t)(step); \
1394 count++; \
1395 } \
1396 \
1397 std::vector<uint32_t> params; \
1398 test_macro(&b, macro, params); \
1399 ASSERT_EQ(data[0], 5); \
1400 ASSERT_EQ(data[1], 5 * count); \
1401 ASSERT_EQ(data[2], end); \
1402 ASSERT_EQ(data[3], 5 * count); \
1403 ASSERT_EQ(data[4], 5); \
1404 }
1405
1406 WHILE_TEST(ilt, 0, 1, 7)
1407 WHILE_TEST(ult, 0, 1, 7)
1408 WHILE_TEST(ile, -10, 2, 0)
1409 WHILE_TEST(ule, 0, 1, 7)
1410 WHILE_TEST(ieq, 0, 5, 0)
1411 WHILE_TEST(ige, 5, -1, -5)
1412 WHILE_TEST(uge, 15, -2, 2)
1413 WHILE_TEST(igt, 7, -3, -10)
1414 WHILE_TEST(ugt, 1604, -30, 1000)
1415 WHILE_TEST(ine, 0, 1, 7)
1416
1417 #undef WHILE_TEST
1418
1419 TEST_F(mme_tu104_sim_test, nested_while)
1420 {
1421 mme_builder b;
1422 mme_builder_init(&b, devinfo);
1423
1424 mme_value n = mme_load(&b);
1425 mme_value m = mme_load(&b);
1426
1427 mme_value count = mme_mov(&b, mme_zero());
1428
1429 mme_value i = mme_mov(&b, mme_zero());
1430 mme_value j = mme_mov(&b, mme_imm(0xffff));
1431 mme_while(&b, ine, i, n) {
1432 mme_mov_to(&b, j, mme_zero());
1433 mme_while(&b, ine, j, m) {
1434 mme_add_to(&b, count, count, mme_imm(1));
1435 mme_add_to(&b, j, j, mme_imm(1));
1436 }
1437
1438 mme_add_to(&b, i, i, mme_imm(1));
1439 }
1440
1441 mme_store_imm_addr(&b, data_addr + 0, i);
1442 mme_store_imm_addr(&b, data_addr + 4, j);
1443 mme_store_imm_addr(&b, data_addr + 8, count);
1444
1445 auto macro = mme_builder_finish_vec(&b);
1446
1447 std::vector<uint32_t> params;
1448 params.push_back(3);
1449 params.push_back(5);
1450
1451 test_macro(&b, macro, params);
1452 ASSERT_EQ(data[0], 3);
1453 ASSERT_EQ(data[1], 5);
1454 ASSERT_EQ(data[2], 15);
1455 }
1456
1457 #if 0
1458 TEST_F(mme_tu104_sim_test, do_ble)
1459 {
1460 mme_builder b;
1461 mme_builder_init(&b, devinfo);
1462
1463 mme_alu(&b, R5, ADD, LOAD0, ZERO);
1464 mme_alu(&b, R6, ADD, ZERO, ZERO);
1465 mme_alu(&b, R7, ADD, ZERO, ZERO);
1466
1467 mme_alu_imm(&b, R7, ADD, R7, IMM, 1);
1468 mme_alu_imm(&b, R7, ADD, R7, IMM, 1);
1469 mme_alu_imm(&b, R7, ADD, R7, IMM, 1);
1470 mme_alu_imm(&b, R7, ADD, R7, IMM, 1);
1471 mme_alu_imm(&b, R6, ADD, R6, IMM, 1);
1472 mme_branch(&b, BLE, R6, R5, -3, 2);
1473 mme_alu_imm(&b, R7, ADD, R7, IMM, 1);
1474 mme_alu_imm(&b, R7, ADD, R7, IMM, 1);
1475
1476 mme_store_imm_addr(&b, data_addr + 0, MME_TU104_REG_R7);
1477
1478 mme_end(&b);
1479
1480 uint32_t counts[] = {0, 1, 5, 9};
1481
1482 for (uint32_t i = 0; i < ARRAY_SIZE(counts); i++) {
1483 reset_push();
1484
1485 std::vector<uint32_t> params;
1486 params.push_back(counts[i]);
1487
1488 test_macro(&b, params);
1489 }
1490 }
1491 #endif
1492
TEST_F(mme_tu104_sim_test,dread_dwrite)1493 TEST_F(mme_tu104_sim_test, dread_dwrite)
1494 {
1495 mme_builder b;
1496 mme_builder_init(&b, devinfo);
1497
1498 mme_value x = mme_load(&b);
1499 mme_value y = mme_load(&b);
1500
1501 mme_dwrite(&b, mme_imm(5), x);
1502 mme_dwrite(&b, mme_imm(8), y);
1503
1504 mme_value y2 = mme_dread(&b, mme_imm(8));
1505 mme_value x2 = mme_dread(&b, mme_imm(5));
1506
1507 mme_store_imm_addr(&b, data_addr + 0, y2);
1508 mme_store_imm_addr(&b, data_addr + 4, x2);
1509
1510 auto macro = mme_builder_finish_vec(&b);
1511
1512 std::vector<uint32_t> params;
1513 params.push_back(-10);
1514 params.push_back(5);
1515
1516 test_macro(&b, macro, params);
1517 }
1518
TEST_F(mme_tu104_sim_test,dwrite_dma)1519 TEST_F(mme_tu104_sim_test, dwrite_dma)
1520 {
1521 const uint32_t canary5 = 0xc0ffee01;
1522 const uint32_t canary8 = canary5 & 0x00ffff00;
1523
1524 mme_builder b;
1525 mme_builder_init(&b, devinfo);
1526
1527 mme_value x = mme_load(&b);
1528 mme_value y = mme_load(&b);
1529
1530 mme_dwrite(&b, mme_imm(5), x);
1531 mme_dwrite(&b, mme_imm(8), y);
1532
1533 auto macro = mme_builder_finish_vec(&b);
1534
1535 push_macro(0, macro);
1536
1537 P_1INC(p, NVC597, CALL_MME_MACRO(0));
1538 P_INLINE_DATA(p, canary5);
1539 P_INLINE_DATA(p, canary8);
1540
1541 P_MTHD(p, NVC597, SET_MME_MEM_ADDRESS_A);
1542 P_NVC597_SET_MME_MEM_ADDRESS_A(p, high32(data_addr));
1543 P_NVC597_SET_MME_MEM_ADDRESS_B(p, low32(data_addr));
1544 /* Start 3 dwords into MME RAM */
1545 P_NVC597_SET_MME_DATA_RAM_ADDRESS(p, 3);
1546 P_IMMD(p, NVC597, MME_DMA_WRITE, 20);
1547
1548 submit_push();
1549
1550 for (uint32_t i = 0; i < 20; i++) {
1551 if (i == 5 - 3) {
1552 ASSERT_EQ(data[i], canary5);
1553 } else if (i == 8 - 3) {
1554 ASSERT_EQ(data[i], canary8);
1555 } else {
1556 ASSERT_EQ(data[i], 0);
1557 }
1558 }
1559 }
1560
TEST_F(mme_tu104_sim_test,dram_limit)1561 TEST_F(mme_tu104_sim_test, dram_limit)
1562 {
1563 static const uint32_t chunk_size = 32;
1564
1565 mme_builder b;
1566 mme_builder_init(&b, devinfo);
1567
1568 mme_value start = mme_load(&b);
1569 mme_value count = mme_load(&b);
1570
1571 mme_value i = mme_mov(&b, start);
1572 mme_loop(&b, count) {
1573 mme_dwrite(&b, i, i);
1574 mme_add_to(&b, i, i, mme_imm(1));
1575 }
1576
1577 mme_value j = mme_mov(&b, start);
1578 struct mme_value64 addr = mme_mov64(&b, mme_imm64(data_addr));
1579
1580 mme_loop(&b, count) {
1581 mme_value x = mme_dread(&b, j);
1582 mme_store(&b, addr, x);
1583 mme_add_to(&b, j, j, mme_imm(1));
1584 mme_add64_to(&b, addr, addr, mme_imm64(4));
1585 }
1586
1587 auto macro = mme_builder_finish_vec(&b);
1588
1589 for (uint32_t i = 0; i < MME_TU104_DRAM_COUNT; i += chunk_size) {
1590 reset_push();
1591
1592 push_macro(0, macro);
1593
1594 P_1INC(p, NVC597, CALL_MME_MACRO(0));
1595 P_INLINE_DATA(p, i);
1596 P_INLINE_DATA(p, chunk_size);
1597
1598 submit_push();
1599
1600 for (uint32_t j = 0; j < chunk_size; j++)
1601 ASSERT_EQ(data[j], i + j);
1602 }
1603 }
1604
TEST_F(mme_tu104_sim_test,dma_read_fifoed)1605 TEST_F(mme_tu104_sim_test, dma_read_fifoed)
1606 {
1607 mme_builder b;
1608 mme_builder_init(&b, devinfo);
1609
1610 mme_mthd(&b, NVC597_SET_MME_DATA_RAM_ADDRESS);
1611 mme_emit(&b, mme_zero());
1612
1613 mme_mthd(&b, NVC597_SET_MME_MEM_ADDRESS_A);
1614 mme_emit(&b, mme_imm(high32(data_addr)));
1615 mme_emit(&b, mme_imm(low32(data_addr)));
1616
1617 mme_mthd(&b, NVC597_MME_DMA_READ_FIFOED);
1618 mme_emit(&b, mme_imm(2));
1619
1620 mme_tu104_load_barrier(&b);
1621
1622 mme_value x = mme_load(&b);
1623 mme_value y = mme_load(&b);
1624
1625 mme_store_imm_addr(&b, data_addr + 256 + 0, x);
1626 mme_store_imm_addr(&b, data_addr + 256 + 4, y);
1627
1628 auto macro = mme_builder_finish_vec(&b);
1629
1630 P_IMMD(p, NVC597, SET_MME_DATA_FIFO_CONFIG, FIFO_SIZE_SIZE_4KB);
1631
1632 for (uint32_t i = 0; i < 64; i++)
1633 data[i] = 1000 + i;
1634
1635 std::vector<uint32_t> params;
1636 params.push_back(7);
1637
1638 test_macro(&b, macro, params);
1639 }
1640
TEST_F(mme_tu104_sim_test,scratch_limit)1641 TEST_F(mme_tu104_sim_test, scratch_limit)
1642 {
1643 static const uint32_t chunk_size = 32;
1644
1645 mme_builder b;
1646 mme_builder_init(&b, devinfo);
1647
1648 mme_value start = mme_load(&b);
1649 mme_value count = mme_load(&b);
1650
1651 mme_value i = mme_mov(&b, start);
1652 mme_loop(&b, count) {
1653 mme_mthd_arr(&b, NVC597_SET_MME_SHADOW_SCRATCH(0), i);
1654 mme_emit(&b, i);
1655 mme_add_to(&b, i, i, mme_imm(1));
1656 }
1657
1658 mme_value j = mme_mov(&b, start);
1659 struct mme_value64 addr = mme_mov64(&b, mme_imm64(data_addr));
1660
1661 mme_loop(&b, count) {
1662 mme_value x = mme_state_arr(&b, NVC597_SET_MME_SHADOW_SCRATCH(0), j);
1663 mme_store(&b, addr, x);
1664 mme_add_to(&b, j, j, mme_imm(1));
1665 mme_add64_to(&b, addr, addr, mme_imm64(4));
1666 }
1667
1668 auto macro = mme_builder_finish_vec(&b);
1669
1670 for (uint32_t i = 0; i < MME_TU104_SCRATCH_COUNT; i += chunk_size) {
1671 reset_push();
1672
1673 push_macro(0, macro);
1674
1675 P_1INC(p, NVC597, CALL_MME_MACRO(0));
1676 P_INLINE_DATA(p, i);
1677 P_INLINE_DATA(p, chunk_size);
1678
1679 submit_push();
1680
1681 for (uint32_t j = 0; j < chunk_size; j++)
1682 ASSERT_EQ(data[j], i + j);
1683 }
1684 }
1685