1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 use crate::api::{GetDebugFlags, ShaderBin, DEBUG};
5 use crate::hw_runner::{Runner, CB0};
6 use crate::ir::*;
7 use crate::sm50::ShaderModel50;
8 use crate::sm70::ShaderModel70;
9
10 use acorn::Acorn;
11 use compiler::cfg::CFGBuilder;
12 use nak_bindings::*;
13 use std::str::FromStr;
14 use std::sync::OnceLock;
15
16 // from https://internals.rust-lang.org/t/discussion-on-offset-of/7440/2
17 macro_rules! offset_of {
18 ($Struct:path, $field:ident) => {{
19 // Using a separate function to minimize unhygienic hazards
20 // (e.g. unsafety of #[repr(packed)] field borrows).
21 // Uncomment `const` when `const fn`s can juggle pointers.
22
23 // const
24 fn offset() -> usize {
25 let u = std::mem::MaybeUninit::<$Struct>::uninit();
26 // Use pattern-matching to avoid accidentally going through Deref.
27 let &$Struct { $field: ref f, .. } = unsafe { &*u.as_ptr() };
28 let o =
29 (f as *const _ as usize).wrapping_sub(&u as *const _ as usize);
30 // Triple check that we are within `u` still.
31 assert!((0..=std::mem::size_of_val(&u)).contains(&o));
32 o
33 }
34 offset()
35 }};
36 }
37
38 struct RunSingleton {
39 sm: Box<dyn ShaderModel + Send + Sync>,
40 run: Runner,
41 }
42
43 static RUN_SINGLETON: OnceLock<RunSingleton> = OnceLock::new();
44
45 impl RunSingleton {
get() -> &'static RunSingleton46 pub fn get() -> &'static RunSingleton {
47 RUN_SINGLETON.get_or_init(|| {
48 let dev_id = match std::env::var("NAK_TEST_DEVICE") {
49 Ok(s) => Some(usize::from_str(&s).unwrap()),
50 Err(_) => None,
51 };
52
53 let run = Runner::new(dev_id);
54 let sm_nr = run.dev_info().sm;
55 let sm: Box<dyn ShaderModel + Send + Sync> = if sm_nr >= 70 {
56 Box::new(ShaderModel70::new(sm_nr))
57 } else if sm_nr >= 50 {
58 Box::new(ShaderModel50::new(sm_nr))
59 } else {
60 panic!("Unsupported shader model");
61 };
62 RunSingleton { sm, run }
63 })
64 }
65 }
66
67 const LOCAL_SIZE_X: u16 = 32;
68
69 pub struct TestShaderBuilder<'a> {
70 sm: &'a dyn ShaderModel,
71 alloc: SSAValueAllocator,
72 b: InstrBuilder<'a>,
73 start_block: BasicBlock,
74 label: Label,
75 data_addr: SSARef,
76 }
77
78 impl<'a> TestShaderBuilder<'a> {
new(sm: &'a dyn ShaderModel) -> TestShaderBuilder79 pub fn new(sm: &'a dyn ShaderModel) -> TestShaderBuilder {
80 let mut alloc = SSAValueAllocator::new();
81 let mut label_alloc = LabelAllocator::new();
82 let mut b = SSAInstrBuilder::new(sm, &mut alloc);
83
84 // Fill out the start block
85 let lane = b.alloc_ssa(RegFile::GPR, 1);
86 b.push_op(OpS2R {
87 dst: lane.into(),
88 idx: NAK_SV_LANE_ID,
89 });
90
91 let cta = b.alloc_ssa(RegFile::GPR, 1);
92 b.push_op(OpS2R {
93 dst: cta.into(),
94 idx: NAK_SV_CTAID,
95 });
96
97 let invoc_id = b.alloc_ssa(RegFile::GPR, 1);
98 b.push_op(OpIMad {
99 dst: invoc_id.into(),
100 srcs: [cta.into(), u32::from(LOCAL_SIZE_X).into(), lane.into()],
101 signed: false,
102 });
103
104 let data_addr_lo = CBufRef {
105 buf: CBuf::Binding(0),
106 offset: offset_of!(CB0, data_addr_lo).try_into().unwrap(),
107 };
108 let data_addr_hi = CBufRef {
109 buf: CBuf::Binding(0),
110 offset: offset_of!(CB0, data_addr_hi).try_into().unwrap(),
111 };
112 let data_addr = b.alloc_ssa(RegFile::GPR, 2);
113 b.copy_to(data_addr[0].into(), data_addr_lo.into());
114 b.copy_to(data_addr[1].into(), data_addr_hi.into());
115
116 let data_stride = CBufRef {
117 buf: CBuf::Binding(0),
118 offset: offset_of!(CB0, data_stride).try_into().unwrap(),
119 };
120 let invocations = CBufRef {
121 buf: CBuf::Binding(0),
122 offset: offset_of!(CB0, invocations).try_into().unwrap(),
123 };
124
125 let data_offset = SSARef::from([
126 b.imul(invoc_id.into(), data_stride.into())[0],
127 b.copy(0.into())[0],
128 ]);
129 let data_addr =
130 b.iadd64(data_addr.into(), data_offset.into(), 0.into());
131
132 // Finally, exit if we're OOB
133 let oob = b.isetp(
134 IntCmpType::U32,
135 IntCmpOp::Ge,
136 invoc_id.into(),
137 invocations.into(),
138 );
139 b.predicate(oob[0].into()).push_op(OpExit {});
140
141 let start_block = BasicBlock {
142 label: label_alloc.alloc(),
143 uniform: true,
144 instrs: b.as_vec(),
145 };
146
147 TestShaderBuilder {
148 sm,
149 alloc: alloc,
150 b: InstrBuilder::new(sm),
151 start_block,
152 label: label_alloc.alloc(),
153 data_addr,
154 }
155 }
156
ld_test_data(&mut self, offset: u16, mem_type: MemType) -> SSARef157 pub fn ld_test_data(&mut self, offset: u16, mem_type: MemType) -> SSARef {
158 let access = MemAccess {
159 mem_type: mem_type,
160 space: MemSpace::Global(MemAddrType::A64),
161 order: MemOrder::Strong(MemScope::System),
162 eviction_priority: MemEvictionPriority::Normal,
163 };
164 let comps: u8 = mem_type.bits().div_ceil(32).try_into().unwrap();
165 let dst = self.alloc_ssa(RegFile::GPR, comps);
166 self.push_op(OpLd {
167 dst: dst.into(),
168 addr: self.data_addr.into(),
169 offset: offset.into(),
170 access: access,
171 });
172 dst
173 }
174
st_test_data( &mut self, offset: u16, mem_type: MemType, data: SSARef, )175 pub fn st_test_data(
176 &mut self,
177 offset: u16,
178 mem_type: MemType,
179 data: SSARef,
180 ) {
181 let access = MemAccess {
182 mem_type: mem_type,
183 space: MemSpace::Global(MemAddrType::A64),
184 order: MemOrder::Strong(MemScope::System),
185 eviction_priority: MemEvictionPriority::Normal,
186 };
187 let comps: u8 = mem_type.bits().div_ceil(32).try_into().unwrap();
188 assert!(data.comps() == comps);
189 self.push_op(OpSt {
190 addr: self.data_addr.into(),
191 data: data.into(),
192 offset: offset.into(),
193 access: access,
194 });
195 }
196
compile(mut self) -> Box<ShaderBin>197 pub fn compile(mut self) -> Box<ShaderBin> {
198 self.b.push_op(OpExit {});
199 let block = BasicBlock {
200 label: self.label,
201 uniform: true,
202 instrs: self.b.as_vec(),
203 };
204
205 let mut cfg = CFGBuilder::new();
206 cfg.add_node(0, self.start_block);
207 cfg.add_node(1, block);
208 cfg.add_edge(0, 1);
209
210 let f = Function {
211 ssa_alloc: self.alloc,
212 phi_alloc: PhiAllocator::new(),
213 blocks: cfg.as_cfg(),
214 };
215
216 let cs_info = ComputeShaderInfo {
217 local_size: [32, 1, 1],
218 smem_size: 0,
219 };
220 let info = ShaderInfo {
221 num_gprs: 0,
222 num_control_barriers: 0,
223 num_instrs: 0,
224 slm_size: 0,
225 max_crs_depth: 0,
226 uses_global_mem: true,
227 writes_global_mem: true,
228 uses_fp64: false,
229 stage: ShaderStageInfo::Compute(cs_info),
230 io: ShaderIoInfo::None,
231 };
232 let mut s = Shader {
233 sm: self.sm,
234 info: info,
235 functions: vec![f],
236 };
237
238 // We do run a few passes
239 s.opt_copy_prop();
240 s.opt_dce();
241 s.legalize();
242
243 s.assign_regs();
244 s.lower_par_copies();
245 s.lower_copy_swap();
246 s.calc_instr_deps();
247
248 if DEBUG.print() {
249 eprintln!("NAK shader: {s}");
250 }
251
252 s.gather_info();
253 s.remove_annotations();
254
255 let code = self.sm.encode_shader(&s);
256 Box::new(ShaderBin::new(self.sm, &s.info, None, code, ""))
257 }
258 }
259
260 impl Builder for TestShaderBuilder<'_> {
push_instr(&mut self, instr: Box<Instr>) -> &mut Instr261 fn push_instr(&mut self, instr: Box<Instr>) -> &mut Instr {
262 self.b.push_instr(instr)
263 }
264
sm(&self) -> u8265 fn sm(&self) -> u8 {
266 self.b.sm()
267 }
268 }
269
270 impl SSABuilder for TestShaderBuilder<'_> {
alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSARef271 fn alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSARef {
272 self.alloc.alloc_vec(file, comps)
273 }
274 }
275
276 #[test]
test_sanity()277 fn test_sanity() {
278 let run = RunSingleton::get();
279 let b = TestShaderBuilder::new(run.sm.as_ref());
280 let bin = b.compile();
281 unsafe {
282 run.run
283 .run_raw(&bin, LOCAL_SIZE_X.into(), 0, std::ptr::null_mut(), 0)
284 .unwrap();
285 }
286 }
287
f32_eq(a: f32, b: f32) -> bool288 fn f32_eq(a: f32, b: f32) -> bool {
289 if a.is_nan() && b.is_nan() {
290 true
291 } else if a.is_nan() || b.is_nan() {
292 // If one is NaN but not the other, fail
293 false
294 } else {
295 (a - b).abs() < 0.000001
296 }
297 }
298
f64_eq(a: f64, b: f64) -> bool299 fn f64_eq(a: f64, b: f64) -> bool {
300 if a.is_nan() && b.is_nan() {
301 true
302 } else if a.is_nan() || b.is_nan() {
303 // If one is NaN but not the other, fail
304 false
305 } else {
306 (a - b).abs() < 0.000001
307 }
308 }
309
test_foldable_op_with( mut op: impl Foldable + Clone + Into<Op>, mut rand_u32: impl FnMut(usize) -> u32, )310 pub fn test_foldable_op_with(
311 mut op: impl Foldable + Clone + Into<Op>,
312 mut rand_u32: impl FnMut(usize) -> u32,
313 ) {
314 let run = RunSingleton::get();
315 let mut b = TestShaderBuilder::new(run.sm.as_ref());
316
317 let mut comps = 0_u16;
318 let mut fold_src = Vec::new();
319 let src_types = op.src_types();
320 for (i, src) in op.srcs_as_mut_slice().iter_mut().enumerate() {
321 match src_types[i] {
322 SrcType::GPR
323 | SrcType::ALU
324 | SrcType::F16
325 | SrcType::F16v2
326 | SrcType::F32
327 | SrcType::I32
328 | SrcType::B32 => {
329 let data = b.ld_test_data(comps * 4, MemType::B32);
330 comps += 1;
331
332 src.src_ref = data.into();
333 fold_src.push(FoldData::U32(0));
334 }
335 SrcType::F64 => {
336 todo!("Double ops aren't tested yet");
337 }
338 SrcType::Pred => {
339 let data = b.ld_test_data(comps * 4, MemType::B32);
340 comps += 1;
341
342 let bit = b.lop2(LogicOp2::And, data.into(), 1.into());
343 let pred = b.isetp(
344 IntCmpType::U32,
345 IntCmpOp::Ne,
346 bit.into(),
347 0.into(),
348 );
349 src.src_ref = pred.into();
350 fold_src.push(FoldData::Pred(false));
351 }
352 SrcType::Carry => {
353 let data = b.ld_test_data(comps * 4, MemType::B32);
354 comps += 1;
355
356 let bit = b.lop2(LogicOp2::And, data.into(), 1.into());
357 let dst = b.alloc_ssa(RegFile::GPR, 1);
358 let carry = b.alloc_ssa(RegFile::Carry, 1);
359 b.push_op(OpIAdd2 {
360 dst: dst.into(),
361 carry_out: carry.into(),
362 srcs: [u32::MAX.into(), bit.into()],
363 });
364 src.src_ref = carry.into();
365 fold_src.push(FoldData::Carry(false));
366 }
367 typ => panic!("Can't auto-generate {typ:?} data"),
368 }
369 }
370 let src_comps = usize::from(comps);
371
372 let mut fold_dst = Vec::new();
373 let dst_types = op.dst_types();
374 for (i, dst) in op.dsts_as_mut_slice().iter_mut().enumerate() {
375 match dst_types[i] {
376 DstType::Pred => {
377 *dst = b.alloc_ssa(RegFile::Pred, 1).into();
378 fold_dst.push(FoldData::Pred(false));
379 }
380 DstType::GPR | DstType::F32 => {
381 *dst = b.alloc_ssa(RegFile::GPR, 1).into();
382 fold_dst.push(FoldData::U32(0));
383 }
384 DstType::F64 => {
385 *dst = b.alloc_ssa(RegFile::GPR, 2).into();
386 fold_dst.push(FoldData::Vec2([0, 0]));
387 }
388 DstType::Carry => {
389 *dst = b.alloc_ssa(RegFile::Carry, 1).into();
390 fold_dst.push(FoldData::Carry(false));
391 }
392 typ => panic!("Can't auto-test {typ:?} data"),
393 }
394 }
395
396 b.push_op(op.clone());
397 let op = op; // Drop mutability
398
399 for dst in op.dsts_as_slice() {
400 let Dst::SSA(vec) = dst else {
401 panic!("Should be an ssa value");
402 };
403
404 for ssa in &vec[..] {
405 let u = match ssa.file() {
406 RegFile::Pred => b.sel((*ssa).into(), 1.into(), 0.into()),
407 RegFile::GPR => (*ssa).into(),
408 RegFile::Carry => {
409 let gpr = b.alloc_ssa(RegFile::GPR, 1);
410 b.push_op(OpIAdd2X {
411 dst: gpr.into(),
412 carry_out: Dst::None,
413 srcs: [0.into(), 0.into()],
414 carry_in: (*ssa).into(),
415 });
416 gpr.into()
417 }
418 file => panic!("Can't auto-test {file:?} data"),
419 };
420 b.st_test_data(comps * 4, MemType::B32, u);
421 comps += 1;
422 }
423 }
424 let comps = usize::from(comps); // Drop mutability
425 let dst_comps = comps - src_comps;
426
427 let bin = b.compile();
428
429 // We're throwing random data at it here so the idea is that the number
430 // of test cases we need to get good coverage is relative to the square
431 // of the number of components. For a big op like IAdd3X, this is going
432 // to give us 2500 iterations.
433 let invocations = src_comps * src_comps * 100;
434
435 let mut data = Vec::new();
436 for _ in 0..invocations {
437 for (i, src) in op.srcs_as_slice().iter().enumerate() {
438 let SrcRef::SSA(vec) = &src.src_ref else {
439 panic!("Should be an ssa value");
440 };
441
442 for _ in 0..vec.comps() {
443 data.push(rand_u32(i));
444 }
445 }
446 for _ in 0..dst_comps {
447 data.push(0_u32);
448 }
449 }
450 debug_assert!(data.len() == invocations * comps);
451
452 unsafe {
453 run.run
454 .run_raw(
455 &bin,
456 invocations.try_into().unwrap(),
457 (comps * 4).try_into().unwrap(),
458 data.as_mut_ptr().cast(),
459 data.len() * 4,
460 )
461 .unwrap();
462 }
463
464 // Now, check the results
465 for invoc_id in 0..invocations {
466 let data = &data[(invoc_id * comps)..((invoc_id + 1) * comps)];
467
468 let mut c = 0_usize;
469 for src in &mut fold_src {
470 match src {
471 FoldData::Pred(b) | FoldData::Carry(b) => {
472 let u = data[c];
473 *b = (u & 1) != 0;
474 c += 1;
475 }
476 FoldData::U32(u) => {
477 *u = data[c];
478 c += 1;
479 }
480 FoldData::Vec2(v) => {
481 *v = [data[c + 0], data[c + 1]];
482 c += 2;
483 }
484 }
485 }
486 debug_assert!(c == src_comps);
487
488 let mut fold = OpFoldData {
489 srcs: &fold_src,
490 dsts: &mut fold_dst,
491 };
492 op.fold(&*run.sm, &mut fold);
493
494 debug_assert!(fold_dst.len() == op.dsts_as_slice().len());
495 for (i, dst) in fold_dst.iter().enumerate() {
496 match dst {
497 FoldData::Pred(b) | FoldData::Carry(b) => {
498 let d = data[c];
499 c += 1;
500 assert_eq!(*b, (d & 1) != 0);
501 }
502 FoldData::U32(u) => {
503 let d = data[c];
504 c += 1;
505
506 match dst_types[i] {
507 DstType::GPR => {
508 assert_eq!(*u, d);
509 }
510 DstType::F32 => {
511 assert!(f32_eq(
512 f32::from_bits(*u),
513 f32::from_bits(d)
514 ));
515 }
516 typ => panic!("Can't auto-test {typ:?} data"),
517 }
518 }
519 FoldData::Vec2(v) => {
520 let d = [data[c + 0], data[c + 1]];
521 c += 2;
522
523 match dst_types[i] {
524 DstType::F64 => {
525 let v_f64 = f64::from_bits(
526 u64::from(v[0]) | (u64::from(v[1]) << 32),
527 );
528 let d_f64 = f64::from_bits(
529 u64::from(d[0]) | (u64::from(d[1]) << 32),
530 );
531 assert!(f64_eq(v_f64, d_f64));
532 }
533 typ => panic!("Can't auto-test {typ:?} data"),
534 }
535 }
536 }
537 }
538 debug_assert!(c == comps);
539 }
540 }
541
test_foldable_op(op: impl Foldable + Clone + Into<Op>)542 pub fn test_foldable_op(op: impl Foldable + Clone + Into<Op>) {
543 let mut a = Acorn::new();
544 test_foldable_op_with(op, &mut |_| a.get_u32());
545 }
546
547 #[test]
test_op_flo()548 fn test_op_flo() {
549 for i in 0..4 {
550 let op = OpFlo {
551 dst: Dst::None,
552 src: 0.into(),
553 signed: i & 0x1 != 0,
554 return_shift_amount: i & 0x2 != 0,
555 };
556
557 let mut a = Acorn::new();
558 test_foldable_op_with(op, &mut |_| {
559 let x = a.get_uint(36);
560 let signed = x & (1 << 32) != 0;
561 let shift = x >> 33;
562 if signed {
563 ((x as i32) >> shift) as u32
564 } else {
565 (x as u32) >> shift
566 }
567 });
568 }
569 }
570
571 #[test]
test_op_iabs()572 fn test_op_iabs() {
573 if RunSingleton::get().sm.sm() >= 70 {
574 let op = OpIAbs {
575 dst: Dst::None,
576 src: 0.into(),
577 };
578 test_foldable_op(op);
579 }
580 }
581
get_iadd_int(a: &mut Acorn) -> u32582 fn get_iadd_int(a: &mut Acorn) -> u32 {
583 let x = a.get_uint(36);
584 match x >> 32 {
585 0 => 0,
586 1 => 1,
587 2 => 1 << 31,
588 3 => (1 << 31) - 1,
589 4 => u32::MAX,
590 5 => u32::MAX - 1,
591 _ => x as u32,
592 }
593 }
594
595 #[test]
test_op_iadd2()596 fn test_op_iadd2() {
597 if RunSingleton::get().sm.sm() < 70 {
598 for i in 0..3 {
599 let mut op = OpIAdd2 {
600 dst: Dst::None,
601 carry_out: Dst::None,
602 srcs: [0.into(), 0.into()],
603 };
604 if i & 0x1 != 0 {
605 op.srcs[0].src_mod = SrcMod::INeg;
606 }
607 if i & 0x2 != 0 {
608 op.srcs[1].src_mod = SrcMod::INeg;
609 }
610
611 let mut a = Acorn::new();
612 test_foldable_op_with(op, |_| get_iadd_int(&mut a));
613 }
614 }
615 }
616
617 #[test]
test_op_iadd2x()618 fn test_op_iadd2x() {
619 if RunSingleton::get().sm.sm() < 70 {
620 for i in 0..3 {
621 let mut op = OpIAdd2X {
622 dst: Dst::None,
623 carry_out: Dst::None,
624 srcs: [0.into(), 0.into()],
625 carry_in: 0.into(),
626 };
627 if i & 0x1 != 0 {
628 op.srcs[0].src_mod = SrcMod::BNot;
629 }
630 if i & 0x2 != 0 {
631 op.srcs[1].src_mod = SrcMod::BNot;
632 }
633
634 let mut a = Acorn::new();
635 test_foldable_op_with(op, |_| get_iadd_int(&mut a));
636 }
637 }
638 }
639
640 #[test]
test_op_iadd3()641 fn test_op_iadd3() {
642 if RunSingleton::get().sm.sm() >= 70 {
643 for i in 0..6 {
644 let mut op = OpIAdd3 {
645 dst: Dst::None,
646 overflow: [Dst::None, Dst::None],
647 srcs: [0.into(), 0.into(), 0.into()],
648 };
649 if i % 3 == 1 {
650 op.srcs[0].src_mod = SrcMod::INeg;
651 } else if i % 3 == 2 {
652 op.srcs[1].src_mod = SrcMod::INeg;
653 }
654 if i / 3 == 1 {
655 op.srcs[2].src_mod = SrcMod::INeg;
656 }
657
658 let mut a = Acorn::new();
659 test_foldable_op_with(op, |_| get_iadd_int(&mut a));
660 }
661 }
662 }
663
664 #[test]
test_op_iadd3x()665 fn test_op_iadd3x() {
666 if RunSingleton::get().sm.sm() >= 70 {
667 for i in 0..6 {
668 let mut op = OpIAdd3X {
669 dst: Dst::None,
670 overflow: [Dst::None, Dst::None],
671 srcs: [0.into(), 0.into(), 0.into()],
672 carry: [false.into(), false.into()],
673 };
674 if i % 3 == 1 {
675 op.srcs[0].src_mod = SrcMod::BNot;
676 } else if i % 3 == 2 {
677 op.srcs[1].src_mod = SrcMod::BNot;
678 }
679 if i / 3 == 1 {
680 op.srcs[2].src_mod = SrcMod::BNot;
681 }
682
683 let mut a = Acorn::new();
684 test_foldable_op_with(op, |_| get_iadd_int(&mut a));
685 }
686 }
687 }
688
689 #[test]
test_op_isetp()690 fn test_op_isetp() {
691 let set_ops = [PredSetOp::And, PredSetOp::Or, PredSetOp::Xor];
692 let cmp_ops = [
693 IntCmpOp::Eq,
694 IntCmpOp::Ne,
695 IntCmpOp::Lt,
696 IntCmpOp::Le,
697 IntCmpOp::Gt,
698 IntCmpOp::Ge,
699 ];
700 let cmp_types = [IntCmpType::U32, IntCmpType::I32];
701
702 for mut i in 0..(set_ops.len() * cmp_ops.len() * cmp_types.len() * 2) {
703 let set_op = set_ops[i % set_ops.len()];
704 i /= set_ops.len();
705
706 let cmp_op = cmp_ops[i % cmp_ops.len()];
707 i /= cmp_ops.len();
708
709 let cmp_type = cmp_types[i % cmp_types.len()];
710 i /= cmp_types.len();
711
712 let ex = i != 0;
713
714 if ex && RunSingleton::get().sm.sm() < 70 {
715 continue;
716 }
717
718 let op = OpISetP {
719 dst: Dst::None,
720 set_op,
721 cmp_op,
722 cmp_type,
723 ex,
724 srcs: [0.into(), 0.into()],
725 accum: 0.into(),
726 low_cmp: 0.into(),
727 };
728
729 let src0_idx = op.src_idx(&op.srcs[0]);
730 let mut a = Acorn::new();
731 let mut src0 = 0_u32;
732 test_foldable_op_with(op, &mut |i| {
733 let x = a.get_u32();
734 if i == src0_idx {
735 src0 = x;
736 }
737
738 // Make src0 and src1
739 if i == src0_idx + 1 && a.get_bool() {
740 src0
741 } else {
742 x
743 }
744 });
745 }
746 }
747
748 #[test]
test_op_lop2()749 fn test_op_lop2() {
750 if RunSingleton::get().sm.sm() < 70 {
751 let logic_ops =
752 [LogicOp2::And, LogicOp2::Or, LogicOp2::Xor, LogicOp2::PassB];
753
754 let src_mods = [
755 (SrcMod::None, SrcMod::None),
756 (SrcMod::BNot, SrcMod::None),
757 (SrcMod::None, SrcMod::BNot),
758 (SrcMod::BNot, SrcMod::BNot),
759 ];
760
761 for logic_op in logic_ops {
762 for (x_mod, y_mod) in src_mods {
763 let mut op = OpLop2 {
764 dst: Dst::None,
765 srcs: [0.into(), 0.into()],
766 op: logic_op,
767 };
768 op.srcs[0].src_mod = x_mod;
769 op.srcs[1].src_mod = y_mod;
770
771 test_foldable_op(op);
772 }
773 }
774 }
775 }
776
777 #[test]
test_op_lop3()778 fn test_op_lop3() {
779 if RunSingleton::get().sm.sm() >= 70 {
780 for lut in 0..255 {
781 let op = OpLop3 {
782 dst: Dst::None,
783 srcs: [0.into(), 0.into(), 0.into()],
784 op: LogicOp3 { lut },
785 };
786 test_foldable_op(op);
787 }
788 }
789 }
790
791 #[test]
test_op_popc()792 fn test_op_popc() {
793 let src_mods = [SrcMod::None, SrcMod::BNot];
794 for src_mod in src_mods {
795 let mut op = OpPopC {
796 dst: Dst::None,
797 src: 0.into(),
798 };
799 op.src.src_mod = src_mod;
800 test_foldable_op(op);
801 }
802 }
803
804 #[test]
test_op_shf()805 fn test_op_shf() {
806 let sm = &RunSingleton::get().sm;
807
808 let types = [IntType::U32, IntType::I32, IntType::U64, IntType::I64];
809
810 for i in 0..32 {
811 let op = OpShf {
812 dst: Dst::None,
813 low: 0.into(),
814 high: 0.into(),
815 shift: 0.into(),
816 data_type: types[i & 0x3],
817 right: i & 0x4 != 0,
818 wrap: i & 0x8 != 0,
819 dst_high: i & 0x10 != 0,
820 };
821
822 if sm.sm() < 70 && !(op.dst_high || op.right) {
823 continue;
824 }
825
826 let shift_idx = op.src_idx(&op.shift);
827 let mut a = Acorn::new();
828 test_foldable_op_with(op, &mut |i| {
829 if i == shift_idx {
830 a.get_uint(6) as u32
831 } else {
832 a.get_u32()
833 }
834 });
835 }
836 }
837
838 #[test]
test_op_prmt()839 fn test_op_prmt() {
840 let op = OpPrmt {
841 dst: Dst::None,
842 srcs: [0.into(), 0.into()],
843 sel: 0.into(),
844 mode: PrmtMode::Index,
845 };
846 test_foldable_op(op);
847 }
848
849 #[test]
test_op_psetp()850 fn test_op_psetp() {
851 if RunSingleton::get().sm.sm() < 70 {
852 let set_ops = [PredSetOp::And, PredSetOp::Or, PredSetOp::Xor];
853 let src_mods = [SrcMod::None, SrcMod::BNot];
854 for mut i in 0..(3 * 3 * 2 * 2 * 2) {
855 let op1 = set_ops[i % 3];
856 i /= 3;
857 let op2 = set_ops[i % 3];
858 i /= 3;
859 let mut op = OpPSetP {
860 dsts: [Dst::None, Dst::None],
861 ops: [op1, op2],
862 srcs: [true.into(), true.into(), true.into()],
863 };
864 op.srcs[0].src_mod = src_mods[(i >> 0) & 1];
865 op.srcs[1].src_mod = src_mods[(i >> 1) & 1];
866 op.srcs[2].src_mod = src_mods[(i >> 2) & 1];
867
868 test_foldable_op(op);
869 }
870 }
871 }
872
873 #[test]
test_iadd64()874 fn test_iadd64() {
875 let run = RunSingleton::get();
876 let invocations = 100;
877
878 let cases = [
879 (SrcMod::None, SrcMod::None),
880 (SrcMod::INeg, SrcMod::None),
881 (SrcMod::None, SrcMod::INeg),
882 ];
883
884 for (x_mod, y_mod) in cases {
885 let mut b = TestShaderBuilder::new(run.sm.as_ref());
886
887 let mut x = Src::from([
888 b.ld_test_data(0, MemType::B32)[0],
889 b.ld_test_data(4, MemType::B32)[0],
890 ]);
891 x.src_mod = x_mod;
892
893 let mut y = Src::from([
894 b.ld_test_data(8, MemType::B32)[0],
895 b.ld_test_data(12, MemType::B32)[0],
896 ]);
897 y.src_mod = y_mod;
898
899 let dst = b.iadd64(x, y, 0.into());
900 b.st_test_data(16, MemType::B32, dst[0].into());
901 b.st_test_data(20, MemType::B32, dst[1].into());
902
903 let bin = b.compile();
904
905 let mut a = Acorn::new();
906 let mut data = Vec::new();
907 for _ in 0..invocations {
908 data.push([
909 get_iadd_int(&mut a),
910 get_iadd_int(&mut a),
911 get_iadd_int(&mut a),
912 get_iadd_int(&mut a),
913 0,
914 0,
915 ]);
916 }
917
918 run.run.run(&bin, &mut data).unwrap();
919
920 for d in &data {
921 let mut x = u64::from(d[0]) | (u64::from(d[1]) << 32);
922 let mut y = u64::from(d[2]) | (u64::from(d[3]) << 32);
923 if x_mod.is_ineg() {
924 x = -(x as i64) as u64;
925 }
926 if y_mod.is_ineg() {
927 y = -(y as i64) as u64;
928 }
929 let dst = x.wrapping_add(y);
930 assert_eq!(d[4], dst as u32);
931 assert_eq!(d[5], (dst >> 32) as u32);
932 }
933 }
934 }
935
936 #[test]
test_ineg64()937 fn test_ineg64() {
938 let run = RunSingleton::get();
939 let invocations = 100;
940
941 let mut b = TestShaderBuilder::new(run.sm.as_ref());
942
943 let x = SSARef::from([
944 b.ld_test_data(0, MemType::B32)[0],
945 b.ld_test_data(4, MemType::B32)[0],
946 ]);
947 let dst = b.ineg64(x.into());
948 b.st_test_data(8, MemType::B32, dst[0].into());
949 b.st_test_data(12, MemType::B32, dst[1].into());
950
951 let bin = b.compile();
952
953 let mut a = Acorn::new();
954 let mut data = Vec::new();
955 for _ in 0..invocations {
956 data.push([a.get_u32(), a.get_u32(), 0, 0]);
957 }
958
959 run.run.run(&bin, &mut data).unwrap();
960
961 for d in &data {
962 let x = u64::from(d[0]) | (u64::from(d[1]) << 32);
963 let dst = -(x as i64) as u64;
964 assert_eq!(d[2], dst as u32);
965 assert_eq!(d[3], (dst >> 32) as u32);
966 }
967 }
968
969 #[test]
test_isetp64()970 fn test_isetp64() {
971 let run = RunSingleton::get();
972 let invocations = 100;
973
974 let types = [IntCmpType::U32, IntCmpType::I32];
975 let ops = [
976 IntCmpOp::Eq,
977 IntCmpOp::Ne,
978 IntCmpOp::Lt,
979 IntCmpOp::Le,
980 IntCmpOp::Gt,
981 IntCmpOp::Ge,
982 ];
983
984 for i in 0..(ops.len() * 2) {
985 let mut b = TestShaderBuilder::new(run.sm.as_ref());
986
987 let cmp_type = types[i % 2];
988 let cmp_op = ops[i / 2];
989
990 let x = SSARef::from([
991 b.ld_test_data(0, MemType::B32)[0],
992 b.ld_test_data(4, MemType::B32)[0],
993 ]);
994 let y = SSARef::from([
995 b.ld_test_data(8, MemType::B32)[0],
996 b.ld_test_data(12, MemType::B32)[0],
997 ]);
998 let p = b.isetp64(cmp_type, cmp_op, x.into(), y.into());
999 let dst = b.sel(p.into(), 1.into(), 0.into());
1000 b.st_test_data(16, MemType::B32, dst.into());
1001
1002 let bin = b.compile();
1003
1004 let mut a = Acorn::new();
1005 let mut data = Vec::new();
1006 for _ in 0..invocations {
1007 match a.get_u32() % 4 {
1008 0 => {
1009 // Equal
1010 let high = a.get_u32();
1011 let low = a.get_u32();
1012 data.push([low, high, low, high, 0]);
1013 }
1014 1 => {
1015 // High bits are equal
1016 let high = a.get_u32();
1017 data.push([a.get_u32(), high, a.get_u32(), high, 0]);
1018 }
1019 _ => {
1020 data.push([
1021 a.get_u32(),
1022 a.get_u32(),
1023 a.get_u32(),
1024 a.get_u32(),
1025 0,
1026 ]);
1027 }
1028 }
1029 }
1030
1031 run.run.run(&bin, &mut data).unwrap();
1032
1033 for d in &data {
1034 let x = u64::from(d[0]) | (u64::from(d[1]) << 32);
1035 let y = u64::from(d[2]) | (u64::from(d[3]) << 32);
1036 let p = if cmp_type.is_signed() {
1037 let x = x as i64;
1038 let y = y as i64;
1039 match cmp_op {
1040 IntCmpOp::Eq => x == y,
1041 IntCmpOp::Ne => x != y,
1042 IntCmpOp::Lt => x < y,
1043 IntCmpOp::Le => x <= y,
1044 IntCmpOp::Gt => x > y,
1045 IntCmpOp::Ge => x >= y,
1046 }
1047 } else {
1048 match cmp_op {
1049 IntCmpOp::Eq => x == y,
1050 IntCmpOp::Ne => x != y,
1051 IntCmpOp::Lt => x < y,
1052 IntCmpOp::Le => x <= y,
1053 IntCmpOp::Gt => x > y,
1054 IntCmpOp::Ge => x >= y,
1055 }
1056 };
1057 let dst = p as u32;
1058 assert_eq!(d[4], dst);
1059 }
1060 }
1061 }
1062
1063 #[test]
test_shl64()1064 fn test_shl64() {
1065 let run = RunSingleton::get();
1066 let invocations = 100;
1067
1068 let mut b = TestShaderBuilder::new(run.sm.as_ref());
1069
1070 let srcs = SSARef::from([
1071 b.ld_test_data(0, MemType::B32)[0],
1072 b.ld_test_data(4, MemType::B32)[0],
1073 ]);
1074 let shift = b.ld_test_data(8, MemType::B32);
1075 let dst = b.shl64(srcs.into(), shift.into());
1076 b.st_test_data(12, MemType::B32, dst[0].into());
1077 b.st_test_data(16, MemType::B32, dst[1].into());
1078
1079 let bin = b.compile();
1080
1081 let mut a = Acorn::new();
1082 let mut data = Vec::new();
1083 for _ in 0..invocations {
1084 data.push([a.get_u32(), a.get_u32(), a.get_uint(7) as u32, 0, 0]);
1085 }
1086
1087 run.run.run(&bin, &mut data).unwrap();
1088
1089 for d in &data {
1090 let src = u64::from(d[0]) | (u64::from(d[1]) << 32);
1091 let dst = src << (d[2] & 0x3f);
1092 assert_eq!(d[3], dst as u32);
1093 assert_eq!(d[4], (dst >> 32) as u32);
1094 }
1095 }
1096
1097 #[test]
test_shr64()1098 fn test_shr64() {
1099 let run = RunSingleton::get();
1100 let invocations = 100;
1101
1102 let cases = [true, false];
1103
1104 for signed in cases {
1105 let mut b = TestShaderBuilder::new(run.sm.as_ref());
1106
1107 let srcs = SSARef::from([
1108 b.ld_test_data(0, MemType::B32)[0],
1109 b.ld_test_data(4, MemType::B32)[0],
1110 ]);
1111 let shift = b.ld_test_data(8, MemType::B32);
1112 let dst = b.shr64(srcs.into(), shift.into(), signed);
1113 b.st_test_data(12, MemType::B32, dst[0].into());
1114 b.st_test_data(16, MemType::B32, dst[1].into());
1115
1116 let bin = b.compile();
1117
1118 let mut a = Acorn::new();
1119 let mut data = Vec::new();
1120 for _ in 0..invocations {
1121 data.push([a.get_u32(), a.get_u32(), a.get_uint(7) as u32, 0, 0]);
1122 }
1123
1124 run.run.run(&bin, &mut data).unwrap();
1125
1126 for d in &data {
1127 let src = u64::from(d[0]) | (u64::from(d[1]) << 32);
1128 let dst = if signed {
1129 ((src as i64) >> (d[2] & 0x3f)) as u64
1130 } else {
1131 src >> (d[2] & 0x3f)
1132 };
1133 assert_eq!(d[3], dst as u32);
1134 assert_eq!(d[4], (dst >> 32) as u32);
1135 }
1136 }
1137 }
1138
1139 #[test]
test_f2fp_pack_ab()1140 fn test_f2fp_pack_ab() {
1141 let run = RunSingleton::get();
1142 let mut b = TestShaderBuilder::new(run.sm.as_ref());
1143
1144 let srcs = SSARef::from([
1145 b.ld_test_data(0, MemType::B32)[0],
1146 b.ld_test_data(4, MemType::B32)[0],
1147 ]);
1148
1149 let dst = b.alloc_ssa(RegFile::GPR, 1);
1150 b.push_op(OpF2FP {
1151 dst: dst.into(),
1152 srcs: [srcs[0].into(), srcs[1].into()],
1153 rnd_mode: FRndMode::NearestEven,
1154 });
1155 b.st_test_data(8, MemType::B32, dst[0].into());
1156
1157 let dst = b.alloc_ssa(RegFile::GPR, 1);
1158 b.push_op(OpF2FP {
1159 dst: dst.into(),
1160 srcs: [srcs[0].into(), 2.0.into()],
1161 rnd_mode: FRndMode::Zero,
1162 });
1163 b.st_test_data(12, MemType::B32, dst[0].into());
1164
1165 let bin = b.compile();
1166
1167 fn f32_to_u32(val: f32) -> u32 {
1168 u32::from_le_bytes(val.to_le_bytes())
1169 }
1170
1171 let zero = f32_to_u32(0.0);
1172 let one = f32_to_u32(1.0);
1173 let two = f32_to_u32(2.0);
1174 let complex = f32_to_u32(1.4556);
1175
1176 let mut data = Vec::new();
1177 data.push([one, two, 0, 0]);
1178 data.push([one, zero, 0, 0]);
1179 data.push([complex, zero, 0, 0]);
1180 run.run.run(&bin, &mut data).unwrap();
1181
1182 // { 1.0fp16, 2.0fp16 }
1183 assert_eq!(data[0][2], 0x3c004000);
1184 // { 1.0fp16, 2.0fp16 }
1185 assert_eq!(data[0][3], 0x3c004000);
1186 // { 1.0fp16, 0.0fp16 }
1187 assert_eq!(data[1][2], 0x3c000000);
1188 // { 1.0fp16, 0.0fp16 }
1189 assert_eq!(data[1][3], 0x3c004000);
1190 // { 1.456fp16, 0.0fp16 }
1191 assert_eq!(data[2][2], 0x3dd30000);
1192 // { 1.455fp16, 0.0fp16 }
1193 assert_eq!(data[2][3], 0x3dd24000);
1194 }
1195