xref: /aosp_15_r20/external/mesa3d/src/nouveau/compiler/nak/ir.rs (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 extern crate bitview;
5 extern crate nak_ir_proc;
6 
7 use bitview::{BitMutView, BitView};
8 use nak_bindings::*;
9 
10 pub use crate::builder::{Builder, InstrBuilder, SSABuilder, SSAInstrBuilder};
11 use crate::legalize::LegalizeBuilder;
12 use crate::sph::{OutputTopology, PixelImap};
13 use compiler::as_slice::*;
14 use compiler::cfg::CFG;
15 use nak_ir_proc::*;
16 use std::cmp::{max, min};
17 use std::fmt;
18 use std::fmt::Write;
19 use std::iter::Zip;
20 use std::ops::{BitAnd, BitOr, Deref, DerefMut, Index, IndexMut, Not, Range};
21 use std::slice;
22 
23 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
24 pub struct Label {
25     idx: u32,
26 }
27 
28 impl fmt::Display for Label {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result29     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
30         write!(f, "L{}", self.idx)
31     }
32 }
33 
34 pub struct LabelAllocator {
35     count: u32,
36 }
37 
38 impl LabelAllocator {
new() -> LabelAllocator39     pub fn new() -> LabelAllocator {
40         LabelAllocator { count: 0 }
41     }
42 
alloc(&mut self) -> Label43     pub fn alloc(&mut self) -> Label {
44         let idx = self.count;
45         self.count += 1;
46         Label { idx: idx }
47     }
48 }
49 
50 /// Represents a register file
51 #[repr(u8)]
52 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
53 pub enum RegFile {
54     /// The general-purpose register file
55     ///
56     /// General-purpose registers are 32 bits per SIMT channel.
57     GPR = 0,
58 
59     /// The general-purpose uniform register file
60     ///
61     /// General-purpose uniform registers are 32 bits each and uniform across a
62     /// wave.
63     UGPR = 1,
64 
65     /// The predicate reigster file
66     ///
67     /// Predicate registers are 1 bit per SIMT channel.
68     Pred = 2,
69 
70     /// The uniform predicate reigster file
71     ///
72     /// Uniform predicate registers are 1 bit and uniform across a wave.
73     UPred = 3,
74 
75     /// The carry flag register file
76     ///
77     /// Only one carry flag register exists in hardware, but representing it as
78     /// a reg file simplifies dependency tracking.
79     ///
80     /// This is used only on SM50.
81     Carry = 4,
82 
83     /// The barrier register file
84     ///
85     /// This is a lane mask used for wave re-convergence instructions.
86     Bar = 5,
87 
88     /// The memory register file
89     ///
90     /// This is a virtual register file for things which will get spilled to
91     /// local memory.  Each memory location is 32 bits per SIMT channel.
92     Mem = 6,
93 }
94 
95 const NUM_REG_FILES: usize = 7;
96 
97 impl RegFile {
98     /// Returns true if the register file is uniform across a wave
is_uniform(&self) -> bool99     pub fn is_uniform(&self) -> bool {
100         match self {
101             RegFile::GPR
102             | RegFile::Pred
103             | RegFile::Carry
104             | RegFile::Bar
105             | RegFile::Mem => false,
106             RegFile::UGPR | RegFile::UPred => true,
107         }
108     }
109 
to_uniform(&self) -> Option<RegFile>110     pub fn to_uniform(&self) -> Option<RegFile> {
111         match self {
112             RegFile::GPR | RegFile::UGPR => Some(RegFile::UGPR),
113             RegFile::Pred | RegFile::UPred => Some(RegFile::UPred),
114             RegFile::Carry | RegFile::Bar | RegFile::Mem => None,
115         }
116     }
117 
to_warp(&self) -> RegFile118     pub fn to_warp(&self) -> RegFile {
119         match self {
120             RegFile::GPR | RegFile::UGPR => RegFile::GPR,
121             RegFile::Pred | RegFile::UPred => RegFile::Pred,
122             RegFile::Carry | RegFile::Bar | RegFile::Mem => *self,
123         }
124     }
125 
126     /// Returns true if the register file is general-purpose
is_gpr(&self) -> bool127     pub fn is_gpr(&self) -> bool {
128         match self {
129             RegFile::GPR | RegFile::UGPR => true,
130             RegFile::Pred
131             | RegFile::UPred
132             | RegFile::Carry
133             | RegFile::Bar
134             | RegFile::Mem => false,
135         }
136     }
137 
138     /// Returns true if the register file is a predicate register file
is_predicate(&self) -> bool139     pub fn is_predicate(&self) -> bool {
140         match self {
141             RegFile::GPR
142             | RegFile::UGPR
143             | RegFile::Carry
144             | RegFile::Bar
145             | RegFile::Mem => false,
146             RegFile::Pred | RegFile::UPred => true,
147         }
148     }
149 
fmt_prefix(&self) -> &'static str150     fn fmt_prefix(&self) -> &'static str {
151         match self {
152             RegFile::GPR => "r",
153             RegFile::UGPR => "ur",
154             RegFile::Pred => "p",
155             RegFile::UPred => "up",
156             RegFile::Carry => "c",
157             RegFile::Bar => "b",
158             RegFile::Mem => "m",
159         }
160     }
161 }
162 
163 impl fmt::Display for RegFile {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result164     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165         match self {
166             RegFile::GPR => write!(f, "GPR"),
167             RegFile::UGPR => write!(f, "UGPR"),
168             RegFile::Pred => write!(f, "Pred"),
169             RegFile::UPred => write!(f, "UPred"),
170             RegFile::Carry => write!(f, "Carry"),
171             RegFile::Bar => write!(f, "Bar"),
172             RegFile::Mem => write!(f, "Mem"),
173         }
174     }
175 }
176 
177 impl From<RegFile> for u8 {
from(value: RegFile) -> u8178     fn from(value: RegFile) -> u8 {
179         value as u8
180     }
181 }
182 
183 impl TryFrom<u32> for RegFile {
184     type Error = &'static str;
185 
try_from(value: u32) -> Result<Self, Self::Error>186     fn try_from(value: u32) -> Result<Self, Self::Error> {
187         match value {
188             0 => Ok(RegFile::GPR),
189             1 => Ok(RegFile::UGPR),
190             2 => Ok(RegFile::Pred),
191             3 => Ok(RegFile::UPred),
192             4 => Ok(RegFile::Carry),
193             5 => Ok(RegFile::Bar),
194             6 => Ok(RegFile::Mem),
195             _ => Err("Invalid register file number"),
196         }
197     }
198 }
199 
200 impl TryFrom<u16> for RegFile {
201     type Error = &'static str;
202 
try_from(value: u16) -> Result<Self, Self::Error>203     fn try_from(value: u16) -> Result<Self, Self::Error> {
204         RegFile::try_from(u32::from(value))
205     }
206 }
207 
208 impl TryFrom<u8> for RegFile {
209     type Error = &'static str;
210 
try_from(value: u8) -> Result<Self, Self::Error>211     fn try_from(value: u8) -> Result<Self, Self::Error> {
212         RegFile::try_from(u32::from(value))
213     }
214 }
215 
216 /// A trait for things which have an associated register file
217 pub trait HasRegFile {
file(&self) -> RegFile218     fn file(&self) -> RegFile;
219 
is_uniform(&self) -> bool220     fn is_uniform(&self) -> bool {
221         self.file().is_uniform()
222     }
223 
is_gpr(&self) -> bool224     fn is_gpr(&self) -> bool {
225         self.file().is_gpr()
226     }
227 
is_predicate(&self) -> bool228     fn is_predicate(&self) -> bool {
229         self.file().is_predicate()
230     }
231 }
232 
233 #[derive(Clone)]
234 pub struct RegFileSet {
235     bits: u8,
236 }
237 
238 impl RegFileSet {
new() -> RegFileSet239     pub fn new() -> RegFileSet {
240         RegFileSet { bits: 0 }
241     }
242 
len(&self) -> usize243     pub fn len(&self) -> usize {
244         self.bits.count_ones() as usize
245     }
246 
contains(&self, file: RegFile) -> bool247     pub fn contains(&self, file: RegFile) -> bool {
248         self.bits & (1 << (file as u8)) != 0
249     }
250 
insert(&mut self, file: RegFile) -> bool251     pub fn insert(&mut self, file: RegFile) -> bool {
252         let has_file = self.contains(file);
253         self.bits |= 1 << (file as u8);
254         !has_file
255     }
256 
is_empty(&self) -> bool257     pub fn is_empty(&self) -> bool {
258         self.bits == 0
259     }
260 
261     #[allow(dead_code)]
iter(&self) -> RegFileSet262     pub fn iter(&self) -> RegFileSet {
263         self.clone()
264     }
265 
remove(&mut self, file: RegFile) -> bool266     pub fn remove(&mut self, file: RegFile) -> bool {
267         let has_file = self.contains(file);
268         self.bits &= !(1 << (file as u8));
269         has_file
270     }
271 }
272 
273 impl FromIterator<RegFile> for RegFileSet {
from_iter<T: IntoIterator<Item = RegFile>>(iter: T) -> Self274     fn from_iter<T: IntoIterator<Item = RegFile>>(iter: T) -> Self {
275         let mut set = RegFileSet::new();
276         for file in iter {
277             set.insert(file);
278         }
279         set
280     }
281 }
282 
283 impl Iterator for RegFileSet {
284     type Item = RegFile;
285 
next(&mut self) -> Option<RegFile>286     fn next(&mut self) -> Option<RegFile> {
287         if self.is_empty() {
288             None
289         } else {
290             let file = self.bits.trailing_zeros().try_into().unwrap();
291             self.remove(file);
292             Some(file)
293         }
294     }
295 
size_hint(&self) -> (usize, Option<usize>)296     fn size_hint(&self) -> (usize, Option<usize>) {
297         let len = self.len();
298         (len, Some(len))
299     }
300 }
301 
302 #[derive(Clone, Copy)]
303 pub struct PerRegFile<T> {
304     per_file: [T; NUM_REG_FILES],
305 }
306 
307 impl<T> PerRegFile<T> {
new_with<F: Fn(RegFile) -> T>(f: F) -> Self308     pub fn new_with<F: Fn(RegFile) -> T>(f: F) -> Self {
309         PerRegFile {
310             per_file: [
311                 f(RegFile::GPR),
312                 f(RegFile::UGPR),
313                 f(RegFile::Pred),
314                 f(RegFile::UPred),
315                 f(RegFile::Carry),
316                 f(RegFile::Bar),
317                 f(RegFile::Mem),
318             ],
319         }
320     }
321 
values(&self) -> slice::Iter<T>322     pub fn values(&self) -> slice::Iter<T> {
323         self.per_file.iter()
324     }
325 
values_mut(&mut self) -> slice::IterMut<T>326     pub fn values_mut(&mut self) -> slice::IterMut<T> {
327         self.per_file.iter_mut()
328     }
329 }
330 
331 impl<T: Default> Default for PerRegFile<T> {
default() -> Self332     fn default() -> Self {
333         PerRegFile {
334             per_file: Default::default(),
335         }
336     }
337 }
338 
339 impl<T> Index<RegFile> for PerRegFile<T> {
340     type Output = T;
341 
index(&self, idx: RegFile) -> &T342     fn index(&self, idx: RegFile) -> &T {
343         &self.per_file[idx as u8 as usize]
344     }
345 }
346 
347 impl<T> IndexMut<RegFile> for PerRegFile<T> {
index_mut(&mut self, idx: RegFile) -> &mut T348     fn index_mut(&mut self, idx: RegFile) -> &mut T {
349         &mut self.per_file[idx as u8 as usize]
350     }
351 }
352 
353 /// An SSA value
354 ///
355 /// Each SSA in NAK represents a single 32-bit or 1-bit (if a predicate) value
356 /// which must either be spilled to memory or allocated space in the specified
357 /// register file.  Whenever more data is required such as a 64-bit memory
358 /// address, double-precision float, or a vec4 texture result, multiple SSA
359 /// values are used.
360 ///
361 /// Each SSA value logically contains two things: an index and a register file.
362 /// It is required that each index refers to a unique SSA value, regardless of
363 /// register file.  This way the index can be used to index tightly-packed data
364 /// structures such as bitsets without having to determine separate ranges for
365 /// each register file.
366 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
367 pub struct SSAValue {
368     packed: u32,
369 }
370 
371 impl SSAValue {
372     /// A special SSA value which is always invalid
373     pub const NONE: Self = SSAValue { packed: 0 };
374 
375     /// Returns an SSA value with the given register file and index
new(file: RegFile, idx: u32) -> SSAValue376     pub fn new(file: RegFile, idx: u32) -> SSAValue {
377         assert!(idx > 0 && idx < (1 << 29) - 2);
378         let mut packed = idx;
379         assert!(u8::from(file) < 8);
380         packed |= u32::from(u8::from(file)) << 29;
381         SSAValue { packed: packed }
382     }
383 
384     /// Returns the index of this SSA value
idx(&self) -> u32385     pub fn idx(&self) -> u32 {
386         self.packed & 0x1fffffff
387     }
388 
389     /// Returns true if this SSA value is equal to SSAValue::NONE
is_none(&self) -> bool390     pub fn is_none(&self) -> bool {
391         self.packed == 0
392     }
393 
fmt_plain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result394     fn fmt_plain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
395         write!(f, "{}{}", self.file().fmt_prefix(), self.idx())
396     }
397 }
398 
399 impl HasRegFile for SSAValue {
400     /// Returns the register file of this SSA value
file(&self) -> RegFile401     fn file(&self) -> RegFile {
402         RegFile::try_from(self.packed >> 29).unwrap()
403     }
404 }
405 
406 impl fmt::Display for SSAValue {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result407     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
408         write!(f, "%")?;
409         self.fmt_plain(f)
410     }
411 }
412 
413 /// A reference to one or more SSA values
414 ///
415 /// Because each SSA value represents a single 1 or 32-bit scalar, we need a way
416 /// to reference multiple SSA values for instructions which read or write
417 /// multiple registers in the same source.  When the register allocator runs,
418 /// all the SSA values in a given SSA ref will be placed in consecutive
419 /// registers, with the base register aligned to the number of values, aligned
420 /// to the next power of two.
421 ///
422 /// An SSA reference can reference between 1 and 4 SSA values.  It dereferences
423 /// to a slice for easy access to individual SSA values.  The structure is
424 /// designed so that is always 16B, regardless of how many SSA values are
425 /// referenced so it's easy and fairly cheap to copy around and embed in other
426 /// structures.
427 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
428 pub struct SSARef {
429     v: [SSAValue; 4],
430 }
431 
432 impl SSARef {
433     /// Returns a new SSA reference
434     #[inline]
new(comps: &[SSAValue]) -> SSARef435     fn new(comps: &[SSAValue]) -> SSARef {
436         assert!(comps.len() > 0 && comps.len() <= 4);
437         let mut r = SSARef {
438             v: [SSAValue::NONE; 4],
439         };
440         for i in 0..comps.len() {
441             r.v[i] = comps[i];
442         }
443         if comps.len() < 4 {
444             r.v[3].packed = (comps.len() as u32).wrapping_neg();
445         }
446         r
447     }
448 
449     /// Returns the number of components in this SSA reference
comps(&self) -> u8450     pub fn comps(&self) -> u8 {
451         if self.v[3].packed >= u32::MAX - 2 {
452             self.v[3].packed.wrapping_neg() as u8
453         } else {
454             4
455         }
456     }
457 
file(&self) -> Option<RegFile>458     pub fn file(&self) -> Option<RegFile> {
459         let comps = usize::from(self.comps());
460         let file = self.v[0].file();
461         for i in 1..comps {
462             if self.v[i].file() != file {
463                 return None;
464             }
465         }
466         Some(file)
467     }
468 
is_uniform(&self) -> bool469     pub fn is_uniform(&self) -> bool {
470         for ssa in &self[..] {
471             if !ssa.is_uniform() {
472                 return false;
473             }
474         }
475         true
476     }
477 
is_gpr(&self) -> bool478     pub fn is_gpr(&self) -> bool {
479         for ssa in &self[..] {
480             if !ssa.is_gpr() {
481                 return false;
482             }
483         }
484         true
485     }
486 
is_predicate(&self) -> bool487     pub fn is_predicate(&self) -> bool {
488         if self.v[0].is_predicate() {
489             true
490         } else {
491             for ssa in &self[..] {
492                 debug_assert!(!ssa.is_predicate());
493             }
494             false
495         }
496     }
497 }
498 
499 impl Deref for SSARef {
500     type Target = [SSAValue];
501 
deref(&self) -> &[SSAValue]502     fn deref(&self) -> &[SSAValue] {
503         let comps = usize::from(self.comps());
504         &self.v[..comps]
505     }
506 }
507 
508 impl DerefMut for SSARef {
deref_mut(&mut self) -> &mut [SSAValue]509     fn deref_mut(&mut self) -> &mut [SSAValue] {
510         let comps = usize::from(self.comps());
511         &mut self.v[..comps]
512     }
513 }
514 
515 impl TryFrom<&[SSAValue]> for SSARef {
516     type Error = &'static str;
517 
try_from(comps: &[SSAValue]) -> Result<Self, Self::Error>518     fn try_from(comps: &[SSAValue]) -> Result<Self, Self::Error> {
519         if comps.len() == 0 {
520             Err("Empty vector")
521         } else if comps.len() > 4 {
522             Err("Too many vector components")
523         } else {
524             Ok(SSARef::new(comps))
525         }
526     }
527 }
528 
529 impl TryFrom<Vec<SSAValue>> for SSARef {
530     type Error = &'static str;
531 
try_from(comps: Vec<SSAValue>) -> Result<Self, Self::Error>532     fn try_from(comps: Vec<SSAValue>) -> Result<Self, Self::Error> {
533         SSARef::try_from(&comps[..])
534     }
535 }
536 
537 macro_rules! impl_ssa_ref_from_arr {
538     ($n: expr) => {
539         impl From<[SSAValue; $n]> for SSARef {
540             fn from(comps: [SSAValue; $n]) -> Self {
541                 SSARef::new(&comps[..])
542             }
543         }
544     };
545 }
546 impl_ssa_ref_from_arr!(1);
547 impl_ssa_ref_from_arr!(2);
548 impl_ssa_ref_from_arr!(3);
549 impl_ssa_ref_from_arr!(4);
550 
551 impl From<SSAValue> for SSARef {
from(val: SSAValue) -> Self552     fn from(val: SSAValue) -> Self {
553         [val].into()
554     }
555 }
556 
557 impl fmt::Display for SSARef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result558     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
559         if self.comps() == 1 {
560             write!(f, "{}", self[0])
561         } else {
562             write!(f, "{{")?;
563             for (i, v) in self.iter().enumerate() {
564                 if i != 0 {
565                     write!(f, " ")?;
566                 }
567                 write!(f, "{}", v)?;
568             }
569             write!(f, "}}")
570         }
571     }
572 }
573 
574 pub struct SSAValueAllocator {
575     count: u32,
576 }
577 
578 impl SSAValueAllocator {
new() -> SSAValueAllocator579     pub fn new() -> SSAValueAllocator {
580         SSAValueAllocator { count: 0 }
581     }
582 
max_idx(&self) -> u32583     pub fn max_idx(&self) -> u32 {
584         self.count
585     }
586 
alloc(&mut self, file: RegFile) -> SSAValue587     pub fn alloc(&mut self, file: RegFile) -> SSAValue {
588         self.count += 1;
589         SSAValue::new(file, self.count)
590     }
591 
alloc_vec(&mut self, file: RegFile, comps: u8) -> SSARef592     pub fn alloc_vec(&mut self, file: RegFile, comps: u8) -> SSARef {
593         assert!(comps >= 1 && comps <= 4);
594         let mut vec = [SSAValue::NONE; 4];
595         for c in 0..comps {
596             vec[usize::from(c)] = self.alloc(file);
597         }
598         vec[0..usize::from(comps)].try_into().unwrap()
599     }
600 }
601 
602 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
603 pub struct RegRef {
604     packed: u32,
605 }
606 
607 impl RegRef {
608     pub const MAX_IDX: u32 = (1 << 26) - 1;
609 
zero_idx(file: RegFile) -> u32610     fn zero_idx(file: RegFile) -> u32 {
611         match file {
612             RegFile::GPR => 255,
613             RegFile::UGPR => 63,
614             RegFile::Pred => 7,
615             RegFile::UPred => 7,
616             RegFile::Carry => panic!("Carry has no zero index"),
617             RegFile::Bar => panic!("Bar has no zero index"),
618             RegFile::Mem => panic!("Mem has no zero index"),
619         }
620     }
621 
new(file: RegFile, base_idx: u32, comps: u8) -> RegRef622     pub fn new(file: RegFile, base_idx: u32, comps: u8) -> RegRef {
623         assert!(base_idx <= Self::MAX_IDX);
624         let mut packed = base_idx;
625         assert!(comps > 0 && comps <= 8);
626         packed |= u32::from(comps - 1) << 26;
627         assert!(u8::from(file) < 8);
628         packed |= u32::from(u8::from(file)) << 29;
629         RegRef { packed: packed }
630     }
631 
zero(file: RegFile, comps: u8) -> RegRef632     pub fn zero(file: RegFile, comps: u8) -> RegRef {
633         RegRef::new(file, RegRef::zero_idx(file), comps)
634     }
635 
base_idx(&self) -> u32636     pub fn base_idx(&self) -> u32 {
637         self.packed & 0x03ffffff
638     }
639 
idx_range(&self) -> Range<u32>640     pub fn idx_range(&self) -> Range<u32> {
641         let start = self.base_idx();
642         let end = start + u32::from(self.comps());
643         start..end
644     }
645 
comps(&self) -> u8646     pub fn comps(&self) -> u8 {
647         (((self.packed >> 26) & 0x7) + 1).try_into().unwrap()
648     }
649 
comp(&self, c: u8) -> RegRef650     pub fn comp(&self, c: u8) -> RegRef {
651         assert!(c < self.comps());
652         RegRef::new(self.file(), self.base_idx() + u32::from(c), 1)
653     }
654 }
655 
656 impl HasRegFile for RegRef {
file(&self) -> RegFile657     fn file(&self) -> RegFile {
658         ((self.packed >> 29) & 0x7).try_into().unwrap()
659     }
660 }
661 
662 impl fmt::Display for RegRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result663     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
664         write!(f, "{}{}", self.file().fmt_prefix(), self.base_idx())?;
665         if self.comps() > 1 {
666             write!(f, "..{}", self.idx_range().end)?;
667         }
668         Ok(())
669     }
670 }
671 
672 #[derive(Clone, Copy)]
673 pub enum Dst {
674     None,
675     SSA(SSARef),
676     Reg(RegRef),
677 }
678 
679 impl Dst {
is_none(&self) -> bool680     pub fn is_none(&self) -> bool {
681         matches!(self, Dst::None)
682     }
683 
as_reg(&self) -> Option<&RegRef>684     pub fn as_reg(&self) -> Option<&RegRef> {
685         match self {
686             Dst::Reg(r) => Some(r),
687             _ => None,
688         }
689     }
690 
as_ssa(&self) -> Option<&SSARef>691     pub fn as_ssa(&self) -> Option<&SSARef> {
692         match self {
693             Dst::SSA(r) => Some(r),
694             _ => None,
695         }
696     }
697 
iter_ssa(&self) -> slice::Iter<'_, SSAValue>698     pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
699         match self {
700             Dst::None | Dst::Reg(_) => &[],
701             Dst::SSA(ssa) => ssa.deref(),
702         }
703         .iter()
704     }
705 
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>706     pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
707         match self {
708             Dst::None | Dst::Reg(_) => &mut [],
709             Dst::SSA(ssa) => ssa.deref_mut(),
710         }
711         .iter_mut()
712     }
713 }
714 
715 impl From<RegRef> for Dst {
from(reg: RegRef) -> Dst716     fn from(reg: RegRef) -> Dst {
717         Dst::Reg(reg)
718     }
719 }
720 
721 impl<T: Into<SSARef>> From<T> for Dst {
from(ssa: T) -> Dst722     fn from(ssa: T) -> Dst {
723         Dst::SSA(ssa.into())
724     }
725 }
726 
727 impl fmt::Display for Dst {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result728     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
729         match self {
730             Dst::None => write!(f, "null")?,
731             Dst::SSA(v) => v.fmt(f)?,
732             Dst::Reg(r) => r.fmt(f)?,
733         }
734         Ok(())
735     }
736 }
737 
738 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
739 pub enum CBuf {
740     Binding(u8),
741 
742     #[allow(dead_code)]
743     BindlessSSA(SSARef),
744 
745     #[allow(dead_code)]
746     BindlessUGPR(RegRef),
747 }
748 
749 impl fmt::Display for CBuf {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result750     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
751         match self {
752             CBuf::Binding(idx) => write!(f, "c[{:#x}]", idx),
753             CBuf::BindlessSSA(v) => write!(f, "cx[{}]", v),
754             CBuf::BindlessUGPR(r) => write!(f, "cx[{}]", r),
755         }
756     }
757 }
758 
759 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
760 pub struct CBufRef {
761     pub buf: CBuf,
762     pub offset: u16,
763 }
764 
765 impl CBufRef {
offset(self, offset: u16) -> CBufRef766     pub fn offset(self, offset: u16) -> CBufRef {
767         CBufRef {
768             buf: self.buf,
769             offset: self.offset + offset,
770         }
771     }
772 }
773 
774 impl fmt::Display for CBufRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result775     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
776         write!(f, "{}[{:#x}]", self.buf, self.offset)
777     }
778 }
779 
780 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
781 pub enum SrcRef {
782     Zero,
783     True,
784     False,
785     Imm32(u32),
786     CBuf(CBufRef),
787     SSA(SSARef),
788     Reg(RegRef),
789 }
790 
791 impl SrcRef {
792     #[allow(dead_code)]
is_alu(&self) -> bool793     pub fn is_alu(&self) -> bool {
794         match self {
795             SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => true,
796             SrcRef::SSA(ssa) => ssa.is_gpr(),
797             SrcRef::Reg(reg) => reg.is_gpr(),
798             SrcRef::True | SrcRef::False => false,
799         }
800     }
801 
is_predicate(&self) -> bool802     pub fn is_predicate(&self) -> bool {
803         match self {
804             SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => false,
805             SrcRef::True | SrcRef::False => true,
806             SrcRef::SSA(ssa) => ssa.is_predicate(),
807             SrcRef::Reg(reg) => reg.is_predicate(),
808         }
809     }
810 
is_carry(&self) -> bool811     pub fn is_carry(&self) -> bool {
812         match self {
813             SrcRef::SSA(ssa) => ssa.file() == Some(RegFile::Carry),
814             SrcRef::Reg(reg) => reg.file() == RegFile::Carry,
815             _ => false,
816         }
817     }
818 
819     #[allow(dead_code)]
is_barrier(&self) -> bool820     pub fn is_barrier(&self) -> bool {
821         match self {
822             SrcRef::SSA(ssa) => ssa.file() == Some(RegFile::Bar),
823             SrcRef::Reg(reg) => reg.file() == RegFile::Bar,
824             _ => false,
825         }
826     }
827 
as_reg(&self) -> Option<&RegRef>828     pub fn as_reg(&self) -> Option<&RegRef> {
829         match self {
830             SrcRef::Reg(r) => Some(r),
831             _ => None,
832         }
833     }
834 
as_ssa(&self) -> Option<&SSARef>835     pub fn as_ssa(&self) -> Option<&SSARef> {
836         match self {
837             SrcRef::SSA(r) => Some(r),
838             _ => None,
839         }
840     }
841 
as_u32(&self) -> Option<u32>842     pub fn as_u32(&self) -> Option<u32> {
843         match self {
844             SrcRef::Zero => Some(0),
845             SrcRef::Imm32(u) => Some(*u),
846             SrcRef::CBuf(_) | SrcRef::SSA(_) | SrcRef::Reg(_) => None,
847             _ => panic!("Invalid integer source"),
848         }
849     }
850 
get_reg(&self) -> Option<&RegRef>851     pub fn get_reg(&self) -> Option<&RegRef> {
852         match self {
853             SrcRef::Zero
854             | SrcRef::True
855             | SrcRef::False
856             | SrcRef::Imm32(_)
857             | SrcRef::SSA(_) => None,
858             SrcRef::CBuf(cb) => match &cb.buf {
859                 CBuf::Binding(_) | CBuf::BindlessSSA(_) => None,
860                 CBuf::BindlessUGPR(reg) => Some(reg),
861             },
862             SrcRef::Reg(reg) => Some(reg),
863         }
864     }
865 
iter_ssa(&self) -> slice::Iter<'_, SSAValue>866     pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
867         match self {
868             SrcRef::Zero
869             | SrcRef::True
870             | SrcRef::False
871             | SrcRef::Imm32(_)
872             | SrcRef::Reg(_) => &[],
873             SrcRef::CBuf(cb) => match &cb.buf {
874                 CBuf::Binding(_) | CBuf::BindlessUGPR(_) => &[],
875                 CBuf::BindlessSSA(ssa) => ssa.deref(),
876             },
877             SrcRef::SSA(ssa) => ssa.deref(),
878         }
879         .iter()
880     }
881 
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>882     pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
883         match self {
884             SrcRef::Zero
885             | SrcRef::True
886             | SrcRef::False
887             | SrcRef::Imm32(_)
888             | SrcRef::Reg(_) => &mut [],
889             SrcRef::CBuf(cb) => match &mut cb.buf {
890                 CBuf::Binding(_) | CBuf::BindlessUGPR(_) => &mut [],
891                 CBuf::BindlessSSA(ssa) => ssa.deref_mut(),
892             },
893             SrcRef::SSA(ssa) => ssa.deref_mut(),
894         }
895         .iter_mut()
896     }
897 }
898 
899 impl From<bool> for SrcRef {
from(b: bool) -> SrcRef900     fn from(b: bool) -> SrcRef {
901         if b {
902             SrcRef::True
903         } else {
904             SrcRef::False
905         }
906     }
907 }
908 
909 impl From<u32> for SrcRef {
from(u: u32) -> SrcRef910     fn from(u: u32) -> SrcRef {
911         if u == 0 {
912             SrcRef::Zero
913         } else {
914             SrcRef::Imm32(u)
915         }
916     }
917 }
918 
919 impl From<f32> for SrcRef {
from(f: f32) -> SrcRef920     fn from(f: f32) -> SrcRef {
921         f.to_bits().into()
922     }
923 }
924 
925 impl From<PrmtSel> for SrcRef {
from(sel: PrmtSel) -> SrcRef926     fn from(sel: PrmtSel) -> SrcRef {
927         u32::from(sel.0).into()
928     }
929 }
930 
931 impl From<CBufRef> for SrcRef {
from(cb: CBufRef) -> SrcRef932     fn from(cb: CBufRef) -> SrcRef {
933         SrcRef::CBuf(cb)
934     }
935 }
936 
937 impl From<RegRef> for SrcRef {
from(reg: RegRef) -> SrcRef938     fn from(reg: RegRef) -> SrcRef {
939         SrcRef::Reg(reg)
940     }
941 }
942 
943 impl<T: Into<SSARef>> From<T> for SrcRef {
from(ssa: T) -> SrcRef944     fn from(ssa: T) -> SrcRef {
945         SrcRef::SSA(ssa.into())
946     }
947 }
948 
949 impl fmt::Display for SrcRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result950     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
951         match self {
952             SrcRef::Zero => write!(f, "rZ"),
953             SrcRef::True => write!(f, "pT"),
954             SrcRef::False => write!(f, "pF"),
955             SrcRef::Imm32(u) => write!(f, "{:#x}", u),
956             SrcRef::CBuf(c) => c.fmt(f),
957             SrcRef::SSA(v) => v.fmt(f),
958             SrcRef::Reg(r) => r.fmt(f),
959         }
960     }
961 }
962 
963 #[derive(Clone, Copy, PartialEq)]
964 pub enum SrcMod {
965     None,
966     FAbs,
967     FNeg,
968     FNegAbs,
969     INeg,
970     BNot,
971 }
972 
973 impl SrcMod {
is_none(&self) -> bool974     pub fn is_none(&self) -> bool {
975         matches!(self, SrcMod::None)
976     }
977 
has_fabs(&self) -> bool978     pub fn has_fabs(&self) -> bool {
979         match self {
980             SrcMod::None | SrcMod::FNeg => false,
981             SrcMod::FAbs | SrcMod::FNegAbs => true,
982             _ => panic!("Not a float modifier"),
983         }
984     }
985 
has_fneg(&self) -> bool986     pub fn has_fneg(&self) -> bool {
987         match self {
988             SrcMod::None | SrcMod::FAbs => false,
989             SrcMod::FNeg | SrcMod::FNegAbs => true,
990             _ => panic!("Not a float modifier"),
991         }
992     }
993 
is_ineg(&self) -> bool994     pub fn is_ineg(&self) -> bool {
995         match self {
996             SrcMod::None => false,
997             SrcMod::INeg => true,
998             _ => panic!("Not an integer modifier"),
999         }
1000     }
1001 
is_bnot(&self) -> bool1002     pub fn is_bnot(&self) -> bool {
1003         match self {
1004             SrcMod::None => false,
1005             SrcMod::BNot => true,
1006             _ => panic!("Not a bitwise modifier"),
1007         }
1008     }
1009 
fabs(self) -> SrcMod1010     pub fn fabs(self) -> SrcMod {
1011         match self {
1012             SrcMod::None | SrcMod::FAbs | SrcMod::FNeg | SrcMod::FNegAbs => {
1013                 SrcMod::FAbs
1014             }
1015             _ => panic!("Not a float source modifier"),
1016         }
1017     }
1018 
fneg(self) -> SrcMod1019     pub fn fneg(self) -> SrcMod {
1020         match self {
1021             SrcMod::None => SrcMod::FNeg,
1022             SrcMod::FAbs => SrcMod::FNegAbs,
1023             SrcMod::FNeg => SrcMod::None,
1024             SrcMod::FNegAbs => SrcMod::FAbs,
1025             _ => panic!("Not a float source modifier"),
1026         }
1027     }
1028 
ineg(self) -> SrcMod1029     pub fn ineg(self) -> SrcMod {
1030         match self {
1031             SrcMod::None => SrcMod::INeg,
1032             SrcMod::INeg => SrcMod::None,
1033             _ => panic!("Not an integer source modifier"),
1034         }
1035     }
1036 
bnot(self) -> SrcMod1037     pub fn bnot(self) -> SrcMod {
1038         match self {
1039             SrcMod::None => SrcMod::BNot,
1040             SrcMod::BNot => SrcMod::None,
1041             _ => panic!("Not a boolean source modifier"),
1042         }
1043     }
1044 
modify(self, other: SrcMod) -> SrcMod1045     pub fn modify(self, other: SrcMod) -> SrcMod {
1046         match other {
1047             SrcMod::None => self,
1048             SrcMod::FAbs => self.fabs(),
1049             SrcMod::FNeg => self.fneg(),
1050             SrcMod::FNegAbs => self.fabs().fneg(),
1051             SrcMod::INeg => self.ineg(),
1052             SrcMod::BNot => self.bnot(),
1053         }
1054     }
1055 }
1056 
1057 #[derive(Clone, Copy, PartialEq)]
1058 #[allow(dead_code)]
1059 pub enum SrcSwizzle {
1060     None,
1061     Xx,
1062     Yy,
1063 }
1064 
1065 impl SrcSwizzle {
is_none(&self) -> bool1066     pub fn is_none(&self) -> bool {
1067         matches!(self, SrcSwizzle::None)
1068     }
1069 }
1070 
1071 impl fmt::Display for SrcSwizzle {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1072     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1073         match self {
1074             SrcSwizzle::None => Ok(()),
1075             SrcSwizzle::Xx => write!(f, ".xx"),
1076             SrcSwizzle::Yy => write!(f, ".yy"),
1077         }
1078     }
1079 }
1080 
1081 #[derive(Clone, Copy, PartialEq)]
1082 pub struct Src {
1083     pub src_ref: SrcRef,
1084     pub src_mod: SrcMod,
1085     pub src_swizzle: SrcSwizzle,
1086 }
1087 
1088 impl Src {
new_zero() -> Src1089     pub fn new_zero() -> Src {
1090         SrcRef::Zero.into()
1091     }
1092 
new_imm_u32(u: u32) -> Src1093     pub fn new_imm_u32(u: u32) -> Src {
1094         u.into()
1095     }
1096 
new_imm_bool(b: bool) -> Src1097     pub fn new_imm_bool(b: bool) -> Src {
1098         b.into()
1099     }
1100 
fabs(&self) -> Src1101     pub fn fabs(&self) -> Src {
1102         Src {
1103             src_ref: self.src_ref,
1104             src_mod: self.src_mod.fabs(),
1105             src_swizzle: self.src_swizzle,
1106         }
1107     }
1108 
fneg(&self) -> Src1109     pub fn fneg(&self) -> Src {
1110         Src {
1111             src_ref: self.src_ref,
1112             src_mod: self.src_mod.fneg(),
1113             src_swizzle: self.src_swizzle,
1114         }
1115     }
1116 
ineg(&self) -> Src1117     pub fn ineg(&self) -> Src {
1118         Src {
1119             src_ref: self.src_ref,
1120             src_mod: self.src_mod.ineg(),
1121             src_swizzle: self.src_swizzle,
1122         }
1123     }
1124 
bnot(&self) -> Src1125     pub fn bnot(&self) -> Src {
1126         Src {
1127             src_ref: self.src_ref,
1128             src_mod: self.src_mod.bnot(),
1129             src_swizzle: self.src_swizzle,
1130         }
1131     }
1132 
fold_imm(&self, src_type: SrcType) -> Src1133     pub fn fold_imm(&self, src_type: SrcType) -> Src {
1134         let SrcRef::Imm32(mut u) = self.src_ref else {
1135             return *self;
1136         };
1137 
1138         if self.src_mod.is_none() && self.src_swizzle.is_none() {
1139             return *self;
1140         }
1141 
1142         assert!(src_type == SrcType::F16v2 || self.src_swizzle.is_none());
1143 
1144         // INeg affects more than just the 32 bits of input data so it can't be
1145         // trivially folded.  In fact, -imm may not be representable as a 32-bit
1146         // immediate at all.
1147         if src_type == SrcType::I32 {
1148             return *self;
1149         }
1150 
1151         u = match src_type {
1152             SrcType::F16 => {
1153                 let low = u & 0xFFFF;
1154 
1155                 match self.src_mod {
1156                     SrcMod::None => low,
1157                     SrcMod::FAbs => low & !(1_u32 << 15),
1158                     SrcMod::FNeg => low ^ (1_u32 << 15),
1159                     SrcMod::FNegAbs => low | (1_u32 << 15),
1160                     _ => panic!("Not a float source modifier"),
1161                 }
1162             }
1163             SrcType::F16v2 => {
1164                 let u = match self.src_swizzle {
1165                     SrcSwizzle::None => u,
1166                     SrcSwizzle::Xx => (u << 16) | (u & 0xffff),
1167                     SrcSwizzle::Yy => (u & 0xffff0000) | (u >> 16),
1168                 };
1169 
1170                 match self.src_mod {
1171                     SrcMod::None => u,
1172                     SrcMod::FAbs => u & 0x7FFF7FFF,
1173                     SrcMod::FNeg => u ^ 0x80008000,
1174                     SrcMod::FNegAbs => u | 0x80008000,
1175                     _ => panic!("Not a float source modifier"),
1176                 }
1177             }
1178             SrcType::F32 | SrcType::F64 => match self.src_mod {
1179                 SrcMod::None => u,
1180                 SrcMod::FAbs => u & !(1_u32 << 31),
1181                 SrcMod::FNeg => u ^ (1_u32 << 31),
1182                 SrcMod::FNegAbs => u | (1_u32 << 31),
1183                 _ => panic!("Not a float source modifier"),
1184             },
1185             SrcType::I32 => match self.src_mod {
1186                 SrcMod::None => u,
1187                 SrcMod::INeg => -(u as i32) as u32,
1188                 _ => panic!("Not an integer source modifier"),
1189             },
1190             SrcType::B32 => match self.src_mod {
1191                 SrcMod::None => u,
1192                 SrcMod::BNot => !u,
1193                 _ => panic!("Not a bitwise source modifier"),
1194             },
1195             _ => {
1196                 assert!(self.src_mod.is_none());
1197                 u
1198             }
1199         };
1200 
1201         Src {
1202             src_mod: SrcMod::None,
1203             src_ref: u.into(),
1204             src_swizzle: SrcSwizzle::None,
1205         }
1206     }
1207 
as_ssa(&self) -> Option<&SSARef>1208     pub fn as_ssa(&self) -> Option<&SSARef> {
1209         if self.src_mod.is_none() {
1210             self.src_ref.as_ssa()
1211         } else {
1212             None
1213         }
1214     }
1215 
as_bool(&self) -> Option<bool>1216     pub fn as_bool(&self) -> Option<bool> {
1217         match self.src_ref {
1218             SrcRef::True => Some(!self.src_mod.is_bnot()),
1219             SrcRef::False => Some(self.src_mod.is_bnot()),
1220             SrcRef::SSA(vec) => {
1221                 assert!(vec.is_predicate() && vec.comps() == 1);
1222                 None
1223             }
1224             SrcRef::Reg(reg) => {
1225                 assert!(reg.is_predicate() && reg.comps() == 1);
1226                 None
1227             }
1228             _ => panic!("Not a boolean source"),
1229         }
1230     }
1231 
as_u32(&self) -> Option<u32>1232     pub fn as_u32(&self) -> Option<u32> {
1233         if self.src_mod.is_none() {
1234             self.src_ref.as_u32()
1235         } else {
1236             None
1237         }
1238     }
1239 
as_imm_not_i20(&self) -> Option<u32>1240     pub fn as_imm_not_i20(&self) -> Option<u32> {
1241         match self.src_ref {
1242             SrcRef::Imm32(i) => {
1243                 assert!(self.src_mod.is_none());
1244                 let top = i & 0xfff80000;
1245                 if top == 0 || top == 0xfff80000 {
1246                     None
1247                 } else {
1248                     Some(i)
1249                 }
1250             }
1251             _ => None,
1252         }
1253     }
1254 
as_imm_not_f20(&self) -> Option<u32>1255     pub fn as_imm_not_f20(&self) -> Option<u32> {
1256         match self.src_ref {
1257             SrcRef::Imm32(i) => {
1258                 assert!(self.src_mod.is_none());
1259                 if (i & 0xfff) == 0 {
1260                     None
1261                 } else {
1262                     Some(i)
1263                 }
1264             }
1265             _ => None,
1266         }
1267     }
1268 
iter_ssa(&self) -> slice::Iter<'_, SSAValue>1269     pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
1270         self.src_ref.iter_ssa()
1271     }
1272 
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>1273     pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
1274         self.src_ref.iter_ssa_mut()
1275     }
1276 
is_uniform(&self) -> bool1277     pub fn is_uniform(&self) -> bool {
1278         match self.src_ref {
1279             SrcRef::Zero
1280             | SrcRef::True
1281             | SrcRef::False
1282             | SrcRef::Imm32(_)
1283             | SrcRef::CBuf(_) => true,
1284             SrcRef::SSA(ssa) => ssa.is_uniform(),
1285             SrcRef::Reg(reg) => reg.is_uniform(),
1286         }
1287     }
1288 
is_predicate(&self) -> bool1289     pub fn is_predicate(&self) -> bool {
1290         self.src_ref.is_predicate()
1291     }
1292 
is_zero(&self) -> bool1293     pub fn is_zero(&self) -> bool {
1294         match self.src_ref {
1295             SrcRef::Zero | SrcRef::Imm32(0) => match self.src_mod {
1296                 SrcMod::None | SrcMod::FAbs => true,
1297                 SrcMod::FNeg | SrcMod::FNegAbs | SrcMod::BNot => false,
1298                 // INeg affects more than just the 32 bits of input data so -0
1299                 // may not be equivalent to 0.
1300                 SrcMod::INeg => false,
1301             },
1302             _ => false,
1303         }
1304     }
1305 
is_fneg_zero(&self, src_type: SrcType) -> bool1306     pub fn is_fneg_zero(&self, src_type: SrcType) -> bool {
1307         match self.fold_imm(src_type).src_ref {
1308             SrcRef::Imm32(0x00008000) => src_type == SrcType::F16,
1309             SrcRef::Imm32(0x80000000) => src_type == SrcType::F32,
1310             SrcRef::Imm32(0x80008000) => src_type == SrcType::F16v2,
1311             _ => false,
1312         }
1313     }
1314 
1315     #[allow(dead_code)]
supports_type(&self, src_type: &SrcType) -> bool1316     pub fn supports_type(&self, src_type: &SrcType) -> bool {
1317         match src_type {
1318             SrcType::SSA => {
1319                 if !self.src_mod.is_none() {
1320                     return false;
1321                 }
1322 
1323                 matches!(self.src_ref, SrcRef::SSA(_) | SrcRef::Reg(_))
1324             }
1325             SrcType::GPR => {
1326                 if !self.src_mod.is_none() {
1327                     return false;
1328                 }
1329 
1330                 matches!(
1331                     self.src_ref,
1332                     SrcRef::Zero | SrcRef::SSA(_) | SrcRef::Reg(_)
1333                 )
1334             }
1335             SrcType::ALU => self.src_mod.is_none() && self.src_ref.is_alu(),
1336             SrcType::F16 | SrcType::F32 | SrcType::F64 | SrcType::F16v2 => {
1337                 match self.src_mod {
1338                     SrcMod::None
1339                     | SrcMod::FAbs
1340                     | SrcMod::FNeg
1341                     | SrcMod::FNegAbs => (),
1342                     _ => return false,
1343                 }
1344 
1345                 self.src_ref.is_alu()
1346             }
1347             SrcType::I32 => {
1348                 match self.src_mod {
1349                     SrcMod::None | SrcMod::INeg => (),
1350                     _ => return false,
1351                 }
1352 
1353                 self.src_ref.is_alu()
1354             }
1355             SrcType::B32 => {
1356                 match self.src_mod {
1357                     SrcMod::None | SrcMod::BNot => (),
1358                     _ => return false,
1359                 }
1360 
1361                 self.src_ref.is_alu()
1362             }
1363             SrcType::Pred => {
1364                 match self.src_mod {
1365                     SrcMod::None | SrcMod::BNot => (),
1366                     _ => return false,
1367                 }
1368 
1369                 self.src_ref.is_predicate()
1370             }
1371             SrcType::Carry => self.src_mod.is_none() && self.src_ref.is_carry(),
1372             SrcType::Bar => self.src_mod.is_none() && self.src_ref.is_barrier(),
1373         }
1374     }
1375 }
1376 
1377 impl<T: Into<SrcRef>> From<T> for Src {
from(value: T) -> Src1378     fn from(value: T) -> Src {
1379         Src {
1380             src_ref: value.into(),
1381             src_mod: SrcMod::None,
1382             src_swizzle: SrcSwizzle::None,
1383         }
1384     }
1385 }
1386 
1387 impl fmt::Display for Src {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1388     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1389         match self.src_mod {
1390             SrcMod::None => write!(f, "{}{}", self.src_ref, self.src_swizzle),
1391             SrcMod::FAbs => write!(f, "|{}{}|", self.src_ref, self.src_swizzle),
1392             SrcMod::FNeg => write!(f, "-{}{}", self.src_ref, self.src_swizzle),
1393             SrcMod::FNegAbs => {
1394                 write!(f, "-|{}{}|", self.src_ref, self.src_swizzle)
1395             }
1396             SrcMod::INeg => write!(f, "-{}{}", self.src_ref, self.src_swizzle),
1397             SrcMod::BNot => write!(f, "!{}{}", self.src_ref, self.src_swizzle),
1398         }
1399     }
1400 }
1401 
1402 #[repr(u8)]
1403 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
1404 pub enum SrcType {
1405     SSA,
1406     GPR,
1407     ALU,
1408     F16,
1409     F16v2,
1410     F32,
1411     F64,
1412     I32,
1413     B32,
1414     Pred,
1415     Carry,
1416     Bar,
1417 }
1418 
1419 impl SrcType {
1420     const DEFAULT: SrcType = SrcType::GPR;
1421 }
1422 
1423 pub type SrcTypeList = AttrList<SrcType>;
1424 
1425 pub trait SrcsAsSlice: AsSlice<Src, Attr = SrcType> {
srcs_as_slice(&self) -> &[Src]1426     fn srcs_as_slice(&self) -> &[Src] {
1427         self.as_slice()
1428     }
1429 
srcs_as_mut_slice(&mut self) -> &mut [Src]1430     fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
1431         self.as_mut_slice()
1432     }
1433 
src_types(&self) -> SrcTypeList1434     fn src_types(&self) -> SrcTypeList {
1435         self.attrs()
1436     }
1437 
src_idx(&self, src: &Src) -> usize1438     fn src_idx(&self, src: &Src) -> usize {
1439         let r = self.srcs_as_slice().as_ptr_range();
1440         assert!(r.contains(&(src as *const Src)));
1441         unsafe { (src as *const Src).offset_from(r.start) as usize }
1442     }
1443 }
1444 
1445 impl<T: AsSlice<Src, Attr = SrcType>> SrcsAsSlice for T {}
1446 
all_dsts_uniform(dsts: &[Dst]) -> bool1447 fn all_dsts_uniform(dsts: &[Dst]) -> bool {
1448     let mut uniform = None;
1449     for dst in dsts {
1450         let dst_uniform = match dst {
1451             Dst::None => continue,
1452             Dst::Reg(r) => r.is_uniform(),
1453             Dst::SSA(r) => r.file().unwrap().is_uniform(),
1454         };
1455         assert!(uniform == None || uniform == Some(dst_uniform));
1456         uniform = Some(dst_uniform);
1457     }
1458     uniform == Some(true)
1459 }
1460 
1461 #[repr(u8)]
1462 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
1463 pub enum DstType {
1464     Pred,
1465     GPR,
1466     F16,
1467     F16v2,
1468     F32,
1469     F64,
1470     Carry,
1471     Bar,
1472     Vec,
1473 }
1474 
1475 impl DstType {
1476     const DEFAULT: DstType = DstType::Vec;
1477 }
1478 
1479 pub type DstTypeList = AttrList<DstType>;
1480 
1481 pub trait DstsAsSlice: AsSlice<Dst, Attr = DstType> {
dsts_as_slice(&self) -> &[Dst]1482     fn dsts_as_slice(&self) -> &[Dst] {
1483         self.as_slice()
1484     }
1485 
dsts_as_mut_slice(&mut self) -> &mut [Dst]1486     fn dsts_as_mut_slice(&mut self) -> &mut [Dst] {
1487         self.as_mut_slice()
1488     }
1489 
dst_types(&self) -> DstTypeList1490     fn dst_types(&self) -> DstTypeList {
1491         self.attrs()
1492     }
1493 
dst_idx(&self, dst: &Dst) -> usize1494     fn dst_idx(&self, dst: &Dst) -> usize {
1495         let r = self.dsts_as_slice().as_ptr_range();
1496         assert!(r.contains(&(dst as *const Dst)));
1497         unsafe { (dst as *const Dst).offset_from(r.start) as usize }
1498     }
1499 }
1500 
1501 impl<T: AsSlice<Dst, Attr = DstType>> DstsAsSlice for T {}
1502 
1503 pub trait IsUniform {
is_uniform(&self) -> bool1504     fn is_uniform(&self) -> bool;
1505 }
1506 
1507 impl<T: DstsAsSlice> IsUniform for T {
is_uniform(&self) -> bool1508     fn is_uniform(&self) -> bool {
1509         all_dsts_uniform(self.dsts_as_slice())
1510     }
1511 }
1512 
fmt_dst_slice(f: &mut fmt::Formatter<'_>, dsts: &[Dst]) -> fmt::Result1513 fn fmt_dst_slice(f: &mut fmt::Formatter<'_>, dsts: &[Dst]) -> fmt::Result {
1514     if dsts.is_empty() {
1515         return Ok(());
1516     }
1517 
1518     // Figure out the last non-null dst
1519     //
1520     // Note: By making the top inclusive and starting at 0, we ensure that
1521     // at least one dst always gets printed.
1522     let mut last_dst = 0;
1523     for (i, dst) in dsts.iter().enumerate() {
1524         if !dst.is_none() {
1525             last_dst = i;
1526         }
1527     }
1528 
1529     for i in 0..(last_dst + 1) {
1530         if i != 0 {
1531             write!(f, " ")?;
1532         }
1533         write!(f, "{}", &dsts[i])?;
1534     }
1535     Ok(())
1536 }
1537 
1538 #[allow(dead_code)]
1539 #[derive(Clone, Copy)]
1540 pub enum FoldData {
1541     Pred(bool),
1542     Carry(bool),
1543     U32(u32),
1544     Vec2([u32; 2]),
1545 }
1546 
1547 pub struct OpFoldData<'a> {
1548     pub dsts: &'a mut [FoldData],
1549     pub srcs: &'a [FoldData],
1550 }
1551 
1552 impl OpFoldData<'_> {
get_pred_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool1553     pub fn get_pred_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool {
1554         let i = op.src_idx(src);
1555         let b = match src.src_ref {
1556             SrcRef::Zero | SrcRef::Imm32(_) => panic!("Expected a predicate"),
1557             SrcRef::True => true,
1558             SrcRef::False => false,
1559             _ => {
1560                 if let FoldData::Pred(b) = self.srcs[i] {
1561                     b
1562                 } else {
1563                     panic!("FoldData is not a predicate");
1564                 }
1565             }
1566         };
1567         b ^ src.src_mod.is_bnot()
1568     }
1569 
get_u32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u321570     pub fn get_u32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u32 {
1571         let i = op.src_idx(src);
1572         match src.src_ref {
1573             SrcRef::Zero => 0,
1574             SrcRef::Imm32(imm) => imm,
1575             SrcRef::True | SrcRef::False => panic!("Unexpected predicate"),
1576             _ => {
1577                 if let FoldData::U32(u) = self.srcs[i] {
1578                     u
1579                 } else {
1580                     panic!("FoldData is not a U32");
1581                 }
1582             }
1583         }
1584     }
1585 
get_u32_bnot_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u321586     pub fn get_u32_bnot_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u32 {
1587         let x = self.get_u32_src(op, src);
1588         if src.src_mod.is_bnot() {
1589             !x
1590         } else {
1591             x
1592         }
1593     }
1594 
get_carry_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool1595     pub fn get_carry_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool {
1596         assert!(src.src_ref.as_ssa().is_some());
1597         let i = op.src_idx(src);
1598         if let FoldData::Carry(b) = self.srcs[i] {
1599             b
1600         } else {
1601             panic!("FoldData is not a predicate");
1602         }
1603     }
1604 
1605     #[allow(dead_code)]
get_f32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f321606     pub fn get_f32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f32 {
1607         f32::from_bits(self.get_u32_src(op, src))
1608     }
1609 
1610     #[allow(dead_code)]
get_f64_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f641611     pub fn get_f64_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f64 {
1612         let i = op.src_idx(src);
1613         match src.src_ref {
1614             SrcRef::Zero => 0.0,
1615             SrcRef::Imm32(imm) => f64::from_bits(u64::from(imm) << 32),
1616             SrcRef::True | SrcRef::False => panic!("Unexpected predicate"),
1617             _ => {
1618                 if let FoldData::Vec2(v) = self.srcs[i] {
1619                     let u = u64::from(v[0]) | (u64::from(v[1]) << 32);
1620                     f64::from_bits(u)
1621                 } else {
1622                     panic!("FoldData is not a U32");
1623                 }
1624             }
1625         }
1626     }
1627 
set_pred_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool)1628     pub fn set_pred_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool) {
1629         self.dsts[op.dst_idx(dst)] = FoldData::Pred(b);
1630     }
1631 
set_carry_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool)1632     pub fn set_carry_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool) {
1633         self.dsts[op.dst_idx(dst)] = FoldData::Carry(b);
1634     }
1635 
set_u32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, u: u32)1636     pub fn set_u32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, u: u32) {
1637         self.dsts[op.dst_idx(dst)] = FoldData::U32(u);
1638     }
1639 
1640     #[allow(dead_code)]
set_f32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f32)1641     pub fn set_f32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f32) {
1642         self.set_u32_dst(op, dst, f.to_bits());
1643     }
1644 
1645     #[allow(dead_code)]
set_f64_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f64)1646     pub fn set_f64_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f64) {
1647         let u = f.to_bits();
1648         let v = [u as u32, (u >> 32) as u32];
1649         self.dsts[op.dst_idx(dst)] = FoldData::Vec2(v);
1650     }
1651 }
1652 
1653 pub trait Foldable: SrcsAsSlice + DstsAsSlice {
fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)1654     fn fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>);
1655 }
1656 
1657 pub trait DisplayOp: DstsAsSlice {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1658     fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1659         fmt_dst_slice(f, self.dsts_as_slice())
1660     }
1661 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1662     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result;
1663 }
1664 
1665 // Hack struct so we can re-use Formatters.  Shamelessly stolen from
1666 // https://users.rust-lang.org/t/reusing-an-fmt-formatter/8531/4
1667 pub struct Fmt<F>(pub F)
1668 where
1669     F: Fn(&mut fmt::Formatter) -> fmt::Result;
1670 
1671 impl<F> fmt::Display for Fmt<F>
1672 where
1673     F: Fn(&mut fmt::Formatter) -> fmt::Result,
1674 {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1675     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1676         (self.0)(f)
1677     }
1678 }
1679 
1680 macro_rules! impl_display_for_op {
1681     ($op: ident) => {
1682         impl fmt::Display for $op {
1683             fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1684                 let mut s = String::new();
1685                 write!(s, "{}", Fmt(|f| self.fmt_dsts(f)))?;
1686                 if !s.is_empty() {
1687                     write!(f, "{} = ", s)?;
1688                 }
1689                 self.fmt_op(f)
1690             }
1691         }
1692     };
1693 }
1694 
1695 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1696 pub enum PredSetOp {
1697     And,
1698     Or,
1699     Xor,
1700 }
1701 
1702 impl PredSetOp {
eval(&self, a: bool, b: bool) -> bool1703     pub fn eval(&self, a: bool, b: bool) -> bool {
1704         match self {
1705             PredSetOp::And => a & b,
1706             PredSetOp::Or => a | b,
1707             PredSetOp::Xor => a ^ b,
1708         }
1709     }
1710 
is_trivial(&self, accum: &Src) -> bool1711     pub fn is_trivial(&self, accum: &Src) -> bool {
1712         if let Some(b) = accum.as_bool() {
1713             match self {
1714                 PredSetOp::And => b,
1715                 PredSetOp::Or => !b,
1716                 PredSetOp::Xor => !b,
1717             }
1718         } else {
1719             false
1720         }
1721     }
1722 }
1723 
1724 impl fmt::Display for PredSetOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1725     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1726         match self {
1727             PredSetOp::And => write!(f, ".and"),
1728             PredSetOp::Or => write!(f, ".or"),
1729             PredSetOp::Xor => write!(f, ".xor"),
1730         }
1731     }
1732 }
1733 
1734 #[allow(dead_code)]
1735 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1736 pub enum FloatCmpOp {
1737     OrdEq,
1738     OrdNe,
1739     OrdLt,
1740     OrdLe,
1741     OrdGt,
1742     OrdGe,
1743     UnordEq,
1744     UnordNe,
1745     UnordLt,
1746     UnordLe,
1747     UnordGt,
1748     UnordGe,
1749     IsNum,
1750     IsNan,
1751 }
1752 
1753 impl FloatCmpOp {
flip(self) -> FloatCmpOp1754     pub fn flip(self) -> FloatCmpOp {
1755         match self {
1756             FloatCmpOp::OrdEq | FloatCmpOp::OrdNe => self,
1757             FloatCmpOp::OrdLt => FloatCmpOp::OrdGt,
1758             FloatCmpOp::OrdLe => FloatCmpOp::OrdGe,
1759             FloatCmpOp::OrdGt => FloatCmpOp::OrdLt,
1760             FloatCmpOp::OrdGe => FloatCmpOp::OrdLe,
1761             FloatCmpOp::UnordEq | FloatCmpOp::UnordNe => self,
1762             FloatCmpOp::UnordLt => FloatCmpOp::UnordGt,
1763             FloatCmpOp::UnordLe => FloatCmpOp::UnordGe,
1764             FloatCmpOp::UnordGt => FloatCmpOp::UnordLt,
1765             FloatCmpOp::UnordGe => FloatCmpOp::UnordLe,
1766             FloatCmpOp::IsNum | FloatCmpOp::IsNan => panic!("Cannot flip unop"),
1767         }
1768     }
1769 }
1770 
1771 impl fmt::Display for FloatCmpOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1772     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1773         match self {
1774             FloatCmpOp::OrdEq => write!(f, ".eq"),
1775             FloatCmpOp::OrdNe => write!(f, ".ne"),
1776             FloatCmpOp::OrdLt => write!(f, ".lt"),
1777             FloatCmpOp::OrdLe => write!(f, ".le"),
1778             FloatCmpOp::OrdGt => write!(f, ".gt"),
1779             FloatCmpOp::OrdGe => write!(f, ".ge"),
1780             FloatCmpOp::UnordEq => write!(f, ".equ"),
1781             FloatCmpOp::UnordNe => write!(f, ".neu"),
1782             FloatCmpOp::UnordLt => write!(f, ".ltu"),
1783             FloatCmpOp::UnordLe => write!(f, ".leu"),
1784             FloatCmpOp::UnordGt => write!(f, ".gtu"),
1785             FloatCmpOp::UnordGe => write!(f, ".geu"),
1786             FloatCmpOp::IsNum => write!(f, ".num"),
1787             FloatCmpOp::IsNan => write!(f, ".nan"),
1788         }
1789     }
1790 }
1791 
1792 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1793 pub enum IntCmpOp {
1794     Eq,
1795     Ne,
1796     Lt,
1797     Le,
1798     Gt,
1799     Ge,
1800 }
1801 
1802 impl IntCmpOp {
flip(self) -> IntCmpOp1803     pub fn flip(self) -> IntCmpOp {
1804         match self {
1805             IntCmpOp::Eq | IntCmpOp::Ne => self,
1806             IntCmpOp::Lt => IntCmpOp::Gt,
1807             IntCmpOp::Le => IntCmpOp::Ge,
1808             IntCmpOp::Gt => IntCmpOp::Lt,
1809             IntCmpOp::Ge => IntCmpOp::Le,
1810         }
1811     }
1812 }
1813 
1814 impl fmt::Display for IntCmpOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1815     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1816         match self {
1817             IntCmpOp::Eq => write!(f, ".eq"),
1818             IntCmpOp::Ne => write!(f, ".ne"),
1819             IntCmpOp::Lt => write!(f, ".lt"),
1820             IntCmpOp::Le => write!(f, ".le"),
1821             IntCmpOp::Gt => write!(f, ".gt"),
1822             IntCmpOp::Ge => write!(f, ".ge"),
1823         }
1824     }
1825 }
1826 
1827 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1828 pub enum IntCmpType {
1829     U32,
1830     I32,
1831 }
1832 
1833 impl IntCmpType {
1834     #[allow(dead_code)]
is_signed(&self) -> bool1835     pub fn is_signed(&self) -> bool {
1836         match self {
1837             IntCmpType::U32 => false,
1838             IntCmpType::I32 => true,
1839         }
1840     }
1841 }
1842 
1843 impl fmt::Display for IntCmpType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1844     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1845         match self {
1846             IntCmpType::U32 => write!(f, ".u32"),
1847             IntCmpType::I32 => write!(f, ".i32"),
1848         }
1849     }
1850 }
1851 
1852 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1853 pub enum LogicOp2 {
1854     And,
1855     Or,
1856     Xor,
1857     PassB,
1858 }
1859 
1860 impl fmt::Display for LogicOp2 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1861     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1862         match self {
1863             LogicOp2::And => write!(f, "and"),
1864             LogicOp2::Or => write!(f, "or"),
1865             LogicOp2::Xor => write!(f, "xor"),
1866             LogicOp2::PassB => write!(f, "pass_b"),
1867         }
1868     }
1869 }
1870 
1871 impl LogicOp2 {
to_lut(self) -> LogicOp31872     pub fn to_lut(self) -> LogicOp3 {
1873         match self {
1874             LogicOp2::And => LogicOp3::new_lut(&|x, y, _| x & y),
1875             LogicOp2::Or => LogicOp3::new_lut(&|x, y, _| x | y),
1876             LogicOp2::Xor => LogicOp3::new_lut(&|x, y, _| x ^ y),
1877             LogicOp2::PassB => LogicOp3::new_lut(&|_, b, _| b),
1878         }
1879     }
1880 }
1881 
1882 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1883 pub struct LogicOp3 {
1884     pub lut: u8,
1885 }
1886 
1887 impl LogicOp3 {
1888     pub const SRC_MASKS: [u8; 3] = [0xf0, 0xcc, 0xaa];
1889 
1890     #[inline]
new_lut<F: Fn(u8, u8, u8) -> u8>(f: &F) -> LogicOp31891     pub fn new_lut<F: Fn(u8, u8, u8) -> u8>(f: &F) -> LogicOp3 {
1892         LogicOp3 {
1893             lut: f(
1894                 LogicOp3::SRC_MASKS[0],
1895                 LogicOp3::SRC_MASKS[1],
1896                 LogicOp3::SRC_MASKS[2],
1897             ),
1898         }
1899     }
1900 
new_const(val: bool) -> LogicOp31901     pub fn new_const(val: bool) -> LogicOp3 {
1902         LogicOp3 {
1903             lut: if val { !0 } else { 0 },
1904         }
1905     }
1906 
src_used(&self, src_idx: usize) -> bool1907     pub fn src_used(&self, src_idx: usize) -> bool {
1908         let mask = LogicOp3::SRC_MASKS[src_idx];
1909         let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1910         self.lut & !mask != (self.lut >> shift) & !mask
1911     }
1912 
fix_src(&mut self, src_idx: usize, val: bool)1913     pub fn fix_src(&mut self, src_idx: usize, val: bool) {
1914         let mask = LogicOp3::SRC_MASKS[src_idx];
1915         let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1916         if val {
1917             let t_bits = self.lut & mask;
1918             self.lut = t_bits | (t_bits >> shift)
1919         } else {
1920             let f_bits = self.lut & !mask;
1921             self.lut = (f_bits << shift) | f_bits
1922         };
1923     }
1924 
invert_src(&mut self, src_idx: usize)1925     pub fn invert_src(&mut self, src_idx: usize) {
1926         let mask = LogicOp3::SRC_MASKS[src_idx];
1927         let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1928         let t_bits = self.lut & mask;
1929         let f_bits = self.lut & !mask;
1930         self.lut = (f_bits << shift) | (t_bits >> shift);
1931     }
1932 
eval< T: BitAnd<Output = T> + BitOr<Output = T> + Copy + Not<Output = T>, >( &self, x: T, y: T, z: T, ) -> T1933     pub fn eval<
1934         T: BitAnd<Output = T> + BitOr<Output = T> + Copy + Not<Output = T>,
1935     >(
1936         &self,
1937         x: T,
1938         y: T,
1939         z: T,
1940     ) -> T {
1941         let mut res = x & !x; // zero
1942         if (self.lut & (1 << 0)) != 0 {
1943             res = res | (!x & !y & !z);
1944         }
1945         if (self.lut & (1 << 1)) != 0 {
1946             res = res | (!x & !y & z);
1947         }
1948         if (self.lut & (1 << 2)) != 0 {
1949             res = res | (!x & y & !z);
1950         }
1951         if (self.lut & (1 << 3)) != 0 {
1952             res = res | (!x & y & z);
1953         }
1954         if (self.lut & (1 << 4)) != 0 {
1955             res = res | (x & !y & !z);
1956         }
1957         if (self.lut & (1 << 5)) != 0 {
1958             res = res | (x & !y & z);
1959         }
1960         if (self.lut & (1 << 6)) != 0 {
1961             res = res | (x & y & !z);
1962         }
1963         if (self.lut & (1 << 7)) != 0 {
1964             res = res | (x & y & z);
1965         }
1966         res
1967     }
1968 }
1969 
1970 impl fmt::Display for LogicOp3 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1971     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1972         write!(f, "LUT[{:#x}]", self.lut)
1973     }
1974 }
1975 
1976 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1977 pub enum FloatType {
1978     F16,
1979     F32,
1980     F64,
1981 }
1982 
1983 impl FloatType {
from_bits(bytes: usize) -> FloatType1984     pub fn from_bits(bytes: usize) -> FloatType {
1985         match bytes {
1986             16 => FloatType::F16,
1987             32 => FloatType::F32,
1988             64 => FloatType::F64,
1989             _ => panic!("Invalid float type size"),
1990         }
1991     }
1992 
bits(&self) -> usize1993     pub fn bits(&self) -> usize {
1994         match self {
1995             FloatType::F16 => 16,
1996             FloatType::F32 => 32,
1997             FloatType::F64 => 64,
1998         }
1999     }
2000 }
2001 
2002 impl fmt::Display for FloatType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2003     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2004         match self {
2005             FloatType::F16 => write!(f, ".f16"),
2006             FloatType::F32 => write!(f, ".f32"),
2007             FloatType::F64 => write!(f, ".f64"),
2008         }
2009     }
2010 }
2011 
2012 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2013 pub enum FRndMode {
2014     NearestEven,
2015     NegInf,
2016     PosInf,
2017     Zero,
2018 }
2019 
2020 impl fmt::Display for FRndMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2021     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2022         match self {
2023             FRndMode::NearestEven => write!(f, ".re"),
2024             FRndMode::NegInf => write!(f, ".rm"),
2025             FRndMode::PosInf => write!(f, ".rp"),
2026             FRndMode::Zero => write!(f, ".rz"),
2027         }
2028     }
2029 }
2030 
2031 #[derive(Clone, Copy, Eq, PartialEq)]
2032 pub enum TexDim {
2033     _1D,
2034     Array1D,
2035     _2D,
2036     Array2D,
2037     _3D,
2038     Cube,
2039     ArrayCube,
2040 }
2041 
2042 impl fmt::Display for TexDim {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2043     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2044         match self {
2045             TexDim::_1D => write!(f, ".1d"),
2046             TexDim::Array1D => write!(f, ".a1d"),
2047             TexDim::_2D => write!(f, ".2d"),
2048             TexDim::Array2D => write!(f, ".a2d"),
2049             TexDim::_3D => write!(f, ".3d"),
2050             TexDim::Cube => write!(f, ".cube"),
2051             TexDim::ArrayCube => write!(f, ".acube"),
2052         }
2053     }
2054 }
2055 
2056 #[derive(Clone, Copy, Eq, PartialEq)]
2057 pub enum TexLodMode {
2058     Auto,
2059     Zero,
2060     Bias,
2061     Lod,
2062     Clamp,
2063     BiasClamp,
2064 }
2065 
2066 impl fmt::Display for TexLodMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2067     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2068         match self {
2069             TexLodMode::Auto => write!(f, "la"),
2070             TexLodMode::Zero => write!(f, "lz"),
2071             TexLodMode::Bias => write!(f, "lb"),
2072             TexLodMode::Lod => write!(f, "ll"),
2073             TexLodMode::Clamp => write!(f, "lc"),
2074             TexLodMode::BiasClamp => write!(f, "lb.lc"),
2075         }
2076     }
2077 }
2078 
2079 #[derive(Clone, Copy, Eq, PartialEq)]
2080 pub enum Tld4OffsetMode {
2081     None,
2082     AddOffI,
2083     PerPx,
2084 }
2085 
2086 impl fmt::Display for Tld4OffsetMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2087     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2088         match self {
2089             Tld4OffsetMode::None => write!(f, "no_off"),
2090             Tld4OffsetMode::AddOffI => write!(f, "aoffi"),
2091             Tld4OffsetMode::PerPx => write!(f, "ptp"),
2092         }
2093     }
2094 }
2095 
2096 #[allow(dead_code)]
2097 #[derive(Clone, Copy, Eq, PartialEq)]
2098 pub enum TexQuery {
2099     Dimension,
2100     TextureType,
2101     SamplerPos,
2102 }
2103 
2104 impl fmt::Display for TexQuery {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2105     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2106         match self {
2107             TexQuery::Dimension => write!(f, "dimension"),
2108             TexQuery::TextureType => write!(f, "texture_type"),
2109             TexQuery::SamplerPos => write!(f, "sampler_pos"),
2110         }
2111     }
2112 }
2113 
2114 #[derive(Clone, Copy, Eq, PartialEq)]
2115 pub enum ImageDim {
2116     _1D,
2117     _1DBuffer,
2118     _1DArray,
2119     _2D,
2120     _2DArray,
2121     _3D,
2122 }
2123 
2124 impl ImageDim {
coord_comps(&self) -> u82125     pub fn coord_comps(&self) -> u8 {
2126         match self {
2127             ImageDim::_1D => 1,
2128             ImageDim::_1DBuffer => 1,
2129             ImageDim::_1DArray => 2,
2130             ImageDim::_2D => 2,
2131             ImageDim::_2DArray => 3,
2132             ImageDim::_3D => 3,
2133         }
2134     }
2135 }
2136 
2137 impl fmt::Display for ImageDim {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2138     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2139         match self {
2140             ImageDim::_1D => write!(f, ".1d"),
2141             ImageDim::_1DBuffer => write!(f, ".buf"),
2142             ImageDim::_1DArray => write!(f, ".a1d"),
2143             ImageDim::_2D => write!(f, ".2d"),
2144             ImageDim::_2DArray => write!(f, ".a2d"),
2145             ImageDim::_3D => write!(f, ".3d"),
2146         }
2147     }
2148 }
2149 
2150 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
2151 pub enum IntType {
2152     U8,
2153     I8,
2154     U16,
2155     I16,
2156     U32,
2157     I32,
2158     U64,
2159     I64,
2160 }
2161 
2162 impl IntType {
from_bits(bits: usize, is_signed: bool) -> IntType2163     pub fn from_bits(bits: usize, is_signed: bool) -> IntType {
2164         match bits {
2165             8 => {
2166                 if is_signed {
2167                     IntType::I8
2168                 } else {
2169                     IntType::U8
2170                 }
2171             }
2172             16 => {
2173                 if is_signed {
2174                     IntType::I16
2175                 } else {
2176                     IntType::U16
2177                 }
2178             }
2179             32 => {
2180                 if is_signed {
2181                     IntType::I32
2182                 } else {
2183                     IntType::U32
2184                 }
2185             }
2186             64 => {
2187                 if is_signed {
2188                     IntType::I64
2189                 } else {
2190                     IntType::U64
2191                 }
2192             }
2193             _ => panic!("Invalid integer type size"),
2194         }
2195     }
2196 
is_signed(&self) -> bool2197     pub fn is_signed(&self) -> bool {
2198         match self {
2199             IntType::U8 | IntType::U16 | IntType::U32 | IntType::U64 => false,
2200             IntType::I8 | IntType::I16 | IntType::I32 | IntType::I64 => true,
2201         }
2202     }
2203 
bits(&self) -> usize2204     pub fn bits(&self) -> usize {
2205         match self {
2206             IntType::U8 | IntType::I8 => 8,
2207             IntType::U16 | IntType::I16 => 16,
2208             IntType::U32 | IntType::I32 => 32,
2209             IntType::U64 | IntType::I64 => 64,
2210         }
2211     }
2212 }
2213 
2214 impl fmt::Display for IntType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2215     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2216         match self {
2217             IntType::U8 => write!(f, ".u8"),
2218             IntType::I8 => write!(f, ".i8"),
2219             IntType::U16 => write!(f, ".u16"),
2220             IntType::I16 => write!(f, ".i16"),
2221             IntType::U32 => write!(f, ".u32"),
2222             IntType::I32 => write!(f, ".i32"),
2223             IntType::U64 => write!(f, ".u64"),
2224             IntType::I64 => write!(f, ".i64"),
2225         }
2226     }
2227 }
2228 
2229 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2230 pub enum MemAddrType {
2231     A32,
2232     A64,
2233 }
2234 
2235 impl fmt::Display for MemAddrType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2236     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2237         match self {
2238             MemAddrType::A32 => write!(f, ".a32"),
2239             MemAddrType::A64 => write!(f, ".a64"),
2240         }
2241     }
2242 }
2243 
2244 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2245 pub enum MemType {
2246     U8,
2247     I8,
2248     U16,
2249     I16,
2250     B32,
2251     B64,
2252     B128,
2253 }
2254 
2255 impl MemType {
from_size(size: u8, is_signed: bool) -> MemType2256     pub fn from_size(size: u8, is_signed: bool) -> MemType {
2257         match size {
2258             1 => {
2259                 if is_signed {
2260                     MemType::I8
2261                 } else {
2262                     MemType::U8
2263                 }
2264             }
2265             2 => {
2266                 if is_signed {
2267                     MemType::I16
2268                 } else {
2269                     MemType::U16
2270                 }
2271             }
2272             4 => MemType::B32,
2273             8 => MemType::B64,
2274             16 => MemType::B128,
2275             _ => panic!("Invalid memory load/store size"),
2276         }
2277     }
2278 
2279     #[allow(dead_code)]
bits(&self) -> usize2280     pub fn bits(&self) -> usize {
2281         match self {
2282             MemType::U8 | MemType::I8 => 8,
2283             MemType::U16 | MemType::I16 => 16,
2284             MemType::B32 => 32,
2285             MemType::B64 => 64,
2286             MemType::B128 => 128,
2287         }
2288     }
2289 }
2290 
2291 impl fmt::Display for MemType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2292     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2293         match self {
2294             MemType::U8 => write!(f, ".u8"),
2295             MemType::I8 => write!(f, ".i8"),
2296             MemType::U16 => write!(f, ".u16"),
2297             MemType::I16 => write!(f, ".i16"),
2298             MemType::B32 => write!(f, ".b32"),
2299             MemType::B64 => write!(f, ".b64"),
2300             MemType::B128 => write!(f, ".b128"),
2301         }
2302     }
2303 }
2304 
2305 #[allow(dead_code)]
2306 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2307 pub enum MemOrder {
2308     Constant,
2309     Weak,
2310     Strong(MemScope),
2311 }
2312 
2313 impl fmt::Display for MemOrder {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2314     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2315         match self {
2316             MemOrder::Constant => write!(f, ".constant"),
2317             MemOrder::Weak => write!(f, ".weak"),
2318             MemOrder::Strong(scope) => write!(f, ".strong{}", scope),
2319         }
2320     }
2321 }
2322 
2323 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2324 pub enum MemScope {
2325     CTA,
2326     GPU,
2327     System,
2328 }
2329 
2330 impl fmt::Display for MemScope {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2331     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2332         match self {
2333             MemScope::CTA => write!(f, ".cta"),
2334             MemScope::GPU => write!(f, ".gpu"),
2335             MemScope::System => write!(f, ".sys"),
2336         }
2337     }
2338 }
2339 
2340 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2341 pub enum MemSpace {
2342     Global(MemAddrType),
2343     Local,
2344     Shared,
2345 }
2346 
2347 impl MemSpace {
addr_type(&self) -> MemAddrType2348     pub fn addr_type(&self) -> MemAddrType {
2349         match self {
2350             MemSpace::Global(t) => *t,
2351             MemSpace::Local => MemAddrType::A32,
2352             MemSpace::Shared => MemAddrType::A32,
2353         }
2354     }
2355 }
2356 
2357 impl fmt::Display for MemSpace {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2358     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2359         match self {
2360             MemSpace::Global(t) => write!(f, ".global{t}"),
2361             MemSpace::Local => write!(f, ".local"),
2362             MemSpace::Shared => write!(f, ".shared"),
2363         }
2364     }
2365 }
2366 
2367 #[allow(dead_code)]
2368 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2369 pub enum MemEvictionPriority {
2370     First,
2371     Normal,
2372     Last,
2373     Unchanged,
2374 }
2375 
2376 impl fmt::Display for MemEvictionPriority {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2377     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2378         match self {
2379             MemEvictionPriority::First => write!(f, ".ef"),
2380             MemEvictionPriority::Normal => Ok(()),
2381             MemEvictionPriority::Last => write!(f, ".el"),
2382             MemEvictionPriority::Unchanged => write!(f, ".lu"),
2383         }
2384     }
2385 }
2386 
2387 #[derive(Clone)]
2388 pub struct MemAccess {
2389     pub mem_type: MemType,
2390     pub space: MemSpace,
2391     pub order: MemOrder,
2392     pub eviction_priority: MemEvictionPriority,
2393 }
2394 
2395 impl fmt::Display for MemAccess {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2396     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2397         write!(
2398             f,
2399             "{}{}{}{}",
2400             self.space, self.order, self.eviction_priority, self.mem_type,
2401         )
2402     }
2403 }
2404 
2405 #[allow(dead_code)]
2406 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2407 pub enum AtomType {
2408     F16x2,
2409     U32,
2410     I32,
2411     F32,
2412     U64,
2413     I64,
2414     F64,
2415 }
2416 
2417 impl AtomType {
F(bits: u8) -> AtomType2418     pub fn F(bits: u8) -> AtomType {
2419         match bits {
2420             16 => panic!("16-bit float atomics not yet supported"),
2421             32 => AtomType::F32,
2422             64 => AtomType::F64,
2423             _ => panic!("Invalid float atomic type"),
2424         }
2425     }
2426 
U(bits: u8) -> AtomType2427     pub fn U(bits: u8) -> AtomType {
2428         match bits {
2429             32 => AtomType::U32,
2430             64 => AtomType::U64,
2431             _ => panic!("Invalid uint atomic type"),
2432         }
2433     }
2434 
I(bits: u8) -> AtomType2435     pub fn I(bits: u8) -> AtomType {
2436         match bits {
2437             32 => AtomType::I32,
2438             64 => AtomType::I64,
2439             _ => panic!("Invalid int atomic type"),
2440         }
2441     }
2442 
bits(&self) -> usize2443     pub fn bits(&self) -> usize {
2444         match self {
2445             AtomType::F16x2 | AtomType::F32 => 32,
2446             AtomType::U32 | AtomType::I32 => 32,
2447             AtomType::U64 | AtomType::I64 | AtomType::F64 => 64,
2448         }
2449     }
2450 }
2451 
2452 impl fmt::Display for AtomType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2453     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2454         match self {
2455             AtomType::F16x2 => write!(f, ".f16x2"),
2456             AtomType::U32 => write!(f, ".u32"),
2457             AtomType::I32 => write!(f, ".i32"),
2458             AtomType::F32 => write!(f, ".f32"),
2459             AtomType::U64 => write!(f, ".u64"),
2460             AtomType::I64 => write!(f, ".i64"),
2461             AtomType::F64 => write!(f, ".f64"),
2462         }
2463     }
2464 }
2465 
2466 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2467 pub enum AtomCmpSrc {
2468     /// The cmpr value is passed as a separate source
2469     Separate,
2470     /// The cmpr value is packed in with the data with cmpr coming first
2471     Packed,
2472 }
2473 
2474 #[allow(dead_code)]
2475 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2476 pub enum AtomOp {
2477     Add,
2478     Min,
2479     Max,
2480     Inc,
2481     Dec,
2482     And,
2483     Or,
2484     Xor,
2485     Exch,
2486     CmpExch(AtomCmpSrc),
2487 }
2488 
2489 impl AtomOp {
is_reduction(&self) -> bool2490     pub fn is_reduction(&self) -> bool {
2491         match self {
2492             AtomOp::Add
2493             | AtomOp::Min
2494             | AtomOp::Max
2495             | AtomOp::Inc
2496             | AtomOp::Dec
2497             | AtomOp::And
2498             | AtomOp::Or
2499             | AtomOp::Xor => true,
2500             AtomOp::Exch | AtomOp::CmpExch(_) => false,
2501         }
2502     }
2503 }
2504 
2505 impl fmt::Display for AtomOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2506     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2507         match self {
2508             AtomOp::Add => write!(f, ".add"),
2509             AtomOp::Min => write!(f, ".min"),
2510             AtomOp::Max => write!(f, ".max"),
2511             AtomOp::Inc => write!(f, ".inc"),
2512             AtomOp::Dec => write!(f, ".dec"),
2513             AtomOp::And => write!(f, ".and"),
2514             AtomOp::Or => write!(f, ".or"),
2515             AtomOp::Xor => write!(f, ".xor"),
2516             AtomOp::Exch => write!(f, ".exch"),
2517             AtomOp::CmpExch(AtomCmpSrc::Separate) => write!(f, ".cmpexch"),
2518             AtomOp::CmpExch(AtomCmpSrc::Packed) => write!(f, ".cmpexch.packed"),
2519         }
2520     }
2521 }
2522 
2523 #[derive(Clone, Copy, Eq, PartialEq)]
2524 pub enum InterpFreq {
2525     Pass,
2526     PassMulW,
2527     Constant,
2528     State,
2529 }
2530 
2531 impl fmt::Display for InterpFreq {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2532     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2533         match self {
2534             InterpFreq::Pass => write!(f, ".pass"),
2535             InterpFreq::PassMulW => write!(f, ".pass_mul_w"),
2536             InterpFreq::Constant => write!(f, ".constant"),
2537             InterpFreq::State => write!(f, ".state"),
2538         }
2539     }
2540 }
2541 #[derive(Clone, Copy, Eq, PartialEq)]
2542 pub enum InterpLoc {
2543     Default,
2544     Centroid,
2545     Offset,
2546 }
2547 
2548 impl fmt::Display for InterpLoc {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2549     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2550         match self {
2551             InterpLoc::Default => Ok(()),
2552             InterpLoc::Centroid => write!(f, ".centroid"),
2553             InterpLoc::Offset => write!(f, ".offset"),
2554         }
2555     }
2556 }
2557 
2558 pub struct AttrAccess {
2559     pub addr: u16,
2560     pub comps: u8,
2561     pub patch: bool,
2562     pub output: bool,
2563     pub phys: bool,
2564 }
2565 
2566 #[repr(C)]
2567 #[derive(SrcsAsSlice, DstsAsSlice)]
2568 pub struct OpFAdd {
2569     #[dst_type(F32)]
2570     pub dst: Dst,
2571 
2572     #[src_type(F32)]
2573     pub srcs: [Src; 2],
2574 
2575     pub saturate: bool,
2576     pub rnd_mode: FRndMode,
2577     pub ftz: bool,
2578 }
2579 
2580 impl DisplayOp for OpFAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2581     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2582         let sat = if self.saturate { ".sat" } else { "" };
2583         write!(f, "fadd{sat}")?;
2584         if self.rnd_mode != FRndMode::NearestEven {
2585             write!(f, "{}", self.rnd_mode)?;
2586         }
2587         if self.ftz {
2588             write!(f, ".ftz")?;
2589         }
2590         write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2591     }
2592 }
2593 impl_display_for_op!(OpFAdd);
2594 
2595 #[repr(C)]
2596 #[derive(SrcsAsSlice, DstsAsSlice)]
2597 pub struct OpFFma {
2598     #[dst_type(F32)]
2599     pub dst: Dst,
2600 
2601     #[src_type(F32)]
2602     pub srcs: [Src; 3],
2603 
2604     pub saturate: bool,
2605     pub rnd_mode: FRndMode,
2606     pub ftz: bool,
2607     pub dnz: bool,
2608 }
2609 
2610 impl DisplayOp for OpFFma {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2611     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2612         let sat = if self.saturate { ".sat" } else { "" };
2613         write!(f, "ffma{sat}")?;
2614         if self.rnd_mode != FRndMode::NearestEven {
2615             write!(f, "{}", self.rnd_mode)?;
2616         }
2617         if self.dnz {
2618             write!(f, ".dnz")?;
2619         } else if self.ftz {
2620             write!(f, ".ftz")?;
2621         }
2622         write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
2623     }
2624 }
2625 impl_display_for_op!(OpFFma);
2626 
2627 #[repr(C)]
2628 #[derive(SrcsAsSlice, DstsAsSlice)]
2629 pub struct OpFMnMx {
2630     #[dst_type(F32)]
2631     pub dst: Dst,
2632 
2633     #[src_type(F32)]
2634     pub srcs: [Src; 2],
2635 
2636     #[src_type(Pred)]
2637     pub min: Src,
2638 
2639     pub ftz: bool,
2640 }
2641 
2642 impl DisplayOp for OpFMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2643     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2644         let ftz = if self.ftz { ".ftz" } else { "" };
2645         write!(
2646             f,
2647             "fmnmx{ftz} {} {} {}",
2648             self.srcs[0], self.srcs[1], self.min
2649         )
2650     }
2651 }
2652 impl_display_for_op!(OpFMnMx);
2653 
2654 #[repr(C)]
2655 #[derive(SrcsAsSlice, DstsAsSlice)]
2656 pub struct OpFMul {
2657     #[dst_type(F32)]
2658     pub dst: Dst,
2659 
2660     #[src_type(F32)]
2661     pub srcs: [Src; 2],
2662 
2663     pub saturate: bool,
2664     pub rnd_mode: FRndMode,
2665     pub ftz: bool,
2666     pub dnz: bool,
2667 }
2668 
2669 impl DisplayOp for OpFMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2670     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2671         let sat = if self.saturate { ".sat" } else { "" };
2672         write!(f, "fmul{sat}")?;
2673         if self.rnd_mode != FRndMode::NearestEven {
2674             write!(f, "{}", self.rnd_mode)?;
2675         }
2676         if self.dnz {
2677             write!(f, ".dnz")?;
2678         } else if self.ftz {
2679             write!(f, ".ftz")?;
2680         }
2681         write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2682     }
2683 }
2684 impl_display_for_op!(OpFMul);
2685 
2686 #[repr(C)]
2687 #[derive(SrcsAsSlice, DstsAsSlice)]
2688 pub struct OpFSet {
2689     #[dst_type(F32)]
2690     pub dst: Dst,
2691 
2692     pub cmp_op: FloatCmpOp,
2693 
2694     #[src_type(F32)]
2695     pub srcs: [Src; 2],
2696 
2697     pub ftz: bool,
2698 }
2699 
2700 impl DisplayOp for OpFSet {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2701     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2702         let ftz = if self.ftz { ".ftz" } else { "" };
2703         write!(
2704             f,
2705             "fset{}{ftz} {} {}",
2706             self.cmp_op, self.srcs[0], self.srcs[1]
2707         )
2708     }
2709 }
2710 impl_display_for_op!(OpFSet);
2711 
2712 #[repr(C)]
2713 #[derive(SrcsAsSlice, DstsAsSlice)]
2714 pub struct OpFSetP {
2715     #[dst_type(Pred)]
2716     pub dst: Dst,
2717 
2718     pub set_op: PredSetOp,
2719     pub cmp_op: FloatCmpOp,
2720 
2721     #[src_type(F32)]
2722     pub srcs: [Src; 2],
2723 
2724     #[src_type(Pred)]
2725     pub accum: Src,
2726 
2727     pub ftz: bool,
2728 }
2729 
2730 impl DisplayOp for OpFSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2731     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2732         let ftz = if self.ftz { ".ftz" } else { "" };
2733         write!(f, "fsetp{}{ftz}", self.cmp_op)?;
2734         if !self.set_op.is_trivial(&self.accum) {
2735             write!(f, "{}", self.set_op)?;
2736         }
2737         write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
2738         if !self.set_op.is_trivial(&self.accum) {
2739             write!(f, " {}", self.accum)?;
2740         }
2741         Ok(())
2742     }
2743 }
2744 impl_display_for_op!(OpFSetP);
2745 
2746 #[allow(dead_code)]
2747 #[derive(Clone, Copy, Eq, PartialEq)]
2748 pub enum FSwzAddOp {
2749     Add,
2750     SubRight,
2751     SubLeft,
2752     MoveLeft,
2753 }
2754 
2755 impl fmt::Display for FSwzAddOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2756     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2757         match self {
2758             FSwzAddOp::Add => write!(f, "add"),
2759             FSwzAddOp::SubRight => write!(f, "subr"),
2760             FSwzAddOp::SubLeft => write!(f, "sub"),
2761             FSwzAddOp::MoveLeft => write!(f, "mov2"),
2762         }
2763     }
2764 }
2765 
2766 #[repr(C)]
2767 #[derive(SrcsAsSlice, DstsAsSlice)]
2768 pub struct OpFSwzAdd {
2769     #[dst_type(F32)]
2770     pub dst: Dst,
2771 
2772     #[src_type(GPR)]
2773     pub srcs: [Src; 2],
2774 
2775     pub rnd_mode: FRndMode,
2776     pub ftz: bool,
2777 
2778     pub ops: [FSwzAddOp; 4],
2779 }
2780 
2781 impl DisplayOp for OpFSwzAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2782     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2783         write!(f, "fswzadd",)?;
2784         if self.rnd_mode != FRndMode::NearestEven {
2785             write!(f, "{}", self.rnd_mode)?;
2786         }
2787         if self.ftz {
2788             write!(f, ".ftz")?;
2789         }
2790         write!(
2791             f,
2792             " {} {} [{}, {}, {}, {}]",
2793             self.srcs[0],
2794             self.srcs[1],
2795             self.ops[0],
2796             self.ops[1],
2797             self.ops[2],
2798             self.ops[3],
2799         )
2800     }
2801 }
2802 impl_display_for_op!(OpFSwzAdd);
2803 
2804 pub enum RroOp {
2805     SinCos,
2806     Exp2,
2807 }
2808 
2809 impl fmt::Display for RroOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2810     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2811         match self {
2812             RroOp::SinCos => write!(f, ".sincos"),
2813             RroOp::Exp2 => write!(f, ".exp2"),
2814         }
2815     }
2816 }
2817 
2818 /// MuFu range reduction operator
2819 ///
2820 /// Not available on SM70+
2821 #[repr(C)]
2822 #[derive(SrcsAsSlice, DstsAsSlice)]
2823 pub struct OpRro {
2824     #[dst_type(F32)]
2825     pub dst: Dst,
2826 
2827     pub op: RroOp,
2828 
2829     #[src_type(F32)]
2830     pub src: Src,
2831 }
2832 
2833 impl DisplayOp for OpRro {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2834     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2835         write!(f, "rro{} {}", self.op, self.src)
2836     }
2837 }
2838 impl_display_for_op!(OpRro);
2839 
2840 #[allow(dead_code)]
2841 #[derive(Clone, Copy, Eq, PartialEq)]
2842 pub enum MuFuOp {
2843     Cos,
2844     Sin,
2845     Exp2,
2846     Log2,
2847     Rcp,
2848     Rsq,
2849     Rcp64H,
2850     Rsq64H,
2851     Sqrt,
2852     Tanh,
2853 }
2854 
2855 impl fmt::Display for MuFuOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2856     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2857         match self {
2858             MuFuOp::Cos => write!(f, "cos"),
2859             MuFuOp::Sin => write!(f, "sin"),
2860             MuFuOp::Exp2 => write!(f, "exp2"),
2861             MuFuOp::Log2 => write!(f, "log2"),
2862             MuFuOp::Rcp => write!(f, "rcp"),
2863             MuFuOp::Rsq => write!(f, "rsq"),
2864             MuFuOp::Rcp64H => write!(f, "rcp64h"),
2865             MuFuOp::Rsq64H => write!(f, "rsq64h"),
2866             MuFuOp::Sqrt => write!(f, "sqrt"),
2867             MuFuOp::Tanh => write!(f, "tanh"),
2868         }
2869     }
2870 }
2871 
2872 #[repr(C)]
2873 #[derive(SrcsAsSlice, DstsAsSlice)]
2874 pub struct OpMuFu {
2875     #[dst_type(F32)]
2876     pub dst: Dst,
2877 
2878     pub op: MuFuOp,
2879 
2880     #[src_type(F32)]
2881     pub src: Src,
2882 }
2883 
2884 impl DisplayOp for OpMuFu {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2885     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2886         write!(f, "mufu.{} {}", self.op, self.src)
2887     }
2888 }
2889 impl_display_for_op!(OpMuFu);
2890 
2891 #[repr(C)]
2892 #[derive(SrcsAsSlice, DstsAsSlice)]
2893 pub struct OpDAdd {
2894     #[dst_type(F64)]
2895     pub dst: Dst,
2896 
2897     #[src_type(F64)]
2898     pub srcs: [Src; 2],
2899 
2900     pub rnd_mode: FRndMode,
2901 }
2902 
2903 impl DisplayOp for OpDAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2904     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2905         write!(f, "dadd")?;
2906         if self.rnd_mode != FRndMode::NearestEven {
2907             write!(f, "{}", self.rnd_mode)?;
2908         }
2909         write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2910     }
2911 }
2912 impl_display_for_op!(OpDAdd);
2913 
2914 #[repr(C)]
2915 #[derive(SrcsAsSlice, DstsAsSlice)]
2916 pub struct OpDMul {
2917     #[dst_type(F64)]
2918     pub dst: Dst,
2919 
2920     #[src_type(F64)]
2921     pub srcs: [Src; 2],
2922 
2923     pub rnd_mode: FRndMode,
2924 }
2925 
2926 impl DisplayOp for OpDMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2927     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2928         write!(f, "dmul")?;
2929         if self.rnd_mode != FRndMode::NearestEven {
2930             write!(f, "{}", self.rnd_mode)?;
2931         }
2932         write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2933     }
2934 }
2935 impl_display_for_op!(OpDMul);
2936 
2937 #[repr(C)]
2938 #[derive(SrcsAsSlice, DstsAsSlice)]
2939 pub struct OpDFma {
2940     #[dst_type(F64)]
2941     pub dst: Dst,
2942 
2943     #[src_type(F64)]
2944     pub srcs: [Src; 3],
2945 
2946     pub rnd_mode: FRndMode,
2947 }
2948 
2949 impl DisplayOp for OpDFma {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2950     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2951         write!(f, "dfma")?;
2952         if self.rnd_mode != FRndMode::NearestEven {
2953             write!(f, "{}", self.rnd_mode)?;
2954         }
2955         write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
2956     }
2957 }
2958 impl_display_for_op!(OpDFma);
2959 
2960 #[repr(C)]
2961 #[derive(SrcsAsSlice, DstsAsSlice)]
2962 pub struct OpDMnMx {
2963     #[dst_type(F64)]
2964     pub dst: Dst,
2965 
2966     #[src_type(F64)]
2967     pub srcs: [Src; 2],
2968 
2969     #[src_type(Pred)]
2970     pub min: Src,
2971 }
2972 
2973 impl DisplayOp for OpDMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2974     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2975         write!(f, "dmnmx {} {} {}", self.srcs[0], self.srcs[1], self.min)
2976     }
2977 }
2978 impl_display_for_op!(OpDMnMx);
2979 
2980 #[repr(C)]
2981 #[derive(SrcsAsSlice, DstsAsSlice)]
2982 pub struct OpDSetP {
2983     #[dst_type(Pred)]
2984     pub dst: Dst,
2985 
2986     pub set_op: PredSetOp,
2987     pub cmp_op: FloatCmpOp,
2988 
2989     #[src_type(F64)]
2990     pub srcs: [Src; 2],
2991 
2992     #[src_type(Pred)]
2993     pub accum: Src,
2994 }
2995 
2996 impl DisplayOp for OpDSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2997     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2998         write!(f, "dsetp{}", self.cmp_op)?;
2999         if !self.set_op.is_trivial(&self.accum) {
3000             write!(f, "{}", self.set_op)?;
3001         }
3002         write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3003         if !self.set_op.is_trivial(&self.accum) {
3004             write!(f, " {}", self.accum)?;
3005         }
3006         Ok(())
3007     }
3008 }
3009 impl_display_for_op!(OpDSetP);
3010 
3011 #[repr(C)]
3012 #[derive(SrcsAsSlice, DstsAsSlice)]
3013 pub struct OpHAdd2 {
3014     #[dst_type(F16v2)]
3015     pub dst: Dst,
3016 
3017     #[src_type(F16v2)]
3018     pub srcs: [Src; 2],
3019 
3020     pub saturate: bool,
3021     pub ftz: bool,
3022     pub f32: bool,
3023 }
3024 
3025 impl DisplayOp for OpHAdd2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3026     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3027         let sat = if self.saturate { ".sat" } else { "" };
3028         let f32 = if self.f32 { ".f32" } else { "" };
3029         write!(f, "hadd2{sat}{f32}")?;
3030         if self.ftz {
3031             write!(f, ".ftz")?;
3032         }
3033         write!(f, " {} {}", self.srcs[0], self.srcs[1])
3034     }
3035 }
3036 impl_display_for_op!(OpHAdd2);
3037 
3038 #[repr(C)]
3039 #[derive(SrcsAsSlice, DstsAsSlice)]
3040 pub struct OpHSet2 {
3041     #[dst_type(F16v2)]
3042     pub dst: Dst,
3043 
3044     pub set_op: PredSetOp,
3045     pub cmp_op: FloatCmpOp,
3046 
3047     #[src_type(F16v2)]
3048     pub srcs: [Src; 2],
3049 
3050     #[src_type(Pred)]
3051     pub accum: Src,
3052 
3053     pub ftz: bool,
3054 }
3055 
3056 impl DisplayOp for OpHSet2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3057     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3058         let ftz = if self.ftz { ".ftz" } else { "" };
3059         write!(f, "hset2{}{ftz}", self.cmp_op)?;
3060         if !self.set_op.is_trivial(&self.accum) {
3061             write!(f, "{}", self.set_op)?;
3062         }
3063         write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3064         if !self.set_op.is_trivial(&self.accum) {
3065             write!(f, " {}", self.accum)?;
3066         }
3067         Ok(())
3068     }
3069 }
3070 impl_display_for_op!(OpHSet2);
3071 
3072 #[repr(C)]
3073 #[derive(SrcsAsSlice, DstsAsSlice)]
3074 pub struct OpHSetP2 {
3075     #[dst_type(Pred)]
3076     pub dsts: [Dst; 2],
3077 
3078     pub set_op: PredSetOp,
3079     pub cmp_op: FloatCmpOp,
3080 
3081     #[src_type(F16v2)]
3082     pub srcs: [Src; 2],
3083 
3084     #[src_type(Pred)]
3085     pub accum: Src,
3086 
3087     pub ftz: bool,
3088 
3089     // When not set, each dsts get the result of each lanes.
3090     // When set, the first dst gets the result of both lanes (res0 && res1)
3091     // and the second dst gets the negation !(res0 && res1)
3092     // before applying the accumulator.
3093     pub horizontal: bool,
3094 }
3095 
3096 impl DisplayOp for OpHSetP2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3097     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3098         let ftz = if self.ftz { ".ftz" } else { "" };
3099         write!(f, "hsetp2{}{ftz}", self.cmp_op)?;
3100         if !self.set_op.is_trivial(&self.accum) {
3101             write!(f, "{}", self.set_op)?;
3102         }
3103         write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3104         if !self.set_op.is_trivial(&self.accum) {
3105             write!(f, " {}", self.accum)?;
3106         }
3107         Ok(())
3108     }
3109 }
3110 impl_display_for_op!(OpHSetP2);
3111 
3112 #[repr(C)]
3113 #[derive(SrcsAsSlice, DstsAsSlice)]
3114 pub struct OpHMul2 {
3115     #[dst_type(F16v2)]
3116     pub dst: Dst,
3117 
3118     #[src_type(F16v2)]
3119     pub srcs: [Src; 2],
3120 
3121     pub saturate: bool,
3122     pub ftz: bool,
3123     pub dnz: bool,
3124 }
3125 
3126 impl DisplayOp for OpHMul2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3127     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3128         let sat = if self.saturate { ".sat" } else { "" };
3129         write!(f, "hmul2{sat}")?;
3130         if self.dnz {
3131             write!(f, ".dnz")?;
3132         } else if self.ftz {
3133             write!(f, ".ftz")?;
3134         }
3135         write!(f, " {} {}", self.srcs[0], self.srcs[1])
3136     }
3137 }
3138 impl_display_for_op!(OpHMul2);
3139 
3140 #[repr(C)]
3141 #[derive(SrcsAsSlice, DstsAsSlice)]
3142 pub struct OpHFma2 {
3143     #[dst_type(F16v2)]
3144     pub dst: Dst,
3145 
3146     #[src_type(F16v2)]
3147     pub srcs: [Src; 3],
3148 
3149     pub saturate: bool,
3150     pub ftz: bool,
3151     pub dnz: bool,
3152     pub f32: bool,
3153 }
3154 
3155 impl DisplayOp for OpHFma2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3156     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3157         let sat = if self.saturate { ".sat" } else { "" };
3158         let f32 = if self.f32 { ".f32" } else { "" };
3159         write!(f, "hfma2{sat}{f32}")?;
3160         if self.dnz {
3161             write!(f, ".dnz")?;
3162         } else if self.ftz {
3163             write!(f, ".ftz")?;
3164         }
3165         write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
3166     }
3167 }
3168 impl_display_for_op!(OpHFma2);
3169 
3170 #[repr(C)]
3171 #[derive(SrcsAsSlice, DstsAsSlice)]
3172 pub struct OpHMnMx2 {
3173     #[dst_type(F16v2)]
3174     pub dst: Dst,
3175 
3176     #[src_type(F16v2)]
3177     pub srcs: [Src; 2],
3178 
3179     #[src_type(Pred)]
3180     pub min: Src,
3181 
3182     pub ftz: bool,
3183 }
3184 
3185 impl DisplayOp for OpHMnMx2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3186     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3187         let ftz = if self.ftz { ".ftz" } else { "" };
3188         write!(
3189             f,
3190             "hmnmx2{ftz} {} {} {}",
3191             self.srcs[0], self.srcs[1], self.min
3192         )
3193     }
3194 }
3195 impl_display_for_op!(OpHMnMx2);
3196 
3197 #[repr(C)]
3198 #[derive(SrcsAsSlice, DstsAsSlice)]
3199 pub struct OpBMsk {
3200     #[dst_type(GPR)]
3201     pub dst: Dst,
3202 
3203     #[src_type(ALU)]
3204     pub pos: Src,
3205 
3206     #[src_type(ALU)]
3207     pub width: Src,
3208 
3209     pub wrap: bool,
3210 }
3211 
3212 impl DisplayOp for OpBMsk {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3213     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3214         let wrap = if self.wrap { ".wrap" } else { ".clamp" };
3215         write!(f, "bmsk{} {} {}", wrap, self.pos, self.width)
3216     }
3217 }
3218 impl_display_for_op!(OpBMsk);
3219 
3220 #[repr(C)]
3221 #[derive(SrcsAsSlice, DstsAsSlice)]
3222 pub struct OpBRev {
3223     #[dst_type(GPR)]
3224     pub dst: Dst,
3225 
3226     #[src_type(ALU)]
3227     pub src: Src,
3228 }
3229 
3230 impl DisplayOp for OpBRev {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3231     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3232         write!(f, "brev {}", self.src)
3233     }
3234 }
3235 impl_display_for_op!(OpBRev);
3236 
3237 /// Bitfield extract. Extracts all bits from `base` starting at `offset` into
3238 /// `dst`.
3239 #[repr(C)]
3240 #[derive(SrcsAsSlice, DstsAsSlice)]
3241 pub struct OpBfe {
3242     /// Where to insert the bits.
3243     #[dst_type(GPR)]
3244     pub dst: Dst,
3245 
3246     /// The source of bits to extract.
3247     #[src_type(ALU)]
3248     pub base: Src,
3249 
3250     /// The range of bits to extract. This source is interpreted as four
3251     /// separate bytes, [b0, b1, b2, b3].
3252     ///
3253     /// b0 and b1: unused
3254     /// b2: the number of bits to extract.
3255     /// b3: the offset of the first bit to extract.
3256     ///
3257     /// This matches the way the hardware works.
3258     #[src_type(ALU)]
3259     pub range: Src,
3260 
3261     /// Whether the output is signed
3262     pub signed: bool,
3263 
3264     /// Whether to reverse the bits before inserting them into `dst`.
3265     pub reverse: bool,
3266 }
3267 
3268 impl DisplayOp for OpBfe {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3269     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3270         write!(f, "bfe")?;
3271         if self.signed {
3272             write!(f, ".s")?;
3273         }
3274         if self.reverse {
3275             write!(f, ".rev")?;
3276         }
3277         write!(f, " {} {}", self.base, self.range,)
3278     }
3279 }
3280 impl_display_for_op!(OpBfe);
3281 
3282 #[repr(C)]
3283 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3284 pub struct OpFlo {
3285     #[dst_type(GPR)]
3286     pub dst: Dst,
3287 
3288     #[src_type(ALU)]
3289     pub src: Src,
3290 
3291     pub signed: bool,
3292     pub return_shift_amount: bool,
3293 }
3294 
3295 impl Foldable for OpFlo {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3296     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3297         let src = f.get_u32_src(self, &self.src);
3298         let leading = if self.signed && (src & 0x80000000) != 0 {
3299             (!src).leading_zeros()
3300         } else {
3301             src.leading_zeros()
3302         };
3303         let dst = if self.return_shift_amount {
3304             leading
3305         } else {
3306             31 - leading
3307         };
3308         f.set_u32_dst(self, &self.dst, dst);
3309     }
3310 }
3311 
3312 impl DisplayOp for OpFlo {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3313     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3314         write!(f, "flo")?;
3315         if self.return_shift_amount {
3316             write!(f, ".samt")?;
3317         }
3318         write!(f, " {}", self.src)
3319     }
3320 }
3321 impl_display_for_op!(OpFlo);
3322 
3323 #[repr(C)]
3324 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3325 pub struct OpIAbs {
3326     #[dst_type(GPR)]
3327     pub dst: Dst,
3328 
3329     #[src_type(ALU)]
3330     pub src: Src,
3331 }
3332 
3333 impl Foldable for OpIAbs {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3334     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3335         let src = f.get_u32_src(self, &self.src);
3336         let dst = (src as i32).abs() as u32;
3337         f.set_u32_dst(self, &self.dst, dst);
3338     }
3339 }
3340 
3341 impl DisplayOp for OpIAbs {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3342     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3343         write!(f, "iabs {}", self.src)
3344     }
3345 }
3346 impl_display_for_op!(OpIAbs);
3347 
3348 /// Only used on SM50
3349 #[repr(C)]
3350 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3351 pub struct OpIAdd2 {
3352     #[dst_type(GPR)]
3353     pub dst: Dst,
3354     #[dst_type(Carry)]
3355     pub carry_out: Dst,
3356 
3357     #[src_type(I32)]
3358     pub srcs: [Src; 2],
3359 }
3360 
3361 impl Foldable for OpIAdd2 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3362     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3363         let srcs = [
3364             f.get_u32_src(self, &self.srcs[0]),
3365             f.get_u32_src(self, &self.srcs[1]),
3366         ];
3367 
3368         let mut sum = 0_u64;
3369         for i in 0..2 {
3370             if self.srcs[i].src_mod.is_ineg() {
3371                 // This is a very literal interpretation of 2's compliment.
3372                 // This is not -u64::from(src) or u64::from(-src).
3373                 sum += u64::from(!srcs[i]) + 1;
3374             } else {
3375                 sum += u64::from(srcs[i]);
3376             }
3377         }
3378 
3379         f.set_u32_dst(self, &self.dst, sum as u32);
3380         f.set_carry_dst(self, &self.carry_out, sum >= (1 << 32));
3381     }
3382 }
3383 
3384 impl DisplayOp for OpIAdd2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3385     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3386         write!(f, "iadd2 {} {}", self.srcs[0], self.srcs[1])
3387     }
3388 }
3389 
3390 /// Only used on SM50
3391 #[repr(C)]
3392 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3393 pub struct OpIAdd2X {
3394     #[dst_type(GPR)]
3395     pub dst: Dst,
3396     #[dst_type(Carry)]
3397     pub carry_out: Dst,
3398 
3399     #[src_type(B32)]
3400     pub srcs: [Src; 2],
3401     #[src_type(Carry)]
3402     pub carry_in: Src,
3403 }
3404 
3405 impl Foldable for OpIAdd2X {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3406     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3407         let srcs = [
3408             f.get_u32_bnot_src(self, &self.srcs[0]),
3409             f.get_u32_bnot_src(self, &self.srcs[1]),
3410         ];
3411         let carry_in = f.get_carry_src(self, &self.carry_in);
3412 
3413         let sum = u64::from(srcs[0]) + u64::from(srcs[1]) + u64::from(carry_in);
3414 
3415         f.set_u32_dst(self, &self.dst, sum as u32);
3416         f.set_carry_dst(self, &self.carry_out, sum >= (1 << 32));
3417     }
3418 }
3419 
3420 impl DisplayOp for OpIAdd2X {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3421     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3422         write!(f, "iadd2.x {} {}", self.srcs[0], self.srcs[1])?;
3423         if !self.carry_in.is_zero() {
3424             write!(f, " {}", self.carry_in)?;
3425         }
3426         Ok(())
3427     }
3428 }
3429 
3430 #[repr(C)]
3431 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3432 pub struct OpIAdd3 {
3433     #[dst_type(GPR)]
3434     pub dst: Dst,
3435 
3436     #[dst_type(Pred)]
3437     pub overflow: [Dst; 2],
3438 
3439     #[src_type(I32)]
3440     pub srcs: [Src; 3],
3441 }
3442 
3443 impl Foldable for OpIAdd3 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3444     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3445         let srcs = [
3446             f.get_u32_src(self, &self.srcs[0]),
3447             f.get_u32_src(self, &self.srcs[1]),
3448             f.get_u32_src(self, &self.srcs[2]),
3449         ];
3450 
3451         let mut sum = 0_u64;
3452         for i in 0..3 {
3453             if self.srcs[i].src_mod.is_ineg() {
3454                 // This is a very literal interpretation of 2's compliment.
3455                 // This is not -u64::from(src) or u64::from(-src).
3456                 sum += u64::from(!srcs[i]) + 1;
3457             } else {
3458                 sum += u64::from(srcs[i]);
3459             }
3460         }
3461 
3462         f.set_u32_dst(self, &self.dst, sum as u32);
3463         f.set_pred_dst(self, &self.overflow[0], sum >= 1_u64 << 32);
3464         f.set_pred_dst(self, &self.overflow[1], sum >= 2_u64 << 32);
3465     }
3466 }
3467 
3468 impl DisplayOp for OpIAdd3 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3469     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3470         write!(
3471             f,
3472             "iadd3 {} {} {}",
3473             self.srcs[0], self.srcs[1], self.srcs[2],
3474         )
3475     }
3476 }
3477 impl_display_for_op!(OpIAdd3);
3478 
3479 #[repr(C)]
3480 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3481 pub struct OpIAdd3X {
3482     #[dst_type(GPR)]
3483     pub dst: Dst,
3484 
3485     #[dst_type(Pred)]
3486     pub overflow: [Dst; 2],
3487 
3488     #[src_type(B32)]
3489     pub srcs: [Src; 3],
3490 
3491     #[src_type(Pred)]
3492     pub carry: [Src; 2],
3493 }
3494 
3495 impl Foldable for OpIAdd3X {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3496     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3497         let srcs = [
3498             f.get_u32_bnot_src(self, &self.srcs[0]),
3499             f.get_u32_bnot_src(self, &self.srcs[1]),
3500             f.get_u32_bnot_src(self, &self.srcs[2]),
3501         ];
3502         let carry = [
3503             f.get_pred_src(self, &self.carry[0]),
3504             f.get_pred_src(self, &self.carry[1]),
3505         ];
3506 
3507         let mut sum = 0_u64;
3508         for i in 0..3 {
3509             sum += u64::from(srcs[i]);
3510         }
3511 
3512         for i in 0..2 {
3513             sum += u64::from(carry[i]);
3514         }
3515 
3516         f.set_u32_dst(self, &self.dst, sum as u32);
3517         f.set_pred_dst(self, &self.overflow[0], sum >= 1_u64 << 32);
3518         f.set_pred_dst(self, &self.overflow[1], sum >= 2_u64 << 32);
3519     }
3520 }
3521 
3522 impl DisplayOp for OpIAdd3X {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3523     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3524         write!(
3525             f,
3526             "iadd3.x {} {} {} {} {}",
3527             self.srcs[0],
3528             self.srcs[1],
3529             self.srcs[2],
3530             self.carry[0],
3531             self.carry[1]
3532         )
3533     }
3534 }
3535 impl_display_for_op!(OpIAdd3X);
3536 
3537 #[repr(C)]
3538 #[derive(SrcsAsSlice, DstsAsSlice)]
3539 pub struct OpIDp4 {
3540     #[dst_type(GPR)]
3541     pub dst: Dst,
3542 
3543     pub src_types: [IntType; 2],
3544 
3545     #[src_type(I32)]
3546     pub srcs: [Src; 3],
3547 }
3548 
3549 impl DisplayOp for OpIDp4 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3550     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3551         write!(
3552             f,
3553             "idp4{}{} {} {} {}",
3554             self.src_types[0],
3555             self.src_types[1],
3556             self.srcs[0],
3557             self.srcs[1],
3558             self.srcs[2],
3559         )
3560     }
3561 }
3562 impl_display_for_op!(OpIDp4);
3563 
3564 #[repr(C)]
3565 #[derive(SrcsAsSlice, DstsAsSlice)]
3566 pub struct OpIMad {
3567     #[dst_type(GPR)]
3568     pub dst: Dst,
3569 
3570     #[src_type(ALU)]
3571     pub srcs: [Src; 3],
3572 
3573     pub signed: bool,
3574 }
3575 
3576 impl DisplayOp for OpIMad {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3577     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3578         write!(f, "imad {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2],)
3579     }
3580 }
3581 impl_display_for_op!(OpIMad);
3582 
3583 /// Only used on SM50
3584 #[repr(C)]
3585 #[derive(SrcsAsSlice, DstsAsSlice)]
3586 pub struct OpIMul {
3587     #[dst_type(GPR)]
3588     pub dst: Dst,
3589 
3590     #[src_type(ALU)]
3591     pub srcs: [Src; 2],
3592 
3593     pub signed: [bool; 2],
3594     pub high: bool,
3595 }
3596 
3597 impl DisplayOp for OpIMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3598     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3599         write!(f, "imul")?;
3600         if self.high {
3601             write!(f, ".hi")?;
3602         }
3603         let src_type = |signed| if signed { ".s32" } else { ".u32" };
3604         write!(
3605             f,
3606             "{}{}",
3607             src_type(self.signed[0]),
3608             src_type(self.signed[1])
3609         )?;
3610         write!(f, " {} {}", self.srcs[0], self.srcs[1])
3611     }
3612 }
3613 
3614 #[repr(C)]
3615 #[derive(SrcsAsSlice, DstsAsSlice)]
3616 pub struct OpIMad64 {
3617     #[dst_type(Vec)]
3618     pub dst: Dst,
3619 
3620     #[src_type(ALU)]
3621     pub srcs: [Src; 3],
3622 
3623     pub signed: bool,
3624 }
3625 
3626 impl DisplayOp for OpIMad64 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3627     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3628         write!(
3629             f,
3630             "imad64 {} {} {}",
3631             self.srcs[0], self.srcs[1], self.srcs[2],
3632         )
3633     }
3634 }
3635 impl_display_for_op!(OpIMad64);
3636 
3637 #[repr(C)]
3638 #[derive(SrcsAsSlice, DstsAsSlice)]
3639 pub struct OpIMnMx {
3640     #[dst_type(GPR)]
3641     pub dst: Dst,
3642 
3643     pub cmp_type: IntCmpType,
3644 
3645     #[src_type(ALU)]
3646     pub srcs: [Src; 2],
3647 
3648     #[src_type(Pred)]
3649     pub min: Src,
3650 }
3651 
3652 impl DisplayOp for OpIMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3653     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3654         write!(
3655             f,
3656             "imnmx{} {} {} {}",
3657             self.cmp_type, self.srcs[0], self.srcs[1], self.min
3658         )
3659     }
3660 }
3661 impl_display_for_op!(OpIMnMx);
3662 
3663 #[repr(C)]
3664 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3665 pub struct OpISetP {
3666     #[dst_type(Pred)]
3667     pub dst: Dst,
3668 
3669     pub set_op: PredSetOp,
3670     pub cmp_op: IntCmpOp,
3671     pub cmp_type: IntCmpType,
3672     pub ex: bool,
3673 
3674     #[src_type(ALU)]
3675     pub srcs: [Src; 2],
3676 
3677     #[src_type(Pred)]
3678     pub accum: Src,
3679 
3680     #[src_type(Pred)]
3681     pub low_cmp: Src,
3682 }
3683 
3684 impl Foldable for OpISetP {
fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3685     fn fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3686         let x = f.get_u32_src(self, &self.srcs[0]);
3687         let y = f.get_u32_src(self, &self.srcs[1]);
3688         let accum = f.get_pred_src(self, &self.accum);
3689         let low_cmp = f.get_pred_src(self, &self.low_cmp);
3690 
3691         let cmp = if self.cmp_type.is_signed() {
3692             let x = x as i32;
3693             let y = y as i32;
3694             match &self.cmp_op {
3695                 IntCmpOp::Eq => x == y,
3696                 IntCmpOp::Ne => x != y,
3697                 IntCmpOp::Lt => x < y,
3698                 IntCmpOp::Le => x <= y,
3699                 IntCmpOp::Gt => x > y,
3700                 IntCmpOp::Ge => x >= y,
3701             }
3702         } else {
3703             match &self.cmp_op {
3704                 IntCmpOp::Eq => x == y,
3705                 IntCmpOp::Ne => x != y,
3706                 IntCmpOp::Lt => x < y,
3707                 IntCmpOp::Le => x <= y,
3708                 IntCmpOp::Gt => x > y,
3709                 IntCmpOp::Ge => x >= y,
3710             }
3711         };
3712 
3713         let cmp = if self.ex && x == y {
3714             // Pre-Volta, isetp.x takes the accumulator into account.  If we
3715             // want to support this, we need to take an an accumulator into
3716             // account.  Disallow it for now.
3717             assert!(sm.sm() >= 70);
3718             low_cmp
3719         } else {
3720             cmp
3721         };
3722 
3723         let dst = self.set_op.eval(cmp, accum);
3724 
3725         f.set_pred_dst(self, &self.dst, dst);
3726     }
3727 }
3728 
3729 impl DisplayOp for OpISetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3730     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3731         write!(f, "isetp{}{}", self.cmp_op, self.cmp_type)?;
3732         if !self.set_op.is_trivial(&self.accum) {
3733             write!(f, "{}", self.set_op)?;
3734         }
3735         if self.ex {
3736             write!(f, ".ex")?;
3737         }
3738         write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3739         if !self.set_op.is_trivial(&self.accum) {
3740             write!(f, " {}", self.accum)?;
3741         }
3742         if self.ex {
3743             write!(f, " {}", self.low_cmp)?;
3744         }
3745         Ok(())
3746     }
3747 }
3748 impl_display_for_op!(OpISetP);
3749 
3750 #[repr(C)]
3751 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3752 pub struct OpLop2 {
3753     #[dst_type(GPR)]
3754     pub dst: Dst,
3755 
3756     #[src_type(B32)]
3757     pub srcs: [Src; 2],
3758 
3759     pub op: LogicOp2,
3760 }
3761 
3762 impl DisplayOp for OpLop2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3763     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3764         write!(f, "lop2.{} {} {}", self.op, self.srcs[0], self.srcs[1],)
3765     }
3766 }
3767 
3768 impl Foldable for OpLop2 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3769     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3770         let srcs = [
3771             f.get_u32_bnot_src(self, &self.srcs[0]),
3772             f.get_u32_bnot_src(self, &self.srcs[1]),
3773         ];
3774         let dst = match self.op {
3775             LogicOp2::And => srcs[0] & srcs[1],
3776             LogicOp2::Or => srcs[0] | srcs[1],
3777             LogicOp2::Xor => srcs[0] ^ srcs[1],
3778             LogicOp2::PassB => srcs[1],
3779         };
3780         f.set_u32_dst(self, &self.dst, dst);
3781     }
3782 }
3783 
3784 #[repr(C)]
3785 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3786 pub struct OpLop3 {
3787     #[dst_type(GPR)]
3788     pub dst: Dst,
3789 
3790     #[src_type(ALU)]
3791     pub srcs: [Src; 3],
3792 
3793     pub op: LogicOp3,
3794 }
3795 
3796 impl Foldable for OpLop3 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3797     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3798         let srcs = [
3799             f.get_u32_bnot_src(self, &self.srcs[0]),
3800             f.get_u32_bnot_src(self, &self.srcs[1]),
3801             f.get_u32_bnot_src(self, &self.srcs[2]),
3802         ];
3803         let dst = self.op.eval(srcs[0], srcs[1], srcs[2]);
3804         f.set_u32_dst(self, &self.dst, dst);
3805     }
3806 }
3807 
3808 impl DisplayOp for OpLop3 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3809     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3810         write!(
3811             f,
3812             "lop3.{} {} {} {}",
3813             self.op, self.srcs[0], self.srcs[1], self.srcs[2],
3814         )
3815     }
3816 }
3817 impl_display_for_op!(OpLop3);
3818 
3819 #[derive(Clone, Copy, Eq, PartialEq)]
3820 pub enum ShflOp {
3821     Idx,
3822     Up,
3823     Down,
3824     Bfly,
3825 }
3826 
3827 impl fmt::Display for ShflOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3828     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3829         match self {
3830             ShflOp::Idx => write!(f, "idx"),
3831             ShflOp::Up => write!(f, "up"),
3832             ShflOp::Down => write!(f, "down"),
3833             ShflOp::Bfly => write!(f, "bfly"),
3834         }
3835     }
3836 }
3837 
3838 #[repr(C)]
3839 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3840 pub struct OpShf {
3841     #[dst_type(GPR)]
3842     pub dst: Dst,
3843 
3844     #[src_type(GPR)]
3845     pub low: Src,
3846 
3847     #[src_type(ALU)]
3848     pub high: Src,
3849 
3850     #[src_type(ALU)]
3851     pub shift: Src,
3852 
3853     pub right: bool,
3854     pub wrap: bool,
3855     pub data_type: IntType,
3856     pub dst_high: bool,
3857 }
3858 
3859 impl Foldable for OpShf {
fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3860     fn fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3861         let low = f.get_u32_src(self, &self.low);
3862         let high = f.get_u32_src(self, &self.high);
3863         let shift = f.get_u32_src(self, &self.shift);
3864 
3865         let bits: u32 = self.data_type.bits().try_into().unwrap();
3866         let shift = if self.wrap {
3867             shift & (bits - 1)
3868         } else {
3869             min(shift, bits)
3870         };
3871 
3872         let x = u64::from(low) | (u64::from(high) << 32);
3873         let shifted = if sm.sm() < 70
3874             && self.dst_high
3875             && self.data_type != IntType::I64
3876         {
3877             if self.right {
3878                 x.checked_shr(shift).unwrap_or(0) as u64
3879             } else {
3880                 x.checked_shl(shift).unwrap_or(0) as u64
3881             }
3882         } else if self.data_type.is_signed() {
3883             if self.right {
3884                 (x as i64).checked_shr(shift).unwrap_or(0) as u64
3885             } else {
3886                 (x as i64).checked_shl(shift).unwrap_or(0) as u64
3887             }
3888         } else {
3889             if self.right {
3890                 x.checked_shr(shift).unwrap_or(0) as u64
3891             } else {
3892                 x.checked_shl(shift).unwrap_or(0) as u64
3893             }
3894         };
3895 
3896         let dst = if (sm.sm() < 70 && !self.right) || self.dst_high {
3897             (shifted >> 32) as u32
3898         } else {
3899             shifted as u32
3900         };
3901 
3902         f.set_u32_dst(self, &self.dst, dst);
3903     }
3904 }
3905 
3906 impl DisplayOp for OpShf {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3907     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3908         write!(f, "shf")?;
3909         if self.right {
3910             write!(f, ".r")?;
3911         } else {
3912             write!(f, ".l")?;
3913         }
3914         if self.wrap {
3915             write!(f, ".w")?;
3916         }
3917         write!(f, "{}", self.data_type)?;
3918         if self.dst_high {
3919             write!(f, ".hi")?;
3920         }
3921         write!(f, " {} {} {}", self.low, self.high, self.shift)
3922     }
3923 }
3924 impl_display_for_op!(OpShf);
3925 
3926 /// Only used on SM50
3927 #[repr(C)]
3928 #[derive(SrcsAsSlice, DstsAsSlice)]
3929 pub struct OpShl {
3930     #[dst_type(GPR)]
3931     pub dst: Dst,
3932 
3933     #[src_type(GPR)]
3934     pub src: Src,
3935 
3936     #[src_type(ALU)]
3937     pub shift: Src,
3938 
3939     pub wrap: bool,
3940 }
3941 
3942 impl DisplayOp for OpShl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3943     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3944         write!(f, "shl")?;
3945         if self.wrap {
3946             write!(f, ".w")?;
3947         }
3948         write!(f, " {} {}", self.src, self.shift)
3949     }
3950 }
3951 
3952 /// Only used on SM50
3953 #[repr(C)]
3954 #[derive(SrcsAsSlice, DstsAsSlice)]
3955 pub struct OpShr {
3956     #[dst_type(GPR)]
3957     pub dst: Dst,
3958 
3959     #[src_type(GPR)]
3960     pub src: Src,
3961 
3962     #[src_type(ALU)]
3963     pub shift: Src,
3964 
3965     pub wrap: bool,
3966     pub signed: bool,
3967 }
3968 
3969 impl DisplayOp for OpShr {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3970     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3971         write!(f, "shr")?;
3972         if self.wrap {
3973             write!(f, ".w")?;
3974         }
3975         if !self.signed {
3976             write!(f, ".u32")?;
3977         }
3978         write!(f, " {} {}", self.src, self.shift)
3979     }
3980 }
3981 
3982 #[repr(C)]
3983 pub struct OpF2F {
3984     pub dst: Dst,
3985     pub src: Src,
3986 
3987     pub src_type: FloatType,
3988     pub dst_type: FloatType,
3989     pub rnd_mode: FRndMode,
3990     pub ftz: bool,
3991     /// For 16-bit up-conversions, take the high 16 bits of the source register.
3992     /// For 16-bit down-conversions, place the result into the upper 16 bits of
3993     /// the destination register
3994     pub high: bool,
3995     /// Round to the nearest integer rather than nearest float
3996     ///
3997     /// Not available on SM70+
3998     pub integer_rnd: bool,
3999 }
4000 
4001 impl AsSlice<Src> for OpF2F {
4002     type Attr = SrcType;
4003 
as_slice(&self) -> &[Src]4004     fn as_slice(&self) -> &[Src] {
4005         std::slice::from_ref(&self.src)
4006     }
4007 
as_mut_slice(&mut self) -> &mut [Src]4008     fn as_mut_slice(&mut self) -> &mut [Src] {
4009         std::slice::from_mut(&mut self.src)
4010     }
4011 
attrs(&self) -> SrcTypeList4012     fn attrs(&self) -> SrcTypeList {
4013         let src_type = match self.src_type {
4014             FloatType::F16 => SrcType::F16,
4015             FloatType::F32 => SrcType::F32,
4016             FloatType::F64 => SrcType::F64,
4017         };
4018         SrcTypeList::Uniform(src_type)
4019     }
4020 }
4021 
4022 impl AsSlice<Dst> for OpF2F {
4023     type Attr = DstType;
4024 
as_slice(&self) -> &[Dst]4025     fn as_slice(&self) -> &[Dst] {
4026         std::slice::from_ref(&self.dst)
4027     }
4028 
as_mut_slice(&mut self) -> &mut [Dst]4029     fn as_mut_slice(&mut self) -> &mut [Dst] {
4030         std::slice::from_mut(&mut self.dst)
4031     }
4032 
attrs(&self) -> DstTypeList4033     fn attrs(&self) -> DstTypeList {
4034         let dst_type = match self.dst_type {
4035             FloatType::F16 => DstType::F16,
4036             FloatType::F32 => DstType::F32,
4037             FloatType::F64 => DstType::F64,
4038         };
4039         DstTypeList::Uniform(dst_type)
4040     }
4041 }
4042 
4043 impl DisplayOp for OpF2F {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4044     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4045         write!(f, "f2f")?;
4046         if self.ftz {
4047             write!(f, ".ftz")?;
4048         }
4049         if self.integer_rnd {
4050             write!(f, ".int")?;
4051         }
4052         write!(
4053             f,
4054             "{}{}{} {}",
4055             self.dst_type, self.src_type, self.rnd_mode, self.src,
4056         )
4057     }
4058 }
4059 impl_display_for_op!(OpF2F);
4060 
4061 #[repr(C)]
4062 #[derive(DstsAsSlice, SrcsAsSlice)]
4063 pub struct OpF2FP {
4064     #[dst_type(GPR)]
4065     pub dst: Dst,
4066 
4067     #[src_type(ALU)]
4068     pub srcs: [Src; 2],
4069 
4070     pub rnd_mode: FRndMode,
4071 }
4072 
4073 impl DisplayOp for OpF2FP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4074     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4075         write!(f, "f2fp.pack_ab")?;
4076         if self.rnd_mode != FRndMode::NearestEven {
4077             write!(f, "{}", self.rnd_mode)?;
4078         }
4079         write!(f, " {}, {}", self.srcs[0], self.srcs[1],)
4080     }
4081 }
4082 impl_display_for_op!(OpF2FP);
4083 
4084 #[repr(C)]
4085 #[derive(DstsAsSlice)]
4086 pub struct OpF2I {
4087     #[dst_type(GPR)]
4088     pub dst: Dst,
4089 
4090     pub src: Src,
4091 
4092     pub src_type: FloatType,
4093     pub dst_type: IntType,
4094     pub rnd_mode: FRndMode,
4095     pub ftz: bool,
4096 }
4097 
4098 impl AsSlice<Src> for OpF2I {
4099     type Attr = SrcType;
4100 
as_slice(&self) -> &[Src]4101     fn as_slice(&self) -> &[Src] {
4102         std::slice::from_ref(&self.src)
4103     }
4104 
as_mut_slice(&mut self) -> &mut [Src]4105     fn as_mut_slice(&mut self) -> &mut [Src] {
4106         std::slice::from_mut(&mut self.src)
4107     }
4108 
attrs(&self) -> SrcTypeList4109     fn attrs(&self) -> SrcTypeList {
4110         let src_type = match self.src_type {
4111             FloatType::F16 => SrcType::F16,
4112             FloatType::F32 => SrcType::F32,
4113             FloatType::F64 => SrcType::F64,
4114         };
4115         SrcTypeList::Uniform(src_type)
4116     }
4117 }
4118 
4119 impl DisplayOp for OpF2I {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4120     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4121         let ftz = if self.ftz { ".ftz" } else { "" };
4122         write!(
4123             f,
4124             "f2i{}{}{}{ftz} {}",
4125             self.dst_type, self.src_type, self.rnd_mode, self.src,
4126         )
4127     }
4128 }
4129 impl_display_for_op!(OpF2I);
4130 
4131 #[repr(C)]
4132 pub struct OpI2F {
4133     pub dst: Dst,
4134     pub src: Src,
4135 
4136     pub dst_type: FloatType,
4137     pub src_type: IntType,
4138     pub rnd_mode: FRndMode,
4139 }
4140 
4141 impl AsSlice<Src> for OpI2F {
4142     type Attr = SrcType;
4143 
as_slice(&self) -> &[Src]4144     fn as_slice(&self) -> &[Src] {
4145         std::slice::from_ref(&self.src)
4146     }
4147 
as_mut_slice(&mut self) -> &mut [Src]4148     fn as_mut_slice(&mut self) -> &mut [Src] {
4149         std::slice::from_mut(&mut self.src)
4150     }
4151 
attrs(&self) -> SrcTypeList4152     fn attrs(&self) -> SrcTypeList {
4153         if self.src_type.bits() <= 32 {
4154             SrcTypeList::Uniform(SrcType::ALU)
4155         } else {
4156             SrcTypeList::Uniform(SrcType::GPR)
4157         }
4158     }
4159 }
4160 
4161 impl AsSlice<Dst> for OpI2F {
4162     type Attr = DstType;
4163 
as_slice(&self) -> &[Dst]4164     fn as_slice(&self) -> &[Dst] {
4165         std::slice::from_ref(&self.dst)
4166     }
4167 
as_mut_slice(&mut self) -> &mut [Dst]4168     fn as_mut_slice(&mut self) -> &mut [Dst] {
4169         std::slice::from_mut(&mut self.dst)
4170     }
4171 
attrs(&self) -> DstTypeList4172     fn attrs(&self) -> DstTypeList {
4173         let dst_type = match self.dst_type {
4174             FloatType::F16 => DstType::F16,
4175             FloatType::F32 => DstType::F32,
4176             FloatType::F64 => DstType::F64,
4177         };
4178         DstTypeList::Uniform(dst_type)
4179     }
4180 }
4181 
4182 impl DisplayOp for OpI2F {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4183     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4184         write!(
4185             f,
4186             "i2f{}{}{} {}",
4187             self.dst_type, self.src_type, self.rnd_mode, self.src,
4188         )
4189     }
4190 }
4191 impl_display_for_op!(OpI2F);
4192 
4193 /// Not used on SM70+
4194 #[repr(C)]
4195 #[derive(SrcsAsSlice, DstsAsSlice)]
4196 pub struct OpI2I {
4197     #[dst_type(GPR)]
4198     pub dst: Dst,
4199 
4200     #[src_type(ALU)]
4201     pub src: Src,
4202 
4203     pub src_type: IntType,
4204     pub dst_type: IntType,
4205 
4206     pub saturate: bool,
4207     pub abs: bool,
4208     pub neg: bool,
4209 }
4210 
4211 impl DisplayOp for OpI2I {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4212     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4213         write!(f, "i2i")?;
4214         if self.saturate {
4215             write!(f, ".sat ")?;
4216         }
4217         write!(f, "{}{} {}", self.dst_type, self.src_type, self.src,)?;
4218         if self.abs {
4219             write!(f, ".abs")?;
4220         }
4221         if self.neg {
4222             write!(f, ".neg")?;
4223         }
4224         Ok(())
4225     }
4226 }
4227 impl_display_for_op!(OpI2I);
4228 
4229 #[repr(C)]
4230 #[derive(DstsAsSlice)]
4231 pub struct OpFRnd {
4232     #[dst_type(F32)]
4233     pub dst: Dst,
4234 
4235     pub src: Src,
4236 
4237     pub dst_type: FloatType,
4238     pub src_type: FloatType,
4239     pub rnd_mode: FRndMode,
4240     pub ftz: bool,
4241 }
4242 
4243 impl AsSlice<Src> for OpFRnd {
4244     type Attr = SrcType;
4245 
as_slice(&self) -> &[Src]4246     fn as_slice(&self) -> &[Src] {
4247         std::slice::from_ref(&self.src)
4248     }
4249 
as_mut_slice(&mut self) -> &mut [Src]4250     fn as_mut_slice(&mut self) -> &mut [Src] {
4251         std::slice::from_mut(&mut self.src)
4252     }
4253 
attrs(&self) -> SrcTypeList4254     fn attrs(&self) -> SrcTypeList {
4255         let src_type = match self.src_type {
4256             FloatType::F16 => SrcType::F16,
4257             FloatType::F32 => SrcType::F32,
4258             FloatType::F64 => SrcType::F64,
4259         };
4260         SrcTypeList::Uniform(src_type)
4261     }
4262 }
4263 
4264 impl DisplayOp for OpFRnd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4265     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4266         let ftz = if self.ftz { ".ftz" } else { "" };
4267         write!(
4268             f,
4269             "frnd{}{}{}{ftz} {}",
4270             self.dst_type, self.src_type, self.rnd_mode, self.src,
4271         )
4272     }
4273 }
4274 impl_display_for_op!(OpFRnd);
4275 
4276 #[repr(C)]
4277 #[derive(SrcsAsSlice, DstsAsSlice)]
4278 pub struct OpMov {
4279     #[dst_type(GPR)]
4280     pub dst: Dst,
4281 
4282     #[src_type(ALU)]
4283     pub src: Src,
4284 
4285     pub quad_lanes: u8,
4286 }
4287 
4288 impl DisplayOp for OpMov {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4289     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4290         if self.quad_lanes == 0xf {
4291             write!(f, "mov {}", self.src)
4292         } else {
4293             write!(f, "mov[{:#x}] {}", self.quad_lanes, self.src)
4294         }
4295     }
4296 }
4297 impl_display_for_op!(OpMov);
4298 
4299 #[derive(Copy, Clone)]
4300 pub struct PrmtSelByte(u8);
4301 
4302 impl PrmtSelByte {
4303     pub const INVALID: PrmtSelByte = PrmtSelByte(u8::MAX);
4304 
new(src_idx: usize, byte_idx: usize, msb: bool) -> PrmtSelByte4305     pub fn new(src_idx: usize, byte_idx: usize, msb: bool) -> PrmtSelByte {
4306         assert!(src_idx < 2);
4307         assert!(byte_idx < 4);
4308 
4309         let mut nib = 0;
4310         nib |= (src_idx as u8) << 2;
4311         nib |= byte_idx as u8;
4312         if msb {
4313             nib |= 0x8;
4314         }
4315         PrmtSelByte(nib)
4316     }
4317 
src(&self) -> usize4318     pub fn src(&self) -> usize {
4319         ((self.0 >> 2) & 0x1).into()
4320     }
4321 
byte(&self) -> usize4322     pub fn byte(&self) -> usize {
4323         (self.0 & 0x3).into()
4324     }
4325 
msb(&self) -> bool4326     pub fn msb(&self) -> bool {
4327         (self.0 & 0x8) != 0
4328     }
4329 
fold_u32(&self, u: u32) -> u84330     pub fn fold_u32(&self, u: u32) -> u8 {
4331         let mut sb = (u >> (self.byte() * 8)) as u8;
4332         if self.msb() {
4333             sb = ((sb as i8) >> 7) as u8;
4334         }
4335         sb
4336     }
4337 }
4338 
4339 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
4340 pub struct PrmtSel(pub u16);
4341 
4342 impl PrmtSel {
new(bytes: [PrmtSelByte; 4]) -> PrmtSel4343     pub fn new(bytes: [PrmtSelByte; 4]) -> PrmtSel {
4344         let mut sel = 0;
4345         for i in 0..4 {
4346             assert!(bytes[i].0 <= 0xf);
4347             sel |= u16::from(bytes[i].0) << (i * 4);
4348         }
4349         PrmtSel(sel)
4350     }
4351 
get(&self, byte_idx: usize) -> PrmtSelByte4352     pub fn get(&self, byte_idx: usize) -> PrmtSelByte {
4353         assert!(byte_idx < 4);
4354         PrmtSelByte(((self.0 >> (byte_idx * 4)) & 0xf) as u8)
4355     }
4356 }
4357 
4358 #[allow(dead_code)]
4359 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
4360 pub enum PrmtMode {
4361     Index,
4362     Forward4Extract,
4363     Backward4Extract,
4364     Replicate8,
4365     EdgeClampLeft,
4366     EdgeClampRight,
4367     Replicate16,
4368 }
4369 
4370 impl fmt::Display for PrmtMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4371     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4372         match self {
4373             PrmtMode::Index => Ok(()),
4374             PrmtMode::Forward4Extract => write!(f, ".f4e"),
4375             PrmtMode::Backward4Extract => write!(f, ".b4e"),
4376             PrmtMode::Replicate8 => write!(f, ".rc8"),
4377             PrmtMode::EdgeClampLeft => write!(f, ".ecl"),
4378             PrmtMode::EdgeClampRight => write!(f, ".ecl"),
4379             PrmtMode::Replicate16 => write!(f, ".rc16"),
4380         }
4381     }
4382 }
4383 
4384 #[repr(C)]
4385 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4386 /// Permutes `srcs` into `dst` using `selection`.
4387 pub struct OpPrmt {
4388     #[dst_type(GPR)]
4389     pub dst: Dst,
4390 
4391     #[src_type(ALU)]
4392     pub srcs: [Src; 2],
4393 
4394     #[src_type(ALU)]
4395     pub sel: Src,
4396 
4397     pub mode: PrmtMode,
4398 }
4399 
4400 impl OpPrmt {
get_sel(&self) -> Option<PrmtSel>4401     pub fn get_sel(&self) -> Option<PrmtSel> {
4402         // TODO: We could construct a PrmtSel for the other modes but we don't
4403         // use them right now because they're kinda pointless.
4404         if self.mode != PrmtMode::Index {
4405             return None;
4406         }
4407 
4408         if let Some(sel) = self.sel.as_u32() {
4409             // The top 16 bits are ignored
4410             Some(PrmtSel(sel as u16))
4411         } else {
4412             None
4413         }
4414     }
4415 
as_u32(&self) -> Option<u32>4416     pub fn as_u32(&self) -> Option<u32> {
4417         let Some(sel) = self.get_sel() else {
4418             return None;
4419         };
4420 
4421         let mut imm = 0_u32;
4422         for b in 0..4 {
4423             let sel_byte = sel.get(b);
4424             let Some(src_u32) = self.srcs[sel_byte.src()].as_u32() else {
4425                 return None;
4426             };
4427 
4428             let sb = sel_byte.fold_u32(src_u32);
4429             imm |= u32::from(sb) << (b * 8);
4430         }
4431         Some(imm)
4432     }
4433 }
4434 
4435 impl Foldable for OpPrmt {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)4436     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
4437         let srcs = [
4438             f.get_u32_src(self, &self.srcs[0]),
4439             f.get_u32_src(self, &self.srcs[1]),
4440         ];
4441         let sel = f.get_u32_src(self, &self.sel);
4442 
4443         assert!(self.mode == PrmtMode::Index);
4444         let sel = PrmtSel(sel as u16);
4445 
4446         let mut dst = 0_u32;
4447         for b in 0..4 {
4448             let sel_byte = sel.get(b);
4449             let src = srcs[sel_byte.src()];
4450             let sb = sel_byte.fold_u32(src);
4451             dst |= u32::from(sb) << (b * 8);
4452         }
4453 
4454         f.set_u32_dst(self, &self.dst, dst);
4455     }
4456 }
4457 
4458 impl DisplayOp for OpPrmt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4459     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4460         write!(
4461             f,
4462             "prmt{} {} [{}] {}",
4463             self.mode, self.srcs[0], self.sel, self.srcs[1],
4464         )
4465     }
4466 }
4467 impl_display_for_op!(OpPrmt);
4468 
4469 #[repr(C)]
4470 #[derive(SrcsAsSlice, DstsAsSlice)]
4471 pub struct OpSel {
4472     #[dst_type(GPR)]
4473     pub dst: Dst,
4474 
4475     #[src_type(Pred)]
4476     pub cond: Src,
4477 
4478     #[src_type(ALU)]
4479     pub srcs: [Src; 2],
4480 }
4481 
4482 impl DisplayOp for OpSel {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4483     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4484         write!(f, "sel {} {} {}", self.cond, self.srcs[0], self.srcs[1],)
4485     }
4486 }
4487 impl_display_for_op!(OpSel);
4488 
4489 #[repr(C)]
4490 #[derive(SrcsAsSlice, DstsAsSlice)]
4491 pub struct OpShfl {
4492     #[dst_type(GPR)]
4493     pub dst: Dst,
4494 
4495     #[dst_type(Pred)]
4496     pub in_bounds: Dst,
4497 
4498     #[src_type(SSA)]
4499     pub src: Src,
4500 
4501     #[src_type(ALU)]
4502     pub lane: Src,
4503 
4504     #[src_type(ALU)]
4505     pub c: Src,
4506 
4507     pub op: ShflOp,
4508 }
4509 
4510 impl DisplayOp for OpShfl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4511     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4512         write!(f, "shfl.{} {} {} {}", self.op, self.src, self.lane, self.c)
4513     }
4514 }
4515 impl_display_for_op!(OpShfl);
4516 
4517 #[repr(C)]
4518 #[derive(SrcsAsSlice, DstsAsSlice)]
4519 pub struct OpPLop3 {
4520     #[dst_type(Pred)]
4521     pub dsts: [Dst; 2],
4522 
4523     #[src_type(Pred)]
4524     pub srcs: [Src; 3],
4525 
4526     pub ops: [LogicOp3; 2],
4527 }
4528 
4529 impl DisplayOp for OpPLop3 {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4530     fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4531         write!(f, "{} {}", self.dsts[0], self.dsts[1])
4532     }
4533 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4534     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4535         write!(
4536             f,
4537             "plop3 {} {} {} {} {}",
4538             self.srcs[0], self.srcs[1], self.srcs[2], self.ops[0], self.ops[1],
4539         )
4540     }
4541 }
4542 impl_display_for_op!(OpPLop3);
4543 
4544 #[repr(C)]
4545 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4546 pub struct OpPSetP {
4547     #[dst_type(Pred)]
4548     pub dsts: [Dst; 2],
4549 
4550     pub ops: [PredSetOp; 2],
4551 
4552     #[src_type(Pred)]
4553     pub srcs: [Src; 3],
4554 }
4555 
4556 impl Foldable for OpPSetP {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)4557     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
4558         let srcs = [
4559             f.get_pred_src(self, &self.srcs[0]),
4560             f.get_pred_src(self, &self.srcs[1]),
4561             f.get_pred_src(self, &self.srcs[2]),
4562         ];
4563 
4564         let tmp = self.ops[0].eval(srcs[0], srcs[1]);
4565         let dst0 = self.ops[1].eval(srcs[2], tmp);
4566 
4567         let tmp = self.ops[0].eval(!srcs[0], srcs[1]);
4568         let dst1 = self.ops[1].eval(srcs[2], tmp);
4569 
4570         f.set_pred_dst(self, &self.dsts[0], dst0);
4571         f.set_pred_dst(self, &self.dsts[1], dst1);
4572     }
4573 }
4574 
4575 impl DisplayOp for OpPSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4576     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4577         write!(
4578             f,
4579             "psetp{}{} {} {} {}",
4580             self.ops[0], self.ops[1], self.srcs[0], self.srcs[1], self.srcs[2],
4581         )
4582     }
4583 }
4584 
4585 #[repr(C)]
4586 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4587 pub struct OpPopC {
4588     #[dst_type(GPR)]
4589     pub dst: Dst,
4590 
4591     #[src_type(B32)]
4592     pub src: Src,
4593 }
4594 
4595 impl Foldable for OpPopC {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)4596     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
4597         let src = f.get_u32_bnot_src(self, &self.src);
4598         let dst = src.count_ones();
4599         f.set_u32_dst(self, &self.dst, dst);
4600     }
4601 }
4602 
4603 impl DisplayOp for OpPopC {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4604     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4605         write!(f, "popc {}", self.src,)
4606     }
4607 }
4608 impl_display_for_op!(OpPopC);
4609 
4610 #[repr(C)]
4611 #[derive(SrcsAsSlice, DstsAsSlice)]
4612 pub struct OpR2UR {
4613     #[dst_type(GPR)]
4614     pub dst: Dst,
4615 
4616     #[src_type(GPR)]
4617     pub src: Src,
4618 }
4619 
4620 impl DisplayOp for OpR2UR {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4621     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4622         write!(f, "r2ur {}", self.src)
4623     }
4624 }
4625 impl_display_for_op!(OpR2UR);
4626 
4627 #[repr(C)]
4628 #[derive(SrcsAsSlice, DstsAsSlice)]
4629 pub struct OpTex {
4630     pub dsts: [Dst; 2],
4631     pub fault: Dst,
4632 
4633     #[src_type(SSA)]
4634     pub srcs: [Src; 2],
4635 
4636     pub dim: TexDim,
4637     pub lod_mode: TexLodMode,
4638     pub z_cmpr: bool,
4639     pub offset: bool,
4640     pub mask: u8,
4641 }
4642 
4643 impl DisplayOp for OpTex {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4644     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4645         write!(f, "tex.b{}", self.dim)?;
4646         if self.lod_mode != TexLodMode::Auto {
4647             write!(f, ".{}", self.lod_mode)?;
4648         }
4649         if self.offset {
4650             write!(f, ".aoffi")?;
4651         }
4652         if self.z_cmpr {
4653             write!(f, ".dc")?;
4654         }
4655         write!(f, " {} {}", self.srcs[0], self.srcs[1])
4656     }
4657 }
4658 impl_display_for_op!(OpTex);
4659 
4660 #[repr(C)]
4661 #[derive(SrcsAsSlice, DstsAsSlice)]
4662 pub struct OpTld {
4663     pub dsts: [Dst; 2],
4664     pub fault: Dst,
4665 
4666     #[src_type(SSA)]
4667     pub srcs: [Src; 2],
4668 
4669     pub dim: TexDim,
4670     pub is_ms: bool,
4671     pub lod_mode: TexLodMode,
4672     pub offset: bool,
4673     pub mask: u8,
4674 }
4675 
4676 impl DisplayOp for OpTld {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4677     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4678         write!(f, "tld.b{}", self.dim)?;
4679         if self.lod_mode != TexLodMode::Auto {
4680             write!(f, ".{}", self.lod_mode)?;
4681         }
4682         if self.offset {
4683             write!(f, ".aoffi")?;
4684         }
4685         if self.is_ms {
4686             write!(f, ".ms")?;
4687         }
4688         write!(f, " {} {}", self.srcs[0], self.srcs[1])
4689     }
4690 }
4691 impl_display_for_op!(OpTld);
4692 
4693 #[repr(C)]
4694 #[derive(SrcsAsSlice, DstsAsSlice)]
4695 pub struct OpTld4 {
4696     pub dsts: [Dst; 2],
4697     pub fault: Dst,
4698 
4699     #[src_type(SSA)]
4700     pub srcs: [Src; 2],
4701 
4702     pub dim: TexDim,
4703     pub comp: u8,
4704     pub offset_mode: Tld4OffsetMode,
4705     pub z_cmpr: bool,
4706     pub mask: u8,
4707 }
4708 
4709 impl DisplayOp for OpTld4 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4710     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4711         write!(f, "tld4.g.b{}", self.dim)?;
4712         if self.offset_mode != Tld4OffsetMode::None {
4713             write!(f, ".{}", self.offset_mode)?;
4714         }
4715         write!(f, " {} {}", self.srcs[0], self.srcs[1])
4716     }
4717 }
4718 impl_display_for_op!(OpTld4);
4719 
4720 #[repr(C)]
4721 #[derive(SrcsAsSlice, DstsAsSlice)]
4722 pub struct OpTmml {
4723     pub dsts: [Dst; 2],
4724 
4725     #[src_type(SSA)]
4726     pub srcs: [Src; 2],
4727 
4728     pub dim: TexDim,
4729     pub mask: u8,
4730 }
4731 
4732 impl DisplayOp for OpTmml {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4733     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4734         write!(
4735             f,
4736             "tmml.b.lod{} {} {}",
4737             self.dim, self.srcs[0], self.srcs[1]
4738         )
4739     }
4740 }
4741 impl_display_for_op!(OpTmml);
4742 
4743 #[repr(C)]
4744 #[derive(SrcsAsSlice, DstsAsSlice)]
4745 pub struct OpTxd {
4746     pub dsts: [Dst; 2],
4747     pub fault: Dst,
4748 
4749     #[src_type(SSA)]
4750     pub srcs: [Src; 2],
4751 
4752     pub dim: TexDim,
4753     pub offset: bool,
4754     pub mask: u8,
4755 }
4756 
4757 impl DisplayOp for OpTxd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4758     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4759         write!(f, "txd.b{}", self.dim)?;
4760         if self.offset {
4761             write!(f, ".aoffi")?;
4762         }
4763         write!(f, " {} {}", self.srcs[0], self.srcs[1])
4764     }
4765 }
4766 impl_display_for_op!(OpTxd);
4767 
4768 #[repr(C)]
4769 #[derive(SrcsAsSlice, DstsAsSlice)]
4770 pub struct OpTxq {
4771     pub dsts: [Dst; 2],
4772 
4773     #[src_type(SSA)]
4774     pub src: Src,
4775 
4776     pub query: TexQuery,
4777     pub mask: u8,
4778 }
4779 
4780 impl DisplayOp for OpTxq {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4781     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4782         write!(f, "txq.b {} {}", self.src, self.query)
4783     }
4784 }
4785 impl_display_for_op!(OpTxq);
4786 
4787 #[repr(C)]
4788 #[derive(SrcsAsSlice, DstsAsSlice)]
4789 pub struct OpSuLd {
4790     pub dst: Dst,
4791     pub fault: Dst,
4792 
4793     pub image_dim: ImageDim,
4794     pub mem_order: MemOrder,
4795     pub mem_eviction_priority: MemEvictionPriority,
4796     pub mask: u8,
4797 
4798     #[src_type(GPR)]
4799     pub handle: Src,
4800 
4801     #[src_type(SSA)]
4802     pub coord: Src,
4803 }
4804 
4805 impl DisplayOp for OpSuLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4806     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4807         write!(
4808             f,
4809             "suld.p{}{}{} [{}] {}",
4810             self.image_dim,
4811             self.mem_order,
4812             self.mem_eviction_priority,
4813             self.coord,
4814             self.handle,
4815         )
4816     }
4817 }
4818 impl_display_for_op!(OpSuLd);
4819 
4820 #[repr(C)]
4821 #[derive(SrcsAsSlice, DstsAsSlice)]
4822 pub struct OpSuSt {
4823     pub image_dim: ImageDim,
4824     pub mem_order: MemOrder,
4825     pub mem_eviction_priority: MemEvictionPriority,
4826     pub mask: u8,
4827 
4828     #[src_type(GPR)]
4829     pub handle: Src,
4830 
4831     #[src_type(SSA)]
4832     pub coord: Src,
4833 
4834     #[src_type(SSA)]
4835     pub data: Src,
4836 }
4837 
4838 impl DisplayOp for OpSuSt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4839     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4840         write!(
4841             f,
4842             "sust.p{}{}{} [{}] {} {}",
4843             self.image_dim,
4844             self.mem_order,
4845             self.mem_eviction_priority,
4846             self.coord,
4847             self.data,
4848             self.handle,
4849         )
4850     }
4851 }
4852 impl_display_for_op!(OpSuSt);
4853 
4854 #[repr(C)]
4855 #[derive(SrcsAsSlice, DstsAsSlice)]
4856 pub struct OpSuAtom {
4857     pub dst: Dst,
4858     pub fault: Dst,
4859 
4860     pub image_dim: ImageDim,
4861 
4862     pub atom_op: AtomOp,
4863     pub atom_type: AtomType,
4864 
4865     pub mem_order: MemOrder,
4866     pub mem_eviction_priority: MemEvictionPriority,
4867 
4868     #[src_type(GPR)]
4869     pub handle: Src,
4870 
4871     #[src_type(SSA)]
4872     pub coord: Src,
4873 
4874     #[src_type(SSA)]
4875     pub data: Src,
4876 }
4877 
4878 impl DisplayOp for OpSuAtom {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4879     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4880         write!(
4881             f,
4882             "suatom.p{}{}{}{}{} [{}] {} {}",
4883             self.image_dim,
4884             self.atom_op,
4885             self.atom_type,
4886             self.mem_order,
4887             self.mem_eviction_priority,
4888             self.coord,
4889             self.data,
4890             self.handle,
4891         )
4892     }
4893 }
4894 impl_display_for_op!(OpSuAtom);
4895 
4896 #[repr(C)]
4897 #[derive(SrcsAsSlice, DstsAsSlice)]
4898 pub struct OpLd {
4899     pub dst: Dst,
4900 
4901     #[src_type(GPR)]
4902     pub addr: Src,
4903 
4904     pub offset: i32,
4905     pub access: MemAccess,
4906 }
4907 
4908 impl DisplayOp for OpLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4909     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4910         write!(f, "ld{} [{}", self.access, self.addr)?;
4911         if self.offset > 0 {
4912             write!(f, "+{:#x}", self.offset)?;
4913         }
4914         write!(f, "]")
4915     }
4916 }
4917 impl_display_for_op!(OpLd);
4918 
4919 #[allow(dead_code)]
4920 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
4921 pub enum LdcMode {
4922     Indexed,
4923     IndexedLinear,
4924     IndexedSegmented,
4925     IndexedSegmentedLinear,
4926 }
4927 
4928 impl fmt::Display for LdcMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4929     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4930         match self {
4931             LdcMode::Indexed => Ok(()),
4932             LdcMode::IndexedLinear => write!(f, ".il"),
4933             LdcMode::IndexedSegmented => write!(f, ".is"),
4934             LdcMode::IndexedSegmentedLinear => write!(f, ".isl"),
4935         }
4936     }
4937 }
4938 
4939 #[repr(C)]
4940 #[derive(SrcsAsSlice, DstsAsSlice)]
4941 pub struct OpLdc {
4942     pub dst: Dst,
4943 
4944     #[src_type(ALU)]
4945     pub cb: Src,
4946 
4947     #[src_type(GPR)]
4948     pub offset: Src,
4949 
4950     pub mode: LdcMode,
4951     pub mem_type: MemType,
4952 }
4953 
4954 impl DisplayOp for OpLdc {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4955     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4956         let SrcRef::CBuf(cb) = self.cb.src_ref else {
4957             panic!("Not a cbuf");
4958         };
4959         write!(f, "ldc{}{} {}[", self.mode, self.mem_type, cb.buf)?;
4960         if self.offset.is_zero() {
4961             write!(f, "+{:#x}", cb.offset)?;
4962         } else if cb.offset == 0 {
4963             write!(f, "{}", self.offset)?;
4964         } else {
4965             write!(f, "{}+{:#x}", self.offset, cb.offset)?;
4966         }
4967         write!(f, "]")
4968     }
4969 }
4970 impl_display_for_op!(OpLdc);
4971 
4972 #[repr(C)]
4973 #[derive(SrcsAsSlice, DstsAsSlice)]
4974 pub struct OpSt {
4975     #[src_type(GPR)]
4976     pub addr: Src,
4977 
4978     #[src_type(SSA)]
4979     pub data: Src,
4980 
4981     pub offset: i32,
4982     pub access: MemAccess,
4983 }
4984 
4985 impl DisplayOp for OpSt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4986     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4987         write!(f, "st{} [{}", self.access, self.addr)?;
4988         if self.offset > 0 {
4989             write!(f, "+{:#x}", self.offset)?;
4990         }
4991         write!(f, "] {}", self.data)
4992     }
4993 }
4994 impl_display_for_op!(OpSt);
4995 
4996 #[repr(C)]
4997 #[derive(SrcsAsSlice, DstsAsSlice)]
4998 pub struct OpAtom {
4999     pub dst: Dst,
5000 
5001     #[src_type(GPR)]
5002     pub addr: Src,
5003 
5004     #[src_type(GPR)]
5005     pub cmpr: Src,
5006 
5007     #[src_type(SSA)]
5008     pub data: Src,
5009 
5010     pub atom_op: AtomOp,
5011     pub atom_type: AtomType,
5012 
5013     pub addr_offset: i32,
5014 
5015     pub mem_space: MemSpace,
5016     pub mem_order: MemOrder,
5017     pub mem_eviction_priority: MemEvictionPriority,
5018 }
5019 
5020 impl DisplayOp for OpAtom {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5021     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5022         write!(
5023             f,
5024             "atom{}{}{}{}{}",
5025             self.atom_op,
5026             self.atom_type,
5027             self.mem_space,
5028             self.mem_order,
5029             self.mem_eviction_priority,
5030         )?;
5031         write!(f, " [")?;
5032         if !self.addr.is_zero() {
5033             write!(f, "{}", self.addr)?;
5034         }
5035         if self.addr_offset > 0 {
5036             if !self.addr.is_zero() {
5037                 write!(f, "+")?;
5038             }
5039             write!(f, "{:#x}", self.addr_offset)?;
5040         }
5041         write!(f, "]")?;
5042         if self.atom_op == AtomOp::CmpExch(AtomCmpSrc::Separate) {
5043             write!(f, " {}", self.cmpr)?;
5044         }
5045         write!(f, " {}", self.data)
5046     }
5047 }
5048 impl_display_for_op!(OpAtom);
5049 
5050 #[repr(C)]
5051 #[derive(SrcsAsSlice, DstsAsSlice)]
5052 pub struct OpAL2P {
5053     pub dst: Dst,
5054 
5055     #[src_type(GPR)]
5056     pub offset: Src,
5057 
5058     pub access: AttrAccess,
5059 }
5060 
5061 impl DisplayOp for OpAL2P {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5062     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5063         write!(f, "al2p")?;
5064         if self.access.output {
5065             write!(f, ".o")?;
5066         }
5067         if self.access.patch {
5068             write!(f, ".p")?;
5069         }
5070         write!(f, " a[{:#x}", self.access.addr)?;
5071         if !self.offset.is_zero() {
5072             write!(f, "+{}", self.offset)?;
5073         }
5074         write!(f, "]")
5075     }
5076 }
5077 impl_display_for_op!(OpAL2P);
5078 
5079 #[repr(C)]
5080 #[derive(SrcsAsSlice, DstsAsSlice)]
5081 pub struct OpALd {
5082     pub dst: Dst,
5083 
5084     #[src_type(GPR)]
5085     pub vtx: Src,
5086 
5087     #[src_type(GPR)]
5088     pub offset: Src,
5089 
5090     pub access: AttrAccess,
5091 }
5092 
5093 impl DisplayOp for OpALd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5094     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5095         write!(f, "ald")?;
5096         if self.access.output {
5097             write!(f, ".o")?;
5098         }
5099         if self.access.patch {
5100             write!(f, ".p")?;
5101         }
5102         if self.access.phys {
5103             write!(f, ".phys")?;
5104         }
5105         write!(f, " a")?;
5106         if !self.vtx.is_zero() {
5107             write!(f, "[{}]", self.vtx)?;
5108         }
5109         write!(f, "[{:#x}", self.access.addr)?;
5110         if !self.offset.is_zero() {
5111             write!(f, "+{}", self.offset)?;
5112         }
5113         write!(f, "]")
5114     }
5115 }
5116 impl_display_for_op!(OpALd);
5117 
5118 #[repr(C)]
5119 #[derive(SrcsAsSlice, DstsAsSlice)]
5120 pub struct OpASt {
5121     #[src_type(GPR)]
5122     pub vtx: Src,
5123 
5124     #[src_type(GPR)]
5125     pub offset: Src,
5126 
5127     #[src_type(SSA)]
5128     pub data: Src,
5129 
5130     pub access: AttrAccess,
5131 }
5132 
5133 impl DisplayOp for OpASt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5134     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5135         write!(f, "ast")?;
5136         if self.access.patch {
5137             write!(f, ".p")?;
5138         }
5139         if self.access.phys {
5140             write!(f, ".phys")?;
5141         }
5142         write!(f, " a")?;
5143         if !self.vtx.is_zero() {
5144             write!(f, "[{}]", self.vtx)?;
5145         }
5146         write!(f, "[{:#x}", self.access.addr)?;
5147         if !self.offset.is_zero() {
5148             write!(f, "+{}", self.offset)?;
5149         }
5150         write!(f, "] {}", self.data)
5151     }
5152 }
5153 impl_display_for_op!(OpASt);
5154 
5155 #[repr(C)]
5156 #[derive(SrcsAsSlice, DstsAsSlice)]
5157 pub struct OpIpa {
5158     pub dst: Dst,
5159     pub addr: u16,
5160     pub freq: InterpFreq,
5161     pub loc: InterpLoc,
5162     pub inv_w: Src,
5163     pub offset: Src,
5164 }
5165 
5166 impl DisplayOp for OpIpa {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5167     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5168         write!(
5169             f,
5170             "ipa{}{} a[{:#x}] {}",
5171             self.freq, self.loc, self.addr, self.inv_w
5172         )?;
5173         if self.loc == InterpLoc::Offset {
5174             write!(f, " {}", self.offset)?;
5175         }
5176         Ok(())
5177     }
5178 }
5179 impl_display_for_op!(OpIpa);
5180 
5181 #[repr(C)]
5182 #[derive(SrcsAsSlice, DstsAsSlice)]
5183 pub struct OpLdTram {
5184     pub dst: Dst,
5185     pub addr: u16,
5186     pub use_c: bool,
5187 }
5188 
5189 impl DisplayOp for OpLdTram {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5190     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5191         write!(f, "ldtram")?;
5192         if self.use_c {
5193             write!(f, ".c")?;
5194         } else {
5195             write!(f, ".ab")?;
5196         }
5197         write!(f, " a[{:#x}]", self.addr)?;
5198         Ok(())
5199     }
5200 }
5201 impl_display_for_op!(OpLdTram);
5202 
5203 #[allow(dead_code)]
5204 #[derive(Copy, Clone, Debug)]
5205 pub enum CCtlOp {
5206     Qry1, // Only available pre-Volta
5207     PF1,
5208     PF1_5, // Only available pre-Volta
5209     PF2,
5210     WB,
5211     IV,
5212     IVAll,
5213     RS,
5214     RSLB,   // Only available pre-Volta
5215     IVAllP, // Only available on Volta+
5216     WBAll,  // Only available on Volta+
5217     WBAllP, // Only available on Volta+
5218 }
5219 
5220 impl CCtlOp {
is_all(&self) -> bool5221     pub fn is_all(&self) -> bool {
5222         match self {
5223             CCtlOp::Qry1
5224             | CCtlOp::PF1
5225             | CCtlOp::PF1_5
5226             | CCtlOp::PF2
5227             | CCtlOp::WB
5228             | CCtlOp::IV
5229             | CCtlOp::RS
5230             | CCtlOp::RSLB => false,
5231             CCtlOp::IVAll | CCtlOp::IVAllP | CCtlOp::WBAll | CCtlOp::WBAllP => {
5232                 true
5233             }
5234         }
5235     }
5236 }
5237 
5238 impl fmt::Display for CCtlOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5239     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5240         match self {
5241             CCtlOp::Qry1 => write!(f, "qry1"),
5242             CCtlOp::PF1 => write!(f, "pf1"),
5243             CCtlOp::PF1_5 => write!(f, "pf1.5"),
5244             CCtlOp::PF2 => write!(f, "pf2"),
5245             CCtlOp::WB => write!(f, "wb"),
5246             CCtlOp::IV => write!(f, "iv"),
5247             CCtlOp::IVAll => write!(f, "ivall"),
5248             CCtlOp::RS => write!(f, "rs"),
5249             CCtlOp::RSLB => write!(f, "rslb"),
5250             CCtlOp::IVAllP => write!(f, "ivallp"),
5251             CCtlOp::WBAll => write!(f, "wball"),
5252             CCtlOp::WBAllP => write!(f, "wballp"),
5253         }
5254     }
5255 }
5256 
5257 #[repr(C)]
5258 #[derive(SrcsAsSlice, DstsAsSlice)]
5259 pub struct OpCCtl {
5260     pub op: CCtlOp,
5261 
5262     pub mem_space: MemSpace,
5263 
5264     #[src_type(GPR)]
5265     pub addr: Src,
5266 
5267     pub addr_offset: i32,
5268 }
5269 
5270 impl DisplayOp for OpCCtl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5271     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5272         write!(f, "cctl{}", self.mem_space)?;
5273         if !self.op.is_all() {
5274             write!(f, " [{}", self.addr)?;
5275             if self.addr_offset > 0 {
5276                 write!(f, "+{:#x}", self.addr_offset)?;
5277             }
5278             write!(f, "]")?;
5279         }
5280         Ok(())
5281     }
5282 }
5283 impl_display_for_op!(OpCCtl);
5284 
5285 #[repr(C)]
5286 #[derive(SrcsAsSlice, DstsAsSlice)]
5287 pub struct OpMemBar {
5288     pub scope: MemScope,
5289 }
5290 
5291 impl DisplayOp for OpMemBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5292     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5293         write!(f, "membar.sc.{}", self.scope)
5294     }
5295 }
5296 impl_display_for_op!(OpMemBar);
5297 
5298 #[repr(C)]
5299 #[derive(SrcsAsSlice, DstsAsSlice)]
5300 pub struct OpBClear {
5301     pub dst: Dst,
5302 }
5303 
5304 impl DisplayOp for OpBClear {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5305     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5306         write!(f, "bclear")
5307     }
5308 }
5309 impl_display_for_op!(OpBClear);
5310 
5311 #[repr(C)]
5312 #[derive(SrcsAsSlice, DstsAsSlice)]
5313 pub struct OpBMov {
5314     pub dst: Dst,
5315     pub src: Src,
5316     pub clear: bool,
5317 }
5318 
5319 impl DisplayOp for OpBMov {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5320     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5321         write!(f, "bmov.32")?;
5322         if self.clear {
5323             write!(f, ".clear")?;
5324         }
5325         write!(f, " {}", self.src)
5326     }
5327 }
5328 impl_display_for_op!(OpBMov);
5329 
5330 #[repr(C)]
5331 #[derive(SrcsAsSlice, DstsAsSlice)]
5332 pub struct OpBreak {
5333     #[dst_type(Bar)]
5334     pub bar_out: Dst,
5335 
5336     #[src_type(Bar)]
5337     pub bar_in: Src,
5338 
5339     #[src_type(Pred)]
5340     pub cond: Src,
5341 }
5342 
5343 impl DisplayOp for OpBreak {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5344     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5345         write!(f, "break {} {}", self.bar_in, self.cond)
5346     }
5347 }
5348 impl_display_for_op!(OpBreak);
5349 
5350 #[repr(C)]
5351 #[derive(SrcsAsSlice, DstsAsSlice)]
5352 pub struct OpBSSy {
5353     #[dst_type(Bar)]
5354     pub bar_out: Dst,
5355 
5356     #[src_type(Pred)]
5357     pub bar_in: Src,
5358 
5359     #[src_type(Pred)]
5360     pub cond: Src,
5361 
5362     pub target: Label,
5363 }
5364 
5365 impl DisplayOp for OpBSSy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5366     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5367         write!(f, "bssy {} {} {}", self.bar_in, self.cond, self.target)
5368     }
5369 }
5370 impl_display_for_op!(OpBSSy);
5371 
5372 #[repr(C)]
5373 #[derive(SrcsAsSlice, DstsAsSlice)]
5374 pub struct OpBSync {
5375     #[src_type(Bar)]
5376     pub bar: Src,
5377 
5378     #[src_type(Pred)]
5379     pub cond: Src,
5380 }
5381 
5382 impl DisplayOp for OpBSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5383     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5384         write!(f, "bsync {} {}", self.bar, self.cond)
5385     }
5386 }
5387 impl_display_for_op!(OpBSync);
5388 
5389 #[repr(C)]
5390 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
5391 pub struct OpBra {
5392     pub target: Label,
5393 }
5394 
5395 impl DisplayOp for OpBra {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5396     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5397         write!(f, "bra {}", self.target)
5398     }
5399 }
5400 impl_display_for_op!(OpBra);
5401 
5402 #[repr(C)]
5403 #[derive(SrcsAsSlice, DstsAsSlice)]
5404 pub struct OpSSy {
5405     pub target: Label,
5406 }
5407 
5408 impl DisplayOp for OpSSy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5409     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5410         write!(f, "ssy {}", self.target)
5411     }
5412 }
5413 impl_display_for_op!(OpSSy);
5414 
5415 #[repr(C)]
5416 #[derive(SrcsAsSlice, DstsAsSlice)]
5417 pub struct OpSync {
5418     pub target: Label,
5419 }
5420 
5421 impl DisplayOp for OpSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5422     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5423         write!(f, "sync {}", self.target)
5424     }
5425 }
5426 impl_display_for_op!(OpSync);
5427 
5428 #[repr(C)]
5429 #[derive(SrcsAsSlice, DstsAsSlice)]
5430 pub struct OpBrk {
5431     pub target: Label,
5432 }
5433 
5434 impl DisplayOp for OpBrk {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5435     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5436         write!(f, "brk {}", self.target)
5437     }
5438 }
5439 impl_display_for_op!(OpBrk);
5440 
5441 #[repr(C)]
5442 #[derive(SrcsAsSlice, DstsAsSlice)]
5443 pub struct OpPBk {
5444     pub target: Label,
5445 }
5446 
5447 impl DisplayOp for OpPBk {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5448     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5449         write!(f, "pbk {}", self.target)
5450     }
5451 }
5452 impl_display_for_op!(OpPBk);
5453 
5454 #[repr(C)]
5455 #[derive(SrcsAsSlice, DstsAsSlice)]
5456 pub struct OpCont {
5457     pub target: Label,
5458 }
5459 
5460 impl DisplayOp for OpCont {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5461     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5462         write!(f, "cont {}", self.target)
5463     }
5464 }
5465 impl_display_for_op!(OpCont);
5466 
5467 #[repr(C)]
5468 #[derive(SrcsAsSlice, DstsAsSlice)]
5469 pub struct OpPCnt {
5470     pub target: Label,
5471 }
5472 
5473 impl DisplayOp for OpPCnt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5474     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5475         write!(f, "pcnt {}", self.target)
5476     }
5477 }
5478 impl_display_for_op!(OpPCnt);
5479 
5480 #[repr(C)]
5481 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
5482 pub struct OpExit {}
5483 
5484 impl DisplayOp for OpExit {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5485     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5486         write!(f, "exit")
5487     }
5488 }
5489 impl_display_for_op!(OpExit);
5490 
5491 #[repr(C)]
5492 #[derive(SrcsAsSlice, DstsAsSlice)]
5493 pub struct OpWarpSync {
5494     pub mask: u32,
5495 }
5496 
5497 impl DisplayOp for OpWarpSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5498     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5499         write!(f, "warpsync 0x{:x}", self.mask)
5500     }
5501 }
5502 impl_display_for_op!(OpWarpSync);
5503 
5504 #[repr(C)]
5505 #[derive(SrcsAsSlice, DstsAsSlice)]
5506 pub struct OpBar {}
5507 
5508 impl DisplayOp for OpBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5509     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5510         write!(f, "bar.sync")
5511     }
5512 }
5513 impl_display_for_op!(OpBar);
5514 
5515 #[repr(C)]
5516 #[derive(SrcsAsSlice, DstsAsSlice)]
5517 pub struct OpCS2R {
5518     pub dst: Dst,
5519     pub idx: u8,
5520 }
5521 
5522 impl DisplayOp for OpCS2R {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5523     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5524         write!(f, "cs2r sr[{:#x}]", self.idx)
5525     }
5526 }
5527 impl_display_for_op!(OpCS2R);
5528 
5529 #[repr(C)]
5530 #[derive(SrcsAsSlice, DstsAsSlice)]
5531 pub struct OpIsberd {
5532     #[dst_type(GPR)]
5533     pub dst: Dst,
5534 
5535     #[src_type(SSA)]
5536     pub idx: Src,
5537 }
5538 
5539 impl DisplayOp for OpIsberd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5540     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5541         write!(f, "isberd [{}]", self.idx)
5542     }
5543 }
5544 impl_display_for_op!(OpIsberd);
5545 
5546 #[repr(C)]
5547 #[derive(SrcsAsSlice, DstsAsSlice)]
5548 pub struct OpKill {}
5549 
5550 impl DisplayOp for OpKill {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5551     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5552         write!(f, "kill")
5553     }
5554 }
5555 impl_display_for_op!(OpKill);
5556 
5557 #[repr(C)]
5558 #[derive(SrcsAsSlice, DstsAsSlice)]
5559 pub struct OpNop {
5560     pub label: Option<Label>,
5561 }
5562 
5563 impl DisplayOp for OpNop {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5564     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5565         write!(f, "nop")?;
5566         if let Some(label) = &self.label {
5567             write!(f, " {}", label)?;
5568         }
5569         Ok(())
5570     }
5571 }
5572 impl_display_for_op!(OpNop);
5573 
5574 #[allow(dead_code)]
5575 pub enum PixVal {
5576     MsCount,
5577     CovMask,
5578     Covered,
5579     Offset,
5580     CentroidOffset,
5581     MyIndex,
5582     InnerCoverage,
5583 }
5584 
5585 impl fmt::Display for PixVal {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5586     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5587         match self {
5588             PixVal::MsCount => write!(f, ".mscount"),
5589             PixVal::CovMask => write!(f, ".covmask"),
5590             PixVal::Covered => write!(f, ".covered"),
5591             PixVal::Offset => write!(f, ".offset"),
5592             PixVal::CentroidOffset => write!(f, ".centroid_offset"),
5593             PixVal::MyIndex => write!(f, ".my_index"),
5594             PixVal::InnerCoverage => write!(f, ".inner_coverage"),
5595         }
5596     }
5597 }
5598 
5599 #[repr(C)]
5600 #[derive(SrcsAsSlice, DstsAsSlice)]
5601 pub struct OpPixLd {
5602     pub dst: Dst,
5603     pub val: PixVal,
5604 }
5605 
5606 impl DisplayOp for OpPixLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5607     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5608         write!(f, "pixld{}", self.val)
5609     }
5610 }
5611 impl_display_for_op!(OpPixLd);
5612 
5613 #[repr(C)]
5614 #[derive(SrcsAsSlice, DstsAsSlice)]
5615 pub struct OpS2R {
5616     pub dst: Dst,
5617     pub idx: u8,
5618 }
5619 
5620 impl DisplayOp for OpS2R {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5621     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5622         write!(f, "s2r sr[{:#x}]", self.idx)
5623     }
5624 }
5625 impl_display_for_op!(OpS2R);
5626 
5627 pub enum VoteOp {
5628     Any,
5629     All,
5630     Eq,
5631 }
5632 
5633 impl fmt::Display for VoteOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5634     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5635         match self {
5636             VoteOp::Any => write!(f, "any"),
5637             VoteOp::All => write!(f, "all"),
5638             VoteOp::Eq => write!(f, "eq"),
5639         }
5640     }
5641 }
5642 
5643 #[repr(C)]
5644 #[derive(SrcsAsSlice, DstsAsSlice)]
5645 pub struct OpVote {
5646     pub op: VoteOp,
5647 
5648     #[dst_type(GPR)]
5649     pub ballot: Dst,
5650 
5651     #[dst_type(Pred)]
5652     pub vote: Dst,
5653 
5654     #[src_type(Pred)]
5655     pub pred: Src,
5656 }
5657 
5658 impl DisplayOp for OpVote {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5659     fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5660         if self.ballot.is_none() && self.vote.is_none() {
5661             write!(f, "none")
5662         } else {
5663             if !self.ballot.is_none() {
5664                 write!(f, "{}", self.ballot)?;
5665             }
5666             if !self.vote.is_none() {
5667                 write!(f, "{}", self.vote)?;
5668             }
5669             Ok(())
5670         }
5671     }
5672 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5673     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5674         write!(f, "vote.{} {}", self.op, self.pred)
5675     }
5676 }
5677 impl_display_for_op!(OpVote);
5678 
5679 #[repr(C)]
5680 #[derive(SrcsAsSlice, DstsAsSlice)]
5681 pub struct OpUndef {
5682     pub dst: Dst,
5683 }
5684 
5685 impl DisplayOp for OpUndef {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5686     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5687         write!(f, "undef {}", self.dst)
5688     }
5689 }
5690 impl_display_for_op!(OpUndef);
5691 
5692 #[repr(C)]
5693 #[derive(SrcsAsSlice, DstsAsSlice)]
5694 pub struct OpSrcBar {
5695     pub src: Src,
5696 }
5697 
5698 impl DisplayOp for OpSrcBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5699     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5700         write!(f, "src_bar {}", self.src)
5701     }
5702 }
5703 impl_display_for_op!(OpSrcBar);
5704 
5705 pub struct VecPair<A, B> {
5706     a: Vec<A>,
5707     b: Vec<B>,
5708 }
5709 
5710 impl<A, B> VecPair<A, B> {
append(&mut self, other: &mut VecPair<A, B>)5711     pub fn append(&mut self, other: &mut VecPair<A, B>) {
5712         self.a.append(&mut other.a);
5713         self.b.append(&mut other.b);
5714     }
5715 
is_empty(&self) -> bool5716     pub fn is_empty(&self) -> bool {
5717         debug_assert!(self.a.len() == self.b.len());
5718         self.a.is_empty()
5719     }
5720 
iter(&self) -> Zip<slice::Iter<'_, A>, slice::Iter<'_, B>>5721     pub fn iter(&self) -> Zip<slice::Iter<'_, A>, slice::Iter<'_, B>> {
5722         debug_assert!(self.a.len() == self.b.len());
5723         self.a.iter().zip(self.b.iter())
5724     }
5725 
iter_mut( &mut self, ) -> Zip<slice::IterMut<'_, A>, slice::IterMut<'_, B>>5726     pub fn iter_mut(
5727         &mut self,
5728     ) -> Zip<slice::IterMut<'_, A>, slice::IterMut<'_, B>> {
5729         debug_assert!(self.a.len() == self.b.len());
5730         self.a.iter_mut().zip(self.b.iter_mut())
5731     }
5732 
len(&self) -> usize5733     pub fn len(&self) -> usize {
5734         debug_assert!(self.a.len() == self.b.len());
5735         self.a.len()
5736     }
5737 
new() -> Self5738     pub fn new() -> Self {
5739         Self {
5740             a: Vec::new(),
5741             b: Vec::new(),
5742         }
5743     }
5744 
push(&mut self, a: A, b: B)5745     pub fn push(&mut self, a: A, b: B) {
5746         debug_assert!(self.a.len() == self.b.len());
5747         self.a.push(a);
5748         self.b.push(b);
5749     }
5750 }
5751 
5752 impl<A: Clone, B: Clone> VecPair<A, B> {
retain(&mut self, mut f: impl FnMut(&A, &B) -> bool)5753     pub fn retain(&mut self, mut f: impl FnMut(&A, &B) -> bool) {
5754         debug_assert!(self.a.len() == self.b.len());
5755         let len = self.a.len();
5756         let mut i = 0_usize;
5757         while i < len {
5758             if !f(&self.a[i], &self.b[i]) {
5759                 break;
5760             }
5761             i += 1;
5762         }
5763 
5764         let mut new_len = i;
5765 
5766         // Don't check this one twice.
5767         i += 1;
5768 
5769         while i < len {
5770             // This could be more efficient but it's good enough for our
5771             // purposes since everything we're storing is small and has a
5772             // trivial Drop.
5773             if f(&self.a[i], &self.b[i]) {
5774                 self.a[new_len] = self.a[i].clone();
5775                 self.b[new_len] = self.b[i].clone();
5776                 new_len += 1;
5777             }
5778             i += 1;
5779         }
5780 
5781         if new_len < len {
5782             self.a.truncate(new_len);
5783             self.b.truncate(new_len);
5784         }
5785     }
5786 }
5787 
5788 pub struct PhiAllocator {
5789     count: u32,
5790 }
5791 
5792 impl PhiAllocator {
new() -> PhiAllocator5793     pub fn new() -> PhiAllocator {
5794         PhiAllocator { count: 0 }
5795     }
5796 
alloc(&mut self) -> u325797     pub fn alloc(&mut self) -> u32 {
5798         let idx = self.count;
5799         self.count = idx + 1;
5800         idx
5801     }
5802 }
5803 
5804 #[repr(C)]
5805 #[derive(DstsAsSlice)]
5806 pub struct OpPhiSrcs {
5807     pub srcs: VecPair<u32, Src>,
5808 }
5809 
5810 impl OpPhiSrcs {
new() -> OpPhiSrcs5811     pub fn new() -> OpPhiSrcs {
5812         OpPhiSrcs {
5813             srcs: VecPair::new(),
5814         }
5815     }
5816 }
5817 
5818 impl AsSlice<Src> for OpPhiSrcs {
5819     type Attr = SrcType;
5820 
as_slice(&self) -> &[Src]5821     fn as_slice(&self) -> &[Src] {
5822         &self.srcs.b
5823     }
5824 
as_mut_slice(&mut self) -> &mut [Src]5825     fn as_mut_slice(&mut self) -> &mut [Src] {
5826         &mut self.srcs.b
5827     }
5828 
attrs(&self) -> SrcTypeList5829     fn attrs(&self) -> SrcTypeList {
5830         SrcTypeList::Uniform(SrcType::GPR)
5831     }
5832 }
5833 
5834 impl DisplayOp for OpPhiSrcs {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result5835     fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
5836         Ok(())
5837     }
5838 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5839     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5840         write!(f, "phi_src ")?;
5841         for (i, (id, src)) in self.srcs.iter().enumerate() {
5842             if i > 0 {
5843                 write!(f, ", ")?;
5844             }
5845             write!(f, "φ{} = {}", id, src)?;
5846         }
5847         Ok(())
5848     }
5849 }
5850 impl_display_for_op!(OpPhiSrcs);
5851 
5852 #[repr(C)]
5853 #[derive(SrcsAsSlice)]
5854 pub struct OpPhiDsts {
5855     pub dsts: VecPair<u32, Dst>,
5856 }
5857 
5858 impl OpPhiDsts {
new() -> OpPhiDsts5859     pub fn new() -> OpPhiDsts {
5860         OpPhiDsts {
5861             dsts: VecPair::new(),
5862         }
5863     }
5864 }
5865 
5866 impl AsSlice<Dst> for OpPhiDsts {
5867     type Attr = DstType;
5868 
as_slice(&self) -> &[Dst]5869     fn as_slice(&self) -> &[Dst] {
5870         &self.dsts.b
5871     }
5872 
as_mut_slice(&mut self) -> &mut [Dst]5873     fn as_mut_slice(&mut self) -> &mut [Dst] {
5874         &mut self.dsts.b
5875     }
5876 
attrs(&self) -> DstTypeList5877     fn attrs(&self) -> DstTypeList {
5878         DstTypeList::Uniform(DstType::Vec)
5879     }
5880 }
5881 
5882 impl DisplayOp for OpPhiDsts {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result5883     fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
5884         Ok(())
5885     }
5886 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5887     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5888         write!(f, "phi_dst ")?;
5889         for (i, (id, dst)) in self.dsts.iter().enumerate() {
5890             if i > 0 {
5891                 write!(f, ", ")?;
5892             }
5893             write!(f, "{} = φ{}", dst, id)?;
5894         }
5895         Ok(())
5896     }
5897 }
5898 impl_display_for_op!(OpPhiDsts);
5899 
5900 #[repr(C)]
5901 #[derive(SrcsAsSlice, DstsAsSlice)]
5902 pub struct OpCopy {
5903     pub dst: Dst,
5904     pub src: Src,
5905 }
5906 
5907 impl DisplayOp for OpCopy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5908     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5909         write!(f, "copy {}", self.src)
5910     }
5911 }
5912 impl_display_for_op!(OpCopy);
5913 
5914 #[repr(C)]
5915 #[derive(SrcsAsSlice, DstsAsSlice)]
5916 /// Copies a value and pins its destination in the register file
5917 pub struct OpPin {
5918     pub dst: Dst,
5919     #[src_type(SSA)]
5920     pub src: Src,
5921 }
5922 
5923 impl DisplayOp for OpPin {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5924     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5925         write!(f, "pin {}", self.src)
5926     }
5927 }
5928 impl_display_for_op!(OpPin);
5929 
5930 #[repr(C)]
5931 #[derive(SrcsAsSlice, DstsAsSlice)]
5932 /// Copies a pinned value to an unpinned value
5933 pub struct OpUnpin {
5934     pub dst: Dst,
5935     #[src_type(SSA)]
5936     pub src: Src,
5937 }
5938 
5939 impl DisplayOp for OpUnpin {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5940     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5941         write!(f, "unpin {}", self.src)
5942     }
5943 }
5944 impl_display_for_op!(OpUnpin);
5945 
5946 #[repr(C)]
5947 #[derive(SrcsAsSlice, DstsAsSlice)]
5948 pub struct OpSwap {
5949     pub dsts: [Dst; 2],
5950     pub srcs: [Src; 2],
5951 }
5952 
5953 impl DisplayOp for OpSwap {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5954     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5955         write!(f, "swap {} {}", self.srcs[0], self.srcs[1])
5956     }
5957 }
5958 impl_display_for_op!(OpSwap);
5959 
5960 #[repr(C)]
5961 pub struct OpParCopy {
5962     pub dsts_srcs: VecPair<Dst, Src>,
5963     pub tmp: Option<RegRef>,
5964 }
5965 
5966 impl OpParCopy {
new() -> OpParCopy5967     pub fn new() -> OpParCopy {
5968         OpParCopy {
5969             dsts_srcs: VecPair::new(),
5970             tmp: None,
5971         }
5972     }
5973 
is_empty(&self) -> bool5974     pub fn is_empty(&self) -> bool {
5975         self.dsts_srcs.is_empty()
5976     }
5977 
push(&mut self, dst: Dst, src: Src)5978     pub fn push(&mut self, dst: Dst, src: Src) {
5979         self.dsts_srcs.push(dst, src);
5980     }
5981 }
5982 
5983 impl AsSlice<Src> for OpParCopy {
5984     type Attr = SrcType;
5985 
as_slice(&self) -> &[Src]5986     fn as_slice(&self) -> &[Src] {
5987         &self.dsts_srcs.b
5988     }
5989 
as_mut_slice(&mut self) -> &mut [Src]5990     fn as_mut_slice(&mut self) -> &mut [Src] {
5991         &mut self.dsts_srcs.b
5992     }
5993 
attrs(&self) -> SrcTypeList5994     fn attrs(&self) -> SrcTypeList {
5995         SrcTypeList::Uniform(SrcType::GPR)
5996     }
5997 }
5998 
5999 impl AsSlice<Dst> for OpParCopy {
6000     type Attr = DstType;
6001 
as_slice(&self) -> &[Dst]6002     fn as_slice(&self) -> &[Dst] {
6003         &self.dsts_srcs.a
6004     }
6005 
as_mut_slice(&mut self) -> &mut [Dst]6006     fn as_mut_slice(&mut self) -> &mut [Dst] {
6007         &mut self.dsts_srcs.a
6008     }
6009 
attrs(&self) -> DstTypeList6010     fn attrs(&self) -> DstTypeList {
6011         DstTypeList::Uniform(DstType::Vec)
6012     }
6013 }
6014 
6015 impl DisplayOp for OpParCopy {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result6016     fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
6017         Ok(())
6018     }
6019 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6020     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6021         write!(f, "par_copy")?;
6022         for (i, (dst, src)) in self.dsts_srcs.iter().enumerate() {
6023             if i > 0 {
6024                 write!(f, ",")?;
6025             }
6026             write!(f, " {} = {}", dst, src)?;
6027         }
6028         Ok(())
6029     }
6030 }
6031 impl_display_for_op!(OpParCopy);
6032 
6033 #[repr(C)]
6034 #[derive(DstsAsSlice)]
6035 pub struct OpRegOut {
6036     pub srcs: Vec<Src>,
6037 }
6038 
6039 impl AsSlice<Src> for OpRegOut {
6040     type Attr = SrcType;
6041 
as_slice(&self) -> &[Src]6042     fn as_slice(&self) -> &[Src] {
6043         &self.srcs
6044     }
6045 
as_mut_slice(&mut self) -> &mut [Src]6046     fn as_mut_slice(&mut self) -> &mut [Src] {
6047         &mut self.srcs
6048     }
6049 
attrs(&self) -> SrcTypeList6050     fn attrs(&self) -> SrcTypeList {
6051         SrcTypeList::Uniform(SrcType::GPR)
6052     }
6053 }
6054 
6055 impl DisplayOp for OpRegOut {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6056     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6057         write!(f, "reg_out {{")?;
6058         for (i, src) in self.srcs.iter().enumerate() {
6059             if i > 0 {
6060                 write!(f, ",")?;
6061             }
6062             write!(f, " {}", src)?;
6063         }
6064         write!(f, " }}")
6065     }
6066 }
6067 impl_display_for_op!(OpRegOut);
6068 
6069 #[derive(Copy, Clone, Debug, PartialEq)]
6070 pub enum OutType {
6071     Emit,
6072     Cut,
6073     EmitThenCut,
6074 }
6075 
6076 impl fmt::Display for OutType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6077     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6078         match self {
6079             OutType::Emit => write!(f, "emit"),
6080             OutType::Cut => write!(f, "cut"),
6081             OutType::EmitThenCut => write!(f, "emit_then_cut"),
6082         }
6083     }
6084 }
6085 
6086 #[repr(C)]
6087 #[derive(SrcsAsSlice, DstsAsSlice)]
6088 pub struct OpOut {
6089     pub dst: Dst,
6090 
6091     #[src_type(SSA)]
6092     pub handle: Src,
6093 
6094     #[src_type(ALU)]
6095     pub stream: Src,
6096 
6097     pub out_type: OutType,
6098 }
6099 
6100 impl DisplayOp for OpOut {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6101     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6102         write!(f, "out.{} {} {}", self.out_type, self.handle, self.stream)
6103     }
6104 }
6105 impl_display_for_op!(OpOut);
6106 
6107 #[repr(C)]
6108 #[derive(SrcsAsSlice, DstsAsSlice)]
6109 pub struct OpOutFinal {
6110     #[src_type(SSA)]
6111     pub handle: Src,
6112 }
6113 
6114 impl DisplayOp for OpOutFinal {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6115     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6116         write!(f, "out.final {{ {} }}", self.handle)
6117     }
6118 }
6119 impl_display_for_op!(OpOutFinal);
6120 
6121 /// Describes an annotation on an instruction.
6122 #[repr(C)]
6123 #[derive(SrcsAsSlice, DstsAsSlice)]
6124 pub struct OpAnnotate {
6125     /// The annotation
6126     pub annotation: String,
6127 }
6128 
6129 impl DisplayOp for OpAnnotate {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6130     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6131         write!(f, "// {}", self.annotation)
6132     }
6133 }
6134 
6135 impl fmt::Display for OpAnnotate {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6136     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6137         self.fmt_op(f)
6138     }
6139 }
6140 
6141 #[derive(DisplayOp, DstsAsSlice, SrcsAsSlice, FromVariants)]
6142 pub enum Op {
6143     FAdd(OpFAdd),
6144     FFma(OpFFma),
6145     FMnMx(OpFMnMx),
6146     FMul(OpFMul),
6147     Rro(OpRro),
6148     MuFu(OpMuFu),
6149     FSet(OpFSet),
6150     FSetP(OpFSetP),
6151     FSwzAdd(OpFSwzAdd),
6152     DAdd(OpDAdd),
6153     DFma(OpDFma),
6154     DMnMx(OpDMnMx),
6155     DMul(OpDMul),
6156     DSetP(OpDSetP),
6157     HAdd2(OpHAdd2),
6158     HFma2(OpHFma2),
6159     HMul2(OpHMul2),
6160     HSet2(OpHSet2),
6161     HSetP2(OpHSetP2),
6162     HMnMx2(OpHMnMx2),
6163     BMsk(OpBMsk),
6164     BRev(OpBRev),
6165     Bfe(OpBfe),
6166     Flo(OpFlo),
6167     IAbs(OpIAbs),
6168     IAdd2(OpIAdd2),
6169     IAdd2X(OpIAdd2X),
6170     IAdd3(OpIAdd3),
6171     IAdd3X(OpIAdd3X),
6172     IDp4(OpIDp4),
6173     IMad(OpIMad),
6174     IMad64(OpIMad64),
6175     IMul(OpIMul),
6176     IMnMx(OpIMnMx),
6177     ISetP(OpISetP),
6178     Lop2(OpLop2),
6179     Lop3(OpLop3),
6180     PopC(OpPopC),
6181     Shf(OpShf),
6182     Shl(OpShl),
6183     Shr(OpShr),
6184     F2F(OpF2F),
6185     F2FP(OpF2FP),
6186     F2I(OpF2I),
6187     I2F(OpI2F),
6188     I2I(OpI2I),
6189     FRnd(OpFRnd),
6190     Mov(OpMov),
6191     Prmt(OpPrmt),
6192     Sel(OpSel),
6193     Shfl(OpShfl),
6194     PLop3(OpPLop3),
6195     PSetP(OpPSetP),
6196     R2UR(OpR2UR),
6197     Tex(OpTex),
6198     Tld(OpTld),
6199     Tld4(OpTld4),
6200     Tmml(OpTmml),
6201     Txd(OpTxd),
6202     Txq(OpTxq),
6203     SuLd(OpSuLd),
6204     SuSt(OpSuSt),
6205     SuAtom(OpSuAtom),
6206     Ld(OpLd),
6207     Ldc(OpLdc),
6208     St(OpSt),
6209     Atom(OpAtom),
6210     AL2P(OpAL2P),
6211     ALd(OpALd),
6212     ASt(OpASt),
6213     Ipa(OpIpa),
6214     LdTram(OpLdTram),
6215     CCtl(OpCCtl),
6216     MemBar(OpMemBar),
6217     BClear(OpBClear),
6218     BMov(OpBMov),
6219     Break(OpBreak),
6220     BSSy(OpBSSy),
6221     BSync(OpBSync),
6222     Bra(OpBra),
6223     SSy(OpSSy),
6224     Sync(OpSync),
6225     Brk(OpBrk),
6226     PBk(OpPBk),
6227     Cont(OpCont),
6228     PCnt(OpPCnt),
6229     Exit(OpExit),
6230     WarpSync(OpWarpSync),
6231     Bar(OpBar),
6232     CS2R(OpCS2R),
6233     Isberd(OpIsberd),
6234     Kill(OpKill),
6235     Nop(OpNop),
6236     PixLd(OpPixLd),
6237     S2R(OpS2R),
6238     Vote(OpVote),
6239     Undef(OpUndef),
6240     SrcBar(OpSrcBar),
6241     PhiSrcs(OpPhiSrcs),
6242     PhiDsts(OpPhiDsts),
6243     Copy(OpCopy),
6244     Pin(OpPin),
6245     Unpin(OpUnpin),
6246     Swap(OpSwap),
6247     ParCopy(OpParCopy),
6248     RegOut(OpRegOut),
6249     Out(OpOut),
6250     OutFinal(OpOutFinal),
6251     Annotate(OpAnnotate),
6252 }
6253 impl_display_for_op!(Op);
6254 
6255 impl Op {
is_branch(&self) -> bool6256     pub fn is_branch(&self) -> bool {
6257         match self {
6258             Op::Bra(_)
6259             | Op::Sync(_)
6260             | Op::Brk(_)
6261             | Op::Cont(_)
6262             | Op::Exit(_) => true,
6263             _ => false,
6264         }
6265     }
6266 }
6267 
6268 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
6269 pub enum PredRef {
6270     None,
6271     SSA(SSAValue),
6272     Reg(RegRef),
6273 }
6274 
6275 impl PredRef {
6276     #[allow(dead_code)]
as_reg(&self) -> Option<&RegRef>6277     pub fn as_reg(&self) -> Option<&RegRef> {
6278         match self {
6279             PredRef::Reg(r) => Some(r),
6280             _ => None,
6281         }
6282     }
6283 
6284     #[allow(dead_code)]
as_ssa(&self) -> Option<&SSAValue>6285     pub fn as_ssa(&self) -> Option<&SSAValue> {
6286         match self {
6287             PredRef::SSA(r) => Some(r),
6288             _ => None,
6289         }
6290     }
6291 
is_none(&self) -> bool6292     pub fn is_none(&self) -> bool {
6293         matches!(self, PredRef::None)
6294     }
6295 
iter_ssa(&self) -> slice::Iter<'_, SSAValue>6296     pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
6297         match self {
6298             PredRef::None | PredRef::Reg(_) => &[],
6299             PredRef::SSA(ssa) => slice::from_ref(ssa),
6300         }
6301         .iter()
6302     }
6303 
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>6304     pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
6305         match self {
6306             PredRef::None | PredRef::Reg(_) => &mut [],
6307             PredRef::SSA(ssa) => slice::from_mut(ssa),
6308         }
6309         .iter_mut()
6310     }
6311 }
6312 
6313 impl From<RegRef> for PredRef {
from(reg: RegRef) -> PredRef6314     fn from(reg: RegRef) -> PredRef {
6315         PredRef::Reg(reg)
6316     }
6317 }
6318 
6319 impl From<SSAValue> for PredRef {
from(ssa: SSAValue) -> PredRef6320     fn from(ssa: SSAValue) -> PredRef {
6321         PredRef::SSA(ssa)
6322     }
6323 }
6324 
6325 impl fmt::Display for PredRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6326     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6327         match self {
6328             PredRef::None => write!(f, "pT"),
6329             PredRef::SSA(ssa) => ssa.fmt_plain(f),
6330             PredRef::Reg(reg) => reg.fmt(f),
6331         }
6332     }
6333 }
6334 
6335 #[derive(Clone, Copy)]
6336 pub struct Pred {
6337     pub pred_ref: PredRef,
6338     pub pred_inv: bool,
6339 }
6340 
6341 impl Pred {
is_true(&self) -> bool6342     pub fn is_true(&self) -> bool {
6343         self.pred_ref.is_none() && !self.pred_inv
6344     }
6345 
is_false(&self) -> bool6346     pub fn is_false(&self) -> bool {
6347         self.pred_ref.is_none() && self.pred_inv
6348     }
6349 
iter_ssa(&self) -> slice::Iter<'_, SSAValue>6350     pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
6351         self.pred_ref.iter_ssa()
6352     }
6353 
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>6354     pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
6355         self.pred_ref.iter_ssa_mut()
6356     }
6357 
bnot(self) -> Self6358     pub fn bnot(self) -> Self {
6359         Pred {
6360             pred_ref: self.pred_ref,
6361             pred_inv: !self.pred_inv,
6362         }
6363     }
6364 }
6365 
6366 impl From<bool> for Pred {
from(b: bool) -> Self6367     fn from(b: bool) -> Self {
6368         Pred {
6369             pred_ref: PredRef::None,
6370             pred_inv: !b,
6371         }
6372     }
6373 }
6374 
6375 impl<T: Into<PredRef>> From<T> for Pred {
from(p: T) -> Self6376     fn from(p: T) -> Self {
6377         Pred {
6378             pred_ref: p.into(),
6379             pred_inv: false,
6380         }
6381     }
6382 }
6383 
6384 impl fmt::Display for Pred {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6385     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6386         if self.pred_inv {
6387             write!(f, "!")?;
6388         }
6389         self.pred_ref.fmt(f)
6390     }
6391 }
6392 
6393 pub const MIN_INSTR_DELAY: u8 = 1;
6394 pub const MAX_INSTR_DELAY: u8 = 15;
6395 
6396 pub struct InstrDeps {
6397     pub delay: u8,
6398     pub yld: bool,
6399     wr_bar: i8,
6400     rd_bar: i8,
6401     pub wt_bar_mask: u8,
6402     pub reuse_mask: u8,
6403 }
6404 
6405 impl InstrDeps {
new() -> InstrDeps6406     pub fn new() -> InstrDeps {
6407         InstrDeps {
6408             delay: 0,
6409             yld: false,
6410             wr_bar: -1,
6411             rd_bar: -1,
6412             wt_bar_mask: 0,
6413             reuse_mask: 0,
6414         }
6415     }
6416 
rd_bar(&self) -> Option<u8>6417     pub fn rd_bar(&self) -> Option<u8> {
6418         if self.rd_bar < 0 {
6419             None
6420         } else {
6421             Some(self.rd_bar.try_into().unwrap())
6422         }
6423     }
6424 
wr_bar(&self) -> Option<u8>6425     pub fn wr_bar(&self) -> Option<u8> {
6426         if self.wr_bar < 0 {
6427             None
6428         } else {
6429             Some(self.wr_bar.try_into().unwrap())
6430         }
6431     }
6432 
set_delay(&mut self, delay: u8)6433     pub fn set_delay(&mut self, delay: u8) {
6434         assert!(delay <= MAX_INSTR_DELAY);
6435         self.delay = delay;
6436     }
6437 
set_yield(&mut self, yld: bool)6438     pub fn set_yield(&mut self, yld: bool) {
6439         self.yld = yld;
6440     }
6441 
set_rd_bar(&mut self, idx: u8)6442     pub fn set_rd_bar(&mut self, idx: u8) {
6443         assert!(idx < 6);
6444         self.rd_bar = idx.try_into().unwrap();
6445     }
6446 
set_wr_bar(&mut self, idx: u8)6447     pub fn set_wr_bar(&mut self, idx: u8) {
6448         assert!(idx < 6);
6449         self.wr_bar = idx.try_into().unwrap();
6450     }
6451 
add_wt_bar(&mut self, idx: u8)6452     pub fn add_wt_bar(&mut self, idx: u8) {
6453         self.add_wt_bar_mask(1 << idx);
6454     }
6455 
add_wt_bar_mask(&mut self, bar_mask: u8)6456     pub fn add_wt_bar_mask(&mut self, bar_mask: u8) {
6457         assert!(bar_mask < 1 << 6);
6458         self.wt_bar_mask |= bar_mask;
6459     }
6460 
6461     #[allow(dead_code)]
add_reuse(&mut self, idx: u8)6462     pub fn add_reuse(&mut self, idx: u8) {
6463         assert!(idx < 6);
6464         self.reuse_mask |= 1_u8 << idx;
6465     }
6466 }
6467 
6468 impl fmt::Display for InstrDeps {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6469     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6470         if self.delay > 0 {
6471             write!(f, " delay={}", self.delay)?;
6472         }
6473         if self.wt_bar_mask != 0 {
6474             write!(f, " wt={:06b}", self.wt_bar_mask)?;
6475         }
6476         if self.rd_bar >= 0 {
6477             write!(f, " rd:{}", self.rd_bar)?;
6478         }
6479         if self.wr_bar >= 0 {
6480             write!(f, " wr:{}", self.wr_bar)?;
6481         }
6482         if self.reuse_mask != 0 {
6483             write!(f, " reuse={:06b}", self.reuse_mask)?;
6484         }
6485         if self.yld {
6486             write!(f, " yld")?;
6487         }
6488         Ok(())
6489     }
6490 }
6491 
6492 pub struct Instr {
6493     pub pred: Pred,
6494     pub op: Op,
6495     pub deps: InstrDeps,
6496 }
6497 
6498 impl Instr {
new(op: impl Into<Op>) -> Instr6499     pub fn new(op: impl Into<Op>) -> Instr {
6500         Instr {
6501             op: op.into(),
6502             pred: true.into(),
6503             deps: InstrDeps::new(),
6504         }
6505     }
6506 
new_boxed(op: impl Into<Op>) -> Box<Self>6507     pub fn new_boxed(op: impl Into<Op>) -> Box<Self> {
6508         Box::new(Instr::new(op))
6509     }
6510 
dsts(&self) -> &[Dst]6511     pub fn dsts(&self) -> &[Dst] {
6512         self.op.dsts_as_slice()
6513     }
6514 
dsts_mut(&mut self) -> &mut [Dst]6515     pub fn dsts_mut(&mut self) -> &mut [Dst] {
6516         self.op.dsts_as_mut_slice()
6517     }
6518 
srcs(&self) -> &[Src]6519     pub fn srcs(&self) -> &[Src] {
6520         self.op.srcs_as_slice()
6521     }
6522 
srcs_mut(&mut self) -> &mut [Src]6523     pub fn srcs_mut(&mut self) -> &mut [Src] {
6524         self.op.srcs_as_mut_slice()
6525     }
6526 
src_types(&self) -> SrcTypeList6527     pub fn src_types(&self) -> SrcTypeList {
6528         self.op.src_types()
6529     }
6530 
for_each_ssa_use(&self, mut f: impl FnMut(&SSAValue))6531     pub fn for_each_ssa_use(&self, mut f: impl FnMut(&SSAValue)) {
6532         for ssa in self.pred.iter_ssa() {
6533             f(ssa);
6534         }
6535         for src in self.srcs() {
6536             for ssa in src.iter_ssa() {
6537                 f(ssa);
6538             }
6539         }
6540     }
6541 
for_each_ssa_use_mut(&mut self, mut f: impl FnMut(&mut SSAValue))6542     pub fn for_each_ssa_use_mut(&mut self, mut f: impl FnMut(&mut SSAValue)) {
6543         for ssa in self.pred.iter_ssa_mut() {
6544             f(ssa);
6545         }
6546         for src in self.srcs_mut() {
6547             for ssa in src.iter_ssa_mut() {
6548                 f(ssa);
6549             }
6550         }
6551     }
6552 
for_each_ssa_def(&self, mut f: impl FnMut(&SSAValue))6553     pub fn for_each_ssa_def(&self, mut f: impl FnMut(&SSAValue)) {
6554         for dst in self.dsts() {
6555             for ssa in dst.iter_ssa() {
6556                 f(ssa);
6557             }
6558         }
6559     }
6560 
for_each_ssa_def_mut(&mut self, mut f: impl FnMut(&mut SSAValue))6561     pub fn for_each_ssa_def_mut(&mut self, mut f: impl FnMut(&mut SSAValue)) {
6562         for dst in self.dsts_mut() {
6563             for ssa in dst.iter_ssa_mut() {
6564                 f(ssa);
6565             }
6566         }
6567     }
6568 
is_branch(&self) -> bool6569     pub fn is_branch(&self) -> bool {
6570         self.op.is_branch()
6571     }
6572 
uses_global_mem(&self) -> bool6573     pub fn uses_global_mem(&self) -> bool {
6574         match &self.op {
6575             Op::Atom(op) => op.mem_space != MemSpace::Local,
6576             Op::Ld(op) => op.access.space != MemSpace::Local,
6577             Op::St(op) => op.access.space != MemSpace::Local,
6578             Op::SuAtom(_) | Op::SuLd(_) | Op::SuSt(_) => true,
6579             _ => false,
6580         }
6581     }
6582 
writes_global_mem(&self) -> bool6583     pub fn writes_global_mem(&self) -> bool {
6584         match &self.op {
6585             Op::Atom(op) => matches!(op.mem_space, MemSpace::Global(_)),
6586             Op::St(op) => matches!(op.access.space, MemSpace::Global(_)),
6587             Op::SuAtom(_) | Op::SuSt(_) => true,
6588             _ => false,
6589         }
6590     }
6591 
can_eliminate(&self) -> bool6592     pub fn can_eliminate(&self) -> bool {
6593         match &self.op {
6594             Op::ASt(_)
6595             | Op::SuSt(_)
6596             | Op::SuAtom(_)
6597             | Op::St(_)
6598             | Op::Atom(_)
6599             | Op::CCtl(_)
6600             | Op::MemBar(_)
6601             | Op::Kill(_)
6602             | Op::Nop(_)
6603             | Op::BSync(_)
6604             | Op::Bra(_)
6605             | Op::SSy(_)
6606             | Op::Sync(_)
6607             | Op::Brk(_)
6608             | Op::PBk(_)
6609             | Op::Cont(_)
6610             | Op::PCnt(_)
6611             | Op::Exit(_)
6612             | Op::WarpSync(_)
6613             | Op::Bar(_)
6614             | Op::RegOut(_)
6615             | Op::Out(_)
6616             | Op::OutFinal(_)
6617             | Op::Annotate(_) => false,
6618             Op::BMov(op) => !op.clear,
6619             _ => true,
6620         }
6621     }
6622 
is_uniform(&self) -> bool6623     pub fn is_uniform(&self) -> bool {
6624         match &self.op {
6625             Op::PhiDsts(_) => false,
6626             op => op.is_uniform(),
6627         }
6628     }
6629 
has_fixed_latency(&self, sm: u8) -> bool6630     pub fn has_fixed_latency(&self, sm: u8) -> bool {
6631         match &self.op {
6632             // Float ALU
6633             Op::F2FP(_)
6634             | Op::FAdd(_)
6635             | Op::FFma(_)
6636             | Op::FMnMx(_)
6637             | Op::FMul(_)
6638             | Op::FSet(_)
6639             | Op::FSetP(_)
6640             | Op::HAdd2(_)
6641             | Op::HFma2(_)
6642             | Op::HMul2(_)
6643             | Op::HSet2(_)
6644             | Op::HSetP2(_)
6645             | Op::HMnMx2(_)
6646             | Op::FSwzAdd(_) => true,
6647 
6648             // Multi-function unit is variable latency
6649             Op::Rro(_) | Op::MuFu(_) => false,
6650 
6651             // Double-precision float ALU
6652             Op::DAdd(_)
6653             | Op::DFma(_)
6654             | Op::DMnMx(_)
6655             | Op::DMul(_)
6656             | Op::DSetP(_) => false,
6657 
6658             // Integer ALU
6659             Op::BRev(_) | Op::Flo(_) | Op::PopC(_) => false,
6660             Op::IMad(_) | Op::IMul(_) => sm >= 70,
6661             Op::BMsk(_)
6662             | Op::IAbs(_)
6663             | Op::IAdd2(_)
6664             | Op::IAdd2X(_)
6665             | Op::IAdd3(_)
6666             | Op::IAdd3X(_)
6667             | Op::IDp4(_)
6668             | Op::IMad64(_)
6669             | Op::IMnMx(_)
6670             | Op::ISetP(_)
6671             | Op::Lop2(_)
6672             | Op::Lop3(_)
6673             | Op::Shf(_)
6674             | Op::Shl(_)
6675             | Op::Shr(_)
6676             | Op::Bfe(_) => true,
6677 
6678             // Conversions are variable latency?!?
6679             Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::I2I(_) | Op::FRnd(_) => {
6680                 false
6681             }
6682 
6683             // Move ops
6684             Op::Mov(_) | Op::Prmt(_) | Op::Sel(_) => true,
6685             Op::Shfl(_) => false,
6686 
6687             // Predicate ops
6688             Op::PLop3(_) | Op::PSetP(_) => true,
6689 
6690             // Uniform ops
6691             Op::R2UR(_) => false,
6692 
6693             // Texture ops
6694             Op::Tex(_)
6695             | Op::Tld(_)
6696             | Op::Tld4(_)
6697             | Op::Tmml(_)
6698             | Op::Txd(_)
6699             | Op::Txq(_) => false,
6700 
6701             // Surface ops
6702             Op::SuLd(_) | Op::SuSt(_) | Op::SuAtom(_) => false,
6703 
6704             // Memory ops
6705             Op::Ld(_)
6706             | Op::Ldc(_)
6707             | Op::St(_)
6708             | Op::Atom(_)
6709             | Op::AL2P(_)
6710             | Op::ALd(_)
6711             | Op::ASt(_)
6712             | Op::Ipa(_)
6713             | Op::CCtl(_)
6714             | Op::LdTram(_)
6715             | Op::MemBar(_) => false,
6716 
6717             // Control-flow ops
6718             Op::BClear(_) | Op::Break(_) | Op::BSSy(_) | Op::BSync(_) => true,
6719             Op::SSy(_)
6720             | Op::Sync(_)
6721             | Op::Brk(_)
6722             | Op::PBk(_)
6723             | Op::Cont(_)
6724             | Op::PCnt(_) => true,
6725             Op::Bra(_) | Op::Exit(_) => true,
6726             Op::WarpSync(_) => false,
6727 
6728             // The barrier half is HW scoreboarded by the GPR isn't.  When
6729             // moving from a GPR to a barrier, we still need a token for WaR
6730             // hazards.
6731             Op::BMov(_) => false,
6732 
6733             // Geometry ops
6734             Op::Out(_) | Op::OutFinal(_) => false,
6735 
6736             // Miscellaneous ops
6737             Op::Bar(_)
6738             | Op::CS2R(_)
6739             | Op::Isberd(_)
6740             | Op::Kill(_)
6741             | Op::PixLd(_)
6742             | Op::S2R(_) => false,
6743             Op::Nop(_) | Op::Vote(_) => true,
6744 
6745             // Virtual ops
6746             Op::Undef(_)
6747             | Op::SrcBar(_)
6748             | Op::PhiSrcs(_)
6749             | Op::PhiDsts(_)
6750             | Op::Copy(_)
6751             | Op::Pin(_)
6752             | Op::Unpin(_)
6753             | Op::Swap(_)
6754             | Op::ParCopy(_)
6755             | Op::RegOut(_)
6756             | Op::Annotate(_) => {
6757                 panic!("Not a hardware opcode")
6758             }
6759         }
6760     }
6761 
needs_yield(&self) -> bool6762     pub fn needs_yield(&self) -> bool {
6763         matches!(&self.op, Op::Bar(_) | Op::BSync(_))
6764     }
6765 
fmt_pred(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6766     fn fmt_pred(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6767         if !self.pred.is_true() {
6768             write!(f, "@{} ", self.pred)?;
6769         }
6770         Ok(())
6771     }
6772 }
6773 
6774 impl fmt::Display for Instr {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6775     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6776         write!(f, "{} {}{}", Fmt(|f| self.fmt_pred(f)), self.op, self.deps)
6777     }
6778 }
6779 
6780 impl<T: Into<Op>> From<T> for Instr {
from(value: T) -> Self6781     fn from(value: T) -> Self {
6782         Self::new(value)
6783     }
6784 }
6785 
6786 /// The result of map() done on a Box<Instr>. A Vec is only allocated if the
6787 /// mapping results in multiple instructions. This helps to reduce the amount of
6788 /// Vec's allocated in the optimization passes.
6789 pub enum MappedInstrs {
6790     None,
6791     One(Box<Instr>),
6792     Many(Vec<Box<Instr>>),
6793 }
6794 
6795 impl MappedInstrs {
push(&mut self, i: Box<Instr>)6796     pub fn push(&mut self, i: Box<Instr>) {
6797         match self {
6798             MappedInstrs::None => {
6799                 *self = MappedInstrs::One(i);
6800             }
6801             MappedInstrs::One(_) => {
6802                 *self = match std::mem::replace(self, MappedInstrs::None) {
6803                     MappedInstrs::One(o) => MappedInstrs::Many(vec![o, i]),
6804                     _ => panic!("Not a One"),
6805                 };
6806             }
6807             MappedInstrs::Many(v) => {
6808                 v.push(i);
6809             }
6810         }
6811     }
6812 
last_mut(&mut self) -> Option<&mut Box<Instr>>6813     pub fn last_mut(&mut self) -> Option<&mut Box<Instr>> {
6814         match self {
6815             MappedInstrs::None => None,
6816             MappedInstrs::One(instr) => Some(instr),
6817             MappedInstrs::Many(v) => v.last_mut(),
6818         }
6819     }
6820 }
6821 
6822 pub struct BasicBlock {
6823     pub label: Label,
6824 
6825     /// Whether or not this block is uniform
6826     ///
6827     /// If true, then all non-exited lanes in a warp which execute this block
6828     /// are guaranteed to execute it together
6829     pub uniform: bool,
6830 
6831     pub instrs: Vec<Box<Instr>>,
6832 }
6833 
6834 impl BasicBlock {
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>) -> MappedInstrs, )6835     pub fn map_instrs(
6836         &mut self,
6837         mut map: impl FnMut(Box<Instr>) -> MappedInstrs,
6838     ) {
6839         let mut instrs = Vec::new();
6840         for i in self.instrs.drain(..) {
6841             match map(i) {
6842                 MappedInstrs::None => (),
6843                 MappedInstrs::One(i) => {
6844                     instrs.push(i);
6845                 }
6846                 MappedInstrs::Many(mut v) => {
6847                     instrs.append(&mut v);
6848                 }
6849             }
6850         }
6851         self.instrs = instrs;
6852     }
6853 
phi_dsts_ip(&self) -> Option<usize>6854     pub fn phi_dsts_ip(&self) -> Option<usize> {
6855         for (ip, instr) in self.instrs.iter().enumerate() {
6856             match &instr.op {
6857                 Op::Annotate(_) => (),
6858                 Op::PhiDsts(_) => return Some(ip),
6859                 _ => break,
6860             }
6861         }
6862         None
6863     }
6864 
phi_dsts(&self) -> Option<&OpPhiDsts>6865     pub fn phi_dsts(&self) -> Option<&OpPhiDsts> {
6866         self.phi_dsts_ip().map(|ip| match &self.instrs[ip].op {
6867             Op::PhiDsts(phi) => phi,
6868             _ => panic!("Expected to find the phi"),
6869         })
6870     }
6871 
6872     #[allow(dead_code)]
phi_dsts_mut(&mut self) -> Option<&mut OpPhiDsts>6873     pub fn phi_dsts_mut(&mut self) -> Option<&mut OpPhiDsts> {
6874         self.phi_dsts_ip().map(|ip| match &mut self.instrs[ip].op {
6875             Op::PhiDsts(phi) => phi,
6876             _ => panic!("Expected to find the phi"),
6877         })
6878     }
6879 
phi_srcs_ip(&self) -> Option<usize>6880     pub fn phi_srcs_ip(&self) -> Option<usize> {
6881         for (ip, instr) in self.instrs.iter().enumerate().rev() {
6882             match &instr.op {
6883                 Op::Annotate(_) => (),
6884                 Op::PhiSrcs(_) => return Some(ip),
6885                 _ if instr.is_branch() => (),
6886                 _ => break,
6887             }
6888         }
6889         None
6890     }
phi_srcs(&self) -> Option<&OpPhiSrcs>6891     pub fn phi_srcs(&self) -> Option<&OpPhiSrcs> {
6892         self.phi_srcs_ip().map(|ip| match &self.instrs[ip].op {
6893             Op::PhiSrcs(phi) => phi,
6894             _ => panic!("Expected to find the phi"),
6895         })
6896     }
6897 
phi_srcs_mut(&mut self) -> Option<&mut OpPhiSrcs>6898     pub fn phi_srcs_mut(&mut self) -> Option<&mut OpPhiSrcs> {
6899         self.phi_srcs_ip().map(|ip| match &mut self.instrs[ip].op {
6900             Op::PhiSrcs(phi) => phi,
6901             _ => panic!("Expected to find the phi"),
6902         })
6903     }
6904 
branch(&self) -> Option<&Instr>6905     pub fn branch(&self) -> Option<&Instr> {
6906         if let Some(i) = self.instrs.last() {
6907             if i.is_branch() {
6908                 Some(i)
6909             } else {
6910                 None
6911             }
6912         } else {
6913             None
6914         }
6915     }
6916 
branch_ip(&self) -> Option<usize>6917     pub fn branch_ip(&self) -> Option<usize> {
6918         if let Some(i) = self.instrs.last() {
6919             if i.is_branch() {
6920                 Some(self.instrs.len() - 1)
6921             } else {
6922                 None
6923             }
6924         } else {
6925             None
6926         }
6927     }
6928 
6929     #[allow(dead_code)]
branch_mut(&mut self) -> Option<&mut Instr>6930     pub fn branch_mut(&mut self) -> Option<&mut Instr> {
6931         if let Some(i) = self.instrs.last_mut() {
6932             if i.is_branch() {
6933                 Some(i)
6934             } else {
6935                 None
6936             }
6937         } else {
6938             None
6939         }
6940     }
6941 
falls_through(&self) -> bool6942     pub fn falls_through(&self) -> bool {
6943         if let Some(i) = self.branch() {
6944             !i.pred.is_true()
6945         } else {
6946             true
6947         }
6948     }
6949 }
6950 
6951 pub struct Function {
6952     pub ssa_alloc: SSAValueAllocator,
6953     pub phi_alloc: PhiAllocator,
6954     pub blocks: CFG<BasicBlock>,
6955 }
6956 
6957 impl Function {
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, )6958     pub fn map_instrs(
6959         &mut self,
6960         mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
6961     ) {
6962         let alloc = &mut self.ssa_alloc;
6963         for b in &mut self.blocks {
6964             b.map_instrs(|i| map(i, alloc));
6965         }
6966     }
6967 }
6968 
6969 impl fmt::Display for Function {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6970     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6971         let mut pred_width = 0;
6972         let mut dsts_width = 0;
6973         let mut op_width = 0;
6974 
6975         let mut blocks = Vec::new();
6976         for b in &self.blocks {
6977             let mut instrs = Vec::new();
6978             for i in &b.instrs {
6979                 let mut pred = String::new();
6980                 write!(pred, "{}", Fmt(|f| i.fmt_pred(f)))?;
6981                 let mut dsts = String::new();
6982                 write!(dsts, "{}", Fmt(|f| i.op.fmt_dsts(f)))?;
6983                 let mut op = String::new();
6984                 write!(op, "{}", Fmt(|f| i.op.fmt_op(f)))?;
6985                 let mut deps = String::new();
6986                 write!(deps, "{}", i.deps)?;
6987 
6988                 pred_width = max(pred_width, pred.len());
6989                 dsts_width = max(dsts_width, dsts.len());
6990                 op_width = max(op_width, op.len());
6991                 let is_annotation = matches!(i.op, Op::Annotate(_));
6992 
6993                 instrs.push((pred, dsts, op, deps, is_annotation));
6994             }
6995             blocks.push(instrs);
6996         }
6997 
6998         for (i, mut b) in blocks.drain(..).enumerate() {
6999             let u = if self.blocks[i].uniform { ".u" } else { "" };
7000             write!(f, "block{u} {} {} [", i, self.blocks[i].label)?;
7001             for (pi, p) in self.blocks.pred_indices(i).iter().enumerate() {
7002                 if pi > 0 {
7003                     write!(f, ", ")?;
7004                 }
7005                 write!(f, "{}", p)?;
7006             }
7007             write!(f, "] -> {{\n")?;
7008 
7009             for (pred, dsts, op, deps, is_annotation) in b.drain(..) {
7010                 let eq_sym = if dsts.is_empty() { " " } else { "=" };
7011                 if is_annotation {
7012                     write!(f, "\n{}\n", op)?;
7013                 } else if deps.is_empty() {
7014                     write!(
7015                         f,
7016                         "{:<pred_width$} {:<dsts_width$} {} {}\n",
7017                         pred, dsts, eq_sym, op,
7018                     )?;
7019                 } else {
7020                     write!(
7021                         f,
7022                         "{:<pred_width$} {:<dsts_width$} {} \
7023                          {:<op_width$} //{}\n",
7024                         pred, dsts, eq_sym, op, deps,
7025                     )?;
7026                 }
7027             }
7028 
7029             write!(f, "}} -> [")?;
7030             for (si, s) in self.blocks.succ_indices(i).iter().enumerate() {
7031                 if si > 0 {
7032                     write!(f, ", ")?;
7033                 }
7034                 write!(f, "{}", s)?;
7035             }
7036             write!(f, "]\n")?;
7037         }
7038         Ok(())
7039     }
7040 }
7041 
7042 #[derive(Debug)]
7043 pub struct ComputeShaderInfo {
7044     pub local_size: [u16; 3],
7045     pub smem_size: u16,
7046 }
7047 
7048 #[derive(Debug)]
7049 pub struct FragmentShaderInfo {
7050     pub uses_kill: bool,
7051     pub does_interlock: bool,
7052     pub post_depth_coverage: bool,
7053     pub early_fragment_tests: bool,
7054     pub uses_sample_shading: bool,
7055 }
7056 
7057 #[derive(Debug)]
7058 pub struct GeometryShaderInfo {
7059     pub passthrough_enable: bool,
7060     pub stream_out_mask: u8,
7061     pub threads_per_input_primitive: u8,
7062     pub output_topology: OutputTopology,
7063     pub max_output_vertex_count: u16,
7064 }
7065 
7066 impl Default for GeometryShaderInfo {
default() -> Self7067     fn default() -> Self {
7068         Self {
7069             passthrough_enable: false,
7070             stream_out_mask: 0,
7071             threads_per_input_primitive: 0,
7072             output_topology: OutputTopology::LineStrip,
7073             max_output_vertex_count: 0,
7074         }
7075     }
7076 }
7077 
7078 #[derive(Debug)]
7079 pub struct TessellationInitShaderInfo {
7080     pub per_patch_attribute_count: u8,
7081     pub threads_per_patch: u8,
7082 }
7083 
7084 #[repr(u8)]
7085 #[derive(Clone, Copy, Debug)]
7086 pub enum TessellationDomain {
7087     Isoline = NAK_TS_DOMAIN_ISOLINE,
7088     Triangle = NAK_TS_DOMAIN_TRIANGLE,
7089     Quad = NAK_TS_DOMAIN_QUAD,
7090 }
7091 
7092 #[repr(u8)]
7093 #[derive(Clone, Copy, Debug)]
7094 pub enum TessellationSpacing {
7095     Integer = NAK_TS_SPACING_INTEGER,
7096     FractionalOdd = NAK_TS_SPACING_FRACT_ODD,
7097     FractionalEven = NAK_TS_SPACING_FRACT_EVEN,
7098 }
7099 
7100 #[repr(u8)]
7101 #[derive(Clone, Copy, Debug)]
7102 pub enum TessellationPrimitives {
7103     Points = NAK_TS_PRIMS_POINTS,
7104     Lines = NAK_TS_PRIMS_LINES,
7105     TrianglesCW = NAK_TS_PRIMS_TRIANGLES_CW,
7106     TrianglesCCW = NAK_TS_PRIMS_TRIANGLES_CCW,
7107 }
7108 
7109 #[derive(Debug)]
7110 pub struct TessellationShaderInfo {
7111     pub domain: TessellationDomain,
7112     pub spacing: TessellationSpacing,
7113     pub primitives: TessellationPrimitives,
7114 }
7115 
7116 #[derive(Debug)]
7117 pub enum ShaderStageInfo {
7118     Compute(ComputeShaderInfo),
7119     Vertex,
7120     Fragment(FragmentShaderInfo),
7121     Geometry(GeometryShaderInfo),
7122     TessellationInit(TessellationInitShaderInfo),
7123     Tessellation(TessellationShaderInfo),
7124 }
7125 
7126 #[derive(Debug, Default)]
7127 pub struct SysValInfo {
7128     pub ab: u32,
7129     pub c: u16,
7130 }
7131 
7132 #[derive(Debug)]
7133 pub struct VtgIoInfo {
7134     pub sysvals_in: SysValInfo,
7135     pub sysvals_in_d: u8,
7136     pub sysvals_out: SysValInfo,
7137     pub sysvals_out_d: u8,
7138     pub attr_in: [u32; 4],
7139     pub attr_out: [u32; 4],
7140     pub store_req_start: u8,
7141     pub store_req_end: u8,
7142     pub clip_enable: u8,
7143     pub cull_enable: u8,
7144     pub xfb: Option<Box<nak_xfb_info>>,
7145 }
7146 
7147 impl VtgIoInfo {
mark_attrs(&mut self, addrs: Range<u16>, written: bool)7148     fn mark_attrs(&mut self, addrs: Range<u16>, written: bool) {
7149         let sysvals = if written {
7150             &mut self.sysvals_out
7151         } else {
7152             &mut self.sysvals_in
7153         };
7154 
7155         let sysvals_d = if written {
7156             &mut self.sysvals_out_d
7157         } else {
7158             &mut self.sysvals_in_d
7159         };
7160 
7161         let mut attr = BitMutView::new(if written {
7162             &mut self.attr_out
7163         } else {
7164             &mut self.attr_in
7165         });
7166 
7167         let mut addrs = addrs;
7168         addrs.start &= !3;
7169         for addr in addrs.step_by(4) {
7170             if addr < 0x080 {
7171                 sysvals.ab |= 1 << (addr / 4);
7172             } else if addr < 0x280 {
7173                 let attr_idx = (addr - 0x080) as usize / 4;
7174                 attr.set_bit(attr_idx, true);
7175             } else if addr < 0x2c0 {
7176                 panic!("FF color I/O not supported");
7177             } else if addr < 0x300 {
7178                 sysvals.c |= 1 << ((addr - 0x2c0) / 4);
7179             } else if addr >= 0x3a0 && addr < 0x3c0 {
7180                 *sysvals_d |= 1 << ((addr - 0x3a0) / 4);
7181             }
7182         }
7183     }
7184 
mark_attrs_read(&mut self, addrs: Range<u16>)7185     pub fn mark_attrs_read(&mut self, addrs: Range<u16>) {
7186         self.mark_attrs(addrs, false);
7187     }
7188 
mark_attrs_written(&mut self, addrs: Range<u16>)7189     pub fn mark_attrs_written(&mut self, addrs: Range<u16>) {
7190         self.mark_attrs(addrs, true);
7191     }
7192 
attr_written(&self, addr: u16) -> bool7193     pub fn attr_written(&self, addr: u16) -> bool {
7194         if addr < 0x080 {
7195             self.sysvals_out.ab & (1 << (addr / 4)) != 0
7196         } else if addr < 0x280 {
7197             let attr_idx = (addr - 0x080) as usize / 4;
7198             BitView::new(&self.attr_out).get_bit(attr_idx)
7199         } else if addr < 0x2c0 {
7200             panic!("FF color I/O not supported");
7201         } else if addr < 0x300 {
7202             self.sysvals_out.c & (1 << ((addr - 0x2c0) / 4)) != 0
7203         } else if addr >= 0x3a0 && addr < 0x3c0 {
7204             self.sysvals_out_d & (1 << ((addr - 0x3a0) / 4)) != 0
7205         } else {
7206             panic!("Unknown I/O address");
7207         }
7208     }
7209 
mark_store_req(&mut self, addrs: Range<u16>)7210     pub fn mark_store_req(&mut self, addrs: Range<u16>) {
7211         let start = (addrs.start / 4).try_into().unwrap();
7212         let end = ((addrs.end - 1) / 4).try_into().unwrap();
7213         self.store_req_start = min(self.store_req_start, start);
7214         self.store_req_end = max(self.store_req_end, end);
7215     }
7216 }
7217 
7218 #[derive(Debug)]
7219 pub struct FragmentIoInfo {
7220     pub sysvals_in: SysValInfo,
7221     pub sysvals_in_d: [PixelImap; 8],
7222     pub attr_in: [PixelImap; 128],
7223     pub barycentric_attr_in: [u32; 4],
7224 
7225     pub reads_sample_mask: bool,
7226     pub writes_color: u32,
7227     pub writes_sample_mask: bool,
7228     pub writes_depth: bool,
7229 }
7230 
7231 impl FragmentIoInfo {
mark_attr_read(&mut self, addr: u16, interp: PixelImap)7232     pub fn mark_attr_read(&mut self, addr: u16, interp: PixelImap) {
7233         if addr < 0x080 {
7234             self.sysvals_in.ab |= 1 << (addr / 4);
7235         } else if addr < 0x280 {
7236             let attr_idx = (addr - 0x080) as usize / 4;
7237             self.attr_in[attr_idx] = interp;
7238         } else if addr < 0x2c0 {
7239             panic!("FF color I/O not supported");
7240         } else if addr < 0x300 {
7241             self.sysvals_in.c |= 1 << ((addr - 0x2c0) / 4);
7242         } else if addr >= 0x3a0 && addr < 0x3c0 {
7243             let attr_idx = (addr - 0x3a0) as usize / 4;
7244             self.sysvals_in_d[attr_idx] = interp;
7245         }
7246     }
7247 
mark_barycentric_attr_in(&mut self, addr: u16)7248     pub fn mark_barycentric_attr_in(&mut self, addr: u16) {
7249         assert!(addr >= 0x80 && addr < 0x280);
7250 
7251         let mut attr = BitMutView::new(&mut self.barycentric_attr_in);
7252 
7253         let attr_idx = (addr - 0x080) as usize / 4;
7254         attr.set_bit(attr_idx, true);
7255     }
7256 }
7257 
7258 #[derive(Debug)]
7259 pub enum ShaderIoInfo {
7260     None,
7261     Vtg(VtgIoInfo),
7262     Fragment(FragmentIoInfo),
7263 }
7264 
7265 #[derive(Debug)]
7266 pub struct ShaderInfo {
7267     pub num_gprs: u8,
7268     pub num_control_barriers: u8,
7269     pub num_instrs: u32,
7270     pub slm_size: u32,
7271     pub max_crs_depth: u32,
7272     pub uses_global_mem: bool,
7273     pub writes_global_mem: bool,
7274     pub uses_fp64: bool,
7275     pub stage: ShaderStageInfo,
7276     pub io: ShaderIoInfo,
7277 }
7278 
7279 pub trait ShaderModel {
sm(&self) -> u87280     fn sm(&self) -> u8;
num_regs(&self, file: RegFile) -> u327281     fn num_regs(&self, file: RegFile) -> u32;
crs_size(&self, max_crs_depth: u32) -> u327282     fn crs_size(&self, max_crs_depth: u32) -> u32;
7283 
op_can_be_uniform(&self, op: &Op) -> bool7284     fn op_can_be_uniform(&self, op: &Op) -> bool;
7285 
legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op)7286     fn legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op);
encode_shader(&self, s: &Shader<'_>) -> Vec<u32>7287     fn encode_shader(&self, s: &Shader<'_>) -> Vec<u32>;
7288 }
7289 
7290 pub struct Shader<'a> {
7291     pub sm: &'a dyn ShaderModel,
7292     pub info: ShaderInfo,
7293     pub functions: Vec<Function>,
7294 }
7295 
7296 impl Shader<'_> {
for_each_instr(&self, f: &mut impl FnMut(&Instr))7297     pub fn for_each_instr(&self, f: &mut impl FnMut(&Instr)) {
7298         for func in &self.functions {
7299             for b in &func.blocks {
7300                 for i in &b.instrs {
7301                     f(i);
7302                 }
7303             }
7304         }
7305     }
7306 
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, )7307     pub fn map_instrs(
7308         &mut self,
7309         mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
7310     ) {
7311         for f in &mut self.functions {
7312             f.map_instrs(&mut map);
7313         }
7314     }
7315 
7316     /// Remove all annotations, presumably before encoding the shader.
remove_annotations(&mut self)7317     pub fn remove_annotations(&mut self) {
7318         self.map_instrs(|instr: Box<Instr>, _| -> MappedInstrs {
7319             if matches!(instr.op, Op::Annotate(_)) {
7320                 MappedInstrs::None
7321             } else {
7322                 MappedInstrs::One(instr)
7323             }
7324         })
7325     }
7326 
gather_info(&mut self)7327     pub fn gather_info(&mut self) {
7328         let mut num_instrs = 0;
7329         let mut uses_global_mem = false;
7330         let mut writes_global_mem = false;
7331 
7332         self.for_each_instr(&mut |instr| {
7333             num_instrs += 1;
7334 
7335             if !uses_global_mem {
7336                 uses_global_mem = instr.uses_global_mem();
7337             }
7338 
7339             if !writes_global_mem {
7340                 writes_global_mem = instr.writes_global_mem();
7341             }
7342         });
7343 
7344         self.info.num_instrs = num_instrs;
7345         self.info.uses_global_mem = uses_global_mem;
7346         self.info.writes_global_mem = writes_global_mem;
7347     }
7348 }
7349 
7350 impl fmt::Display for Shader<'_> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result7351     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
7352         for func in &self.functions {
7353             write!(f, "{}", func)?;
7354         }
7355         Ok(())
7356     }
7357 }
7358