1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 extern crate bitview;
5 extern crate nak_ir_proc;
6
7 use bitview::{BitMutView, BitView};
8 use nak_bindings::*;
9
10 pub use crate::builder::{Builder, InstrBuilder, SSABuilder, SSAInstrBuilder};
11 use crate::legalize::LegalizeBuilder;
12 use crate::sph::{OutputTopology, PixelImap};
13 use compiler::as_slice::*;
14 use compiler::cfg::CFG;
15 use nak_ir_proc::*;
16 use std::cmp::{max, min};
17 use std::fmt;
18 use std::fmt::Write;
19 use std::iter::Zip;
20 use std::ops::{BitAnd, BitOr, Deref, DerefMut, Index, IndexMut, Not, Range};
21 use std::slice;
22
23 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
24 pub struct Label {
25 idx: u32,
26 }
27
28 impl fmt::Display for Label {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result29 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
30 write!(f, "L{}", self.idx)
31 }
32 }
33
34 pub struct LabelAllocator {
35 count: u32,
36 }
37
38 impl LabelAllocator {
new() -> LabelAllocator39 pub fn new() -> LabelAllocator {
40 LabelAllocator { count: 0 }
41 }
42
alloc(&mut self) -> Label43 pub fn alloc(&mut self) -> Label {
44 let idx = self.count;
45 self.count += 1;
46 Label { idx: idx }
47 }
48 }
49
50 /// Represents a register file
51 #[repr(u8)]
52 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
53 pub enum RegFile {
54 /// The general-purpose register file
55 ///
56 /// General-purpose registers are 32 bits per SIMT channel.
57 GPR = 0,
58
59 /// The general-purpose uniform register file
60 ///
61 /// General-purpose uniform registers are 32 bits each and uniform across a
62 /// wave.
63 UGPR = 1,
64
65 /// The predicate reigster file
66 ///
67 /// Predicate registers are 1 bit per SIMT channel.
68 Pred = 2,
69
70 /// The uniform predicate reigster file
71 ///
72 /// Uniform predicate registers are 1 bit and uniform across a wave.
73 UPred = 3,
74
75 /// The carry flag register file
76 ///
77 /// Only one carry flag register exists in hardware, but representing it as
78 /// a reg file simplifies dependency tracking.
79 ///
80 /// This is used only on SM50.
81 Carry = 4,
82
83 /// The barrier register file
84 ///
85 /// This is a lane mask used for wave re-convergence instructions.
86 Bar = 5,
87
88 /// The memory register file
89 ///
90 /// This is a virtual register file for things which will get spilled to
91 /// local memory. Each memory location is 32 bits per SIMT channel.
92 Mem = 6,
93 }
94
95 const NUM_REG_FILES: usize = 7;
96
97 impl RegFile {
98 /// Returns true if the register file is uniform across a wave
is_uniform(&self) -> bool99 pub fn is_uniform(&self) -> bool {
100 match self {
101 RegFile::GPR
102 | RegFile::Pred
103 | RegFile::Carry
104 | RegFile::Bar
105 | RegFile::Mem => false,
106 RegFile::UGPR | RegFile::UPred => true,
107 }
108 }
109
to_uniform(&self) -> Option<RegFile>110 pub fn to_uniform(&self) -> Option<RegFile> {
111 match self {
112 RegFile::GPR | RegFile::UGPR => Some(RegFile::UGPR),
113 RegFile::Pred | RegFile::UPred => Some(RegFile::UPred),
114 RegFile::Carry | RegFile::Bar | RegFile::Mem => None,
115 }
116 }
117
to_warp(&self) -> RegFile118 pub fn to_warp(&self) -> RegFile {
119 match self {
120 RegFile::GPR | RegFile::UGPR => RegFile::GPR,
121 RegFile::Pred | RegFile::UPred => RegFile::Pred,
122 RegFile::Carry | RegFile::Bar | RegFile::Mem => *self,
123 }
124 }
125
126 /// Returns true if the register file is general-purpose
is_gpr(&self) -> bool127 pub fn is_gpr(&self) -> bool {
128 match self {
129 RegFile::GPR | RegFile::UGPR => true,
130 RegFile::Pred
131 | RegFile::UPred
132 | RegFile::Carry
133 | RegFile::Bar
134 | RegFile::Mem => false,
135 }
136 }
137
138 /// Returns true if the register file is a predicate register file
is_predicate(&self) -> bool139 pub fn is_predicate(&self) -> bool {
140 match self {
141 RegFile::GPR
142 | RegFile::UGPR
143 | RegFile::Carry
144 | RegFile::Bar
145 | RegFile::Mem => false,
146 RegFile::Pred | RegFile::UPred => true,
147 }
148 }
149
fmt_prefix(&self) -> &'static str150 fn fmt_prefix(&self) -> &'static str {
151 match self {
152 RegFile::GPR => "r",
153 RegFile::UGPR => "ur",
154 RegFile::Pred => "p",
155 RegFile::UPred => "up",
156 RegFile::Carry => "c",
157 RegFile::Bar => "b",
158 RegFile::Mem => "m",
159 }
160 }
161 }
162
163 impl fmt::Display for RegFile {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result164 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165 match self {
166 RegFile::GPR => write!(f, "GPR"),
167 RegFile::UGPR => write!(f, "UGPR"),
168 RegFile::Pred => write!(f, "Pred"),
169 RegFile::UPred => write!(f, "UPred"),
170 RegFile::Carry => write!(f, "Carry"),
171 RegFile::Bar => write!(f, "Bar"),
172 RegFile::Mem => write!(f, "Mem"),
173 }
174 }
175 }
176
177 impl From<RegFile> for u8 {
from(value: RegFile) -> u8178 fn from(value: RegFile) -> u8 {
179 value as u8
180 }
181 }
182
183 impl TryFrom<u32> for RegFile {
184 type Error = &'static str;
185
try_from(value: u32) -> Result<Self, Self::Error>186 fn try_from(value: u32) -> Result<Self, Self::Error> {
187 match value {
188 0 => Ok(RegFile::GPR),
189 1 => Ok(RegFile::UGPR),
190 2 => Ok(RegFile::Pred),
191 3 => Ok(RegFile::UPred),
192 4 => Ok(RegFile::Carry),
193 5 => Ok(RegFile::Bar),
194 6 => Ok(RegFile::Mem),
195 _ => Err("Invalid register file number"),
196 }
197 }
198 }
199
200 impl TryFrom<u16> for RegFile {
201 type Error = &'static str;
202
try_from(value: u16) -> Result<Self, Self::Error>203 fn try_from(value: u16) -> Result<Self, Self::Error> {
204 RegFile::try_from(u32::from(value))
205 }
206 }
207
208 impl TryFrom<u8> for RegFile {
209 type Error = &'static str;
210
try_from(value: u8) -> Result<Self, Self::Error>211 fn try_from(value: u8) -> Result<Self, Self::Error> {
212 RegFile::try_from(u32::from(value))
213 }
214 }
215
216 /// A trait for things which have an associated register file
217 pub trait HasRegFile {
file(&self) -> RegFile218 fn file(&self) -> RegFile;
219
is_uniform(&self) -> bool220 fn is_uniform(&self) -> bool {
221 self.file().is_uniform()
222 }
223
is_gpr(&self) -> bool224 fn is_gpr(&self) -> bool {
225 self.file().is_gpr()
226 }
227
is_predicate(&self) -> bool228 fn is_predicate(&self) -> bool {
229 self.file().is_predicate()
230 }
231 }
232
233 #[derive(Clone)]
234 pub struct RegFileSet {
235 bits: u8,
236 }
237
238 impl RegFileSet {
new() -> RegFileSet239 pub fn new() -> RegFileSet {
240 RegFileSet { bits: 0 }
241 }
242
len(&self) -> usize243 pub fn len(&self) -> usize {
244 self.bits.count_ones() as usize
245 }
246
contains(&self, file: RegFile) -> bool247 pub fn contains(&self, file: RegFile) -> bool {
248 self.bits & (1 << (file as u8)) != 0
249 }
250
insert(&mut self, file: RegFile) -> bool251 pub fn insert(&mut self, file: RegFile) -> bool {
252 let has_file = self.contains(file);
253 self.bits |= 1 << (file as u8);
254 !has_file
255 }
256
is_empty(&self) -> bool257 pub fn is_empty(&self) -> bool {
258 self.bits == 0
259 }
260
261 #[allow(dead_code)]
iter(&self) -> RegFileSet262 pub fn iter(&self) -> RegFileSet {
263 self.clone()
264 }
265
remove(&mut self, file: RegFile) -> bool266 pub fn remove(&mut self, file: RegFile) -> bool {
267 let has_file = self.contains(file);
268 self.bits &= !(1 << (file as u8));
269 has_file
270 }
271 }
272
273 impl FromIterator<RegFile> for RegFileSet {
from_iter<T: IntoIterator<Item = RegFile>>(iter: T) -> Self274 fn from_iter<T: IntoIterator<Item = RegFile>>(iter: T) -> Self {
275 let mut set = RegFileSet::new();
276 for file in iter {
277 set.insert(file);
278 }
279 set
280 }
281 }
282
283 impl Iterator for RegFileSet {
284 type Item = RegFile;
285
next(&mut self) -> Option<RegFile>286 fn next(&mut self) -> Option<RegFile> {
287 if self.is_empty() {
288 None
289 } else {
290 let file = self.bits.trailing_zeros().try_into().unwrap();
291 self.remove(file);
292 Some(file)
293 }
294 }
295
size_hint(&self) -> (usize, Option<usize>)296 fn size_hint(&self) -> (usize, Option<usize>) {
297 let len = self.len();
298 (len, Some(len))
299 }
300 }
301
302 #[derive(Clone, Copy)]
303 pub struct PerRegFile<T> {
304 per_file: [T; NUM_REG_FILES],
305 }
306
307 impl<T> PerRegFile<T> {
new_with<F: Fn(RegFile) -> T>(f: F) -> Self308 pub fn new_with<F: Fn(RegFile) -> T>(f: F) -> Self {
309 PerRegFile {
310 per_file: [
311 f(RegFile::GPR),
312 f(RegFile::UGPR),
313 f(RegFile::Pred),
314 f(RegFile::UPred),
315 f(RegFile::Carry),
316 f(RegFile::Bar),
317 f(RegFile::Mem),
318 ],
319 }
320 }
321
values(&self) -> slice::Iter<T>322 pub fn values(&self) -> slice::Iter<T> {
323 self.per_file.iter()
324 }
325
values_mut(&mut self) -> slice::IterMut<T>326 pub fn values_mut(&mut self) -> slice::IterMut<T> {
327 self.per_file.iter_mut()
328 }
329 }
330
331 impl<T: Default> Default for PerRegFile<T> {
default() -> Self332 fn default() -> Self {
333 PerRegFile {
334 per_file: Default::default(),
335 }
336 }
337 }
338
339 impl<T> Index<RegFile> for PerRegFile<T> {
340 type Output = T;
341
index(&self, idx: RegFile) -> &T342 fn index(&self, idx: RegFile) -> &T {
343 &self.per_file[idx as u8 as usize]
344 }
345 }
346
347 impl<T> IndexMut<RegFile> for PerRegFile<T> {
index_mut(&mut self, idx: RegFile) -> &mut T348 fn index_mut(&mut self, idx: RegFile) -> &mut T {
349 &mut self.per_file[idx as u8 as usize]
350 }
351 }
352
353 /// An SSA value
354 ///
355 /// Each SSA in NAK represents a single 32-bit or 1-bit (if a predicate) value
356 /// which must either be spilled to memory or allocated space in the specified
357 /// register file. Whenever more data is required such as a 64-bit memory
358 /// address, double-precision float, or a vec4 texture result, multiple SSA
359 /// values are used.
360 ///
361 /// Each SSA value logically contains two things: an index and a register file.
362 /// It is required that each index refers to a unique SSA value, regardless of
363 /// register file. This way the index can be used to index tightly-packed data
364 /// structures such as bitsets without having to determine separate ranges for
365 /// each register file.
366 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
367 pub struct SSAValue {
368 packed: u32,
369 }
370
371 impl SSAValue {
372 /// A special SSA value which is always invalid
373 pub const NONE: Self = SSAValue { packed: 0 };
374
375 /// Returns an SSA value with the given register file and index
new(file: RegFile, idx: u32) -> SSAValue376 pub fn new(file: RegFile, idx: u32) -> SSAValue {
377 assert!(idx > 0 && idx < (1 << 29) - 2);
378 let mut packed = idx;
379 assert!(u8::from(file) < 8);
380 packed |= u32::from(u8::from(file)) << 29;
381 SSAValue { packed: packed }
382 }
383
384 /// Returns the index of this SSA value
idx(&self) -> u32385 pub fn idx(&self) -> u32 {
386 self.packed & 0x1fffffff
387 }
388
389 /// Returns true if this SSA value is equal to SSAValue::NONE
is_none(&self) -> bool390 pub fn is_none(&self) -> bool {
391 self.packed == 0
392 }
393
fmt_plain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result394 fn fmt_plain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
395 write!(f, "{}{}", self.file().fmt_prefix(), self.idx())
396 }
397 }
398
399 impl HasRegFile for SSAValue {
400 /// Returns the register file of this SSA value
file(&self) -> RegFile401 fn file(&self) -> RegFile {
402 RegFile::try_from(self.packed >> 29).unwrap()
403 }
404 }
405
406 impl fmt::Display for SSAValue {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result407 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
408 write!(f, "%")?;
409 self.fmt_plain(f)
410 }
411 }
412
413 /// A reference to one or more SSA values
414 ///
415 /// Because each SSA value represents a single 1 or 32-bit scalar, we need a way
416 /// to reference multiple SSA values for instructions which read or write
417 /// multiple registers in the same source. When the register allocator runs,
418 /// all the SSA values in a given SSA ref will be placed in consecutive
419 /// registers, with the base register aligned to the number of values, aligned
420 /// to the next power of two.
421 ///
422 /// An SSA reference can reference between 1 and 4 SSA values. It dereferences
423 /// to a slice for easy access to individual SSA values. The structure is
424 /// designed so that is always 16B, regardless of how many SSA values are
425 /// referenced so it's easy and fairly cheap to copy around and embed in other
426 /// structures.
427 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
428 pub struct SSARef {
429 v: [SSAValue; 4],
430 }
431
432 impl SSARef {
433 /// Returns a new SSA reference
434 #[inline]
new(comps: &[SSAValue]) -> SSARef435 fn new(comps: &[SSAValue]) -> SSARef {
436 assert!(comps.len() > 0 && comps.len() <= 4);
437 let mut r = SSARef {
438 v: [SSAValue::NONE; 4],
439 };
440 for i in 0..comps.len() {
441 r.v[i] = comps[i];
442 }
443 if comps.len() < 4 {
444 r.v[3].packed = (comps.len() as u32).wrapping_neg();
445 }
446 r
447 }
448
449 /// Returns the number of components in this SSA reference
comps(&self) -> u8450 pub fn comps(&self) -> u8 {
451 if self.v[3].packed >= u32::MAX - 2 {
452 self.v[3].packed.wrapping_neg() as u8
453 } else {
454 4
455 }
456 }
457
file(&self) -> Option<RegFile>458 pub fn file(&self) -> Option<RegFile> {
459 let comps = usize::from(self.comps());
460 let file = self.v[0].file();
461 for i in 1..comps {
462 if self.v[i].file() != file {
463 return None;
464 }
465 }
466 Some(file)
467 }
468
is_uniform(&self) -> bool469 pub fn is_uniform(&self) -> bool {
470 for ssa in &self[..] {
471 if !ssa.is_uniform() {
472 return false;
473 }
474 }
475 true
476 }
477
is_gpr(&self) -> bool478 pub fn is_gpr(&self) -> bool {
479 for ssa in &self[..] {
480 if !ssa.is_gpr() {
481 return false;
482 }
483 }
484 true
485 }
486
is_predicate(&self) -> bool487 pub fn is_predicate(&self) -> bool {
488 if self.v[0].is_predicate() {
489 true
490 } else {
491 for ssa in &self[..] {
492 debug_assert!(!ssa.is_predicate());
493 }
494 false
495 }
496 }
497 }
498
499 impl Deref for SSARef {
500 type Target = [SSAValue];
501
deref(&self) -> &[SSAValue]502 fn deref(&self) -> &[SSAValue] {
503 let comps = usize::from(self.comps());
504 &self.v[..comps]
505 }
506 }
507
508 impl DerefMut for SSARef {
deref_mut(&mut self) -> &mut [SSAValue]509 fn deref_mut(&mut self) -> &mut [SSAValue] {
510 let comps = usize::from(self.comps());
511 &mut self.v[..comps]
512 }
513 }
514
515 impl TryFrom<&[SSAValue]> for SSARef {
516 type Error = &'static str;
517
try_from(comps: &[SSAValue]) -> Result<Self, Self::Error>518 fn try_from(comps: &[SSAValue]) -> Result<Self, Self::Error> {
519 if comps.len() == 0 {
520 Err("Empty vector")
521 } else if comps.len() > 4 {
522 Err("Too many vector components")
523 } else {
524 Ok(SSARef::new(comps))
525 }
526 }
527 }
528
529 impl TryFrom<Vec<SSAValue>> for SSARef {
530 type Error = &'static str;
531
try_from(comps: Vec<SSAValue>) -> Result<Self, Self::Error>532 fn try_from(comps: Vec<SSAValue>) -> Result<Self, Self::Error> {
533 SSARef::try_from(&comps[..])
534 }
535 }
536
537 macro_rules! impl_ssa_ref_from_arr {
538 ($n: expr) => {
539 impl From<[SSAValue; $n]> for SSARef {
540 fn from(comps: [SSAValue; $n]) -> Self {
541 SSARef::new(&comps[..])
542 }
543 }
544 };
545 }
546 impl_ssa_ref_from_arr!(1);
547 impl_ssa_ref_from_arr!(2);
548 impl_ssa_ref_from_arr!(3);
549 impl_ssa_ref_from_arr!(4);
550
551 impl From<SSAValue> for SSARef {
from(val: SSAValue) -> Self552 fn from(val: SSAValue) -> Self {
553 [val].into()
554 }
555 }
556
557 impl fmt::Display for SSARef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result558 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
559 if self.comps() == 1 {
560 write!(f, "{}", self[0])
561 } else {
562 write!(f, "{{")?;
563 for (i, v) in self.iter().enumerate() {
564 if i != 0 {
565 write!(f, " ")?;
566 }
567 write!(f, "{}", v)?;
568 }
569 write!(f, "}}")
570 }
571 }
572 }
573
574 pub struct SSAValueAllocator {
575 count: u32,
576 }
577
578 impl SSAValueAllocator {
new() -> SSAValueAllocator579 pub fn new() -> SSAValueAllocator {
580 SSAValueAllocator { count: 0 }
581 }
582
max_idx(&self) -> u32583 pub fn max_idx(&self) -> u32 {
584 self.count
585 }
586
alloc(&mut self, file: RegFile) -> SSAValue587 pub fn alloc(&mut self, file: RegFile) -> SSAValue {
588 self.count += 1;
589 SSAValue::new(file, self.count)
590 }
591
alloc_vec(&mut self, file: RegFile, comps: u8) -> SSARef592 pub fn alloc_vec(&mut self, file: RegFile, comps: u8) -> SSARef {
593 assert!(comps >= 1 && comps <= 4);
594 let mut vec = [SSAValue::NONE; 4];
595 for c in 0..comps {
596 vec[usize::from(c)] = self.alloc(file);
597 }
598 vec[0..usize::from(comps)].try_into().unwrap()
599 }
600 }
601
602 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
603 pub struct RegRef {
604 packed: u32,
605 }
606
607 impl RegRef {
608 pub const MAX_IDX: u32 = (1 << 26) - 1;
609
zero_idx(file: RegFile) -> u32610 fn zero_idx(file: RegFile) -> u32 {
611 match file {
612 RegFile::GPR => 255,
613 RegFile::UGPR => 63,
614 RegFile::Pred => 7,
615 RegFile::UPred => 7,
616 RegFile::Carry => panic!("Carry has no zero index"),
617 RegFile::Bar => panic!("Bar has no zero index"),
618 RegFile::Mem => panic!("Mem has no zero index"),
619 }
620 }
621
new(file: RegFile, base_idx: u32, comps: u8) -> RegRef622 pub fn new(file: RegFile, base_idx: u32, comps: u8) -> RegRef {
623 assert!(base_idx <= Self::MAX_IDX);
624 let mut packed = base_idx;
625 assert!(comps > 0 && comps <= 8);
626 packed |= u32::from(comps - 1) << 26;
627 assert!(u8::from(file) < 8);
628 packed |= u32::from(u8::from(file)) << 29;
629 RegRef { packed: packed }
630 }
631
zero(file: RegFile, comps: u8) -> RegRef632 pub fn zero(file: RegFile, comps: u8) -> RegRef {
633 RegRef::new(file, RegRef::zero_idx(file), comps)
634 }
635
base_idx(&self) -> u32636 pub fn base_idx(&self) -> u32 {
637 self.packed & 0x03ffffff
638 }
639
idx_range(&self) -> Range<u32>640 pub fn idx_range(&self) -> Range<u32> {
641 let start = self.base_idx();
642 let end = start + u32::from(self.comps());
643 start..end
644 }
645
comps(&self) -> u8646 pub fn comps(&self) -> u8 {
647 (((self.packed >> 26) & 0x7) + 1).try_into().unwrap()
648 }
649
comp(&self, c: u8) -> RegRef650 pub fn comp(&self, c: u8) -> RegRef {
651 assert!(c < self.comps());
652 RegRef::new(self.file(), self.base_idx() + u32::from(c), 1)
653 }
654 }
655
656 impl HasRegFile for RegRef {
file(&self) -> RegFile657 fn file(&self) -> RegFile {
658 ((self.packed >> 29) & 0x7).try_into().unwrap()
659 }
660 }
661
662 impl fmt::Display for RegRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result663 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
664 write!(f, "{}{}", self.file().fmt_prefix(), self.base_idx())?;
665 if self.comps() > 1 {
666 write!(f, "..{}", self.idx_range().end)?;
667 }
668 Ok(())
669 }
670 }
671
672 #[derive(Clone, Copy)]
673 pub enum Dst {
674 None,
675 SSA(SSARef),
676 Reg(RegRef),
677 }
678
679 impl Dst {
is_none(&self) -> bool680 pub fn is_none(&self) -> bool {
681 matches!(self, Dst::None)
682 }
683
as_reg(&self) -> Option<&RegRef>684 pub fn as_reg(&self) -> Option<&RegRef> {
685 match self {
686 Dst::Reg(r) => Some(r),
687 _ => None,
688 }
689 }
690
as_ssa(&self) -> Option<&SSARef>691 pub fn as_ssa(&self) -> Option<&SSARef> {
692 match self {
693 Dst::SSA(r) => Some(r),
694 _ => None,
695 }
696 }
697
iter_ssa(&self) -> slice::Iter<'_, SSAValue>698 pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
699 match self {
700 Dst::None | Dst::Reg(_) => &[],
701 Dst::SSA(ssa) => ssa.deref(),
702 }
703 .iter()
704 }
705
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>706 pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
707 match self {
708 Dst::None | Dst::Reg(_) => &mut [],
709 Dst::SSA(ssa) => ssa.deref_mut(),
710 }
711 .iter_mut()
712 }
713 }
714
715 impl From<RegRef> for Dst {
from(reg: RegRef) -> Dst716 fn from(reg: RegRef) -> Dst {
717 Dst::Reg(reg)
718 }
719 }
720
721 impl<T: Into<SSARef>> From<T> for Dst {
from(ssa: T) -> Dst722 fn from(ssa: T) -> Dst {
723 Dst::SSA(ssa.into())
724 }
725 }
726
727 impl fmt::Display for Dst {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result728 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
729 match self {
730 Dst::None => write!(f, "null")?,
731 Dst::SSA(v) => v.fmt(f)?,
732 Dst::Reg(r) => r.fmt(f)?,
733 }
734 Ok(())
735 }
736 }
737
738 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
739 pub enum CBuf {
740 Binding(u8),
741
742 #[allow(dead_code)]
743 BindlessSSA(SSARef),
744
745 #[allow(dead_code)]
746 BindlessUGPR(RegRef),
747 }
748
749 impl fmt::Display for CBuf {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result750 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
751 match self {
752 CBuf::Binding(idx) => write!(f, "c[{:#x}]", idx),
753 CBuf::BindlessSSA(v) => write!(f, "cx[{}]", v),
754 CBuf::BindlessUGPR(r) => write!(f, "cx[{}]", r),
755 }
756 }
757 }
758
759 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
760 pub struct CBufRef {
761 pub buf: CBuf,
762 pub offset: u16,
763 }
764
765 impl CBufRef {
offset(self, offset: u16) -> CBufRef766 pub fn offset(self, offset: u16) -> CBufRef {
767 CBufRef {
768 buf: self.buf,
769 offset: self.offset + offset,
770 }
771 }
772 }
773
774 impl fmt::Display for CBufRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result775 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
776 write!(f, "{}[{:#x}]", self.buf, self.offset)
777 }
778 }
779
780 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
781 pub enum SrcRef {
782 Zero,
783 True,
784 False,
785 Imm32(u32),
786 CBuf(CBufRef),
787 SSA(SSARef),
788 Reg(RegRef),
789 }
790
791 impl SrcRef {
792 #[allow(dead_code)]
is_alu(&self) -> bool793 pub fn is_alu(&self) -> bool {
794 match self {
795 SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => true,
796 SrcRef::SSA(ssa) => ssa.is_gpr(),
797 SrcRef::Reg(reg) => reg.is_gpr(),
798 SrcRef::True | SrcRef::False => false,
799 }
800 }
801
is_predicate(&self) -> bool802 pub fn is_predicate(&self) -> bool {
803 match self {
804 SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => false,
805 SrcRef::True | SrcRef::False => true,
806 SrcRef::SSA(ssa) => ssa.is_predicate(),
807 SrcRef::Reg(reg) => reg.is_predicate(),
808 }
809 }
810
is_carry(&self) -> bool811 pub fn is_carry(&self) -> bool {
812 match self {
813 SrcRef::SSA(ssa) => ssa.file() == Some(RegFile::Carry),
814 SrcRef::Reg(reg) => reg.file() == RegFile::Carry,
815 _ => false,
816 }
817 }
818
819 #[allow(dead_code)]
is_barrier(&self) -> bool820 pub fn is_barrier(&self) -> bool {
821 match self {
822 SrcRef::SSA(ssa) => ssa.file() == Some(RegFile::Bar),
823 SrcRef::Reg(reg) => reg.file() == RegFile::Bar,
824 _ => false,
825 }
826 }
827
as_reg(&self) -> Option<&RegRef>828 pub fn as_reg(&self) -> Option<&RegRef> {
829 match self {
830 SrcRef::Reg(r) => Some(r),
831 _ => None,
832 }
833 }
834
as_ssa(&self) -> Option<&SSARef>835 pub fn as_ssa(&self) -> Option<&SSARef> {
836 match self {
837 SrcRef::SSA(r) => Some(r),
838 _ => None,
839 }
840 }
841
as_u32(&self) -> Option<u32>842 pub fn as_u32(&self) -> Option<u32> {
843 match self {
844 SrcRef::Zero => Some(0),
845 SrcRef::Imm32(u) => Some(*u),
846 SrcRef::CBuf(_) | SrcRef::SSA(_) | SrcRef::Reg(_) => None,
847 _ => panic!("Invalid integer source"),
848 }
849 }
850
get_reg(&self) -> Option<&RegRef>851 pub fn get_reg(&self) -> Option<&RegRef> {
852 match self {
853 SrcRef::Zero
854 | SrcRef::True
855 | SrcRef::False
856 | SrcRef::Imm32(_)
857 | SrcRef::SSA(_) => None,
858 SrcRef::CBuf(cb) => match &cb.buf {
859 CBuf::Binding(_) | CBuf::BindlessSSA(_) => None,
860 CBuf::BindlessUGPR(reg) => Some(reg),
861 },
862 SrcRef::Reg(reg) => Some(reg),
863 }
864 }
865
iter_ssa(&self) -> slice::Iter<'_, SSAValue>866 pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
867 match self {
868 SrcRef::Zero
869 | SrcRef::True
870 | SrcRef::False
871 | SrcRef::Imm32(_)
872 | SrcRef::Reg(_) => &[],
873 SrcRef::CBuf(cb) => match &cb.buf {
874 CBuf::Binding(_) | CBuf::BindlessUGPR(_) => &[],
875 CBuf::BindlessSSA(ssa) => ssa.deref(),
876 },
877 SrcRef::SSA(ssa) => ssa.deref(),
878 }
879 .iter()
880 }
881
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>882 pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
883 match self {
884 SrcRef::Zero
885 | SrcRef::True
886 | SrcRef::False
887 | SrcRef::Imm32(_)
888 | SrcRef::Reg(_) => &mut [],
889 SrcRef::CBuf(cb) => match &mut cb.buf {
890 CBuf::Binding(_) | CBuf::BindlessUGPR(_) => &mut [],
891 CBuf::BindlessSSA(ssa) => ssa.deref_mut(),
892 },
893 SrcRef::SSA(ssa) => ssa.deref_mut(),
894 }
895 .iter_mut()
896 }
897 }
898
899 impl From<bool> for SrcRef {
from(b: bool) -> SrcRef900 fn from(b: bool) -> SrcRef {
901 if b {
902 SrcRef::True
903 } else {
904 SrcRef::False
905 }
906 }
907 }
908
909 impl From<u32> for SrcRef {
from(u: u32) -> SrcRef910 fn from(u: u32) -> SrcRef {
911 if u == 0 {
912 SrcRef::Zero
913 } else {
914 SrcRef::Imm32(u)
915 }
916 }
917 }
918
919 impl From<f32> for SrcRef {
from(f: f32) -> SrcRef920 fn from(f: f32) -> SrcRef {
921 f.to_bits().into()
922 }
923 }
924
925 impl From<PrmtSel> for SrcRef {
from(sel: PrmtSel) -> SrcRef926 fn from(sel: PrmtSel) -> SrcRef {
927 u32::from(sel.0).into()
928 }
929 }
930
931 impl From<CBufRef> for SrcRef {
from(cb: CBufRef) -> SrcRef932 fn from(cb: CBufRef) -> SrcRef {
933 SrcRef::CBuf(cb)
934 }
935 }
936
937 impl From<RegRef> for SrcRef {
from(reg: RegRef) -> SrcRef938 fn from(reg: RegRef) -> SrcRef {
939 SrcRef::Reg(reg)
940 }
941 }
942
943 impl<T: Into<SSARef>> From<T> for SrcRef {
from(ssa: T) -> SrcRef944 fn from(ssa: T) -> SrcRef {
945 SrcRef::SSA(ssa.into())
946 }
947 }
948
949 impl fmt::Display for SrcRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result950 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
951 match self {
952 SrcRef::Zero => write!(f, "rZ"),
953 SrcRef::True => write!(f, "pT"),
954 SrcRef::False => write!(f, "pF"),
955 SrcRef::Imm32(u) => write!(f, "{:#x}", u),
956 SrcRef::CBuf(c) => c.fmt(f),
957 SrcRef::SSA(v) => v.fmt(f),
958 SrcRef::Reg(r) => r.fmt(f),
959 }
960 }
961 }
962
963 #[derive(Clone, Copy, PartialEq)]
964 pub enum SrcMod {
965 None,
966 FAbs,
967 FNeg,
968 FNegAbs,
969 INeg,
970 BNot,
971 }
972
973 impl SrcMod {
is_none(&self) -> bool974 pub fn is_none(&self) -> bool {
975 matches!(self, SrcMod::None)
976 }
977
has_fabs(&self) -> bool978 pub fn has_fabs(&self) -> bool {
979 match self {
980 SrcMod::None | SrcMod::FNeg => false,
981 SrcMod::FAbs | SrcMod::FNegAbs => true,
982 _ => panic!("Not a float modifier"),
983 }
984 }
985
has_fneg(&self) -> bool986 pub fn has_fneg(&self) -> bool {
987 match self {
988 SrcMod::None | SrcMod::FAbs => false,
989 SrcMod::FNeg | SrcMod::FNegAbs => true,
990 _ => panic!("Not a float modifier"),
991 }
992 }
993
is_ineg(&self) -> bool994 pub fn is_ineg(&self) -> bool {
995 match self {
996 SrcMod::None => false,
997 SrcMod::INeg => true,
998 _ => panic!("Not an integer modifier"),
999 }
1000 }
1001
is_bnot(&self) -> bool1002 pub fn is_bnot(&self) -> bool {
1003 match self {
1004 SrcMod::None => false,
1005 SrcMod::BNot => true,
1006 _ => panic!("Not a bitwise modifier"),
1007 }
1008 }
1009
fabs(self) -> SrcMod1010 pub fn fabs(self) -> SrcMod {
1011 match self {
1012 SrcMod::None | SrcMod::FAbs | SrcMod::FNeg | SrcMod::FNegAbs => {
1013 SrcMod::FAbs
1014 }
1015 _ => panic!("Not a float source modifier"),
1016 }
1017 }
1018
fneg(self) -> SrcMod1019 pub fn fneg(self) -> SrcMod {
1020 match self {
1021 SrcMod::None => SrcMod::FNeg,
1022 SrcMod::FAbs => SrcMod::FNegAbs,
1023 SrcMod::FNeg => SrcMod::None,
1024 SrcMod::FNegAbs => SrcMod::FAbs,
1025 _ => panic!("Not a float source modifier"),
1026 }
1027 }
1028
ineg(self) -> SrcMod1029 pub fn ineg(self) -> SrcMod {
1030 match self {
1031 SrcMod::None => SrcMod::INeg,
1032 SrcMod::INeg => SrcMod::None,
1033 _ => panic!("Not an integer source modifier"),
1034 }
1035 }
1036
bnot(self) -> SrcMod1037 pub fn bnot(self) -> SrcMod {
1038 match self {
1039 SrcMod::None => SrcMod::BNot,
1040 SrcMod::BNot => SrcMod::None,
1041 _ => panic!("Not a boolean source modifier"),
1042 }
1043 }
1044
modify(self, other: SrcMod) -> SrcMod1045 pub fn modify(self, other: SrcMod) -> SrcMod {
1046 match other {
1047 SrcMod::None => self,
1048 SrcMod::FAbs => self.fabs(),
1049 SrcMod::FNeg => self.fneg(),
1050 SrcMod::FNegAbs => self.fabs().fneg(),
1051 SrcMod::INeg => self.ineg(),
1052 SrcMod::BNot => self.bnot(),
1053 }
1054 }
1055 }
1056
1057 #[derive(Clone, Copy, PartialEq)]
1058 #[allow(dead_code)]
1059 pub enum SrcSwizzle {
1060 None,
1061 Xx,
1062 Yy,
1063 }
1064
1065 impl SrcSwizzle {
is_none(&self) -> bool1066 pub fn is_none(&self) -> bool {
1067 matches!(self, SrcSwizzle::None)
1068 }
1069 }
1070
1071 impl fmt::Display for SrcSwizzle {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1072 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1073 match self {
1074 SrcSwizzle::None => Ok(()),
1075 SrcSwizzle::Xx => write!(f, ".xx"),
1076 SrcSwizzle::Yy => write!(f, ".yy"),
1077 }
1078 }
1079 }
1080
1081 #[derive(Clone, Copy, PartialEq)]
1082 pub struct Src {
1083 pub src_ref: SrcRef,
1084 pub src_mod: SrcMod,
1085 pub src_swizzle: SrcSwizzle,
1086 }
1087
1088 impl Src {
new_zero() -> Src1089 pub fn new_zero() -> Src {
1090 SrcRef::Zero.into()
1091 }
1092
new_imm_u32(u: u32) -> Src1093 pub fn new_imm_u32(u: u32) -> Src {
1094 u.into()
1095 }
1096
new_imm_bool(b: bool) -> Src1097 pub fn new_imm_bool(b: bool) -> Src {
1098 b.into()
1099 }
1100
fabs(&self) -> Src1101 pub fn fabs(&self) -> Src {
1102 Src {
1103 src_ref: self.src_ref,
1104 src_mod: self.src_mod.fabs(),
1105 src_swizzle: self.src_swizzle,
1106 }
1107 }
1108
fneg(&self) -> Src1109 pub fn fneg(&self) -> Src {
1110 Src {
1111 src_ref: self.src_ref,
1112 src_mod: self.src_mod.fneg(),
1113 src_swizzle: self.src_swizzle,
1114 }
1115 }
1116
ineg(&self) -> Src1117 pub fn ineg(&self) -> Src {
1118 Src {
1119 src_ref: self.src_ref,
1120 src_mod: self.src_mod.ineg(),
1121 src_swizzle: self.src_swizzle,
1122 }
1123 }
1124
bnot(&self) -> Src1125 pub fn bnot(&self) -> Src {
1126 Src {
1127 src_ref: self.src_ref,
1128 src_mod: self.src_mod.bnot(),
1129 src_swizzle: self.src_swizzle,
1130 }
1131 }
1132
fold_imm(&self, src_type: SrcType) -> Src1133 pub fn fold_imm(&self, src_type: SrcType) -> Src {
1134 let SrcRef::Imm32(mut u) = self.src_ref else {
1135 return *self;
1136 };
1137
1138 if self.src_mod.is_none() && self.src_swizzle.is_none() {
1139 return *self;
1140 }
1141
1142 assert!(src_type == SrcType::F16v2 || self.src_swizzle.is_none());
1143
1144 // INeg affects more than just the 32 bits of input data so it can't be
1145 // trivially folded. In fact, -imm may not be representable as a 32-bit
1146 // immediate at all.
1147 if src_type == SrcType::I32 {
1148 return *self;
1149 }
1150
1151 u = match src_type {
1152 SrcType::F16 => {
1153 let low = u & 0xFFFF;
1154
1155 match self.src_mod {
1156 SrcMod::None => low,
1157 SrcMod::FAbs => low & !(1_u32 << 15),
1158 SrcMod::FNeg => low ^ (1_u32 << 15),
1159 SrcMod::FNegAbs => low | (1_u32 << 15),
1160 _ => panic!("Not a float source modifier"),
1161 }
1162 }
1163 SrcType::F16v2 => {
1164 let u = match self.src_swizzle {
1165 SrcSwizzle::None => u,
1166 SrcSwizzle::Xx => (u << 16) | (u & 0xffff),
1167 SrcSwizzle::Yy => (u & 0xffff0000) | (u >> 16),
1168 };
1169
1170 match self.src_mod {
1171 SrcMod::None => u,
1172 SrcMod::FAbs => u & 0x7FFF7FFF,
1173 SrcMod::FNeg => u ^ 0x80008000,
1174 SrcMod::FNegAbs => u | 0x80008000,
1175 _ => panic!("Not a float source modifier"),
1176 }
1177 }
1178 SrcType::F32 | SrcType::F64 => match self.src_mod {
1179 SrcMod::None => u,
1180 SrcMod::FAbs => u & !(1_u32 << 31),
1181 SrcMod::FNeg => u ^ (1_u32 << 31),
1182 SrcMod::FNegAbs => u | (1_u32 << 31),
1183 _ => panic!("Not a float source modifier"),
1184 },
1185 SrcType::I32 => match self.src_mod {
1186 SrcMod::None => u,
1187 SrcMod::INeg => -(u as i32) as u32,
1188 _ => panic!("Not an integer source modifier"),
1189 },
1190 SrcType::B32 => match self.src_mod {
1191 SrcMod::None => u,
1192 SrcMod::BNot => !u,
1193 _ => panic!("Not a bitwise source modifier"),
1194 },
1195 _ => {
1196 assert!(self.src_mod.is_none());
1197 u
1198 }
1199 };
1200
1201 Src {
1202 src_mod: SrcMod::None,
1203 src_ref: u.into(),
1204 src_swizzle: SrcSwizzle::None,
1205 }
1206 }
1207
as_ssa(&self) -> Option<&SSARef>1208 pub fn as_ssa(&self) -> Option<&SSARef> {
1209 if self.src_mod.is_none() {
1210 self.src_ref.as_ssa()
1211 } else {
1212 None
1213 }
1214 }
1215
as_bool(&self) -> Option<bool>1216 pub fn as_bool(&self) -> Option<bool> {
1217 match self.src_ref {
1218 SrcRef::True => Some(!self.src_mod.is_bnot()),
1219 SrcRef::False => Some(self.src_mod.is_bnot()),
1220 SrcRef::SSA(vec) => {
1221 assert!(vec.is_predicate() && vec.comps() == 1);
1222 None
1223 }
1224 SrcRef::Reg(reg) => {
1225 assert!(reg.is_predicate() && reg.comps() == 1);
1226 None
1227 }
1228 _ => panic!("Not a boolean source"),
1229 }
1230 }
1231
as_u32(&self) -> Option<u32>1232 pub fn as_u32(&self) -> Option<u32> {
1233 if self.src_mod.is_none() {
1234 self.src_ref.as_u32()
1235 } else {
1236 None
1237 }
1238 }
1239
as_imm_not_i20(&self) -> Option<u32>1240 pub fn as_imm_not_i20(&self) -> Option<u32> {
1241 match self.src_ref {
1242 SrcRef::Imm32(i) => {
1243 assert!(self.src_mod.is_none());
1244 let top = i & 0xfff80000;
1245 if top == 0 || top == 0xfff80000 {
1246 None
1247 } else {
1248 Some(i)
1249 }
1250 }
1251 _ => None,
1252 }
1253 }
1254
as_imm_not_f20(&self) -> Option<u32>1255 pub fn as_imm_not_f20(&self) -> Option<u32> {
1256 match self.src_ref {
1257 SrcRef::Imm32(i) => {
1258 assert!(self.src_mod.is_none());
1259 if (i & 0xfff) == 0 {
1260 None
1261 } else {
1262 Some(i)
1263 }
1264 }
1265 _ => None,
1266 }
1267 }
1268
iter_ssa(&self) -> slice::Iter<'_, SSAValue>1269 pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
1270 self.src_ref.iter_ssa()
1271 }
1272
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>1273 pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
1274 self.src_ref.iter_ssa_mut()
1275 }
1276
is_uniform(&self) -> bool1277 pub fn is_uniform(&self) -> bool {
1278 match self.src_ref {
1279 SrcRef::Zero
1280 | SrcRef::True
1281 | SrcRef::False
1282 | SrcRef::Imm32(_)
1283 | SrcRef::CBuf(_) => true,
1284 SrcRef::SSA(ssa) => ssa.is_uniform(),
1285 SrcRef::Reg(reg) => reg.is_uniform(),
1286 }
1287 }
1288
is_predicate(&self) -> bool1289 pub fn is_predicate(&self) -> bool {
1290 self.src_ref.is_predicate()
1291 }
1292
is_zero(&self) -> bool1293 pub fn is_zero(&self) -> bool {
1294 match self.src_ref {
1295 SrcRef::Zero | SrcRef::Imm32(0) => match self.src_mod {
1296 SrcMod::None | SrcMod::FAbs => true,
1297 SrcMod::FNeg | SrcMod::FNegAbs | SrcMod::BNot => false,
1298 // INeg affects more than just the 32 bits of input data so -0
1299 // may not be equivalent to 0.
1300 SrcMod::INeg => false,
1301 },
1302 _ => false,
1303 }
1304 }
1305
is_fneg_zero(&self, src_type: SrcType) -> bool1306 pub fn is_fneg_zero(&self, src_type: SrcType) -> bool {
1307 match self.fold_imm(src_type).src_ref {
1308 SrcRef::Imm32(0x00008000) => src_type == SrcType::F16,
1309 SrcRef::Imm32(0x80000000) => src_type == SrcType::F32,
1310 SrcRef::Imm32(0x80008000) => src_type == SrcType::F16v2,
1311 _ => false,
1312 }
1313 }
1314
1315 #[allow(dead_code)]
supports_type(&self, src_type: &SrcType) -> bool1316 pub fn supports_type(&self, src_type: &SrcType) -> bool {
1317 match src_type {
1318 SrcType::SSA => {
1319 if !self.src_mod.is_none() {
1320 return false;
1321 }
1322
1323 matches!(self.src_ref, SrcRef::SSA(_) | SrcRef::Reg(_))
1324 }
1325 SrcType::GPR => {
1326 if !self.src_mod.is_none() {
1327 return false;
1328 }
1329
1330 matches!(
1331 self.src_ref,
1332 SrcRef::Zero | SrcRef::SSA(_) | SrcRef::Reg(_)
1333 )
1334 }
1335 SrcType::ALU => self.src_mod.is_none() && self.src_ref.is_alu(),
1336 SrcType::F16 | SrcType::F32 | SrcType::F64 | SrcType::F16v2 => {
1337 match self.src_mod {
1338 SrcMod::None
1339 | SrcMod::FAbs
1340 | SrcMod::FNeg
1341 | SrcMod::FNegAbs => (),
1342 _ => return false,
1343 }
1344
1345 self.src_ref.is_alu()
1346 }
1347 SrcType::I32 => {
1348 match self.src_mod {
1349 SrcMod::None | SrcMod::INeg => (),
1350 _ => return false,
1351 }
1352
1353 self.src_ref.is_alu()
1354 }
1355 SrcType::B32 => {
1356 match self.src_mod {
1357 SrcMod::None | SrcMod::BNot => (),
1358 _ => return false,
1359 }
1360
1361 self.src_ref.is_alu()
1362 }
1363 SrcType::Pred => {
1364 match self.src_mod {
1365 SrcMod::None | SrcMod::BNot => (),
1366 _ => return false,
1367 }
1368
1369 self.src_ref.is_predicate()
1370 }
1371 SrcType::Carry => self.src_mod.is_none() && self.src_ref.is_carry(),
1372 SrcType::Bar => self.src_mod.is_none() && self.src_ref.is_barrier(),
1373 }
1374 }
1375 }
1376
1377 impl<T: Into<SrcRef>> From<T> for Src {
from(value: T) -> Src1378 fn from(value: T) -> Src {
1379 Src {
1380 src_ref: value.into(),
1381 src_mod: SrcMod::None,
1382 src_swizzle: SrcSwizzle::None,
1383 }
1384 }
1385 }
1386
1387 impl fmt::Display for Src {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1388 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1389 match self.src_mod {
1390 SrcMod::None => write!(f, "{}{}", self.src_ref, self.src_swizzle),
1391 SrcMod::FAbs => write!(f, "|{}{}|", self.src_ref, self.src_swizzle),
1392 SrcMod::FNeg => write!(f, "-{}{}", self.src_ref, self.src_swizzle),
1393 SrcMod::FNegAbs => {
1394 write!(f, "-|{}{}|", self.src_ref, self.src_swizzle)
1395 }
1396 SrcMod::INeg => write!(f, "-{}{}", self.src_ref, self.src_swizzle),
1397 SrcMod::BNot => write!(f, "!{}{}", self.src_ref, self.src_swizzle),
1398 }
1399 }
1400 }
1401
1402 #[repr(u8)]
1403 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
1404 pub enum SrcType {
1405 SSA,
1406 GPR,
1407 ALU,
1408 F16,
1409 F16v2,
1410 F32,
1411 F64,
1412 I32,
1413 B32,
1414 Pred,
1415 Carry,
1416 Bar,
1417 }
1418
1419 impl SrcType {
1420 const DEFAULT: SrcType = SrcType::GPR;
1421 }
1422
1423 pub type SrcTypeList = AttrList<SrcType>;
1424
1425 pub trait SrcsAsSlice: AsSlice<Src, Attr = SrcType> {
srcs_as_slice(&self) -> &[Src]1426 fn srcs_as_slice(&self) -> &[Src] {
1427 self.as_slice()
1428 }
1429
srcs_as_mut_slice(&mut self) -> &mut [Src]1430 fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
1431 self.as_mut_slice()
1432 }
1433
src_types(&self) -> SrcTypeList1434 fn src_types(&self) -> SrcTypeList {
1435 self.attrs()
1436 }
1437
src_idx(&self, src: &Src) -> usize1438 fn src_idx(&self, src: &Src) -> usize {
1439 let r = self.srcs_as_slice().as_ptr_range();
1440 assert!(r.contains(&(src as *const Src)));
1441 unsafe { (src as *const Src).offset_from(r.start) as usize }
1442 }
1443 }
1444
1445 impl<T: AsSlice<Src, Attr = SrcType>> SrcsAsSlice for T {}
1446
all_dsts_uniform(dsts: &[Dst]) -> bool1447 fn all_dsts_uniform(dsts: &[Dst]) -> bool {
1448 let mut uniform = None;
1449 for dst in dsts {
1450 let dst_uniform = match dst {
1451 Dst::None => continue,
1452 Dst::Reg(r) => r.is_uniform(),
1453 Dst::SSA(r) => r.file().unwrap().is_uniform(),
1454 };
1455 assert!(uniform == None || uniform == Some(dst_uniform));
1456 uniform = Some(dst_uniform);
1457 }
1458 uniform == Some(true)
1459 }
1460
1461 #[repr(u8)]
1462 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
1463 pub enum DstType {
1464 Pred,
1465 GPR,
1466 F16,
1467 F16v2,
1468 F32,
1469 F64,
1470 Carry,
1471 Bar,
1472 Vec,
1473 }
1474
1475 impl DstType {
1476 const DEFAULT: DstType = DstType::Vec;
1477 }
1478
1479 pub type DstTypeList = AttrList<DstType>;
1480
1481 pub trait DstsAsSlice: AsSlice<Dst, Attr = DstType> {
dsts_as_slice(&self) -> &[Dst]1482 fn dsts_as_slice(&self) -> &[Dst] {
1483 self.as_slice()
1484 }
1485
dsts_as_mut_slice(&mut self) -> &mut [Dst]1486 fn dsts_as_mut_slice(&mut self) -> &mut [Dst] {
1487 self.as_mut_slice()
1488 }
1489
dst_types(&self) -> DstTypeList1490 fn dst_types(&self) -> DstTypeList {
1491 self.attrs()
1492 }
1493
dst_idx(&self, dst: &Dst) -> usize1494 fn dst_idx(&self, dst: &Dst) -> usize {
1495 let r = self.dsts_as_slice().as_ptr_range();
1496 assert!(r.contains(&(dst as *const Dst)));
1497 unsafe { (dst as *const Dst).offset_from(r.start) as usize }
1498 }
1499 }
1500
1501 impl<T: AsSlice<Dst, Attr = DstType>> DstsAsSlice for T {}
1502
1503 pub trait IsUniform {
is_uniform(&self) -> bool1504 fn is_uniform(&self) -> bool;
1505 }
1506
1507 impl<T: DstsAsSlice> IsUniform for T {
is_uniform(&self) -> bool1508 fn is_uniform(&self) -> bool {
1509 all_dsts_uniform(self.dsts_as_slice())
1510 }
1511 }
1512
fmt_dst_slice(f: &mut fmt::Formatter<'_>, dsts: &[Dst]) -> fmt::Result1513 fn fmt_dst_slice(f: &mut fmt::Formatter<'_>, dsts: &[Dst]) -> fmt::Result {
1514 if dsts.is_empty() {
1515 return Ok(());
1516 }
1517
1518 // Figure out the last non-null dst
1519 //
1520 // Note: By making the top inclusive and starting at 0, we ensure that
1521 // at least one dst always gets printed.
1522 let mut last_dst = 0;
1523 for (i, dst) in dsts.iter().enumerate() {
1524 if !dst.is_none() {
1525 last_dst = i;
1526 }
1527 }
1528
1529 for i in 0..(last_dst + 1) {
1530 if i != 0 {
1531 write!(f, " ")?;
1532 }
1533 write!(f, "{}", &dsts[i])?;
1534 }
1535 Ok(())
1536 }
1537
1538 #[allow(dead_code)]
1539 #[derive(Clone, Copy)]
1540 pub enum FoldData {
1541 Pred(bool),
1542 Carry(bool),
1543 U32(u32),
1544 Vec2([u32; 2]),
1545 }
1546
1547 pub struct OpFoldData<'a> {
1548 pub dsts: &'a mut [FoldData],
1549 pub srcs: &'a [FoldData],
1550 }
1551
1552 impl OpFoldData<'_> {
get_pred_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool1553 pub fn get_pred_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool {
1554 let i = op.src_idx(src);
1555 let b = match src.src_ref {
1556 SrcRef::Zero | SrcRef::Imm32(_) => panic!("Expected a predicate"),
1557 SrcRef::True => true,
1558 SrcRef::False => false,
1559 _ => {
1560 if let FoldData::Pred(b) = self.srcs[i] {
1561 b
1562 } else {
1563 panic!("FoldData is not a predicate");
1564 }
1565 }
1566 };
1567 b ^ src.src_mod.is_bnot()
1568 }
1569
get_u32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u321570 pub fn get_u32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u32 {
1571 let i = op.src_idx(src);
1572 match src.src_ref {
1573 SrcRef::Zero => 0,
1574 SrcRef::Imm32(imm) => imm,
1575 SrcRef::True | SrcRef::False => panic!("Unexpected predicate"),
1576 _ => {
1577 if let FoldData::U32(u) = self.srcs[i] {
1578 u
1579 } else {
1580 panic!("FoldData is not a U32");
1581 }
1582 }
1583 }
1584 }
1585
get_u32_bnot_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u321586 pub fn get_u32_bnot_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u32 {
1587 let x = self.get_u32_src(op, src);
1588 if src.src_mod.is_bnot() {
1589 !x
1590 } else {
1591 x
1592 }
1593 }
1594
get_carry_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool1595 pub fn get_carry_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool {
1596 assert!(src.src_ref.as_ssa().is_some());
1597 let i = op.src_idx(src);
1598 if let FoldData::Carry(b) = self.srcs[i] {
1599 b
1600 } else {
1601 panic!("FoldData is not a predicate");
1602 }
1603 }
1604
1605 #[allow(dead_code)]
get_f32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f321606 pub fn get_f32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f32 {
1607 f32::from_bits(self.get_u32_src(op, src))
1608 }
1609
1610 #[allow(dead_code)]
get_f64_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f641611 pub fn get_f64_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f64 {
1612 let i = op.src_idx(src);
1613 match src.src_ref {
1614 SrcRef::Zero => 0.0,
1615 SrcRef::Imm32(imm) => f64::from_bits(u64::from(imm) << 32),
1616 SrcRef::True | SrcRef::False => panic!("Unexpected predicate"),
1617 _ => {
1618 if let FoldData::Vec2(v) = self.srcs[i] {
1619 let u = u64::from(v[0]) | (u64::from(v[1]) << 32);
1620 f64::from_bits(u)
1621 } else {
1622 panic!("FoldData is not a U32");
1623 }
1624 }
1625 }
1626 }
1627
set_pred_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool)1628 pub fn set_pred_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool) {
1629 self.dsts[op.dst_idx(dst)] = FoldData::Pred(b);
1630 }
1631
set_carry_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool)1632 pub fn set_carry_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool) {
1633 self.dsts[op.dst_idx(dst)] = FoldData::Carry(b);
1634 }
1635
set_u32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, u: u32)1636 pub fn set_u32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, u: u32) {
1637 self.dsts[op.dst_idx(dst)] = FoldData::U32(u);
1638 }
1639
1640 #[allow(dead_code)]
set_f32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f32)1641 pub fn set_f32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f32) {
1642 self.set_u32_dst(op, dst, f.to_bits());
1643 }
1644
1645 #[allow(dead_code)]
set_f64_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f64)1646 pub fn set_f64_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f64) {
1647 let u = f.to_bits();
1648 let v = [u as u32, (u >> 32) as u32];
1649 self.dsts[op.dst_idx(dst)] = FoldData::Vec2(v);
1650 }
1651 }
1652
1653 pub trait Foldable: SrcsAsSlice + DstsAsSlice {
fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)1654 fn fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>);
1655 }
1656
1657 pub trait DisplayOp: DstsAsSlice {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1658 fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1659 fmt_dst_slice(f, self.dsts_as_slice())
1660 }
1661
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1662 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result;
1663 }
1664
1665 // Hack struct so we can re-use Formatters. Shamelessly stolen from
1666 // https://users.rust-lang.org/t/reusing-an-fmt-formatter/8531/4
1667 pub struct Fmt<F>(pub F)
1668 where
1669 F: Fn(&mut fmt::Formatter) -> fmt::Result;
1670
1671 impl<F> fmt::Display for Fmt<F>
1672 where
1673 F: Fn(&mut fmt::Formatter) -> fmt::Result,
1674 {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1675 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1676 (self.0)(f)
1677 }
1678 }
1679
1680 macro_rules! impl_display_for_op {
1681 ($op: ident) => {
1682 impl fmt::Display for $op {
1683 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1684 let mut s = String::new();
1685 write!(s, "{}", Fmt(|f| self.fmt_dsts(f)))?;
1686 if !s.is_empty() {
1687 write!(f, "{} = ", s)?;
1688 }
1689 self.fmt_op(f)
1690 }
1691 }
1692 };
1693 }
1694
1695 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1696 pub enum PredSetOp {
1697 And,
1698 Or,
1699 Xor,
1700 }
1701
1702 impl PredSetOp {
eval(&self, a: bool, b: bool) -> bool1703 pub fn eval(&self, a: bool, b: bool) -> bool {
1704 match self {
1705 PredSetOp::And => a & b,
1706 PredSetOp::Or => a | b,
1707 PredSetOp::Xor => a ^ b,
1708 }
1709 }
1710
is_trivial(&self, accum: &Src) -> bool1711 pub fn is_trivial(&self, accum: &Src) -> bool {
1712 if let Some(b) = accum.as_bool() {
1713 match self {
1714 PredSetOp::And => b,
1715 PredSetOp::Or => !b,
1716 PredSetOp::Xor => !b,
1717 }
1718 } else {
1719 false
1720 }
1721 }
1722 }
1723
1724 impl fmt::Display for PredSetOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1725 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1726 match self {
1727 PredSetOp::And => write!(f, ".and"),
1728 PredSetOp::Or => write!(f, ".or"),
1729 PredSetOp::Xor => write!(f, ".xor"),
1730 }
1731 }
1732 }
1733
1734 #[allow(dead_code)]
1735 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1736 pub enum FloatCmpOp {
1737 OrdEq,
1738 OrdNe,
1739 OrdLt,
1740 OrdLe,
1741 OrdGt,
1742 OrdGe,
1743 UnordEq,
1744 UnordNe,
1745 UnordLt,
1746 UnordLe,
1747 UnordGt,
1748 UnordGe,
1749 IsNum,
1750 IsNan,
1751 }
1752
1753 impl FloatCmpOp {
flip(self) -> FloatCmpOp1754 pub fn flip(self) -> FloatCmpOp {
1755 match self {
1756 FloatCmpOp::OrdEq | FloatCmpOp::OrdNe => self,
1757 FloatCmpOp::OrdLt => FloatCmpOp::OrdGt,
1758 FloatCmpOp::OrdLe => FloatCmpOp::OrdGe,
1759 FloatCmpOp::OrdGt => FloatCmpOp::OrdLt,
1760 FloatCmpOp::OrdGe => FloatCmpOp::OrdLe,
1761 FloatCmpOp::UnordEq | FloatCmpOp::UnordNe => self,
1762 FloatCmpOp::UnordLt => FloatCmpOp::UnordGt,
1763 FloatCmpOp::UnordLe => FloatCmpOp::UnordGe,
1764 FloatCmpOp::UnordGt => FloatCmpOp::UnordLt,
1765 FloatCmpOp::UnordGe => FloatCmpOp::UnordLe,
1766 FloatCmpOp::IsNum | FloatCmpOp::IsNan => panic!("Cannot flip unop"),
1767 }
1768 }
1769 }
1770
1771 impl fmt::Display for FloatCmpOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1772 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1773 match self {
1774 FloatCmpOp::OrdEq => write!(f, ".eq"),
1775 FloatCmpOp::OrdNe => write!(f, ".ne"),
1776 FloatCmpOp::OrdLt => write!(f, ".lt"),
1777 FloatCmpOp::OrdLe => write!(f, ".le"),
1778 FloatCmpOp::OrdGt => write!(f, ".gt"),
1779 FloatCmpOp::OrdGe => write!(f, ".ge"),
1780 FloatCmpOp::UnordEq => write!(f, ".equ"),
1781 FloatCmpOp::UnordNe => write!(f, ".neu"),
1782 FloatCmpOp::UnordLt => write!(f, ".ltu"),
1783 FloatCmpOp::UnordLe => write!(f, ".leu"),
1784 FloatCmpOp::UnordGt => write!(f, ".gtu"),
1785 FloatCmpOp::UnordGe => write!(f, ".geu"),
1786 FloatCmpOp::IsNum => write!(f, ".num"),
1787 FloatCmpOp::IsNan => write!(f, ".nan"),
1788 }
1789 }
1790 }
1791
1792 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1793 pub enum IntCmpOp {
1794 Eq,
1795 Ne,
1796 Lt,
1797 Le,
1798 Gt,
1799 Ge,
1800 }
1801
1802 impl IntCmpOp {
flip(self) -> IntCmpOp1803 pub fn flip(self) -> IntCmpOp {
1804 match self {
1805 IntCmpOp::Eq | IntCmpOp::Ne => self,
1806 IntCmpOp::Lt => IntCmpOp::Gt,
1807 IntCmpOp::Le => IntCmpOp::Ge,
1808 IntCmpOp::Gt => IntCmpOp::Lt,
1809 IntCmpOp::Ge => IntCmpOp::Le,
1810 }
1811 }
1812 }
1813
1814 impl fmt::Display for IntCmpOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1815 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1816 match self {
1817 IntCmpOp::Eq => write!(f, ".eq"),
1818 IntCmpOp::Ne => write!(f, ".ne"),
1819 IntCmpOp::Lt => write!(f, ".lt"),
1820 IntCmpOp::Le => write!(f, ".le"),
1821 IntCmpOp::Gt => write!(f, ".gt"),
1822 IntCmpOp::Ge => write!(f, ".ge"),
1823 }
1824 }
1825 }
1826
1827 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1828 pub enum IntCmpType {
1829 U32,
1830 I32,
1831 }
1832
1833 impl IntCmpType {
1834 #[allow(dead_code)]
is_signed(&self) -> bool1835 pub fn is_signed(&self) -> bool {
1836 match self {
1837 IntCmpType::U32 => false,
1838 IntCmpType::I32 => true,
1839 }
1840 }
1841 }
1842
1843 impl fmt::Display for IntCmpType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1844 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1845 match self {
1846 IntCmpType::U32 => write!(f, ".u32"),
1847 IntCmpType::I32 => write!(f, ".i32"),
1848 }
1849 }
1850 }
1851
1852 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1853 pub enum LogicOp2 {
1854 And,
1855 Or,
1856 Xor,
1857 PassB,
1858 }
1859
1860 impl fmt::Display for LogicOp2 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1861 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1862 match self {
1863 LogicOp2::And => write!(f, "and"),
1864 LogicOp2::Or => write!(f, "or"),
1865 LogicOp2::Xor => write!(f, "xor"),
1866 LogicOp2::PassB => write!(f, "pass_b"),
1867 }
1868 }
1869 }
1870
1871 impl LogicOp2 {
to_lut(self) -> LogicOp31872 pub fn to_lut(self) -> LogicOp3 {
1873 match self {
1874 LogicOp2::And => LogicOp3::new_lut(&|x, y, _| x & y),
1875 LogicOp2::Or => LogicOp3::new_lut(&|x, y, _| x | y),
1876 LogicOp2::Xor => LogicOp3::new_lut(&|x, y, _| x ^ y),
1877 LogicOp2::PassB => LogicOp3::new_lut(&|_, b, _| b),
1878 }
1879 }
1880 }
1881
1882 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1883 pub struct LogicOp3 {
1884 pub lut: u8,
1885 }
1886
1887 impl LogicOp3 {
1888 pub const SRC_MASKS: [u8; 3] = [0xf0, 0xcc, 0xaa];
1889
1890 #[inline]
new_lut<F: Fn(u8, u8, u8) -> u8>(f: &F) -> LogicOp31891 pub fn new_lut<F: Fn(u8, u8, u8) -> u8>(f: &F) -> LogicOp3 {
1892 LogicOp3 {
1893 lut: f(
1894 LogicOp3::SRC_MASKS[0],
1895 LogicOp3::SRC_MASKS[1],
1896 LogicOp3::SRC_MASKS[2],
1897 ),
1898 }
1899 }
1900
new_const(val: bool) -> LogicOp31901 pub fn new_const(val: bool) -> LogicOp3 {
1902 LogicOp3 {
1903 lut: if val { !0 } else { 0 },
1904 }
1905 }
1906
src_used(&self, src_idx: usize) -> bool1907 pub fn src_used(&self, src_idx: usize) -> bool {
1908 let mask = LogicOp3::SRC_MASKS[src_idx];
1909 let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1910 self.lut & !mask != (self.lut >> shift) & !mask
1911 }
1912
fix_src(&mut self, src_idx: usize, val: bool)1913 pub fn fix_src(&mut self, src_idx: usize, val: bool) {
1914 let mask = LogicOp3::SRC_MASKS[src_idx];
1915 let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1916 if val {
1917 let t_bits = self.lut & mask;
1918 self.lut = t_bits | (t_bits >> shift)
1919 } else {
1920 let f_bits = self.lut & !mask;
1921 self.lut = (f_bits << shift) | f_bits
1922 };
1923 }
1924
invert_src(&mut self, src_idx: usize)1925 pub fn invert_src(&mut self, src_idx: usize) {
1926 let mask = LogicOp3::SRC_MASKS[src_idx];
1927 let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1928 let t_bits = self.lut & mask;
1929 let f_bits = self.lut & !mask;
1930 self.lut = (f_bits << shift) | (t_bits >> shift);
1931 }
1932
eval< T: BitAnd<Output = T> + BitOr<Output = T> + Copy + Not<Output = T>, >( &self, x: T, y: T, z: T, ) -> T1933 pub fn eval<
1934 T: BitAnd<Output = T> + BitOr<Output = T> + Copy + Not<Output = T>,
1935 >(
1936 &self,
1937 x: T,
1938 y: T,
1939 z: T,
1940 ) -> T {
1941 let mut res = x & !x; // zero
1942 if (self.lut & (1 << 0)) != 0 {
1943 res = res | (!x & !y & !z);
1944 }
1945 if (self.lut & (1 << 1)) != 0 {
1946 res = res | (!x & !y & z);
1947 }
1948 if (self.lut & (1 << 2)) != 0 {
1949 res = res | (!x & y & !z);
1950 }
1951 if (self.lut & (1 << 3)) != 0 {
1952 res = res | (!x & y & z);
1953 }
1954 if (self.lut & (1 << 4)) != 0 {
1955 res = res | (x & !y & !z);
1956 }
1957 if (self.lut & (1 << 5)) != 0 {
1958 res = res | (x & !y & z);
1959 }
1960 if (self.lut & (1 << 6)) != 0 {
1961 res = res | (x & y & !z);
1962 }
1963 if (self.lut & (1 << 7)) != 0 {
1964 res = res | (x & y & z);
1965 }
1966 res
1967 }
1968 }
1969
1970 impl fmt::Display for LogicOp3 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1971 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1972 write!(f, "LUT[{:#x}]", self.lut)
1973 }
1974 }
1975
1976 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1977 pub enum FloatType {
1978 F16,
1979 F32,
1980 F64,
1981 }
1982
1983 impl FloatType {
from_bits(bytes: usize) -> FloatType1984 pub fn from_bits(bytes: usize) -> FloatType {
1985 match bytes {
1986 16 => FloatType::F16,
1987 32 => FloatType::F32,
1988 64 => FloatType::F64,
1989 _ => panic!("Invalid float type size"),
1990 }
1991 }
1992
bits(&self) -> usize1993 pub fn bits(&self) -> usize {
1994 match self {
1995 FloatType::F16 => 16,
1996 FloatType::F32 => 32,
1997 FloatType::F64 => 64,
1998 }
1999 }
2000 }
2001
2002 impl fmt::Display for FloatType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2003 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2004 match self {
2005 FloatType::F16 => write!(f, ".f16"),
2006 FloatType::F32 => write!(f, ".f32"),
2007 FloatType::F64 => write!(f, ".f64"),
2008 }
2009 }
2010 }
2011
2012 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2013 pub enum FRndMode {
2014 NearestEven,
2015 NegInf,
2016 PosInf,
2017 Zero,
2018 }
2019
2020 impl fmt::Display for FRndMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2021 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2022 match self {
2023 FRndMode::NearestEven => write!(f, ".re"),
2024 FRndMode::NegInf => write!(f, ".rm"),
2025 FRndMode::PosInf => write!(f, ".rp"),
2026 FRndMode::Zero => write!(f, ".rz"),
2027 }
2028 }
2029 }
2030
2031 #[derive(Clone, Copy, Eq, PartialEq)]
2032 pub enum TexDim {
2033 _1D,
2034 Array1D,
2035 _2D,
2036 Array2D,
2037 _3D,
2038 Cube,
2039 ArrayCube,
2040 }
2041
2042 impl fmt::Display for TexDim {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2043 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2044 match self {
2045 TexDim::_1D => write!(f, ".1d"),
2046 TexDim::Array1D => write!(f, ".a1d"),
2047 TexDim::_2D => write!(f, ".2d"),
2048 TexDim::Array2D => write!(f, ".a2d"),
2049 TexDim::_3D => write!(f, ".3d"),
2050 TexDim::Cube => write!(f, ".cube"),
2051 TexDim::ArrayCube => write!(f, ".acube"),
2052 }
2053 }
2054 }
2055
2056 #[derive(Clone, Copy, Eq, PartialEq)]
2057 pub enum TexLodMode {
2058 Auto,
2059 Zero,
2060 Bias,
2061 Lod,
2062 Clamp,
2063 BiasClamp,
2064 }
2065
2066 impl fmt::Display for TexLodMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2067 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2068 match self {
2069 TexLodMode::Auto => write!(f, "la"),
2070 TexLodMode::Zero => write!(f, "lz"),
2071 TexLodMode::Bias => write!(f, "lb"),
2072 TexLodMode::Lod => write!(f, "ll"),
2073 TexLodMode::Clamp => write!(f, "lc"),
2074 TexLodMode::BiasClamp => write!(f, "lb.lc"),
2075 }
2076 }
2077 }
2078
2079 #[derive(Clone, Copy, Eq, PartialEq)]
2080 pub enum Tld4OffsetMode {
2081 None,
2082 AddOffI,
2083 PerPx,
2084 }
2085
2086 impl fmt::Display for Tld4OffsetMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2087 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2088 match self {
2089 Tld4OffsetMode::None => write!(f, "no_off"),
2090 Tld4OffsetMode::AddOffI => write!(f, "aoffi"),
2091 Tld4OffsetMode::PerPx => write!(f, "ptp"),
2092 }
2093 }
2094 }
2095
2096 #[allow(dead_code)]
2097 #[derive(Clone, Copy, Eq, PartialEq)]
2098 pub enum TexQuery {
2099 Dimension,
2100 TextureType,
2101 SamplerPos,
2102 }
2103
2104 impl fmt::Display for TexQuery {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2105 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2106 match self {
2107 TexQuery::Dimension => write!(f, "dimension"),
2108 TexQuery::TextureType => write!(f, "texture_type"),
2109 TexQuery::SamplerPos => write!(f, "sampler_pos"),
2110 }
2111 }
2112 }
2113
2114 #[derive(Clone, Copy, Eq, PartialEq)]
2115 pub enum ImageDim {
2116 _1D,
2117 _1DBuffer,
2118 _1DArray,
2119 _2D,
2120 _2DArray,
2121 _3D,
2122 }
2123
2124 impl ImageDim {
coord_comps(&self) -> u82125 pub fn coord_comps(&self) -> u8 {
2126 match self {
2127 ImageDim::_1D => 1,
2128 ImageDim::_1DBuffer => 1,
2129 ImageDim::_1DArray => 2,
2130 ImageDim::_2D => 2,
2131 ImageDim::_2DArray => 3,
2132 ImageDim::_3D => 3,
2133 }
2134 }
2135 }
2136
2137 impl fmt::Display for ImageDim {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2138 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2139 match self {
2140 ImageDim::_1D => write!(f, ".1d"),
2141 ImageDim::_1DBuffer => write!(f, ".buf"),
2142 ImageDim::_1DArray => write!(f, ".a1d"),
2143 ImageDim::_2D => write!(f, ".2d"),
2144 ImageDim::_2DArray => write!(f, ".a2d"),
2145 ImageDim::_3D => write!(f, ".3d"),
2146 }
2147 }
2148 }
2149
2150 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
2151 pub enum IntType {
2152 U8,
2153 I8,
2154 U16,
2155 I16,
2156 U32,
2157 I32,
2158 U64,
2159 I64,
2160 }
2161
2162 impl IntType {
from_bits(bits: usize, is_signed: bool) -> IntType2163 pub fn from_bits(bits: usize, is_signed: bool) -> IntType {
2164 match bits {
2165 8 => {
2166 if is_signed {
2167 IntType::I8
2168 } else {
2169 IntType::U8
2170 }
2171 }
2172 16 => {
2173 if is_signed {
2174 IntType::I16
2175 } else {
2176 IntType::U16
2177 }
2178 }
2179 32 => {
2180 if is_signed {
2181 IntType::I32
2182 } else {
2183 IntType::U32
2184 }
2185 }
2186 64 => {
2187 if is_signed {
2188 IntType::I64
2189 } else {
2190 IntType::U64
2191 }
2192 }
2193 _ => panic!("Invalid integer type size"),
2194 }
2195 }
2196
is_signed(&self) -> bool2197 pub fn is_signed(&self) -> bool {
2198 match self {
2199 IntType::U8 | IntType::U16 | IntType::U32 | IntType::U64 => false,
2200 IntType::I8 | IntType::I16 | IntType::I32 | IntType::I64 => true,
2201 }
2202 }
2203
bits(&self) -> usize2204 pub fn bits(&self) -> usize {
2205 match self {
2206 IntType::U8 | IntType::I8 => 8,
2207 IntType::U16 | IntType::I16 => 16,
2208 IntType::U32 | IntType::I32 => 32,
2209 IntType::U64 | IntType::I64 => 64,
2210 }
2211 }
2212 }
2213
2214 impl fmt::Display for IntType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2215 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2216 match self {
2217 IntType::U8 => write!(f, ".u8"),
2218 IntType::I8 => write!(f, ".i8"),
2219 IntType::U16 => write!(f, ".u16"),
2220 IntType::I16 => write!(f, ".i16"),
2221 IntType::U32 => write!(f, ".u32"),
2222 IntType::I32 => write!(f, ".i32"),
2223 IntType::U64 => write!(f, ".u64"),
2224 IntType::I64 => write!(f, ".i64"),
2225 }
2226 }
2227 }
2228
2229 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2230 pub enum MemAddrType {
2231 A32,
2232 A64,
2233 }
2234
2235 impl fmt::Display for MemAddrType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2236 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2237 match self {
2238 MemAddrType::A32 => write!(f, ".a32"),
2239 MemAddrType::A64 => write!(f, ".a64"),
2240 }
2241 }
2242 }
2243
2244 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2245 pub enum MemType {
2246 U8,
2247 I8,
2248 U16,
2249 I16,
2250 B32,
2251 B64,
2252 B128,
2253 }
2254
2255 impl MemType {
from_size(size: u8, is_signed: bool) -> MemType2256 pub fn from_size(size: u8, is_signed: bool) -> MemType {
2257 match size {
2258 1 => {
2259 if is_signed {
2260 MemType::I8
2261 } else {
2262 MemType::U8
2263 }
2264 }
2265 2 => {
2266 if is_signed {
2267 MemType::I16
2268 } else {
2269 MemType::U16
2270 }
2271 }
2272 4 => MemType::B32,
2273 8 => MemType::B64,
2274 16 => MemType::B128,
2275 _ => panic!("Invalid memory load/store size"),
2276 }
2277 }
2278
2279 #[allow(dead_code)]
bits(&self) -> usize2280 pub fn bits(&self) -> usize {
2281 match self {
2282 MemType::U8 | MemType::I8 => 8,
2283 MemType::U16 | MemType::I16 => 16,
2284 MemType::B32 => 32,
2285 MemType::B64 => 64,
2286 MemType::B128 => 128,
2287 }
2288 }
2289 }
2290
2291 impl fmt::Display for MemType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2292 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2293 match self {
2294 MemType::U8 => write!(f, ".u8"),
2295 MemType::I8 => write!(f, ".i8"),
2296 MemType::U16 => write!(f, ".u16"),
2297 MemType::I16 => write!(f, ".i16"),
2298 MemType::B32 => write!(f, ".b32"),
2299 MemType::B64 => write!(f, ".b64"),
2300 MemType::B128 => write!(f, ".b128"),
2301 }
2302 }
2303 }
2304
2305 #[allow(dead_code)]
2306 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2307 pub enum MemOrder {
2308 Constant,
2309 Weak,
2310 Strong(MemScope),
2311 }
2312
2313 impl fmt::Display for MemOrder {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2314 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2315 match self {
2316 MemOrder::Constant => write!(f, ".constant"),
2317 MemOrder::Weak => write!(f, ".weak"),
2318 MemOrder::Strong(scope) => write!(f, ".strong{}", scope),
2319 }
2320 }
2321 }
2322
2323 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2324 pub enum MemScope {
2325 CTA,
2326 GPU,
2327 System,
2328 }
2329
2330 impl fmt::Display for MemScope {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2331 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2332 match self {
2333 MemScope::CTA => write!(f, ".cta"),
2334 MemScope::GPU => write!(f, ".gpu"),
2335 MemScope::System => write!(f, ".sys"),
2336 }
2337 }
2338 }
2339
2340 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2341 pub enum MemSpace {
2342 Global(MemAddrType),
2343 Local,
2344 Shared,
2345 }
2346
2347 impl MemSpace {
addr_type(&self) -> MemAddrType2348 pub fn addr_type(&self) -> MemAddrType {
2349 match self {
2350 MemSpace::Global(t) => *t,
2351 MemSpace::Local => MemAddrType::A32,
2352 MemSpace::Shared => MemAddrType::A32,
2353 }
2354 }
2355 }
2356
2357 impl fmt::Display for MemSpace {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2358 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2359 match self {
2360 MemSpace::Global(t) => write!(f, ".global{t}"),
2361 MemSpace::Local => write!(f, ".local"),
2362 MemSpace::Shared => write!(f, ".shared"),
2363 }
2364 }
2365 }
2366
2367 #[allow(dead_code)]
2368 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2369 pub enum MemEvictionPriority {
2370 First,
2371 Normal,
2372 Last,
2373 Unchanged,
2374 }
2375
2376 impl fmt::Display for MemEvictionPriority {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2377 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2378 match self {
2379 MemEvictionPriority::First => write!(f, ".ef"),
2380 MemEvictionPriority::Normal => Ok(()),
2381 MemEvictionPriority::Last => write!(f, ".el"),
2382 MemEvictionPriority::Unchanged => write!(f, ".lu"),
2383 }
2384 }
2385 }
2386
2387 #[derive(Clone)]
2388 pub struct MemAccess {
2389 pub mem_type: MemType,
2390 pub space: MemSpace,
2391 pub order: MemOrder,
2392 pub eviction_priority: MemEvictionPriority,
2393 }
2394
2395 impl fmt::Display for MemAccess {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2396 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2397 write!(
2398 f,
2399 "{}{}{}{}",
2400 self.space, self.order, self.eviction_priority, self.mem_type,
2401 )
2402 }
2403 }
2404
2405 #[allow(dead_code)]
2406 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2407 pub enum AtomType {
2408 F16x2,
2409 U32,
2410 I32,
2411 F32,
2412 U64,
2413 I64,
2414 F64,
2415 }
2416
2417 impl AtomType {
F(bits: u8) -> AtomType2418 pub fn F(bits: u8) -> AtomType {
2419 match bits {
2420 16 => panic!("16-bit float atomics not yet supported"),
2421 32 => AtomType::F32,
2422 64 => AtomType::F64,
2423 _ => panic!("Invalid float atomic type"),
2424 }
2425 }
2426
U(bits: u8) -> AtomType2427 pub fn U(bits: u8) -> AtomType {
2428 match bits {
2429 32 => AtomType::U32,
2430 64 => AtomType::U64,
2431 _ => panic!("Invalid uint atomic type"),
2432 }
2433 }
2434
I(bits: u8) -> AtomType2435 pub fn I(bits: u8) -> AtomType {
2436 match bits {
2437 32 => AtomType::I32,
2438 64 => AtomType::I64,
2439 _ => panic!("Invalid int atomic type"),
2440 }
2441 }
2442
bits(&self) -> usize2443 pub fn bits(&self) -> usize {
2444 match self {
2445 AtomType::F16x2 | AtomType::F32 => 32,
2446 AtomType::U32 | AtomType::I32 => 32,
2447 AtomType::U64 | AtomType::I64 | AtomType::F64 => 64,
2448 }
2449 }
2450 }
2451
2452 impl fmt::Display for AtomType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2453 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2454 match self {
2455 AtomType::F16x2 => write!(f, ".f16x2"),
2456 AtomType::U32 => write!(f, ".u32"),
2457 AtomType::I32 => write!(f, ".i32"),
2458 AtomType::F32 => write!(f, ".f32"),
2459 AtomType::U64 => write!(f, ".u64"),
2460 AtomType::I64 => write!(f, ".i64"),
2461 AtomType::F64 => write!(f, ".f64"),
2462 }
2463 }
2464 }
2465
2466 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2467 pub enum AtomCmpSrc {
2468 /// The cmpr value is passed as a separate source
2469 Separate,
2470 /// The cmpr value is packed in with the data with cmpr coming first
2471 Packed,
2472 }
2473
2474 #[allow(dead_code)]
2475 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2476 pub enum AtomOp {
2477 Add,
2478 Min,
2479 Max,
2480 Inc,
2481 Dec,
2482 And,
2483 Or,
2484 Xor,
2485 Exch,
2486 CmpExch(AtomCmpSrc),
2487 }
2488
2489 impl AtomOp {
is_reduction(&self) -> bool2490 pub fn is_reduction(&self) -> bool {
2491 match self {
2492 AtomOp::Add
2493 | AtomOp::Min
2494 | AtomOp::Max
2495 | AtomOp::Inc
2496 | AtomOp::Dec
2497 | AtomOp::And
2498 | AtomOp::Or
2499 | AtomOp::Xor => true,
2500 AtomOp::Exch | AtomOp::CmpExch(_) => false,
2501 }
2502 }
2503 }
2504
2505 impl fmt::Display for AtomOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2506 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2507 match self {
2508 AtomOp::Add => write!(f, ".add"),
2509 AtomOp::Min => write!(f, ".min"),
2510 AtomOp::Max => write!(f, ".max"),
2511 AtomOp::Inc => write!(f, ".inc"),
2512 AtomOp::Dec => write!(f, ".dec"),
2513 AtomOp::And => write!(f, ".and"),
2514 AtomOp::Or => write!(f, ".or"),
2515 AtomOp::Xor => write!(f, ".xor"),
2516 AtomOp::Exch => write!(f, ".exch"),
2517 AtomOp::CmpExch(AtomCmpSrc::Separate) => write!(f, ".cmpexch"),
2518 AtomOp::CmpExch(AtomCmpSrc::Packed) => write!(f, ".cmpexch.packed"),
2519 }
2520 }
2521 }
2522
2523 #[derive(Clone, Copy, Eq, PartialEq)]
2524 pub enum InterpFreq {
2525 Pass,
2526 PassMulW,
2527 Constant,
2528 State,
2529 }
2530
2531 impl fmt::Display for InterpFreq {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2532 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2533 match self {
2534 InterpFreq::Pass => write!(f, ".pass"),
2535 InterpFreq::PassMulW => write!(f, ".pass_mul_w"),
2536 InterpFreq::Constant => write!(f, ".constant"),
2537 InterpFreq::State => write!(f, ".state"),
2538 }
2539 }
2540 }
2541 #[derive(Clone, Copy, Eq, PartialEq)]
2542 pub enum InterpLoc {
2543 Default,
2544 Centroid,
2545 Offset,
2546 }
2547
2548 impl fmt::Display for InterpLoc {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2549 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2550 match self {
2551 InterpLoc::Default => Ok(()),
2552 InterpLoc::Centroid => write!(f, ".centroid"),
2553 InterpLoc::Offset => write!(f, ".offset"),
2554 }
2555 }
2556 }
2557
2558 pub struct AttrAccess {
2559 pub addr: u16,
2560 pub comps: u8,
2561 pub patch: bool,
2562 pub output: bool,
2563 pub phys: bool,
2564 }
2565
2566 #[repr(C)]
2567 #[derive(SrcsAsSlice, DstsAsSlice)]
2568 pub struct OpFAdd {
2569 #[dst_type(F32)]
2570 pub dst: Dst,
2571
2572 #[src_type(F32)]
2573 pub srcs: [Src; 2],
2574
2575 pub saturate: bool,
2576 pub rnd_mode: FRndMode,
2577 pub ftz: bool,
2578 }
2579
2580 impl DisplayOp for OpFAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2581 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2582 let sat = if self.saturate { ".sat" } else { "" };
2583 write!(f, "fadd{sat}")?;
2584 if self.rnd_mode != FRndMode::NearestEven {
2585 write!(f, "{}", self.rnd_mode)?;
2586 }
2587 if self.ftz {
2588 write!(f, ".ftz")?;
2589 }
2590 write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2591 }
2592 }
2593 impl_display_for_op!(OpFAdd);
2594
2595 #[repr(C)]
2596 #[derive(SrcsAsSlice, DstsAsSlice)]
2597 pub struct OpFFma {
2598 #[dst_type(F32)]
2599 pub dst: Dst,
2600
2601 #[src_type(F32)]
2602 pub srcs: [Src; 3],
2603
2604 pub saturate: bool,
2605 pub rnd_mode: FRndMode,
2606 pub ftz: bool,
2607 pub dnz: bool,
2608 }
2609
2610 impl DisplayOp for OpFFma {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2611 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2612 let sat = if self.saturate { ".sat" } else { "" };
2613 write!(f, "ffma{sat}")?;
2614 if self.rnd_mode != FRndMode::NearestEven {
2615 write!(f, "{}", self.rnd_mode)?;
2616 }
2617 if self.dnz {
2618 write!(f, ".dnz")?;
2619 } else if self.ftz {
2620 write!(f, ".ftz")?;
2621 }
2622 write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
2623 }
2624 }
2625 impl_display_for_op!(OpFFma);
2626
2627 #[repr(C)]
2628 #[derive(SrcsAsSlice, DstsAsSlice)]
2629 pub struct OpFMnMx {
2630 #[dst_type(F32)]
2631 pub dst: Dst,
2632
2633 #[src_type(F32)]
2634 pub srcs: [Src; 2],
2635
2636 #[src_type(Pred)]
2637 pub min: Src,
2638
2639 pub ftz: bool,
2640 }
2641
2642 impl DisplayOp for OpFMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2643 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2644 let ftz = if self.ftz { ".ftz" } else { "" };
2645 write!(
2646 f,
2647 "fmnmx{ftz} {} {} {}",
2648 self.srcs[0], self.srcs[1], self.min
2649 )
2650 }
2651 }
2652 impl_display_for_op!(OpFMnMx);
2653
2654 #[repr(C)]
2655 #[derive(SrcsAsSlice, DstsAsSlice)]
2656 pub struct OpFMul {
2657 #[dst_type(F32)]
2658 pub dst: Dst,
2659
2660 #[src_type(F32)]
2661 pub srcs: [Src; 2],
2662
2663 pub saturate: bool,
2664 pub rnd_mode: FRndMode,
2665 pub ftz: bool,
2666 pub dnz: bool,
2667 }
2668
2669 impl DisplayOp for OpFMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2670 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2671 let sat = if self.saturate { ".sat" } else { "" };
2672 write!(f, "fmul{sat}")?;
2673 if self.rnd_mode != FRndMode::NearestEven {
2674 write!(f, "{}", self.rnd_mode)?;
2675 }
2676 if self.dnz {
2677 write!(f, ".dnz")?;
2678 } else if self.ftz {
2679 write!(f, ".ftz")?;
2680 }
2681 write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2682 }
2683 }
2684 impl_display_for_op!(OpFMul);
2685
2686 #[repr(C)]
2687 #[derive(SrcsAsSlice, DstsAsSlice)]
2688 pub struct OpFSet {
2689 #[dst_type(F32)]
2690 pub dst: Dst,
2691
2692 pub cmp_op: FloatCmpOp,
2693
2694 #[src_type(F32)]
2695 pub srcs: [Src; 2],
2696
2697 pub ftz: bool,
2698 }
2699
2700 impl DisplayOp for OpFSet {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2701 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2702 let ftz = if self.ftz { ".ftz" } else { "" };
2703 write!(
2704 f,
2705 "fset{}{ftz} {} {}",
2706 self.cmp_op, self.srcs[0], self.srcs[1]
2707 )
2708 }
2709 }
2710 impl_display_for_op!(OpFSet);
2711
2712 #[repr(C)]
2713 #[derive(SrcsAsSlice, DstsAsSlice)]
2714 pub struct OpFSetP {
2715 #[dst_type(Pred)]
2716 pub dst: Dst,
2717
2718 pub set_op: PredSetOp,
2719 pub cmp_op: FloatCmpOp,
2720
2721 #[src_type(F32)]
2722 pub srcs: [Src; 2],
2723
2724 #[src_type(Pred)]
2725 pub accum: Src,
2726
2727 pub ftz: bool,
2728 }
2729
2730 impl DisplayOp for OpFSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2731 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2732 let ftz = if self.ftz { ".ftz" } else { "" };
2733 write!(f, "fsetp{}{ftz}", self.cmp_op)?;
2734 if !self.set_op.is_trivial(&self.accum) {
2735 write!(f, "{}", self.set_op)?;
2736 }
2737 write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
2738 if !self.set_op.is_trivial(&self.accum) {
2739 write!(f, " {}", self.accum)?;
2740 }
2741 Ok(())
2742 }
2743 }
2744 impl_display_for_op!(OpFSetP);
2745
2746 #[allow(dead_code)]
2747 #[derive(Clone, Copy, Eq, PartialEq)]
2748 pub enum FSwzAddOp {
2749 Add,
2750 SubRight,
2751 SubLeft,
2752 MoveLeft,
2753 }
2754
2755 impl fmt::Display for FSwzAddOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2756 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2757 match self {
2758 FSwzAddOp::Add => write!(f, "add"),
2759 FSwzAddOp::SubRight => write!(f, "subr"),
2760 FSwzAddOp::SubLeft => write!(f, "sub"),
2761 FSwzAddOp::MoveLeft => write!(f, "mov2"),
2762 }
2763 }
2764 }
2765
2766 #[repr(C)]
2767 #[derive(SrcsAsSlice, DstsAsSlice)]
2768 pub struct OpFSwzAdd {
2769 #[dst_type(F32)]
2770 pub dst: Dst,
2771
2772 #[src_type(GPR)]
2773 pub srcs: [Src; 2],
2774
2775 pub rnd_mode: FRndMode,
2776 pub ftz: bool,
2777
2778 pub ops: [FSwzAddOp; 4],
2779 }
2780
2781 impl DisplayOp for OpFSwzAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2782 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2783 write!(f, "fswzadd",)?;
2784 if self.rnd_mode != FRndMode::NearestEven {
2785 write!(f, "{}", self.rnd_mode)?;
2786 }
2787 if self.ftz {
2788 write!(f, ".ftz")?;
2789 }
2790 write!(
2791 f,
2792 " {} {} [{}, {}, {}, {}]",
2793 self.srcs[0],
2794 self.srcs[1],
2795 self.ops[0],
2796 self.ops[1],
2797 self.ops[2],
2798 self.ops[3],
2799 )
2800 }
2801 }
2802 impl_display_for_op!(OpFSwzAdd);
2803
2804 pub enum RroOp {
2805 SinCos,
2806 Exp2,
2807 }
2808
2809 impl fmt::Display for RroOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2810 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2811 match self {
2812 RroOp::SinCos => write!(f, ".sincos"),
2813 RroOp::Exp2 => write!(f, ".exp2"),
2814 }
2815 }
2816 }
2817
2818 /// MuFu range reduction operator
2819 ///
2820 /// Not available on SM70+
2821 #[repr(C)]
2822 #[derive(SrcsAsSlice, DstsAsSlice)]
2823 pub struct OpRro {
2824 #[dst_type(F32)]
2825 pub dst: Dst,
2826
2827 pub op: RroOp,
2828
2829 #[src_type(F32)]
2830 pub src: Src,
2831 }
2832
2833 impl DisplayOp for OpRro {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2834 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2835 write!(f, "rro{} {}", self.op, self.src)
2836 }
2837 }
2838 impl_display_for_op!(OpRro);
2839
2840 #[allow(dead_code)]
2841 #[derive(Clone, Copy, Eq, PartialEq)]
2842 pub enum MuFuOp {
2843 Cos,
2844 Sin,
2845 Exp2,
2846 Log2,
2847 Rcp,
2848 Rsq,
2849 Rcp64H,
2850 Rsq64H,
2851 Sqrt,
2852 Tanh,
2853 }
2854
2855 impl fmt::Display for MuFuOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2856 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2857 match self {
2858 MuFuOp::Cos => write!(f, "cos"),
2859 MuFuOp::Sin => write!(f, "sin"),
2860 MuFuOp::Exp2 => write!(f, "exp2"),
2861 MuFuOp::Log2 => write!(f, "log2"),
2862 MuFuOp::Rcp => write!(f, "rcp"),
2863 MuFuOp::Rsq => write!(f, "rsq"),
2864 MuFuOp::Rcp64H => write!(f, "rcp64h"),
2865 MuFuOp::Rsq64H => write!(f, "rsq64h"),
2866 MuFuOp::Sqrt => write!(f, "sqrt"),
2867 MuFuOp::Tanh => write!(f, "tanh"),
2868 }
2869 }
2870 }
2871
2872 #[repr(C)]
2873 #[derive(SrcsAsSlice, DstsAsSlice)]
2874 pub struct OpMuFu {
2875 #[dst_type(F32)]
2876 pub dst: Dst,
2877
2878 pub op: MuFuOp,
2879
2880 #[src_type(F32)]
2881 pub src: Src,
2882 }
2883
2884 impl DisplayOp for OpMuFu {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2885 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2886 write!(f, "mufu.{} {}", self.op, self.src)
2887 }
2888 }
2889 impl_display_for_op!(OpMuFu);
2890
2891 #[repr(C)]
2892 #[derive(SrcsAsSlice, DstsAsSlice)]
2893 pub struct OpDAdd {
2894 #[dst_type(F64)]
2895 pub dst: Dst,
2896
2897 #[src_type(F64)]
2898 pub srcs: [Src; 2],
2899
2900 pub rnd_mode: FRndMode,
2901 }
2902
2903 impl DisplayOp for OpDAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2904 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2905 write!(f, "dadd")?;
2906 if self.rnd_mode != FRndMode::NearestEven {
2907 write!(f, "{}", self.rnd_mode)?;
2908 }
2909 write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2910 }
2911 }
2912 impl_display_for_op!(OpDAdd);
2913
2914 #[repr(C)]
2915 #[derive(SrcsAsSlice, DstsAsSlice)]
2916 pub struct OpDMul {
2917 #[dst_type(F64)]
2918 pub dst: Dst,
2919
2920 #[src_type(F64)]
2921 pub srcs: [Src; 2],
2922
2923 pub rnd_mode: FRndMode,
2924 }
2925
2926 impl DisplayOp for OpDMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2927 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2928 write!(f, "dmul")?;
2929 if self.rnd_mode != FRndMode::NearestEven {
2930 write!(f, "{}", self.rnd_mode)?;
2931 }
2932 write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2933 }
2934 }
2935 impl_display_for_op!(OpDMul);
2936
2937 #[repr(C)]
2938 #[derive(SrcsAsSlice, DstsAsSlice)]
2939 pub struct OpDFma {
2940 #[dst_type(F64)]
2941 pub dst: Dst,
2942
2943 #[src_type(F64)]
2944 pub srcs: [Src; 3],
2945
2946 pub rnd_mode: FRndMode,
2947 }
2948
2949 impl DisplayOp for OpDFma {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2950 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2951 write!(f, "dfma")?;
2952 if self.rnd_mode != FRndMode::NearestEven {
2953 write!(f, "{}", self.rnd_mode)?;
2954 }
2955 write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
2956 }
2957 }
2958 impl_display_for_op!(OpDFma);
2959
2960 #[repr(C)]
2961 #[derive(SrcsAsSlice, DstsAsSlice)]
2962 pub struct OpDMnMx {
2963 #[dst_type(F64)]
2964 pub dst: Dst,
2965
2966 #[src_type(F64)]
2967 pub srcs: [Src; 2],
2968
2969 #[src_type(Pred)]
2970 pub min: Src,
2971 }
2972
2973 impl DisplayOp for OpDMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2974 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2975 write!(f, "dmnmx {} {} {}", self.srcs[0], self.srcs[1], self.min)
2976 }
2977 }
2978 impl_display_for_op!(OpDMnMx);
2979
2980 #[repr(C)]
2981 #[derive(SrcsAsSlice, DstsAsSlice)]
2982 pub struct OpDSetP {
2983 #[dst_type(Pred)]
2984 pub dst: Dst,
2985
2986 pub set_op: PredSetOp,
2987 pub cmp_op: FloatCmpOp,
2988
2989 #[src_type(F64)]
2990 pub srcs: [Src; 2],
2991
2992 #[src_type(Pred)]
2993 pub accum: Src,
2994 }
2995
2996 impl DisplayOp for OpDSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2997 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2998 write!(f, "dsetp{}", self.cmp_op)?;
2999 if !self.set_op.is_trivial(&self.accum) {
3000 write!(f, "{}", self.set_op)?;
3001 }
3002 write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3003 if !self.set_op.is_trivial(&self.accum) {
3004 write!(f, " {}", self.accum)?;
3005 }
3006 Ok(())
3007 }
3008 }
3009 impl_display_for_op!(OpDSetP);
3010
3011 #[repr(C)]
3012 #[derive(SrcsAsSlice, DstsAsSlice)]
3013 pub struct OpHAdd2 {
3014 #[dst_type(F16v2)]
3015 pub dst: Dst,
3016
3017 #[src_type(F16v2)]
3018 pub srcs: [Src; 2],
3019
3020 pub saturate: bool,
3021 pub ftz: bool,
3022 pub f32: bool,
3023 }
3024
3025 impl DisplayOp for OpHAdd2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3026 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3027 let sat = if self.saturate { ".sat" } else { "" };
3028 let f32 = if self.f32 { ".f32" } else { "" };
3029 write!(f, "hadd2{sat}{f32}")?;
3030 if self.ftz {
3031 write!(f, ".ftz")?;
3032 }
3033 write!(f, " {} {}", self.srcs[0], self.srcs[1])
3034 }
3035 }
3036 impl_display_for_op!(OpHAdd2);
3037
3038 #[repr(C)]
3039 #[derive(SrcsAsSlice, DstsAsSlice)]
3040 pub struct OpHSet2 {
3041 #[dst_type(F16v2)]
3042 pub dst: Dst,
3043
3044 pub set_op: PredSetOp,
3045 pub cmp_op: FloatCmpOp,
3046
3047 #[src_type(F16v2)]
3048 pub srcs: [Src; 2],
3049
3050 #[src_type(Pred)]
3051 pub accum: Src,
3052
3053 pub ftz: bool,
3054 }
3055
3056 impl DisplayOp for OpHSet2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3057 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3058 let ftz = if self.ftz { ".ftz" } else { "" };
3059 write!(f, "hset2{}{ftz}", self.cmp_op)?;
3060 if !self.set_op.is_trivial(&self.accum) {
3061 write!(f, "{}", self.set_op)?;
3062 }
3063 write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3064 if !self.set_op.is_trivial(&self.accum) {
3065 write!(f, " {}", self.accum)?;
3066 }
3067 Ok(())
3068 }
3069 }
3070 impl_display_for_op!(OpHSet2);
3071
3072 #[repr(C)]
3073 #[derive(SrcsAsSlice, DstsAsSlice)]
3074 pub struct OpHSetP2 {
3075 #[dst_type(Pred)]
3076 pub dsts: [Dst; 2],
3077
3078 pub set_op: PredSetOp,
3079 pub cmp_op: FloatCmpOp,
3080
3081 #[src_type(F16v2)]
3082 pub srcs: [Src; 2],
3083
3084 #[src_type(Pred)]
3085 pub accum: Src,
3086
3087 pub ftz: bool,
3088
3089 // When not set, each dsts get the result of each lanes.
3090 // When set, the first dst gets the result of both lanes (res0 && res1)
3091 // and the second dst gets the negation !(res0 && res1)
3092 // before applying the accumulator.
3093 pub horizontal: bool,
3094 }
3095
3096 impl DisplayOp for OpHSetP2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3097 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3098 let ftz = if self.ftz { ".ftz" } else { "" };
3099 write!(f, "hsetp2{}{ftz}", self.cmp_op)?;
3100 if !self.set_op.is_trivial(&self.accum) {
3101 write!(f, "{}", self.set_op)?;
3102 }
3103 write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3104 if !self.set_op.is_trivial(&self.accum) {
3105 write!(f, " {}", self.accum)?;
3106 }
3107 Ok(())
3108 }
3109 }
3110 impl_display_for_op!(OpHSetP2);
3111
3112 #[repr(C)]
3113 #[derive(SrcsAsSlice, DstsAsSlice)]
3114 pub struct OpHMul2 {
3115 #[dst_type(F16v2)]
3116 pub dst: Dst,
3117
3118 #[src_type(F16v2)]
3119 pub srcs: [Src; 2],
3120
3121 pub saturate: bool,
3122 pub ftz: bool,
3123 pub dnz: bool,
3124 }
3125
3126 impl DisplayOp for OpHMul2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3127 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3128 let sat = if self.saturate { ".sat" } else { "" };
3129 write!(f, "hmul2{sat}")?;
3130 if self.dnz {
3131 write!(f, ".dnz")?;
3132 } else if self.ftz {
3133 write!(f, ".ftz")?;
3134 }
3135 write!(f, " {} {}", self.srcs[0], self.srcs[1])
3136 }
3137 }
3138 impl_display_for_op!(OpHMul2);
3139
3140 #[repr(C)]
3141 #[derive(SrcsAsSlice, DstsAsSlice)]
3142 pub struct OpHFma2 {
3143 #[dst_type(F16v2)]
3144 pub dst: Dst,
3145
3146 #[src_type(F16v2)]
3147 pub srcs: [Src; 3],
3148
3149 pub saturate: bool,
3150 pub ftz: bool,
3151 pub dnz: bool,
3152 pub f32: bool,
3153 }
3154
3155 impl DisplayOp for OpHFma2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3156 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3157 let sat = if self.saturate { ".sat" } else { "" };
3158 let f32 = if self.f32 { ".f32" } else { "" };
3159 write!(f, "hfma2{sat}{f32}")?;
3160 if self.dnz {
3161 write!(f, ".dnz")?;
3162 } else if self.ftz {
3163 write!(f, ".ftz")?;
3164 }
3165 write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
3166 }
3167 }
3168 impl_display_for_op!(OpHFma2);
3169
3170 #[repr(C)]
3171 #[derive(SrcsAsSlice, DstsAsSlice)]
3172 pub struct OpHMnMx2 {
3173 #[dst_type(F16v2)]
3174 pub dst: Dst,
3175
3176 #[src_type(F16v2)]
3177 pub srcs: [Src; 2],
3178
3179 #[src_type(Pred)]
3180 pub min: Src,
3181
3182 pub ftz: bool,
3183 }
3184
3185 impl DisplayOp for OpHMnMx2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3186 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3187 let ftz = if self.ftz { ".ftz" } else { "" };
3188 write!(
3189 f,
3190 "hmnmx2{ftz} {} {} {}",
3191 self.srcs[0], self.srcs[1], self.min
3192 )
3193 }
3194 }
3195 impl_display_for_op!(OpHMnMx2);
3196
3197 #[repr(C)]
3198 #[derive(SrcsAsSlice, DstsAsSlice)]
3199 pub struct OpBMsk {
3200 #[dst_type(GPR)]
3201 pub dst: Dst,
3202
3203 #[src_type(ALU)]
3204 pub pos: Src,
3205
3206 #[src_type(ALU)]
3207 pub width: Src,
3208
3209 pub wrap: bool,
3210 }
3211
3212 impl DisplayOp for OpBMsk {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3213 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3214 let wrap = if self.wrap { ".wrap" } else { ".clamp" };
3215 write!(f, "bmsk{} {} {}", wrap, self.pos, self.width)
3216 }
3217 }
3218 impl_display_for_op!(OpBMsk);
3219
3220 #[repr(C)]
3221 #[derive(SrcsAsSlice, DstsAsSlice)]
3222 pub struct OpBRev {
3223 #[dst_type(GPR)]
3224 pub dst: Dst,
3225
3226 #[src_type(ALU)]
3227 pub src: Src,
3228 }
3229
3230 impl DisplayOp for OpBRev {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3231 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3232 write!(f, "brev {}", self.src)
3233 }
3234 }
3235 impl_display_for_op!(OpBRev);
3236
3237 /// Bitfield extract. Extracts all bits from `base` starting at `offset` into
3238 /// `dst`.
3239 #[repr(C)]
3240 #[derive(SrcsAsSlice, DstsAsSlice)]
3241 pub struct OpBfe {
3242 /// Where to insert the bits.
3243 #[dst_type(GPR)]
3244 pub dst: Dst,
3245
3246 /// The source of bits to extract.
3247 #[src_type(ALU)]
3248 pub base: Src,
3249
3250 /// The range of bits to extract. This source is interpreted as four
3251 /// separate bytes, [b0, b1, b2, b3].
3252 ///
3253 /// b0 and b1: unused
3254 /// b2: the number of bits to extract.
3255 /// b3: the offset of the first bit to extract.
3256 ///
3257 /// This matches the way the hardware works.
3258 #[src_type(ALU)]
3259 pub range: Src,
3260
3261 /// Whether the output is signed
3262 pub signed: bool,
3263
3264 /// Whether to reverse the bits before inserting them into `dst`.
3265 pub reverse: bool,
3266 }
3267
3268 impl DisplayOp for OpBfe {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3269 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3270 write!(f, "bfe")?;
3271 if self.signed {
3272 write!(f, ".s")?;
3273 }
3274 if self.reverse {
3275 write!(f, ".rev")?;
3276 }
3277 write!(f, " {} {}", self.base, self.range,)
3278 }
3279 }
3280 impl_display_for_op!(OpBfe);
3281
3282 #[repr(C)]
3283 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3284 pub struct OpFlo {
3285 #[dst_type(GPR)]
3286 pub dst: Dst,
3287
3288 #[src_type(ALU)]
3289 pub src: Src,
3290
3291 pub signed: bool,
3292 pub return_shift_amount: bool,
3293 }
3294
3295 impl Foldable for OpFlo {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3296 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3297 let src = f.get_u32_src(self, &self.src);
3298 let leading = if self.signed && (src & 0x80000000) != 0 {
3299 (!src).leading_zeros()
3300 } else {
3301 src.leading_zeros()
3302 };
3303 let dst = if self.return_shift_amount {
3304 leading
3305 } else {
3306 31 - leading
3307 };
3308 f.set_u32_dst(self, &self.dst, dst);
3309 }
3310 }
3311
3312 impl DisplayOp for OpFlo {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3313 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3314 write!(f, "flo")?;
3315 if self.return_shift_amount {
3316 write!(f, ".samt")?;
3317 }
3318 write!(f, " {}", self.src)
3319 }
3320 }
3321 impl_display_for_op!(OpFlo);
3322
3323 #[repr(C)]
3324 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3325 pub struct OpIAbs {
3326 #[dst_type(GPR)]
3327 pub dst: Dst,
3328
3329 #[src_type(ALU)]
3330 pub src: Src,
3331 }
3332
3333 impl Foldable for OpIAbs {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3334 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3335 let src = f.get_u32_src(self, &self.src);
3336 let dst = (src as i32).abs() as u32;
3337 f.set_u32_dst(self, &self.dst, dst);
3338 }
3339 }
3340
3341 impl DisplayOp for OpIAbs {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3342 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3343 write!(f, "iabs {}", self.src)
3344 }
3345 }
3346 impl_display_for_op!(OpIAbs);
3347
3348 /// Only used on SM50
3349 #[repr(C)]
3350 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3351 pub struct OpIAdd2 {
3352 #[dst_type(GPR)]
3353 pub dst: Dst,
3354 #[dst_type(Carry)]
3355 pub carry_out: Dst,
3356
3357 #[src_type(I32)]
3358 pub srcs: [Src; 2],
3359 }
3360
3361 impl Foldable for OpIAdd2 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3362 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3363 let srcs = [
3364 f.get_u32_src(self, &self.srcs[0]),
3365 f.get_u32_src(self, &self.srcs[1]),
3366 ];
3367
3368 let mut sum = 0_u64;
3369 for i in 0..2 {
3370 if self.srcs[i].src_mod.is_ineg() {
3371 // This is a very literal interpretation of 2's compliment.
3372 // This is not -u64::from(src) or u64::from(-src).
3373 sum += u64::from(!srcs[i]) + 1;
3374 } else {
3375 sum += u64::from(srcs[i]);
3376 }
3377 }
3378
3379 f.set_u32_dst(self, &self.dst, sum as u32);
3380 f.set_carry_dst(self, &self.carry_out, sum >= (1 << 32));
3381 }
3382 }
3383
3384 impl DisplayOp for OpIAdd2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3385 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3386 write!(f, "iadd2 {} {}", self.srcs[0], self.srcs[1])
3387 }
3388 }
3389
3390 /// Only used on SM50
3391 #[repr(C)]
3392 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3393 pub struct OpIAdd2X {
3394 #[dst_type(GPR)]
3395 pub dst: Dst,
3396 #[dst_type(Carry)]
3397 pub carry_out: Dst,
3398
3399 #[src_type(B32)]
3400 pub srcs: [Src; 2],
3401 #[src_type(Carry)]
3402 pub carry_in: Src,
3403 }
3404
3405 impl Foldable for OpIAdd2X {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3406 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3407 let srcs = [
3408 f.get_u32_bnot_src(self, &self.srcs[0]),
3409 f.get_u32_bnot_src(self, &self.srcs[1]),
3410 ];
3411 let carry_in = f.get_carry_src(self, &self.carry_in);
3412
3413 let sum = u64::from(srcs[0]) + u64::from(srcs[1]) + u64::from(carry_in);
3414
3415 f.set_u32_dst(self, &self.dst, sum as u32);
3416 f.set_carry_dst(self, &self.carry_out, sum >= (1 << 32));
3417 }
3418 }
3419
3420 impl DisplayOp for OpIAdd2X {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3421 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3422 write!(f, "iadd2.x {} {}", self.srcs[0], self.srcs[1])?;
3423 if !self.carry_in.is_zero() {
3424 write!(f, " {}", self.carry_in)?;
3425 }
3426 Ok(())
3427 }
3428 }
3429
3430 #[repr(C)]
3431 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3432 pub struct OpIAdd3 {
3433 #[dst_type(GPR)]
3434 pub dst: Dst,
3435
3436 #[dst_type(Pred)]
3437 pub overflow: [Dst; 2],
3438
3439 #[src_type(I32)]
3440 pub srcs: [Src; 3],
3441 }
3442
3443 impl Foldable for OpIAdd3 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3444 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3445 let srcs = [
3446 f.get_u32_src(self, &self.srcs[0]),
3447 f.get_u32_src(self, &self.srcs[1]),
3448 f.get_u32_src(self, &self.srcs[2]),
3449 ];
3450
3451 let mut sum = 0_u64;
3452 for i in 0..3 {
3453 if self.srcs[i].src_mod.is_ineg() {
3454 // This is a very literal interpretation of 2's compliment.
3455 // This is not -u64::from(src) or u64::from(-src).
3456 sum += u64::from(!srcs[i]) + 1;
3457 } else {
3458 sum += u64::from(srcs[i]);
3459 }
3460 }
3461
3462 f.set_u32_dst(self, &self.dst, sum as u32);
3463 f.set_pred_dst(self, &self.overflow[0], sum >= 1_u64 << 32);
3464 f.set_pred_dst(self, &self.overflow[1], sum >= 2_u64 << 32);
3465 }
3466 }
3467
3468 impl DisplayOp for OpIAdd3 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3469 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3470 write!(
3471 f,
3472 "iadd3 {} {} {}",
3473 self.srcs[0], self.srcs[1], self.srcs[2],
3474 )
3475 }
3476 }
3477 impl_display_for_op!(OpIAdd3);
3478
3479 #[repr(C)]
3480 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3481 pub struct OpIAdd3X {
3482 #[dst_type(GPR)]
3483 pub dst: Dst,
3484
3485 #[dst_type(Pred)]
3486 pub overflow: [Dst; 2],
3487
3488 #[src_type(B32)]
3489 pub srcs: [Src; 3],
3490
3491 #[src_type(Pred)]
3492 pub carry: [Src; 2],
3493 }
3494
3495 impl Foldable for OpIAdd3X {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3496 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3497 let srcs = [
3498 f.get_u32_bnot_src(self, &self.srcs[0]),
3499 f.get_u32_bnot_src(self, &self.srcs[1]),
3500 f.get_u32_bnot_src(self, &self.srcs[2]),
3501 ];
3502 let carry = [
3503 f.get_pred_src(self, &self.carry[0]),
3504 f.get_pred_src(self, &self.carry[1]),
3505 ];
3506
3507 let mut sum = 0_u64;
3508 for i in 0..3 {
3509 sum += u64::from(srcs[i]);
3510 }
3511
3512 for i in 0..2 {
3513 sum += u64::from(carry[i]);
3514 }
3515
3516 f.set_u32_dst(self, &self.dst, sum as u32);
3517 f.set_pred_dst(self, &self.overflow[0], sum >= 1_u64 << 32);
3518 f.set_pred_dst(self, &self.overflow[1], sum >= 2_u64 << 32);
3519 }
3520 }
3521
3522 impl DisplayOp for OpIAdd3X {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3523 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3524 write!(
3525 f,
3526 "iadd3.x {} {} {} {} {}",
3527 self.srcs[0],
3528 self.srcs[1],
3529 self.srcs[2],
3530 self.carry[0],
3531 self.carry[1]
3532 )
3533 }
3534 }
3535 impl_display_for_op!(OpIAdd3X);
3536
3537 #[repr(C)]
3538 #[derive(SrcsAsSlice, DstsAsSlice)]
3539 pub struct OpIDp4 {
3540 #[dst_type(GPR)]
3541 pub dst: Dst,
3542
3543 pub src_types: [IntType; 2],
3544
3545 #[src_type(I32)]
3546 pub srcs: [Src; 3],
3547 }
3548
3549 impl DisplayOp for OpIDp4 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3550 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3551 write!(
3552 f,
3553 "idp4{}{} {} {} {}",
3554 self.src_types[0],
3555 self.src_types[1],
3556 self.srcs[0],
3557 self.srcs[1],
3558 self.srcs[2],
3559 )
3560 }
3561 }
3562 impl_display_for_op!(OpIDp4);
3563
3564 #[repr(C)]
3565 #[derive(SrcsAsSlice, DstsAsSlice)]
3566 pub struct OpIMad {
3567 #[dst_type(GPR)]
3568 pub dst: Dst,
3569
3570 #[src_type(ALU)]
3571 pub srcs: [Src; 3],
3572
3573 pub signed: bool,
3574 }
3575
3576 impl DisplayOp for OpIMad {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3577 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3578 write!(f, "imad {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2],)
3579 }
3580 }
3581 impl_display_for_op!(OpIMad);
3582
3583 /// Only used on SM50
3584 #[repr(C)]
3585 #[derive(SrcsAsSlice, DstsAsSlice)]
3586 pub struct OpIMul {
3587 #[dst_type(GPR)]
3588 pub dst: Dst,
3589
3590 #[src_type(ALU)]
3591 pub srcs: [Src; 2],
3592
3593 pub signed: [bool; 2],
3594 pub high: bool,
3595 }
3596
3597 impl DisplayOp for OpIMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3598 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3599 write!(f, "imul")?;
3600 if self.high {
3601 write!(f, ".hi")?;
3602 }
3603 let src_type = |signed| if signed { ".s32" } else { ".u32" };
3604 write!(
3605 f,
3606 "{}{}",
3607 src_type(self.signed[0]),
3608 src_type(self.signed[1])
3609 )?;
3610 write!(f, " {} {}", self.srcs[0], self.srcs[1])
3611 }
3612 }
3613
3614 #[repr(C)]
3615 #[derive(SrcsAsSlice, DstsAsSlice)]
3616 pub struct OpIMad64 {
3617 #[dst_type(Vec)]
3618 pub dst: Dst,
3619
3620 #[src_type(ALU)]
3621 pub srcs: [Src; 3],
3622
3623 pub signed: bool,
3624 }
3625
3626 impl DisplayOp for OpIMad64 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3627 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3628 write!(
3629 f,
3630 "imad64 {} {} {}",
3631 self.srcs[0], self.srcs[1], self.srcs[2],
3632 )
3633 }
3634 }
3635 impl_display_for_op!(OpIMad64);
3636
3637 #[repr(C)]
3638 #[derive(SrcsAsSlice, DstsAsSlice)]
3639 pub struct OpIMnMx {
3640 #[dst_type(GPR)]
3641 pub dst: Dst,
3642
3643 pub cmp_type: IntCmpType,
3644
3645 #[src_type(ALU)]
3646 pub srcs: [Src; 2],
3647
3648 #[src_type(Pred)]
3649 pub min: Src,
3650 }
3651
3652 impl DisplayOp for OpIMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3653 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3654 write!(
3655 f,
3656 "imnmx{} {} {} {}",
3657 self.cmp_type, self.srcs[0], self.srcs[1], self.min
3658 )
3659 }
3660 }
3661 impl_display_for_op!(OpIMnMx);
3662
3663 #[repr(C)]
3664 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3665 pub struct OpISetP {
3666 #[dst_type(Pred)]
3667 pub dst: Dst,
3668
3669 pub set_op: PredSetOp,
3670 pub cmp_op: IntCmpOp,
3671 pub cmp_type: IntCmpType,
3672 pub ex: bool,
3673
3674 #[src_type(ALU)]
3675 pub srcs: [Src; 2],
3676
3677 #[src_type(Pred)]
3678 pub accum: Src,
3679
3680 #[src_type(Pred)]
3681 pub low_cmp: Src,
3682 }
3683
3684 impl Foldable for OpISetP {
fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3685 fn fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3686 let x = f.get_u32_src(self, &self.srcs[0]);
3687 let y = f.get_u32_src(self, &self.srcs[1]);
3688 let accum = f.get_pred_src(self, &self.accum);
3689 let low_cmp = f.get_pred_src(self, &self.low_cmp);
3690
3691 let cmp = if self.cmp_type.is_signed() {
3692 let x = x as i32;
3693 let y = y as i32;
3694 match &self.cmp_op {
3695 IntCmpOp::Eq => x == y,
3696 IntCmpOp::Ne => x != y,
3697 IntCmpOp::Lt => x < y,
3698 IntCmpOp::Le => x <= y,
3699 IntCmpOp::Gt => x > y,
3700 IntCmpOp::Ge => x >= y,
3701 }
3702 } else {
3703 match &self.cmp_op {
3704 IntCmpOp::Eq => x == y,
3705 IntCmpOp::Ne => x != y,
3706 IntCmpOp::Lt => x < y,
3707 IntCmpOp::Le => x <= y,
3708 IntCmpOp::Gt => x > y,
3709 IntCmpOp::Ge => x >= y,
3710 }
3711 };
3712
3713 let cmp = if self.ex && x == y {
3714 // Pre-Volta, isetp.x takes the accumulator into account. If we
3715 // want to support this, we need to take an an accumulator into
3716 // account. Disallow it for now.
3717 assert!(sm.sm() >= 70);
3718 low_cmp
3719 } else {
3720 cmp
3721 };
3722
3723 let dst = self.set_op.eval(cmp, accum);
3724
3725 f.set_pred_dst(self, &self.dst, dst);
3726 }
3727 }
3728
3729 impl DisplayOp for OpISetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3730 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3731 write!(f, "isetp{}{}", self.cmp_op, self.cmp_type)?;
3732 if !self.set_op.is_trivial(&self.accum) {
3733 write!(f, "{}", self.set_op)?;
3734 }
3735 if self.ex {
3736 write!(f, ".ex")?;
3737 }
3738 write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3739 if !self.set_op.is_trivial(&self.accum) {
3740 write!(f, " {}", self.accum)?;
3741 }
3742 if self.ex {
3743 write!(f, " {}", self.low_cmp)?;
3744 }
3745 Ok(())
3746 }
3747 }
3748 impl_display_for_op!(OpISetP);
3749
3750 #[repr(C)]
3751 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3752 pub struct OpLop2 {
3753 #[dst_type(GPR)]
3754 pub dst: Dst,
3755
3756 #[src_type(B32)]
3757 pub srcs: [Src; 2],
3758
3759 pub op: LogicOp2,
3760 }
3761
3762 impl DisplayOp for OpLop2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3763 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3764 write!(f, "lop2.{} {} {}", self.op, self.srcs[0], self.srcs[1],)
3765 }
3766 }
3767
3768 impl Foldable for OpLop2 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3769 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3770 let srcs = [
3771 f.get_u32_bnot_src(self, &self.srcs[0]),
3772 f.get_u32_bnot_src(self, &self.srcs[1]),
3773 ];
3774 let dst = match self.op {
3775 LogicOp2::And => srcs[0] & srcs[1],
3776 LogicOp2::Or => srcs[0] | srcs[1],
3777 LogicOp2::Xor => srcs[0] ^ srcs[1],
3778 LogicOp2::PassB => srcs[1],
3779 };
3780 f.set_u32_dst(self, &self.dst, dst);
3781 }
3782 }
3783
3784 #[repr(C)]
3785 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3786 pub struct OpLop3 {
3787 #[dst_type(GPR)]
3788 pub dst: Dst,
3789
3790 #[src_type(ALU)]
3791 pub srcs: [Src; 3],
3792
3793 pub op: LogicOp3,
3794 }
3795
3796 impl Foldable for OpLop3 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3797 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3798 let srcs = [
3799 f.get_u32_bnot_src(self, &self.srcs[0]),
3800 f.get_u32_bnot_src(self, &self.srcs[1]),
3801 f.get_u32_bnot_src(self, &self.srcs[2]),
3802 ];
3803 let dst = self.op.eval(srcs[0], srcs[1], srcs[2]);
3804 f.set_u32_dst(self, &self.dst, dst);
3805 }
3806 }
3807
3808 impl DisplayOp for OpLop3 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3809 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3810 write!(
3811 f,
3812 "lop3.{} {} {} {}",
3813 self.op, self.srcs[0], self.srcs[1], self.srcs[2],
3814 )
3815 }
3816 }
3817 impl_display_for_op!(OpLop3);
3818
3819 #[derive(Clone, Copy, Eq, PartialEq)]
3820 pub enum ShflOp {
3821 Idx,
3822 Up,
3823 Down,
3824 Bfly,
3825 }
3826
3827 impl fmt::Display for ShflOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3828 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3829 match self {
3830 ShflOp::Idx => write!(f, "idx"),
3831 ShflOp::Up => write!(f, "up"),
3832 ShflOp::Down => write!(f, "down"),
3833 ShflOp::Bfly => write!(f, "bfly"),
3834 }
3835 }
3836 }
3837
3838 #[repr(C)]
3839 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3840 pub struct OpShf {
3841 #[dst_type(GPR)]
3842 pub dst: Dst,
3843
3844 #[src_type(GPR)]
3845 pub low: Src,
3846
3847 #[src_type(ALU)]
3848 pub high: Src,
3849
3850 #[src_type(ALU)]
3851 pub shift: Src,
3852
3853 pub right: bool,
3854 pub wrap: bool,
3855 pub data_type: IntType,
3856 pub dst_high: bool,
3857 }
3858
3859 impl Foldable for OpShf {
fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3860 fn fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3861 let low = f.get_u32_src(self, &self.low);
3862 let high = f.get_u32_src(self, &self.high);
3863 let shift = f.get_u32_src(self, &self.shift);
3864
3865 let bits: u32 = self.data_type.bits().try_into().unwrap();
3866 let shift = if self.wrap {
3867 shift & (bits - 1)
3868 } else {
3869 min(shift, bits)
3870 };
3871
3872 let x = u64::from(low) | (u64::from(high) << 32);
3873 let shifted = if sm.sm() < 70
3874 && self.dst_high
3875 && self.data_type != IntType::I64
3876 {
3877 if self.right {
3878 x.checked_shr(shift).unwrap_or(0) as u64
3879 } else {
3880 x.checked_shl(shift).unwrap_or(0) as u64
3881 }
3882 } else if self.data_type.is_signed() {
3883 if self.right {
3884 (x as i64).checked_shr(shift).unwrap_or(0) as u64
3885 } else {
3886 (x as i64).checked_shl(shift).unwrap_or(0) as u64
3887 }
3888 } else {
3889 if self.right {
3890 x.checked_shr(shift).unwrap_or(0) as u64
3891 } else {
3892 x.checked_shl(shift).unwrap_or(0) as u64
3893 }
3894 };
3895
3896 let dst = if (sm.sm() < 70 && !self.right) || self.dst_high {
3897 (shifted >> 32) as u32
3898 } else {
3899 shifted as u32
3900 };
3901
3902 f.set_u32_dst(self, &self.dst, dst);
3903 }
3904 }
3905
3906 impl DisplayOp for OpShf {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3907 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3908 write!(f, "shf")?;
3909 if self.right {
3910 write!(f, ".r")?;
3911 } else {
3912 write!(f, ".l")?;
3913 }
3914 if self.wrap {
3915 write!(f, ".w")?;
3916 }
3917 write!(f, "{}", self.data_type)?;
3918 if self.dst_high {
3919 write!(f, ".hi")?;
3920 }
3921 write!(f, " {} {} {}", self.low, self.high, self.shift)
3922 }
3923 }
3924 impl_display_for_op!(OpShf);
3925
3926 /// Only used on SM50
3927 #[repr(C)]
3928 #[derive(SrcsAsSlice, DstsAsSlice)]
3929 pub struct OpShl {
3930 #[dst_type(GPR)]
3931 pub dst: Dst,
3932
3933 #[src_type(GPR)]
3934 pub src: Src,
3935
3936 #[src_type(ALU)]
3937 pub shift: Src,
3938
3939 pub wrap: bool,
3940 }
3941
3942 impl DisplayOp for OpShl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3943 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3944 write!(f, "shl")?;
3945 if self.wrap {
3946 write!(f, ".w")?;
3947 }
3948 write!(f, " {} {}", self.src, self.shift)
3949 }
3950 }
3951
3952 /// Only used on SM50
3953 #[repr(C)]
3954 #[derive(SrcsAsSlice, DstsAsSlice)]
3955 pub struct OpShr {
3956 #[dst_type(GPR)]
3957 pub dst: Dst,
3958
3959 #[src_type(GPR)]
3960 pub src: Src,
3961
3962 #[src_type(ALU)]
3963 pub shift: Src,
3964
3965 pub wrap: bool,
3966 pub signed: bool,
3967 }
3968
3969 impl DisplayOp for OpShr {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3970 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3971 write!(f, "shr")?;
3972 if self.wrap {
3973 write!(f, ".w")?;
3974 }
3975 if !self.signed {
3976 write!(f, ".u32")?;
3977 }
3978 write!(f, " {} {}", self.src, self.shift)
3979 }
3980 }
3981
3982 #[repr(C)]
3983 pub struct OpF2F {
3984 pub dst: Dst,
3985 pub src: Src,
3986
3987 pub src_type: FloatType,
3988 pub dst_type: FloatType,
3989 pub rnd_mode: FRndMode,
3990 pub ftz: bool,
3991 /// For 16-bit up-conversions, take the high 16 bits of the source register.
3992 /// For 16-bit down-conversions, place the result into the upper 16 bits of
3993 /// the destination register
3994 pub high: bool,
3995 /// Round to the nearest integer rather than nearest float
3996 ///
3997 /// Not available on SM70+
3998 pub integer_rnd: bool,
3999 }
4000
4001 impl AsSlice<Src> for OpF2F {
4002 type Attr = SrcType;
4003
as_slice(&self) -> &[Src]4004 fn as_slice(&self) -> &[Src] {
4005 std::slice::from_ref(&self.src)
4006 }
4007
as_mut_slice(&mut self) -> &mut [Src]4008 fn as_mut_slice(&mut self) -> &mut [Src] {
4009 std::slice::from_mut(&mut self.src)
4010 }
4011
attrs(&self) -> SrcTypeList4012 fn attrs(&self) -> SrcTypeList {
4013 let src_type = match self.src_type {
4014 FloatType::F16 => SrcType::F16,
4015 FloatType::F32 => SrcType::F32,
4016 FloatType::F64 => SrcType::F64,
4017 };
4018 SrcTypeList::Uniform(src_type)
4019 }
4020 }
4021
4022 impl AsSlice<Dst> for OpF2F {
4023 type Attr = DstType;
4024
as_slice(&self) -> &[Dst]4025 fn as_slice(&self) -> &[Dst] {
4026 std::slice::from_ref(&self.dst)
4027 }
4028
as_mut_slice(&mut self) -> &mut [Dst]4029 fn as_mut_slice(&mut self) -> &mut [Dst] {
4030 std::slice::from_mut(&mut self.dst)
4031 }
4032
attrs(&self) -> DstTypeList4033 fn attrs(&self) -> DstTypeList {
4034 let dst_type = match self.dst_type {
4035 FloatType::F16 => DstType::F16,
4036 FloatType::F32 => DstType::F32,
4037 FloatType::F64 => DstType::F64,
4038 };
4039 DstTypeList::Uniform(dst_type)
4040 }
4041 }
4042
4043 impl DisplayOp for OpF2F {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4044 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4045 write!(f, "f2f")?;
4046 if self.ftz {
4047 write!(f, ".ftz")?;
4048 }
4049 if self.integer_rnd {
4050 write!(f, ".int")?;
4051 }
4052 write!(
4053 f,
4054 "{}{}{} {}",
4055 self.dst_type, self.src_type, self.rnd_mode, self.src,
4056 )
4057 }
4058 }
4059 impl_display_for_op!(OpF2F);
4060
4061 #[repr(C)]
4062 #[derive(DstsAsSlice, SrcsAsSlice)]
4063 pub struct OpF2FP {
4064 #[dst_type(GPR)]
4065 pub dst: Dst,
4066
4067 #[src_type(ALU)]
4068 pub srcs: [Src; 2],
4069
4070 pub rnd_mode: FRndMode,
4071 }
4072
4073 impl DisplayOp for OpF2FP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4074 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4075 write!(f, "f2fp.pack_ab")?;
4076 if self.rnd_mode != FRndMode::NearestEven {
4077 write!(f, "{}", self.rnd_mode)?;
4078 }
4079 write!(f, " {}, {}", self.srcs[0], self.srcs[1],)
4080 }
4081 }
4082 impl_display_for_op!(OpF2FP);
4083
4084 #[repr(C)]
4085 #[derive(DstsAsSlice)]
4086 pub struct OpF2I {
4087 #[dst_type(GPR)]
4088 pub dst: Dst,
4089
4090 pub src: Src,
4091
4092 pub src_type: FloatType,
4093 pub dst_type: IntType,
4094 pub rnd_mode: FRndMode,
4095 pub ftz: bool,
4096 }
4097
4098 impl AsSlice<Src> for OpF2I {
4099 type Attr = SrcType;
4100
as_slice(&self) -> &[Src]4101 fn as_slice(&self) -> &[Src] {
4102 std::slice::from_ref(&self.src)
4103 }
4104
as_mut_slice(&mut self) -> &mut [Src]4105 fn as_mut_slice(&mut self) -> &mut [Src] {
4106 std::slice::from_mut(&mut self.src)
4107 }
4108
attrs(&self) -> SrcTypeList4109 fn attrs(&self) -> SrcTypeList {
4110 let src_type = match self.src_type {
4111 FloatType::F16 => SrcType::F16,
4112 FloatType::F32 => SrcType::F32,
4113 FloatType::F64 => SrcType::F64,
4114 };
4115 SrcTypeList::Uniform(src_type)
4116 }
4117 }
4118
4119 impl DisplayOp for OpF2I {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4120 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4121 let ftz = if self.ftz { ".ftz" } else { "" };
4122 write!(
4123 f,
4124 "f2i{}{}{}{ftz} {}",
4125 self.dst_type, self.src_type, self.rnd_mode, self.src,
4126 )
4127 }
4128 }
4129 impl_display_for_op!(OpF2I);
4130
4131 #[repr(C)]
4132 pub struct OpI2F {
4133 pub dst: Dst,
4134 pub src: Src,
4135
4136 pub dst_type: FloatType,
4137 pub src_type: IntType,
4138 pub rnd_mode: FRndMode,
4139 }
4140
4141 impl AsSlice<Src> for OpI2F {
4142 type Attr = SrcType;
4143
as_slice(&self) -> &[Src]4144 fn as_slice(&self) -> &[Src] {
4145 std::slice::from_ref(&self.src)
4146 }
4147
as_mut_slice(&mut self) -> &mut [Src]4148 fn as_mut_slice(&mut self) -> &mut [Src] {
4149 std::slice::from_mut(&mut self.src)
4150 }
4151
attrs(&self) -> SrcTypeList4152 fn attrs(&self) -> SrcTypeList {
4153 if self.src_type.bits() <= 32 {
4154 SrcTypeList::Uniform(SrcType::ALU)
4155 } else {
4156 SrcTypeList::Uniform(SrcType::GPR)
4157 }
4158 }
4159 }
4160
4161 impl AsSlice<Dst> for OpI2F {
4162 type Attr = DstType;
4163
as_slice(&self) -> &[Dst]4164 fn as_slice(&self) -> &[Dst] {
4165 std::slice::from_ref(&self.dst)
4166 }
4167
as_mut_slice(&mut self) -> &mut [Dst]4168 fn as_mut_slice(&mut self) -> &mut [Dst] {
4169 std::slice::from_mut(&mut self.dst)
4170 }
4171
attrs(&self) -> DstTypeList4172 fn attrs(&self) -> DstTypeList {
4173 let dst_type = match self.dst_type {
4174 FloatType::F16 => DstType::F16,
4175 FloatType::F32 => DstType::F32,
4176 FloatType::F64 => DstType::F64,
4177 };
4178 DstTypeList::Uniform(dst_type)
4179 }
4180 }
4181
4182 impl DisplayOp for OpI2F {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4183 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4184 write!(
4185 f,
4186 "i2f{}{}{} {}",
4187 self.dst_type, self.src_type, self.rnd_mode, self.src,
4188 )
4189 }
4190 }
4191 impl_display_for_op!(OpI2F);
4192
4193 /// Not used on SM70+
4194 #[repr(C)]
4195 #[derive(SrcsAsSlice, DstsAsSlice)]
4196 pub struct OpI2I {
4197 #[dst_type(GPR)]
4198 pub dst: Dst,
4199
4200 #[src_type(ALU)]
4201 pub src: Src,
4202
4203 pub src_type: IntType,
4204 pub dst_type: IntType,
4205
4206 pub saturate: bool,
4207 pub abs: bool,
4208 pub neg: bool,
4209 }
4210
4211 impl DisplayOp for OpI2I {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4212 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4213 write!(f, "i2i")?;
4214 if self.saturate {
4215 write!(f, ".sat ")?;
4216 }
4217 write!(f, "{}{} {}", self.dst_type, self.src_type, self.src,)?;
4218 if self.abs {
4219 write!(f, ".abs")?;
4220 }
4221 if self.neg {
4222 write!(f, ".neg")?;
4223 }
4224 Ok(())
4225 }
4226 }
4227 impl_display_for_op!(OpI2I);
4228
4229 #[repr(C)]
4230 #[derive(DstsAsSlice)]
4231 pub struct OpFRnd {
4232 #[dst_type(F32)]
4233 pub dst: Dst,
4234
4235 pub src: Src,
4236
4237 pub dst_type: FloatType,
4238 pub src_type: FloatType,
4239 pub rnd_mode: FRndMode,
4240 pub ftz: bool,
4241 }
4242
4243 impl AsSlice<Src> for OpFRnd {
4244 type Attr = SrcType;
4245
as_slice(&self) -> &[Src]4246 fn as_slice(&self) -> &[Src] {
4247 std::slice::from_ref(&self.src)
4248 }
4249
as_mut_slice(&mut self) -> &mut [Src]4250 fn as_mut_slice(&mut self) -> &mut [Src] {
4251 std::slice::from_mut(&mut self.src)
4252 }
4253
attrs(&self) -> SrcTypeList4254 fn attrs(&self) -> SrcTypeList {
4255 let src_type = match self.src_type {
4256 FloatType::F16 => SrcType::F16,
4257 FloatType::F32 => SrcType::F32,
4258 FloatType::F64 => SrcType::F64,
4259 };
4260 SrcTypeList::Uniform(src_type)
4261 }
4262 }
4263
4264 impl DisplayOp for OpFRnd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4265 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4266 let ftz = if self.ftz { ".ftz" } else { "" };
4267 write!(
4268 f,
4269 "frnd{}{}{}{ftz} {}",
4270 self.dst_type, self.src_type, self.rnd_mode, self.src,
4271 )
4272 }
4273 }
4274 impl_display_for_op!(OpFRnd);
4275
4276 #[repr(C)]
4277 #[derive(SrcsAsSlice, DstsAsSlice)]
4278 pub struct OpMov {
4279 #[dst_type(GPR)]
4280 pub dst: Dst,
4281
4282 #[src_type(ALU)]
4283 pub src: Src,
4284
4285 pub quad_lanes: u8,
4286 }
4287
4288 impl DisplayOp for OpMov {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4289 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4290 if self.quad_lanes == 0xf {
4291 write!(f, "mov {}", self.src)
4292 } else {
4293 write!(f, "mov[{:#x}] {}", self.quad_lanes, self.src)
4294 }
4295 }
4296 }
4297 impl_display_for_op!(OpMov);
4298
4299 #[derive(Copy, Clone)]
4300 pub struct PrmtSelByte(u8);
4301
4302 impl PrmtSelByte {
4303 pub const INVALID: PrmtSelByte = PrmtSelByte(u8::MAX);
4304
new(src_idx: usize, byte_idx: usize, msb: bool) -> PrmtSelByte4305 pub fn new(src_idx: usize, byte_idx: usize, msb: bool) -> PrmtSelByte {
4306 assert!(src_idx < 2);
4307 assert!(byte_idx < 4);
4308
4309 let mut nib = 0;
4310 nib |= (src_idx as u8) << 2;
4311 nib |= byte_idx as u8;
4312 if msb {
4313 nib |= 0x8;
4314 }
4315 PrmtSelByte(nib)
4316 }
4317
src(&self) -> usize4318 pub fn src(&self) -> usize {
4319 ((self.0 >> 2) & 0x1).into()
4320 }
4321
byte(&self) -> usize4322 pub fn byte(&self) -> usize {
4323 (self.0 & 0x3).into()
4324 }
4325
msb(&self) -> bool4326 pub fn msb(&self) -> bool {
4327 (self.0 & 0x8) != 0
4328 }
4329
fold_u32(&self, u: u32) -> u84330 pub fn fold_u32(&self, u: u32) -> u8 {
4331 let mut sb = (u >> (self.byte() * 8)) as u8;
4332 if self.msb() {
4333 sb = ((sb as i8) >> 7) as u8;
4334 }
4335 sb
4336 }
4337 }
4338
4339 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
4340 pub struct PrmtSel(pub u16);
4341
4342 impl PrmtSel {
new(bytes: [PrmtSelByte; 4]) -> PrmtSel4343 pub fn new(bytes: [PrmtSelByte; 4]) -> PrmtSel {
4344 let mut sel = 0;
4345 for i in 0..4 {
4346 assert!(bytes[i].0 <= 0xf);
4347 sel |= u16::from(bytes[i].0) << (i * 4);
4348 }
4349 PrmtSel(sel)
4350 }
4351
get(&self, byte_idx: usize) -> PrmtSelByte4352 pub fn get(&self, byte_idx: usize) -> PrmtSelByte {
4353 assert!(byte_idx < 4);
4354 PrmtSelByte(((self.0 >> (byte_idx * 4)) & 0xf) as u8)
4355 }
4356 }
4357
4358 #[allow(dead_code)]
4359 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
4360 pub enum PrmtMode {
4361 Index,
4362 Forward4Extract,
4363 Backward4Extract,
4364 Replicate8,
4365 EdgeClampLeft,
4366 EdgeClampRight,
4367 Replicate16,
4368 }
4369
4370 impl fmt::Display for PrmtMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4371 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4372 match self {
4373 PrmtMode::Index => Ok(()),
4374 PrmtMode::Forward4Extract => write!(f, ".f4e"),
4375 PrmtMode::Backward4Extract => write!(f, ".b4e"),
4376 PrmtMode::Replicate8 => write!(f, ".rc8"),
4377 PrmtMode::EdgeClampLeft => write!(f, ".ecl"),
4378 PrmtMode::EdgeClampRight => write!(f, ".ecl"),
4379 PrmtMode::Replicate16 => write!(f, ".rc16"),
4380 }
4381 }
4382 }
4383
4384 #[repr(C)]
4385 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4386 /// Permutes `srcs` into `dst` using `selection`.
4387 pub struct OpPrmt {
4388 #[dst_type(GPR)]
4389 pub dst: Dst,
4390
4391 #[src_type(ALU)]
4392 pub srcs: [Src; 2],
4393
4394 #[src_type(ALU)]
4395 pub sel: Src,
4396
4397 pub mode: PrmtMode,
4398 }
4399
4400 impl OpPrmt {
get_sel(&self) -> Option<PrmtSel>4401 pub fn get_sel(&self) -> Option<PrmtSel> {
4402 // TODO: We could construct a PrmtSel for the other modes but we don't
4403 // use them right now because they're kinda pointless.
4404 if self.mode != PrmtMode::Index {
4405 return None;
4406 }
4407
4408 if let Some(sel) = self.sel.as_u32() {
4409 // The top 16 bits are ignored
4410 Some(PrmtSel(sel as u16))
4411 } else {
4412 None
4413 }
4414 }
4415
as_u32(&self) -> Option<u32>4416 pub fn as_u32(&self) -> Option<u32> {
4417 let Some(sel) = self.get_sel() else {
4418 return None;
4419 };
4420
4421 let mut imm = 0_u32;
4422 for b in 0..4 {
4423 let sel_byte = sel.get(b);
4424 let Some(src_u32) = self.srcs[sel_byte.src()].as_u32() else {
4425 return None;
4426 };
4427
4428 let sb = sel_byte.fold_u32(src_u32);
4429 imm |= u32::from(sb) << (b * 8);
4430 }
4431 Some(imm)
4432 }
4433 }
4434
4435 impl Foldable for OpPrmt {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)4436 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
4437 let srcs = [
4438 f.get_u32_src(self, &self.srcs[0]),
4439 f.get_u32_src(self, &self.srcs[1]),
4440 ];
4441 let sel = f.get_u32_src(self, &self.sel);
4442
4443 assert!(self.mode == PrmtMode::Index);
4444 let sel = PrmtSel(sel as u16);
4445
4446 let mut dst = 0_u32;
4447 for b in 0..4 {
4448 let sel_byte = sel.get(b);
4449 let src = srcs[sel_byte.src()];
4450 let sb = sel_byte.fold_u32(src);
4451 dst |= u32::from(sb) << (b * 8);
4452 }
4453
4454 f.set_u32_dst(self, &self.dst, dst);
4455 }
4456 }
4457
4458 impl DisplayOp for OpPrmt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4459 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4460 write!(
4461 f,
4462 "prmt{} {} [{}] {}",
4463 self.mode, self.srcs[0], self.sel, self.srcs[1],
4464 )
4465 }
4466 }
4467 impl_display_for_op!(OpPrmt);
4468
4469 #[repr(C)]
4470 #[derive(SrcsAsSlice, DstsAsSlice)]
4471 pub struct OpSel {
4472 #[dst_type(GPR)]
4473 pub dst: Dst,
4474
4475 #[src_type(Pred)]
4476 pub cond: Src,
4477
4478 #[src_type(ALU)]
4479 pub srcs: [Src; 2],
4480 }
4481
4482 impl DisplayOp for OpSel {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4483 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4484 write!(f, "sel {} {} {}", self.cond, self.srcs[0], self.srcs[1],)
4485 }
4486 }
4487 impl_display_for_op!(OpSel);
4488
4489 #[repr(C)]
4490 #[derive(SrcsAsSlice, DstsAsSlice)]
4491 pub struct OpShfl {
4492 #[dst_type(GPR)]
4493 pub dst: Dst,
4494
4495 #[dst_type(Pred)]
4496 pub in_bounds: Dst,
4497
4498 #[src_type(SSA)]
4499 pub src: Src,
4500
4501 #[src_type(ALU)]
4502 pub lane: Src,
4503
4504 #[src_type(ALU)]
4505 pub c: Src,
4506
4507 pub op: ShflOp,
4508 }
4509
4510 impl DisplayOp for OpShfl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4511 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4512 write!(f, "shfl.{} {} {} {}", self.op, self.src, self.lane, self.c)
4513 }
4514 }
4515 impl_display_for_op!(OpShfl);
4516
4517 #[repr(C)]
4518 #[derive(SrcsAsSlice, DstsAsSlice)]
4519 pub struct OpPLop3 {
4520 #[dst_type(Pred)]
4521 pub dsts: [Dst; 2],
4522
4523 #[src_type(Pred)]
4524 pub srcs: [Src; 3],
4525
4526 pub ops: [LogicOp3; 2],
4527 }
4528
4529 impl DisplayOp for OpPLop3 {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4530 fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4531 write!(f, "{} {}", self.dsts[0], self.dsts[1])
4532 }
4533
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4534 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4535 write!(
4536 f,
4537 "plop3 {} {} {} {} {}",
4538 self.srcs[0], self.srcs[1], self.srcs[2], self.ops[0], self.ops[1],
4539 )
4540 }
4541 }
4542 impl_display_for_op!(OpPLop3);
4543
4544 #[repr(C)]
4545 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4546 pub struct OpPSetP {
4547 #[dst_type(Pred)]
4548 pub dsts: [Dst; 2],
4549
4550 pub ops: [PredSetOp; 2],
4551
4552 #[src_type(Pred)]
4553 pub srcs: [Src; 3],
4554 }
4555
4556 impl Foldable for OpPSetP {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)4557 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
4558 let srcs = [
4559 f.get_pred_src(self, &self.srcs[0]),
4560 f.get_pred_src(self, &self.srcs[1]),
4561 f.get_pred_src(self, &self.srcs[2]),
4562 ];
4563
4564 let tmp = self.ops[0].eval(srcs[0], srcs[1]);
4565 let dst0 = self.ops[1].eval(srcs[2], tmp);
4566
4567 let tmp = self.ops[0].eval(!srcs[0], srcs[1]);
4568 let dst1 = self.ops[1].eval(srcs[2], tmp);
4569
4570 f.set_pred_dst(self, &self.dsts[0], dst0);
4571 f.set_pred_dst(self, &self.dsts[1], dst1);
4572 }
4573 }
4574
4575 impl DisplayOp for OpPSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4576 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4577 write!(
4578 f,
4579 "psetp{}{} {} {} {}",
4580 self.ops[0], self.ops[1], self.srcs[0], self.srcs[1], self.srcs[2],
4581 )
4582 }
4583 }
4584
4585 #[repr(C)]
4586 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4587 pub struct OpPopC {
4588 #[dst_type(GPR)]
4589 pub dst: Dst,
4590
4591 #[src_type(B32)]
4592 pub src: Src,
4593 }
4594
4595 impl Foldable for OpPopC {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)4596 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
4597 let src = f.get_u32_bnot_src(self, &self.src);
4598 let dst = src.count_ones();
4599 f.set_u32_dst(self, &self.dst, dst);
4600 }
4601 }
4602
4603 impl DisplayOp for OpPopC {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4604 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4605 write!(f, "popc {}", self.src,)
4606 }
4607 }
4608 impl_display_for_op!(OpPopC);
4609
4610 #[repr(C)]
4611 #[derive(SrcsAsSlice, DstsAsSlice)]
4612 pub struct OpR2UR {
4613 #[dst_type(GPR)]
4614 pub dst: Dst,
4615
4616 #[src_type(GPR)]
4617 pub src: Src,
4618 }
4619
4620 impl DisplayOp for OpR2UR {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4621 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4622 write!(f, "r2ur {}", self.src)
4623 }
4624 }
4625 impl_display_for_op!(OpR2UR);
4626
4627 #[repr(C)]
4628 #[derive(SrcsAsSlice, DstsAsSlice)]
4629 pub struct OpTex {
4630 pub dsts: [Dst; 2],
4631 pub fault: Dst,
4632
4633 #[src_type(SSA)]
4634 pub srcs: [Src; 2],
4635
4636 pub dim: TexDim,
4637 pub lod_mode: TexLodMode,
4638 pub z_cmpr: bool,
4639 pub offset: bool,
4640 pub mask: u8,
4641 }
4642
4643 impl DisplayOp for OpTex {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4644 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4645 write!(f, "tex.b{}", self.dim)?;
4646 if self.lod_mode != TexLodMode::Auto {
4647 write!(f, ".{}", self.lod_mode)?;
4648 }
4649 if self.offset {
4650 write!(f, ".aoffi")?;
4651 }
4652 if self.z_cmpr {
4653 write!(f, ".dc")?;
4654 }
4655 write!(f, " {} {}", self.srcs[0], self.srcs[1])
4656 }
4657 }
4658 impl_display_for_op!(OpTex);
4659
4660 #[repr(C)]
4661 #[derive(SrcsAsSlice, DstsAsSlice)]
4662 pub struct OpTld {
4663 pub dsts: [Dst; 2],
4664 pub fault: Dst,
4665
4666 #[src_type(SSA)]
4667 pub srcs: [Src; 2],
4668
4669 pub dim: TexDim,
4670 pub is_ms: bool,
4671 pub lod_mode: TexLodMode,
4672 pub offset: bool,
4673 pub mask: u8,
4674 }
4675
4676 impl DisplayOp for OpTld {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4677 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4678 write!(f, "tld.b{}", self.dim)?;
4679 if self.lod_mode != TexLodMode::Auto {
4680 write!(f, ".{}", self.lod_mode)?;
4681 }
4682 if self.offset {
4683 write!(f, ".aoffi")?;
4684 }
4685 if self.is_ms {
4686 write!(f, ".ms")?;
4687 }
4688 write!(f, " {} {}", self.srcs[0], self.srcs[1])
4689 }
4690 }
4691 impl_display_for_op!(OpTld);
4692
4693 #[repr(C)]
4694 #[derive(SrcsAsSlice, DstsAsSlice)]
4695 pub struct OpTld4 {
4696 pub dsts: [Dst; 2],
4697 pub fault: Dst,
4698
4699 #[src_type(SSA)]
4700 pub srcs: [Src; 2],
4701
4702 pub dim: TexDim,
4703 pub comp: u8,
4704 pub offset_mode: Tld4OffsetMode,
4705 pub z_cmpr: bool,
4706 pub mask: u8,
4707 }
4708
4709 impl DisplayOp for OpTld4 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4710 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4711 write!(f, "tld4.g.b{}", self.dim)?;
4712 if self.offset_mode != Tld4OffsetMode::None {
4713 write!(f, ".{}", self.offset_mode)?;
4714 }
4715 write!(f, " {} {}", self.srcs[0], self.srcs[1])
4716 }
4717 }
4718 impl_display_for_op!(OpTld4);
4719
4720 #[repr(C)]
4721 #[derive(SrcsAsSlice, DstsAsSlice)]
4722 pub struct OpTmml {
4723 pub dsts: [Dst; 2],
4724
4725 #[src_type(SSA)]
4726 pub srcs: [Src; 2],
4727
4728 pub dim: TexDim,
4729 pub mask: u8,
4730 }
4731
4732 impl DisplayOp for OpTmml {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4733 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4734 write!(
4735 f,
4736 "tmml.b.lod{} {} {}",
4737 self.dim, self.srcs[0], self.srcs[1]
4738 )
4739 }
4740 }
4741 impl_display_for_op!(OpTmml);
4742
4743 #[repr(C)]
4744 #[derive(SrcsAsSlice, DstsAsSlice)]
4745 pub struct OpTxd {
4746 pub dsts: [Dst; 2],
4747 pub fault: Dst,
4748
4749 #[src_type(SSA)]
4750 pub srcs: [Src; 2],
4751
4752 pub dim: TexDim,
4753 pub offset: bool,
4754 pub mask: u8,
4755 }
4756
4757 impl DisplayOp for OpTxd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4758 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4759 write!(f, "txd.b{}", self.dim)?;
4760 if self.offset {
4761 write!(f, ".aoffi")?;
4762 }
4763 write!(f, " {} {}", self.srcs[0], self.srcs[1])
4764 }
4765 }
4766 impl_display_for_op!(OpTxd);
4767
4768 #[repr(C)]
4769 #[derive(SrcsAsSlice, DstsAsSlice)]
4770 pub struct OpTxq {
4771 pub dsts: [Dst; 2],
4772
4773 #[src_type(SSA)]
4774 pub src: Src,
4775
4776 pub query: TexQuery,
4777 pub mask: u8,
4778 }
4779
4780 impl DisplayOp for OpTxq {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4781 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4782 write!(f, "txq.b {} {}", self.src, self.query)
4783 }
4784 }
4785 impl_display_for_op!(OpTxq);
4786
4787 #[repr(C)]
4788 #[derive(SrcsAsSlice, DstsAsSlice)]
4789 pub struct OpSuLd {
4790 pub dst: Dst,
4791 pub fault: Dst,
4792
4793 pub image_dim: ImageDim,
4794 pub mem_order: MemOrder,
4795 pub mem_eviction_priority: MemEvictionPriority,
4796 pub mask: u8,
4797
4798 #[src_type(GPR)]
4799 pub handle: Src,
4800
4801 #[src_type(SSA)]
4802 pub coord: Src,
4803 }
4804
4805 impl DisplayOp for OpSuLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4806 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4807 write!(
4808 f,
4809 "suld.p{}{}{} [{}] {}",
4810 self.image_dim,
4811 self.mem_order,
4812 self.mem_eviction_priority,
4813 self.coord,
4814 self.handle,
4815 )
4816 }
4817 }
4818 impl_display_for_op!(OpSuLd);
4819
4820 #[repr(C)]
4821 #[derive(SrcsAsSlice, DstsAsSlice)]
4822 pub struct OpSuSt {
4823 pub image_dim: ImageDim,
4824 pub mem_order: MemOrder,
4825 pub mem_eviction_priority: MemEvictionPriority,
4826 pub mask: u8,
4827
4828 #[src_type(GPR)]
4829 pub handle: Src,
4830
4831 #[src_type(SSA)]
4832 pub coord: Src,
4833
4834 #[src_type(SSA)]
4835 pub data: Src,
4836 }
4837
4838 impl DisplayOp for OpSuSt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4839 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4840 write!(
4841 f,
4842 "sust.p{}{}{} [{}] {} {}",
4843 self.image_dim,
4844 self.mem_order,
4845 self.mem_eviction_priority,
4846 self.coord,
4847 self.data,
4848 self.handle,
4849 )
4850 }
4851 }
4852 impl_display_for_op!(OpSuSt);
4853
4854 #[repr(C)]
4855 #[derive(SrcsAsSlice, DstsAsSlice)]
4856 pub struct OpSuAtom {
4857 pub dst: Dst,
4858 pub fault: Dst,
4859
4860 pub image_dim: ImageDim,
4861
4862 pub atom_op: AtomOp,
4863 pub atom_type: AtomType,
4864
4865 pub mem_order: MemOrder,
4866 pub mem_eviction_priority: MemEvictionPriority,
4867
4868 #[src_type(GPR)]
4869 pub handle: Src,
4870
4871 #[src_type(SSA)]
4872 pub coord: Src,
4873
4874 #[src_type(SSA)]
4875 pub data: Src,
4876 }
4877
4878 impl DisplayOp for OpSuAtom {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4879 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4880 write!(
4881 f,
4882 "suatom.p{}{}{}{}{} [{}] {} {}",
4883 self.image_dim,
4884 self.atom_op,
4885 self.atom_type,
4886 self.mem_order,
4887 self.mem_eviction_priority,
4888 self.coord,
4889 self.data,
4890 self.handle,
4891 )
4892 }
4893 }
4894 impl_display_for_op!(OpSuAtom);
4895
4896 #[repr(C)]
4897 #[derive(SrcsAsSlice, DstsAsSlice)]
4898 pub struct OpLd {
4899 pub dst: Dst,
4900
4901 #[src_type(GPR)]
4902 pub addr: Src,
4903
4904 pub offset: i32,
4905 pub access: MemAccess,
4906 }
4907
4908 impl DisplayOp for OpLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4909 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4910 write!(f, "ld{} [{}", self.access, self.addr)?;
4911 if self.offset > 0 {
4912 write!(f, "+{:#x}", self.offset)?;
4913 }
4914 write!(f, "]")
4915 }
4916 }
4917 impl_display_for_op!(OpLd);
4918
4919 #[allow(dead_code)]
4920 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
4921 pub enum LdcMode {
4922 Indexed,
4923 IndexedLinear,
4924 IndexedSegmented,
4925 IndexedSegmentedLinear,
4926 }
4927
4928 impl fmt::Display for LdcMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4929 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4930 match self {
4931 LdcMode::Indexed => Ok(()),
4932 LdcMode::IndexedLinear => write!(f, ".il"),
4933 LdcMode::IndexedSegmented => write!(f, ".is"),
4934 LdcMode::IndexedSegmentedLinear => write!(f, ".isl"),
4935 }
4936 }
4937 }
4938
4939 #[repr(C)]
4940 #[derive(SrcsAsSlice, DstsAsSlice)]
4941 pub struct OpLdc {
4942 pub dst: Dst,
4943
4944 #[src_type(ALU)]
4945 pub cb: Src,
4946
4947 #[src_type(GPR)]
4948 pub offset: Src,
4949
4950 pub mode: LdcMode,
4951 pub mem_type: MemType,
4952 }
4953
4954 impl DisplayOp for OpLdc {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4955 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4956 let SrcRef::CBuf(cb) = self.cb.src_ref else {
4957 panic!("Not a cbuf");
4958 };
4959 write!(f, "ldc{}{} {}[", self.mode, self.mem_type, cb.buf)?;
4960 if self.offset.is_zero() {
4961 write!(f, "+{:#x}", cb.offset)?;
4962 } else if cb.offset == 0 {
4963 write!(f, "{}", self.offset)?;
4964 } else {
4965 write!(f, "{}+{:#x}", self.offset, cb.offset)?;
4966 }
4967 write!(f, "]")
4968 }
4969 }
4970 impl_display_for_op!(OpLdc);
4971
4972 #[repr(C)]
4973 #[derive(SrcsAsSlice, DstsAsSlice)]
4974 pub struct OpSt {
4975 #[src_type(GPR)]
4976 pub addr: Src,
4977
4978 #[src_type(SSA)]
4979 pub data: Src,
4980
4981 pub offset: i32,
4982 pub access: MemAccess,
4983 }
4984
4985 impl DisplayOp for OpSt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4986 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4987 write!(f, "st{} [{}", self.access, self.addr)?;
4988 if self.offset > 0 {
4989 write!(f, "+{:#x}", self.offset)?;
4990 }
4991 write!(f, "] {}", self.data)
4992 }
4993 }
4994 impl_display_for_op!(OpSt);
4995
4996 #[repr(C)]
4997 #[derive(SrcsAsSlice, DstsAsSlice)]
4998 pub struct OpAtom {
4999 pub dst: Dst,
5000
5001 #[src_type(GPR)]
5002 pub addr: Src,
5003
5004 #[src_type(GPR)]
5005 pub cmpr: Src,
5006
5007 #[src_type(SSA)]
5008 pub data: Src,
5009
5010 pub atom_op: AtomOp,
5011 pub atom_type: AtomType,
5012
5013 pub addr_offset: i32,
5014
5015 pub mem_space: MemSpace,
5016 pub mem_order: MemOrder,
5017 pub mem_eviction_priority: MemEvictionPriority,
5018 }
5019
5020 impl DisplayOp for OpAtom {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5021 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5022 write!(
5023 f,
5024 "atom{}{}{}{}{}",
5025 self.atom_op,
5026 self.atom_type,
5027 self.mem_space,
5028 self.mem_order,
5029 self.mem_eviction_priority,
5030 )?;
5031 write!(f, " [")?;
5032 if !self.addr.is_zero() {
5033 write!(f, "{}", self.addr)?;
5034 }
5035 if self.addr_offset > 0 {
5036 if !self.addr.is_zero() {
5037 write!(f, "+")?;
5038 }
5039 write!(f, "{:#x}", self.addr_offset)?;
5040 }
5041 write!(f, "]")?;
5042 if self.atom_op == AtomOp::CmpExch(AtomCmpSrc::Separate) {
5043 write!(f, " {}", self.cmpr)?;
5044 }
5045 write!(f, " {}", self.data)
5046 }
5047 }
5048 impl_display_for_op!(OpAtom);
5049
5050 #[repr(C)]
5051 #[derive(SrcsAsSlice, DstsAsSlice)]
5052 pub struct OpAL2P {
5053 pub dst: Dst,
5054
5055 #[src_type(GPR)]
5056 pub offset: Src,
5057
5058 pub access: AttrAccess,
5059 }
5060
5061 impl DisplayOp for OpAL2P {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5062 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5063 write!(f, "al2p")?;
5064 if self.access.output {
5065 write!(f, ".o")?;
5066 }
5067 if self.access.patch {
5068 write!(f, ".p")?;
5069 }
5070 write!(f, " a[{:#x}", self.access.addr)?;
5071 if !self.offset.is_zero() {
5072 write!(f, "+{}", self.offset)?;
5073 }
5074 write!(f, "]")
5075 }
5076 }
5077 impl_display_for_op!(OpAL2P);
5078
5079 #[repr(C)]
5080 #[derive(SrcsAsSlice, DstsAsSlice)]
5081 pub struct OpALd {
5082 pub dst: Dst,
5083
5084 #[src_type(GPR)]
5085 pub vtx: Src,
5086
5087 #[src_type(GPR)]
5088 pub offset: Src,
5089
5090 pub access: AttrAccess,
5091 }
5092
5093 impl DisplayOp for OpALd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5094 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5095 write!(f, "ald")?;
5096 if self.access.output {
5097 write!(f, ".o")?;
5098 }
5099 if self.access.patch {
5100 write!(f, ".p")?;
5101 }
5102 if self.access.phys {
5103 write!(f, ".phys")?;
5104 }
5105 write!(f, " a")?;
5106 if !self.vtx.is_zero() {
5107 write!(f, "[{}]", self.vtx)?;
5108 }
5109 write!(f, "[{:#x}", self.access.addr)?;
5110 if !self.offset.is_zero() {
5111 write!(f, "+{}", self.offset)?;
5112 }
5113 write!(f, "]")
5114 }
5115 }
5116 impl_display_for_op!(OpALd);
5117
5118 #[repr(C)]
5119 #[derive(SrcsAsSlice, DstsAsSlice)]
5120 pub struct OpASt {
5121 #[src_type(GPR)]
5122 pub vtx: Src,
5123
5124 #[src_type(GPR)]
5125 pub offset: Src,
5126
5127 #[src_type(SSA)]
5128 pub data: Src,
5129
5130 pub access: AttrAccess,
5131 }
5132
5133 impl DisplayOp for OpASt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5134 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5135 write!(f, "ast")?;
5136 if self.access.patch {
5137 write!(f, ".p")?;
5138 }
5139 if self.access.phys {
5140 write!(f, ".phys")?;
5141 }
5142 write!(f, " a")?;
5143 if !self.vtx.is_zero() {
5144 write!(f, "[{}]", self.vtx)?;
5145 }
5146 write!(f, "[{:#x}", self.access.addr)?;
5147 if !self.offset.is_zero() {
5148 write!(f, "+{}", self.offset)?;
5149 }
5150 write!(f, "] {}", self.data)
5151 }
5152 }
5153 impl_display_for_op!(OpASt);
5154
5155 #[repr(C)]
5156 #[derive(SrcsAsSlice, DstsAsSlice)]
5157 pub struct OpIpa {
5158 pub dst: Dst,
5159 pub addr: u16,
5160 pub freq: InterpFreq,
5161 pub loc: InterpLoc,
5162 pub inv_w: Src,
5163 pub offset: Src,
5164 }
5165
5166 impl DisplayOp for OpIpa {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5167 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5168 write!(
5169 f,
5170 "ipa{}{} a[{:#x}] {}",
5171 self.freq, self.loc, self.addr, self.inv_w
5172 )?;
5173 if self.loc == InterpLoc::Offset {
5174 write!(f, " {}", self.offset)?;
5175 }
5176 Ok(())
5177 }
5178 }
5179 impl_display_for_op!(OpIpa);
5180
5181 #[repr(C)]
5182 #[derive(SrcsAsSlice, DstsAsSlice)]
5183 pub struct OpLdTram {
5184 pub dst: Dst,
5185 pub addr: u16,
5186 pub use_c: bool,
5187 }
5188
5189 impl DisplayOp for OpLdTram {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5190 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5191 write!(f, "ldtram")?;
5192 if self.use_c {
5193 write!(f, ".c")?;
5194 } else {
5195 write!(f, ".ab")?;
5196 }
5197 write!(f, " a[{:#x}]", self.addr)?;
5198 Ok(())
5199 }
5200 }
5201 impl_display_for_op!(OpLdTram);
5202
5203 #[allow(dead_code)]
5204 #[derive(Copy, Clone, Debug)]
5205 pub enum CCtlOp {
5206 Qry1, // Only available pre-Volta
5207 PF1,
5208 PF1_5, // Only available pre-Volta
5209 PF2,
5210 WB,
5211 IV,
5212 IVAll,
5213 RS,
5214 RSLB, // Only available pre-Volta
5215 IVAllP, // Only available on Volta+
5216 WBAll, // Only available on Volta+
5217 WBAllP, // Only available on Volta+
5218 }
5219
5220 impl CCtlOp {
is_all(&self) -> bool5221 pub fn is_all(&self) -> bool {
5222 match self {
5223 CCtlOp::Qry1
5224 | CCtlOp::PF1
5225 | CCtlOp::PF1_5
5226 | CCtlOp::PF2
5227 | CCtlOp::WB
5228 | CCtlOp::IV
5229 | CCtlOp::RS
5230 | CCtlOp::RSLB => false,
5231 CCtlOp::IVAll | CCtlOp::IVAllP | CCtlOp::WBAll | CCtlOp::WBAllP => {
5232 true
5233 }
5234 }
5235 }
5236 }
5237
5238 impl fmt::Display for CCtlOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5239 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5240 match self {
5241 CCtlOp::Qry1 => write!(f, "qry1"),
5242 CCtlOp::PF1 => write!(f, "pf1"),
5243 CCtlOp::PF1_5 => write!(f, "pf1.5"),
5244 CCtlOp::PF2 => write!(f, "pf2"),
5245 CCtlOp::WB => write!(f, "wb"),
5246 CCtlOp::IV => write!(f, "iv"),
5247 CCtlOp::IVAll => write!(f, "ivall"),
5248 CCtlOp::RS => write!(f, "rs"),
5249 CCtlOp::RSLB => write!(f, "rslb"),
5250 CCtlOp::IVAllP => write!(f, "ivallp"),
5251 CCtlOp::WBAll => write!(f, "wball"),
5252 CCtlOp::WBAllP => write!(f, "wballp"),
5253 }
5254 }
5255 }
5256
5257 #[repr(C)]
5258 #[derive(SrcsAsSlice, DstsAsSlice)]
5259 pub struct OpCCtl {
5260 pub op: CCtlOp,
5261
5262 pub mem_space: MemSpace,
5263
5264 #[src_type(GPR)]
5265 pub addr: Src,
5266
5267 pub addr_offset: i32,
5268 }
5269
5270 impl DisplayOp for OpCCtl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5271 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5272 write!(f, "cctl{}", self.mem_space)?;
5273 if !self.op.is_all() {
5274 write!(f, " [{}", self.addr)?;
5275 if self.addr_offset > 0 {
5276 write!(f, "+{:#x}", self.addr_offset)?;
5277 }
5278 write!(f, "]")?;
5279 }
5280 Ok(())
5281 }
5282 }
5283 impl_display_for_op!(OpCCtl);
5284
5285 #[repr(C)]
5286 #[derive(SrcsAsSlice, DstsAsSlice)]
5287 pub struct OpMemBar {
5288 pub scope: MemScope,
5289 }
5290
5291 impl DisplayOp for OpMemBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5292 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5293 write!(f, "membar.sc.{}", self.scope)
5294 }
5295 }
5296 impl_display_for_op!(OpMemBar);
5297
5298 #[repr(C)]
5299 #[derive(SrcsAsSlice, DstsAsSlice)]
5300 pub struct OpBClear {
5301 pub dst: Dst,
5302 }
5303
5304 impl DisplayOp for OpBClear {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5305 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5306 write!(f, "bclear")
5307 }
5308 }
5309 impl_display_for_op!(OpBClear);
5310
5311 #[repr(C)]
5312 #[derive(SrcsAsSlice, DstsAsSlice)]
5313 pub struct OpBMov {
5314 pub dst: Dst,
5315 pub src: Src,
5316 pub clear: bool,
5317 }
5318
5319 impl DisplayOp for OpBMov {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5320 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5321 write!(f, "bmov.32")?;
5322 if self.clear {
5323 write!(f, ".clear")?;
5324 }
5325 write!(f, " {}", self.src)
5326 }
5327 }
5328 impl_display_for_op!(OpBMov);
5329
5330 #[repr(C)]
5331 #[derive(SrcsAsSlice, DstsAsSlice)]
5332 pub struct OpBreak {
5333 #[dst_type(Bar)]
5334 pub bar_out: Dst,
5335
5336 #[src_type(Bar)]
5337 pub bar_in: Src,
5338
5339 #[src_type(Pred)]
5340 pub cond: Src,
5341 }
5342
5343 impl DisplayOp for OpBreak {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5344 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5345 write!(f, "break {} {}", self.bar_in, self.cond)
5346 }
5347 }
5348 impl_display_for_op!(OpBreak);
5349
5350 #[repr(C)]
5351 #[derive(SrcsAsSlice, DstsAsSlice)]
5352 pub struct OpBSSy {
5353 #[dst_type(Bar)]
5354 pub bar_out: Dst,
5355
5356 #[src_type(Pred)]
5357 pub bar_in: Src,
5358
5359 #[src_type(Pred)]
5360 pub cond: Src,
5361
5362 pub target: Label,
5363 }
5364
5365 impl DisplayOp for OpBSSy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5366 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5367 write!(f, "bssy {} {} {}", self.bar_in, self.cond, self.target)
5368 }
5369 }
5370 impl_display_for_op!(OpBSSy);
5371
5372 #[repr(C)]
5373 #[derive(SrcsAsSlice, DstsAsSlice)]
5374 pub struct OpBSync {
5375 #[src_type(Bar)]
5376 pub bar: Src,
5377
5378 #[src_type(Pred)]
5379 pub cond: Src,
5380 }
5381
5382 impl DisplayOp for OpBSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5383 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5384 write!(f, "bsync {} {}", self.bar, self.cond)
5385 }
5386 }
5387 impl_display_for_op!(OpBSync);
5388
5389 #[repr(C)]
5390 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
5391 pub struct OpBra {
5392 pub target: Label,
5393 }
5394
5395 impl DisplayOp for OpBra {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5396 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5397 write!(f, "bra {}", self.target)
5398 }
5399 }
5400 impl_display_for_op!(OpBra);
5401
5402 #[repr(C)]
5403 #[derive(SrcsAsSlice, DstsAsSlice)]
5404 pub struct OpSSy {
5405 pub target: Label,
5406 }
5407
5408 impl DisplayOp for OpSSy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5409 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5410 write!(f, "ssy {}", self.target)
5411 }
5412 }
5413 impl_display_for_op!(OpSSy);
5414
5415 #[repr(C)]
5416 #[derive(SrcsAsSlice, DstsAsSlice)]
5417 pub struct OpSync {
5418 pub target: Label,
5419 }
5420
5421 impl DisplayOp for OpSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5422 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5423 write!(f, "sync {}", self.target)
5424 }
5425 }
5426 impl_display_for_op!(OpSync);
5427
5428 #[repr(C)]
5429 #[derive(SrcsAsSlice, DstsAsSlice)]
5430 pub struct OpBrk {
5431 pub target: Label,
5432 }
5433
5434 impl DisplayOp for OpBrk {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5435 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5436 write!(f, "brk {}", self.target)
5437 }
5438 }
5439 impl_display_for_op!(OpBrk);
5440
5441 #[repr(C)]
5442 #[derive(SrcsAsSlice, DstsAsSlice)]
5443 pub struct OpPBk {
5444 pub target: Label,
5445 }
5446
5447 impl DisplayOp for OpPBk {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5448 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5449 write!(f, "pbk {}", self.target)
5450 }
5451 }
5452 impl_display_for_op!(OpPBk);
5453
5454 #[repr(C)]
5455 #[derive(SrcsAsSlice, DstsAsSlice)]
5456 pub struct OpCont {
5457 pub target: Label,
5458 }
5459
5460 impl DisplayOp for OpCont {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5461 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5462 write!(f, "cont {}", self.target)
5463 }
5464 }
5465 impl_display_for_op!(OpCont);
5466
5467 #[repr(C)]
5468 #[derive(SrcsAsSlice, DstsAsSlice)]
5469 pub struct OpPCnt {
5470 pub target: Label,
5471 }
5472
5473 impl DisplayOp for OpPCnt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5474 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5475 write!(f, "pcnt {}", self.target)
5476 }
5477 }
5478 impl_display_for_op!(OpPCnt);
5479
5480 #[repr(C)]
5481 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
5482 pub struct OpExit {}
5483
5484 impl DisplayOp for OpExit {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5485 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5486 write!(f, "exit")
5487 }
5488 }
5489 impl_display_for_op!(OpExit);
5490
5491 #[repr(C)]
5492 #[derive(SrcsAsSlice, DstsAsSlice)]
5493 pub struct OpWarpSync {
5494 pub mask: u32,
5495 }
5496
5497 impl DisplayOp for OpWarpSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5498 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5499 write!(f, "warpsync 0x{:x}", self.mask)
5500 }
5501 }
5502 impl_display_for_op!(OpWarpSync);
5503
5504 #[repr(C)]
5505 #[derive(SrcsAsSlice, DstsAsSlice)]
5506 pub struct OpBar {}
5507
5508 impl DisplayOp for OpBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5509 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5510 write!(f, "bar.sync")
5511 }
5512 }
5513 impl_display_for_op!(OpBar);
5514
5515 #[repr(C)]
5516 #[derive(SrcsAsSlice, DstsAsSlice)]
5517 pub struct OpCS2R {
5518 pub dst: Dst,
5519 pub idx: u8,
5520 }
5521
5522 impl DisplayOp for OpCS2R {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5523 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5524 write!(f, "cs2r sr[{:#x}]", self.idx)
5525 }
5526 }
5527 impl_display_for_op!(OpCS2R);
5528
5529 #[repr(C)]
5530 #[derive(SrcsAsSlice, DstsAsSlice)]
5531 pub struct OpIsberd {
5532 #[dst_type(GPR)]
5533 pub dst: Dst,
5534
5535 #[src_type(SSA)]
5536 pub idx: Src,
5537 }
5538
5539 impl DisplayOp for OpIsberd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5540 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5541 write!(f, "isberd [{}]", self.idx)
5542 }
5543 }
5544 impl_display_for_op!(OpIsberd);
5545
5546 #[repr(C)]
5547 #[derive(SrcsAsSlice, DstsAsSlice)]
5548 pub struct OpKill {}
5549
5550 impl DisplayOp for OpKill {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5551 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5552 write!(f, "kill")
5553 }
5554 }
5555 impl_display_for_op!(OpKill);
5556
5557 #[repr(C)]
5558 #[derive(SrcsAsSlice, DstsAsSlice)]
5559 pub struct OpNop {
5560 pub label: Option<Label>,
5561 }
5562
5563 impl DisplayOp for OpNop {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5564 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5565 write!(f, "nop")?;
5566 if let Some(label) = &self.label {
5567 write!(f, " {}", label)?;
5568 }
5569 Ok(())
5570 }
5571 }
5572 impl_display_for_op!(OpNop);
5573
5574 #[allow(dead_code)]
5575 pub enum PixVal {
5576 MsCount,
5577 CovMask,
5578 Covered,
5579 Offset,
5580 CentroidOffset,
5581 MyIndex,
5582 InnerCoverage,
5583 }
5584
5585 impl fmt::Display for PixVal {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5586 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5587 match self {
5588 PixVal::MsCount => write!(f, ".mscount"),
5589 PixVal::CovMask => write!(f, ".covmask"),
5590 PixVal::Covered => write!(f, ".covered"),
5591 PixVal::Offset => write!(f, ".offset"),
5592 PixVal::CentroidOffset => write!(f, ".centroid_offset"),
5593 PixVal::MyIndex => write!(f, ".my_index"),
5594 PixVal::InnerCoverage => write!(f, ".inner_coverage"),
5595 }
5596 }
5597 }
5598
5599 #[repr(C)]
5600 #[derive(SrcsAsSlice, DstsAsSlice)]
5601 pub struct OpPixLd {
5602 pub dst: Dst,
5603 pub val: PixVal,
5604 }
5605
5606 impl DisplayOp for OpPixLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5607 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5608 write!(f, "pixld{}", self.val)
5609 }
5610 }
5611 impl_display_for_op!(OpPixLd);
5612
5613 #[repr(C)]
5614 #[derive(SrcsAsSlice, DstsAsSlice)]
5615 pub struct OpS2R {
5616 pub dst: Dst,
5617 pub idx: u8,
5618 }
5619
5620 impl DisplayOp for OpS2R {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5621 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5622 write!(f, "s2r sr[{:#x}]", self.idx)
5623 }
5624 }
5625 impl_display_for_op!(OpS2R);
5626
5627 pub enum VoteOp {
5628 Any,
5629 All,
5630 Eq,
5631 }
5632
5633 impl fmt::Display for VoteOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5634 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5635 match self {
5636 VoteOp::Any => write!(f, "any"),
5637 VoteOp::All => write!(f, "all"),
5638 VoteOp::Eq => write!(f, "eq"),
5639 }
5640 }
5641 }
5642
5643 #[repr(C)]
5644 #[derive(SrcsAsSlice, DstsAsSlice)]
5645 pub struct OpVote {
5646 pub op: VoteOp,
5647
5648 #[dst_type(GPR)]
5649 pub ballot: Dst,
5650
5651 #[dst_type(Pred)]
5652 pub vote: Dst,
5653
5654 #[src_type(Pred)]
5655 pub pred: Src,
5656 }
5657
5658 impl DisplayOp for OpVote {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5659 fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5660 if self.ballot.is_none() && self.vote.is_none() {
5661 write!(f, "none")
5662 } else {
5663 if !self.ballot.is_none() {
5664 write!(f, "{}", self.ballot)?;
5665 }
5666 if !self.vote.is_none() {
5667 write!(f, "{}", self.vote)?;
5668 }
5669 Ok(())
5670 }
5671 }
5672
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5673 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5674 write!(f, "vote.{} {}", self.op, self.pred)
5675 }
5676 }
5677 impl_display_for_op!(OpVote);
5678
5679 #[repr(C)]
5680 #[derive(SrcsAsSlice, DstsAsSlice)]
5681 pub struct OpUndef {
5682 pub dst: Dst,
5683 }
5684
5685 impl DisplayOp for OpUndef {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5686 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5687 write!(f, "undef {}", self.dst)
5688 }
5689 }
5690 impl_display_for_op!(OpUndef);
5691
5692 #[repr(C)]
5693 #[derive(SrcsAsSlice, DstsAsSlice)]
5694 pub struct OpSrcBar {
5695 pub src: Src,
5696 }
5697
5698 impl DisplayOp for OpSrcBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5699 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5700 write!(f, "src_bar {}", self.src)
5701 }
5702 }
5703 impl_display_for_op!(OpSrcBar);
5704
5705 pub struct VecPair<A, B> {
5706 a: Vec<A>,
5707 b: Vec<B>,
5708 }
5709
5710 impl<A, B> VecPair<A, B> {
append(&mut self, other: &mut VecPair<A, B>)5711 pub fn append(&mut self, other: &mut VecPair<A, B>) {
5712 self.a.append(&mut other.a);
5713 self.b.append(&mut other.b);
5714 }
5715
is_empty(&self) -> bool5716 pub fn is_empty(&self) -> bool {
5717 debug_assert!(self.a.len() == self.b.len());
5718 self.a.is_empty()
5719 }
5720
iter(&self) -> Zip<slice::Iter<'_, A>, slice::Iter<'_, B>>5721 pub fn iter(&self) -> Zip<slice::Iter<'_, A>, slice::Iter<'_, B>> {
5722 debug_assert!(self.a.len() == self.b.len());
5723 self.a.iter().zip(self.b.iter())
5724 }
5725
iter_mut( &mut self, ) -> Zip<slice::IterMut<'_, A>, slice::IterMut<'_, B>>5726 pub fn iter_mut(
5727 &mut self,
5728 ) -> Zip<slice::IterMut<'_, A>, slice::IterMut<'_, B>> {
5729 debug_assert!(self.a.len() == self.b.len());
5730 self.a.iter_mut().zip(self.b.iter_mut())
5731 }
5732
len(&self) -> usize5733 pub fn len(&self) -> usize {
5734 debug_assert!(self.a.len() == self.b.len());
5735 self.a.len()
5736 }
5737
new() -> Self5738 pub fn new() -> Self {
5739 Self {
5740 a: Vec::new(),
5741 b: Vec::new(),
5742 }
5743 }
5744
push(&mut self, a: A, b: B)5745 pub fn push(&mut self, a: A, b: B) {
5746 debug_assert!(self.a.len() == self.b.len());
5747 self.a.push(a);
5748 self.b.push(b);
5749 }
5750 }
5751
5752 impl<A: Clone, B: Clone> VecPair<A, B> {
retain(&mut self, mut f: impl FnMut(&A, &B) -> bool)5753 pub fn retain(&mut self, mut f: impl FnMut(&A, &B) -> bool) {
5754 debug_assert!(self.a.len() == self.b.len());
5755 let len = self.a.len();
5756 let mut i = 0_usize;
5757 while i < len {
5758 if !f(&self.a[i], &self.b[i]) {
5759 break;
5760 }
5761 i += 1;
5762 }
5763
5764 let mut new_len = i;
5765
5766 // Don't check this one twice.
5767 i += 1;
5768
5769 while i < len {
5770 // This could be more efficient but it's good enough for our
5771 // purposes since everything we're storing is small and has a
5772 // trivial Drop.
5773 if f(&self.a[i], &self.b[i]) {
5774 self.a[new_len] = self.a[i].clone();
5775 self.b[new_len] = self.b[i].clone();
5776 new_len += 1;
5777 }
5778 i += 1;
5779 }
5780
5781 if new_len < len {
5782 self.a.truncate(new_len);
5783 self.b.truncate(new_len);
5784 }
5785 }
5786 }
5787
5788 pub struct PhiAllocator {
5789 count: u32,
5790 }
5791
5792 impl PhiAllocator {
new() -> PhiAllocator5793 pub fn new() -> PhiAllocator {
5794 PhiAllocator { count: 0 }
5795 }
5796
alloc(&mut self) -> u325797 pub fn alloc(&mut self) -> u32 {
5798 let idx = self.count;
5799 self.count = idx + 1;
5800 idx
5801 }
5802 }
5803
5804 #[repr(C)]
5805 #[derive(DstsAsSlice)]
5806 pub struct OpPhiSrcs {
5807 pub srcs: VecPair<u32, Src>,
5808 }
5809
5810 impl OpPhiSrcs {
new() -> OpPhiSrcs5811 pub fn new() -> OpPhiSrcs {
5812 OpPhiSrcs {
5813 srcs: VecPair::new(),
5814 }
5815 }
5816 }
5817
5818 impl AsSlice<Src> for OpPhiSrcs {
5819 type Attr = SrcType;
5820
as_slice(&self) -> &[Src]5821 fn as_slice(&self) -> &[Src] {
5822 &self.srcs.b
5823 }
5824
as_mut_slice(&mut self) -> &mut [Src]5825 fn as_mut_slice(&mut self) -> &mut [Src] {
5826 &mut self.srcs.b
5827 }
5828
attrs(&self) -> SrcTypeList5829 fn attrs(&self) -> SrcTypeList {
5830 SrcTypeList::Uniform(SrcType::GPR)
5831 }
5832 }
5833
5834 impl DisplayOp for OpPhiSrcs {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result5835 fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
5836 Ok(())
5837 }
5838
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5839 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5840 write!(f, "phi_src ")?;
5841 for (i, (id, src)) in self.srcs.iter().enumerate() {
5842 if i > 0 {
5843 write!(f, ", ")?;
5844 }
5845 write!(f, "φ{} = {}", id, src)?;
5846 }
5847 Ok(())
5848 }
5849 }
5850 impl_display_for_op!(OpPhiSrcs);
5851
5852 #[repr(C)]
5853 #[derive(SrcsAsSlice)]
5854 pub struct OpPhiDsts {
5855 pub dsts: VecPair<u32, Dst>,
5856 }
5857
5858 impl OpPhiDsts {
new() -> OpPhiDsts5859 pub fn new() -> OpPhiDsts {
5860 OpPhiDsts {
5861 dsts: VecPair::new(),
5862 }
5863 }
5864 }
5865
5866 impl AsSlice<Dst> for OpPhiDsts {
5867 type Attr = DstType;
5868
as_slice(&self) -> &[Dst]5869 fn as_slice(&self) -> &[Dst] {
5870 &self.dsts.b
5871 }
5872
as_mut_slice(&mut self) -> &mut [Dst]5873 fn as_mut_slice(&mut self) -> &mut [Dst] {
5874 &mut self.dsts.b
5875 }
5876
attrs(&self) -> DstTypeList5877 fn attrs(&self) -> DstTypeList {
5878 DstTypeList::Uniform(DstType::Vec)
5879 }
5880 }
5881
5882 impl DisplayOp for OpPhiDsts {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result5883 fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
5884 Ok(())
5885 }
5886
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5887 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5888 write!(f, "phi_dst ")?;
5889 for (i, (id, dst)) in self.dsts.iter().enumerate() {
5890 if i > 0 {
5891 write!(f, ", ")?;
5892 }
5893 write!(f, "{} = φ{}", dst, id)?;
5894 }
5895 Ok(())
5896 }
5897 }
5898 impl_display_for_op!(OpPhiDsts);
5899
5900 #[repr(C)]
5901 #[derive(SrcsAsSlice, DstsAsSlice)]
5902 pub struct OpCopy {
5903 pub dst: Dst,
5904 pub src: Src,
5905 }
5906
5907 impl DisplayOp for OpCopy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5908 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5909 write!(f, "copy {}", self.src)
5910 }
5911 }
5912 impl_display_for_op!(OpCopy);
5913
5914 #[repr(C)]
5915 #[derive(SrcsAsSlice, DstsAsSlice)]
5916 /// Copies a value and pins its destination in the register file
5917 pub struct OpPin {
5918 pub dst: Dst,
5919 #[src_type(SSA)]
5920 pub src: Src,
5921 }
5922
5923 impl DisplayOp for OpPin {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5924 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5925 write!(f, "pin {}", self.src)
5926 }
5927 }
5928 impl_display_for_op!(OpPin);
5929
5930 #[repr(C)]
5931 #[derive(SrcsAsSlice, DstsAsSlice)]
5932 /// Copies a pinned value to an unpinned value
5933 pub struct OpUnpin {
5934 pub dst: Dst,
5935 #[src_type(SSA)]
5936 pub src: Src,
5937 }
5938
5939 impl DisplayOp for OpUnpin {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5940 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5941 write!(f, "unpin {}", self.src)
5942 }
5943 }
5944 impl_display_for_op!(OpUnpin);
5945
5946 #[repr(C)]
5947 #[derive(SrcsAsSlice, DstsAsSlice)]
5948 pub struct OpSwap {
5949 pub dsts: [Dst; 2],
5950 pub srcs: [Src; 2],
5951 }
5952
5953 impl DisplayOp for OpSwap {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5954 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5955 write!(f, "swap {} {}", self.srcs[0], self.srcs[1])
5956 }
5957 }
5958 impl_display_for_op!(OpSwap);
5959
5960 #[repr(C)]
5961 pub struct OpParCopy {
5962 pub dsts_srcs: VecPair<Dst, Src>,
5963 pub tmp: Option<RegRef>,
5964 }
5965
5966 impl OpParCopy {
new() -> OpParCopy5967 pub fn new() -> OpParCopy {
5968 OpParCopy {
5969 dsts_srcs: VecPair::new(),
5970 tmp: None,
5971 }
5972 }
5973
is_empty(&self) -> bool5974 pub fn is_empty(&self) -> bool {
5975 self.dsts_srcs.is_empty()
5976 }
5977
push(&mut self, dst: Dst, src: Src)5978 pub fn push(&mut self, dst: Dst, src: Src) {
5979 self.dsts_srcs.push(dst, src);
5980 }
5981 }
5982
5983 impl AsSlice<Src> for OpParCopy {
5984 type Attr = SrcType;
5985
as_slice(&self) -> &[Src]5986 fn as_slice(&self) -> &[Src] {
5987 &self.dsts_srcs.b
5988 }
5989
as_mut_slice(&mut self) -> &mut [Src]5990 fn as_mut_slice(&mut self) -> &mut [Src] {
5991 &mut self.dsts_srcs.b
5992 }
5993
attrs(&self) -> SrcTypeList5994 fn attrs(&self) -> SrcTypeList {
5995 SrcTypeList::Uniform(SrcType::GPR)
5996 }
5997 }
5998
5999 impl AsSlice<Dst> for OpParCopy {
6000 type Attr = DstType;
6001
as_slice(&self) -> &[Dst]6002 fn as_slice(&self) -> &[Dst] {
6003 &self.dsts_srcs.a
6004 }
6005
as_mut_slice(&mut self) -> &mut [Dst]6006 fn as_mut_slice(&mut self) -> &mut [Dst] {
6007 &mut self.dsts_srcs.a
6008 }
6009
attrs(&self) -> DstTypeList6010 fn attrs(&self) -> DstTypeList {
6011 DstTypeList::Uniform(DstType::Vec)
6012 }
6013 }
6014
6015 impl DisplayOp for OpParCopy {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result6016 fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
6017 Ok(())
6018 }
6019
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6020 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6021 write!(f, "par_copy")?;
6022 for (i, (dst, src)) in self.dsts_srcs.iter().enumerate() {
6023 if i > 0 {
6024 write!(f, ",")?;
6025 }
6026 write!(f, " {} = {}", dst, src)?;
6027 }
6028 Ok(())
6029 }
6030 }
6031 impl_display_for_op!(OpParCopy);
6032
6033 #[repr(C)]
6034 #[derive(DstsAsSlice)]
6035 pub struct OpRegOut {
6036 pub srcs: Vec<Src>,
6037 }
6038
6039 impl AsSlice<Src> for OpRegOut {
6040 type Attr = SrcType;
6041
as_slice(&self) -> &[Src]6042 fn as_slice(&self) -> &[Src] {
6043 &self.srcs
6044 }
6045
as_mut_slice(&mut self) -> &mut [Src]6046 fn as_mut_slice(&mut self) -> &mut [Src] {
6047 &mut self.srcs
6048 }
6049
attrs(&self) -> SrcTypeList6050 fn attrs(&self) -> SrcTypeList {
6051 SrcTypeList::Uniform(SrcType::GPR)
6052 }
6053 }
6054
6055 impl DisplayOp for OpRegOut {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6056 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6057 write!(f, "reg_out {{")?;
6058 for (i, src) in self.srcs.iter().enumerate() {
6059 if i > 0 {
6060 write!(f, ",")?;
6061 }
6062 write!(f, " {}", src)?;
6063 }
6064 write!(f, " }}")
6065 }
6066 }
6067 impl_display_for_op!(OpRegOut);
6068
6069 #[derive(Copy, Clone, Debug, PartialEq)]
6070 pub enum OutType {
6071 Emit,
6072 Cut,
6073 EmitThenCut,
6074 }
6075
6076 impl fmt::Display for OutType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6077 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6078 match self {
6079 OutType::Emit => write!(f, "emit"),
6080 OutType::Cut => write!(f, "cut"),
6081 OutType::EmitThenCut => write!(f, "emit_then_cut"),
6082 }
6083 }
6084 }
6085
6086 #[repr(C)]
6087 #[derive(SrcsAsSlice, DstsAsSlice)]
6088 pub struct OpOut {
6089 pub dst: Dst,
6090
6091 #[src_type(SSA)]
6092 pub handle: Src,
6093
6094 #[src_type(ALU)]
6095 pub stream: Src,
6096
6097 pub out_type: OutType,
6098 }
6099
6100 impl DisplayOp for OpOut {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6101 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6102 write!(f, "out.{} {} {}", self.out_type, self.handle, self.stream)
6103 }
6104 }
6105 impl_display_for_op!(OpOut);
6106
6107 #[repr(C)]
6108 #[derive(SrcsAsSlice, DstsAsSlice)]
6109 pub struct OpOutFinal {
6110 #[src_type(SSA)]
6111 pub handle: Src,
6112 }
6113
6114 impl DisplayOp for OpOutFinal {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6115 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6116 write!(f, "out.final {{ {} }}", self.handle)
6117 }
6118 }
6119 impl_display_for_op!(OpOutFinal);
6120
6121 /// Describes an annotation on an instruction.
6122 #[repr(C)]
6123 #[derive(SrcsAsSlice, DstsAsSlice)]
6124 pub struct OpAnnotate {
6125 /// The annotation
6126 pub annotation: String,
6127 }
6128
6129 impl DisplayOp for OpAnnotate {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6130 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6131 write!(f, "// {}", self.annotation)
6132 }
6133 }
6134
6135 impl fmt::Display for OpAnnotate {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6136 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6137 self.fmt_op(f)
6138 }
6139 }
6140
6141 #[derive(DisplayOp, DstsAsSlice, SrcsAsSlice, FromVariants)]
6142 pub enum Op {
6143 FAdd(OpFAdd),
6144 FFma(OpFFma),
6145 FMnMx(OpFMnMx),
6146 FMul(OpFMul),
6147 Rro(OpRro),
6148 MuFu(OpMuFu),
6149 FSet(OpFSet),
6150 FSetP(OpFSetP),
6151 FSwzAdd(OpFSwzAdd),
6152 DAdd(OpDAdd),
6153 DFma(OpDFma),
6154 DMnMx(OpDMnMx),
6155 DMul(OpDMul),
6156 DSetP(OpDSetP),
6157 HAdd2(OpHAdd2),
6158 HFma2(OpHFma2),
6159 HMul2(OpHMul2),
6160 HSet2(OpHSet2),
6161 HSetP2(OpHSetP2),
6162 HMnMx2(OpHMnMx2),
6163 BMsk(OpBMsk),
6164 BRev(OpBRev),
6165 Bfe(OpBfe),
6166 Flo(OpFlo),
6167 IAbs(OpIAbs),
6168 IAdd2(OpIAdd2),
6169 IAdd2X(OpIAdd2X),
6170 IAdd3(OpIAdd3),
6171 IAdd3X(OpIAdd3X),
6172 IDp4(OpIDp4),
6173 IMad(OpIMad),
6174 IMad64(OpIMad64),
6175 IMul(OpIMul),
6176 IMnMx(OpIMnMx),
6177 ISetP(OpISetP),
6178 Lop2(OpLop2),
6179 Lop3(OpLop3),
6180 PopC(OpPopC),
6181 Shf(OpShf),
6182 Shl(OpShl),
6183 Shr(OpShr),
6184 F2F(OpF2F),
6185 F2FP(OpF2FP),
6186 F2I(OpF2I),
6187 I2F(OpI2F),
6188 I2I(OpI2I),
6189 FRnd(OpFRnd),
6190 Mov(OpMov),
6191 Prmt(OpPrmt),
6192 Sel(OpSel),
6193 Shfl(OpShfl),
6194 PLop3(OpPLop3),
6195 PSetP(OpPSetP),
6196 R2UR(OpR2UR),
6197 Tex(OpTex),
6198 Tld(OpTld),
6199 Tld4(OpTld4),
6200 Tmml(OpTmml),
6201 Txd(OpTxd),
6202 Txq(OpTxq),
6203 SuLd(OpSuLd),
6204 SuSt(OpSuSt),
6205 SuAtom(OpSuAtom),
6206 Ld(OpLd),
6207 Ldc(OpLdc),
6208 St(OpSt),
6209 Atom(OpAtom),
6210 AL2P(OpAL2P),
6211 ALd(OpALd),
6212 ASt(OpASt),
6213 Ipa(OpIpa),
6214 LdTram(OpLdTram),
6215 CCtl(OpCCtl),
6216 MemBar(OpMemBar),
6217 BClear(OpBClear),
6218 BMov(OpBMov),
6219 Break(OpBreak),
6220 BSSy(OpBSSy),
6221 BSync(OpBSync),
6222 Bra(OpBra),
6223 SSy(OpSSy),
6224 Sync(OpSync),
6225 Brk(OpBrk),
6226 PBk(OpPBk),
6227 Cont(OpCont),
6228 PCnt(OpPCnt),
6229 Exit(OpExit),
6230 WarpSync(OpWarpSync),
6231 Bar(OpBar),
6232 CS2R(OpCS2R),
6233 Isberd(OpIsberd),
6234 Kill(OpKill),
6235 Nop(OpNop),
6236 PixLd(OpPixLd),
6237 S2R(OpS2R),
6238 Vote(OpVote),
6239 Undef(OpUndef),
6240 SrcBar(OpSrcBar),
6241 PhiSrcs(OpPhiSrcs),
6242 PhiDsts(OpPhiDsts),
6243 Copy(OpCopy),
6244 Pin(OpPin),
6245 Unpin(OpUnpin),
6246 Swap(OpSwap),
6247 ParCopy(OpParCopy),
6248 RegOut(OpRegOut),
6249 Out(OpOut),
6250 OutFinal(OpOutFinal),
6251 Annotate(OpAnnotate),
6252 }
6253 impl_display_for_op!(Op);
6254
6255 impl Op {
is_branch(&self) -> bool6256 pub fn is_branch(&self) -> bool {
6257 match self {
6258 Op::Bra(_)
6259 | Op::Sync(_)
6260 | Op::Brk(_)
6261 | Op::Cont(_)
6262 | Op::Exit(_) => true,
6263 _ => false,
6264 }
6265 }
6266 }
6267
6268 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
6269 pub enum PredRef {
6270 None,
6271 SSA(SSAValue),
6272 Reg(RegRef),
6273 }
6274
6275 impl PredRef {
6276 #[allow(dead_code)]
as_reg(&self) -> Option<&RegRef>6277 pub fn as_reg(&self) -> Option<&RegRef> {
6278 match self {
6279 PredRef::Reg(r) => Some(r),
6280 _ => None,
6281 }
6282 }
6283
6284 #[allow(dead_code)]
as_ssa(&self) -> Option<&SSAValue>6285 pub fn as_ssa(&self) -> Option<&SSAValue> {
6286 match self {
6287 PredRef::SSA(r) => Some(r),
6288 _ => None,
6289 }
6290 }
6291
is_none(&self) -> bool6292 pub fn is_none(&self) -> bool {
6293 matches!(self, PredRef::None)
6294 }
6295
iter_ssa(&self) -> slice::Iter<'_, SSAValue>6296 pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
6297 match self {
6298 PredRef::None | PredRef::Reg(_) => &[],
6299 PredRef::SSA(ssa) => slice::from_ref(ssa),
6300 }
6301 .iter()
6302 }
6303
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>6304 pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
6305 match self {
6306 PredRef::None | PredRef::Reg(_) => &mut [],
6307 PredRef::SSA(ssa) => slice::from_mut(ssa),
6308 }
6309 .iter_mut()
6310 }
6311 }
6312
6313 impl From<RegRef> for PredRef {
from(reg: RegRef) -> PredRef6314 fn from(reg: RegRef) -> PredRef {
6315 PredRef::Reg(reg)
6316 }
6317 }
6318
6319 impl From<SSAValue> for PredRef {
from(ssa: SSAValue) -> PredRef6320 fn from(ssa: SSAValue) -> PredRef {
6321 PredRef::SSA(ssa)
6322 }
6323 }
6324
6325 impl fmt::Display for PredRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6326 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6327 match self {
6328 PredRef::None => write!(f, "pT"),
6329 PredRef::SSA(ssa) => ssa.fmt_plain(f),
6330 PredRef::Reg(reg) => reg.fmt(f),
6331 }
6332 }
6333 }
6334
6335 #[derive(Clone, Copy)]
6336 pub struct Pred {
6337 pub pred_ref: PredRef,
6338 pub pred_inv: bool,
6339 }
6340
6341 impl Pred {
is_true(&self) -> bool6342 pub fn is_true(&self) -> bool {
6343 self.pred_ref.is_none() && !self.pred_inv
6344 }
6345
is_false(&self) -> bool6346 pub fn is_false(&self) -> bool {
6347 self.pred_ref.is_none() && self.pred_inv
6348 }
6349
iter_ssa(&self) -> slice::Iter<'_, SSAValue>6350 pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
6351 self.pred_ref.iter_ssa()
6352 }
6353
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>6354 pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
6355 self.pred_ref.iter_ssa_mut()
6356 }
6357
bnot(self) -> Self6358 pub fn bnot(self) -> Self {
6359 Pred {
6360 pred_ref: self.pred_ref,
6361 pred_inv: !self.pred_inv,
6362 }
6363 }
6364 }
6365
6366 impl From<bool> for Pred {
from(b: bool) -> Self6367 fn from(b: bool) -> Self {
6368 Pred {
6369 pred_ref: PredRef::None,
6370 pred_inv: !b,
6371 }
6372 }
6373 }
6374
6375 impl<T: Into<PredRef>> From<T> for Pred {
from(p: T) -> Self6376 fn from(p: T) -> Self {
6377 Pred {
6378 pred_ref: p.into(),
6379 pred_inv: false,
6380 }
6381 }
6382 }
6383
6384 impl fmt::Display for Pred {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6385 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6386 if self.pred_inv {
6387 write!(f, "!")?;
6388 }
6389 self.pred_ref.fmt(f)
6390 }
6391 }
6392
6393 pub const MIN_INSTR_DELAY: u8 = 1;
6394 pub const MAX_INSTR_DELAY: u8 = 15;
6395
6396 pub struct InstrDeps {
6397 pub delay: u8,
6398 pub yld: bool,
6399 wr_bar: i8,
6400 rd_bar: i8,
6401 pub wt_bar_mask: u8,
6402 pub reuse_mask: u8,
6403 }
6404
6405 impl InstrDeps {
new() -> InstrDeps6406 pub fn new() -> InstrDeps {
6407 InstrDeps {
6408 delay: 0,
6409 yld: false,
6410 wr_bar: -1,
6411 rd_bar: -1,
6412 wt_bar_mask: 0,
6413 reuse_mask: 0,
6414 }
6415 }
6416
rd_bar(&self) -> Option<u8>6417 pub fn rd_bar(&self) -> Option<u8> {
6418 if self.rd_bar < 0 {
6419 None
6420 } else {
6421 Some(self.rd_bar.try_into().unwrap())
6422 }
6423 }
6424
wr_bar(&self) -> Option<u8>6425 pub fn wr_bar(&self) -> Option<u8> {
6426 if self.wr_bar < 0 {
6427 None
6428 } else {
6429 Some(self.wr_bar.try_into().unwrap())
6430 }
6431 }
6432
set_delay(&mut self, delay: u8)6433 pub fn set_delay(&mut self, delay: u8) {
6434 assert!(delay <= MAX_INSTR_DELAY);
6435 self.delay = delay;
6436 }
6437
set_yield(&mut self, yld: bool)6438 pub fn set_yield(&mut self, yld: bool) {
6439 self.yld = yld;
6440 }
6441
set_rd_bar(&mut self, idx: u8)6442 pub fn set_rd_bar(&mut self, idx: u8) {
6443 assert!(idx < 6);
6444 self.rd_bar = idx.try_into().unwrap();
6445 }
6446
set_wr_bar(&mut self, idx: u8)6447 pub fn set_wr_bar(&mut self, idx: u8) {
6448 assert!(idx < 6);
6449 self.wr_bar = idx.try_into().unwrap();
6450 }
6451
add_wt_bar(&mut self, idx: u8)6452 pub fn add_wt_bar(&mut self, idx: u8) {
6453 self.add_wt_bar_mask(1 << idx);
6454 }
6455
add_wt_bar_mask(&mut self, bar_mask: u8)6456 pub fn add_wt_bar_mask(&mut self, bar_mask: u8) {
6457 assert!(bar_mask < 1 << 6);
6458 self.wt_bar_mask |= bar_mask;
6459 }
6460
6461 #[allow(dead_code)]
add_reuse(&mut self, idx: u8)6462 pub fn add_reuse(&mut self, idx: u8) {
6463 assert!(idx < 6);
6464 self.reuse_mask |= 1_u8 << idx;
6465 }
6466 }
6467
6468 impl fmt::Display for InstrDeps {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6469 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6470 if self.delay > 0 {
6471 write!(f, " delay={}", self.delay)?;
6472 }
6473 if self.wt_bar_mask != 0 {
6474 write!(f, " wt={:06b}", self.wt_bar_mask)?;
6475 }
6476 if self.rd_bar >= 0 {
6477 write!(f, " rd:{}", self.rd_bar)?;
6478 }
6479 if self.wr_bar >= 0 {
6480 write!(f, " wr:{}", self.wr_bar)?;
6481 }
6482 if self.reuse_mask != 0 {
6483 write!(f, " reuse={:06b}", self.reuse_mask)?;
6484 }
6485 if self.yld {
6486 write!(f, " yld")?;
6487 }
6488 Ok(())
6489 }
6490 }
6491
6492 pub struct Instr {
6493 pub pred: Pred,
6494 pub op: Op,
6495 pub deps: InstrDeps,
6496 }
6497
6498 impl Instr {
new(op: impl Into<Op>) -> Instr6499 pub fn new(op: impl Into<Op>) -> Instr {
6500 Instr {
6501 op: op.into(),
6502 pred: true.into(),
6503 deps: InstrDeps::new(),
6504 }
6505 }
6506
new_boxed(op: impl Into<Op>) -> Box<Self>6507 pub fn new_boxed(op: impl Into<Op>) -> Box<Self> {
6508 Box::new(Instr::new(op))
6509 }
6510
dsts(&self) -> &[Dst]6511 pub fn dsts(&self) -> &[Dst] {
6512 self.op.dsts_as_slice()
6513 }
6514
dsts_mut(&mut self) -> &mut [Dst]6515 pub fn dsts_mut(&mut self) -> &mut [Dst] {
6516 self.op.dsts_as_mut_slice()
6517 }
6518
srcs(&self) -> &[Src]6519 pub fn srcs(&self) -> &[Src] {
6520 self.op.srcs_as_slice()
6521 }
6522
srcs_mut(&mut self) -> &mut [Src]6523 pub fn srcs_mut(&mut self) -> &mut [Src] {
6524 self.op.srcs_as_mut_slice()
6525 }
6526
src_types(&self) -> SrcTypeList6527 pub fn src_types(&self) -> SrcTypeList {
6528 self.op.src_types()
6529 }
6530
for_each_ssa_use(&self, mut f: impl FnMut(&SSAValue))6531 pub fn for_each_ssa_use(&self, mut f: impl FnMut(&SSAValue)) {
6532 for ssa in self.pred.iter_ssa() {
6533 f(ssa);
6534 }
6535 for src in self.srcs() {
6536 for ssa in src.iter_ssa() {
6537 f(ssa);
6538 }
6539 }
6540 }
6541
for_each_ssa_use_mut(&mut self, mut f: impl FnMut(&mut SSAValue))6542 pub fn for_each_ssa_use_mut(&mut self, mut f: impl FnMut(&mut SSAValue)) {
6543 for ssa in self.pred.iter_ssa_mut() {
6544 f(ssa);
6545 }
6546 for src in self.srcs_mut() {
6547 for ssa in src.iter_ssa_mut() {
6548 f(ssa);
6549 }
6550 }
6551 }
6552
for_each_ssa_def(&self, mut f: impl FnMut(&SSAValue))6553 pub fn for_each_ssa_def(&self, mut f: impl FnMut(&SSAValue)) {
6554 for dst in self.dsts() {
6555 for ssa in dst.iter_ssa() {
6556 f(ssa);
6557 }
6558 }
6559 }
6560
for_each_ssa_def_mut(&mut self, mut f: impl FnMut(&mut SSAValue))6561 pub fn for_each_ssa_def_mut(&mut self, mut f: impl FnMut(&mut SSAValue)) {
6562 for dst in self.dsts_mut() {
6563 for ssa in dst.iter_ssa_mut() {
6564 f(ssa);
6565 }
6566 }
6567 }
6568
is_branch(&self) -> bool6569 pub fn is_branch(&self) -> bool {
6570 self.op.is_branch()
6571 }
6572
uses_global_mem(&self) -> bool6573 pub fn uses_global_mem(&self) -> bool {
6574 match &self.op {
6575 Op::Atom(op) => op.mem_space != MemSpace::Local,
6576 Op::Ld(op) => op.access.space != MemSpace::Local,
6577 Op::St(op) => op.access.space != MemSpace::Local,
6578 Op::SuAtom(_) | Op::SuLd(_) | Op::SuSt(_) => true,
6579 _ => false,
6580 }
6581 }
6582
writes_global_mem(&self) -> bool6583 pub fn writes_global_mem(&self) -> bool {
6584 match &self.op {
6585 Op::Atom(op) => matches!(op.mem_space, MemSpace::Global(_)),
6586 Op::St(op) => matches!(op.access.space, MemSpace::Global(_)),
6587 Op::SuAtom(_) | Op::SuSt(_) => true,
6588 _ => false,
6589 }
6590 }
6591
can_eliminate(&self) -> bool6592 pub fn can_eliminate(&self) -> bool {
6593 match &self.op {
6594 Op::ASt(_)
6595 | Op::SuSt(_)
6596 | Op::SuAtom(_)
6597 | Op::St(_)
6598 | Op::Atom(_)
6599 | Op::CCtl(_)
6600 | Op::MemBar(_)
6601 | Op::Kill(_)
6602 | Op::Nop(_)
6603 | Op::BSync(_)
6604 | Op::Bra(_)
6605 | Op::SSy(_)
6606 | Op::Sync(_)
6607 | Op::Brk(_)
6608 | Op::PBk(_)
6609 | Op::Cont(_)
6610 | Op::PCnt(_)
6611 | Op::Exit(_)
6612 | Op::WarpSync(_)
6613 | Op::Bar(_)
6614 | Op::RegOut(_)
6615 | Op::Out(_)
6616 | Op::OutFinal(_)
6617 | Op::Annotate(_) => false,
6618 Op::BMov(op) => !op.clear,
6619 _ => true,
6620 }
6621 }
6622
is_uniform(&self) -> bool6623 pub fn is_uniform(&self) -> bool {
6624 match &self.op {
6625 Op::PhiDsts(_) => false,
6626 op => op.is_uniform(),
6627 }
6628 }
6629
has_fixed_latency(&self, sm: u8) -> bool6630 pub fn has_fixed_latency(&self, sm: u8) -> bool {
6631 match &self.op {
6632 // Float ALU
6633 Op::F2FP(_)
6634 | Op::FAdd(_)
6635 | Op::FFma(_)
6636 | Op::FMnMx(_)
6637 | Op::FMul(_)
6638 | Op::FSet(_)
6639 | Op::FSetP(_)
6640 | Op::HAdd2(_)
6641 | Op::HFma2(_)
6642 | Op::HMul2(_)
6643 | Op::HSet2(_)
6644 | Op::HSetP2(_)
6645 | Op::HMnMx2(_)
6646 | Op::FSwzAdd(_) => true,
6647
6648 // Multi-function unit is variable latency
6649 Op::Rro(_) | Op::MuFu(_) => false,
6650
6651 // Double-precision float ALU
6652 Op::DAdd(_)
6653 | Op::DFma(_)
6654 | Op::DMnMx(_)
6655 | Op::DMul(_)
6656 | Op::DSetP(_) => false,
6657
6658 // Integer ALU
6659 Op::BRev(_) | Op::Flo(_) | Op::PopC(_) => false,
6660 Op::IMad(_) | Op::IMul(_) => sm >= 70,
6661 Op::BMsk(_)
6662 | Op::IAbs(_)
6663 | Op::IAdd2(_)
6664 | Op::IAdd2X(_)
6665 | Op::IAdd3(_)
6666 | Op::IAdd3X(_)
6667 | Op::IDp4(_)
6668 | Op::IMad64(_)
6669 | Op::IMnMx(_)
6670 | Op::ISetP(_)
6671 | Op::Lop2(_)
6672 | Op::Lop3(_)
6673 | Op::Shf(_)
6674 | Op::Shl(_)
6675 | Op::Shr(_)
6676 | Op::Bfe(_) => true,
6677
6678 // Conversions are variable latency?!?
6679 Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::I2I(_) | Op::FRnd(_) => {
6680 false
6681 }
6682
6683 // Move ops
6684 Op::Mov(_) | Op::Prmt(_) | Op::Sel(_) => true,
6685 Op::Shfl(_) => false,
6686
6687 // Predicate ops
6688 Op::PLop3(_) | Op::PSetP(_) => true,
6689
6690 // Uniform ops
6691 Op::R2UR(_) => false,
6692
6693 // Texture ops
6694 Op::Tex(_)
6695 | Op::Tld(_)
6696 | Op::Tld4(_)
6697 | Op::Tmml(_)
6698 | Op::Txd(_)
6699 | Op::Txq(_) => false,
6700
6701 // Surface ops
6702 Op::SuLd(_) | Op::SuSt(_) | Op::SuAtom(_) => false,
6703
6704 // Memory ops
6705 Op::Ld(_)
6706 | Op::Ldc(_)
6707 | Op::St(_)
6708 | Op::Atom(_)
6709 | Op::AL2P(_)
6710 | Op::ALd(_)
6711 | Op::ASt(_)
6712 | Op::Ipa(_)
6713 | Op::CCtl(_)
6714 | Op::LdTram(_)
6715 | Op::MemBar(_) => false,
6716
6717 // Control-flow ops
6718 Op::BClear(_) | Op::Break(_) | Op::BSSy(_) | Op::BSync(_) => true,
6719 Op::SSy(_)
6720 | Op::Sync(_)
6721 | Op::Brk(_)
6722 | Op::PBk(_)
6723 | Op::Cont(_)
6724 | Op::PCnt(_) => true,
6725 Op::Bra(_) | Op::Exit(_) => true,
6726 Op::WarpSync(_) => false,
6727
6728 // The barrier half is HW scoreboarded by the GPR isn't. When
6729 // moving from a GPR to a barrier, we still need a token for WaR
6730 // hazards.
6731 Op::BMov(_) => false,
6732
6733 // Geometry ops
6734 Op::Out(_) | Op::OutFinal(_) => false,
6735
6736 // Miscellaneous ops
6737 Op::Bar(_)
6738 | Op::CS2R(_)
6739 | Op::Isberd(_)
6740 | Op::Kill(_)
6741 | Op::PixLd(_)
6742 | Op::S2R(_) => false,
6743 Op::Nop(_) | Op::Vote(_) => true,
6744
6745 // Virtual ops
6746 Op::Undef(_)
6747 | Op::SrcBar(_)
6748 | Op::PhiSrcs(_)
6749 | Op::PhiDsts(_)
6750 | Op::Copy(_)
6751 | Op::Pin(_)
6752 | Op::Unpin(_)
6753 | Op::Swap(_)
6754 | Op::ParCopy(_)
6755 | Op::RegOut(_)
6756 | Op::Annotate(_) => {
6757 panic!("Not a hardware opcode")
6758 }
6759 }
6760 }
6761
needs_yield(&self) -> bool6762 pub fn needs_yield(&self) -> bool {
6763 matches!(&self.op, Op::Bar(_) | Op::BSync(_))
6764 }
6765
fmt_pred(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6766 fn fmt_pred(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6767 if !self.pred.is_true() {
6768 write!(f, "@{} ", self.pred)?;
6769 }
6770 Ok(())
6771 }
6772 }
6773
6774 impl fmt::Display for Instr {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6775 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6776 write!(f, "{} {}{}", Fmt(|f| self.fmt_pred(f)), self.op, self.deps)
6777 }
6778 }
6779
6780 impl<T: Into<Op>> From<T> for Instr {
from(value: T) -> Self6781 fn from(value: T) -> Self {
6782 Self::new(value)
6783 }
6784 }
6785
6786 /// The result of map() done on a Box<Instr>. A Vec is only allocated if the
6787 /// mapping results in multiple instructions. This helps to reduce the amount of
6788 /// Vec's allocated in the optimization passes.
6789 pub enum MappedInstrs {
6790 None,
6791 One(Box<Instr>),
6792 Many(Vec<Box<Instr>>),
6793 }
6794
6795 impl MappedInstrs {
push(&mut self, i: Box<Instr>)6796 pub fn push(&mut self, i: Box<Instr>) {
6797 match self {
6798 MappedInstrs::None => {
6799 *self = MappedInstrs::One(i);
6800 }
6801 MappedInstrs::One(_) => {
6802 *self = match std::mem::replace(self, MappedInstrs::None) {
6803 MappedInstrs::One(o) => MappedInstrs::Many(vec![o, i]),
6804 _ => panic!("Not a One"),
6805 };
6806 }
6807 MappedInstrs::Many(v) => {
6808 v.push(i);
6809 }
6810 }
6811 }
6812
last_mut(&mut self) -> Option<&mut Box<Instr>>6813 pub fn last_mut(&mut self) -> Option<&mut Box<Instr>> {
6814 match self {
6815 MappedInstrs::None => None,
6816 MappedInstrs::One(instr) => Some(instr),
6817 MappedInstrs::Many(v) => v.last_mut(),
6818 }
6819 }
6820 }
6821
6822 pub struct BasicBlock {
6823 pub label: Label,
6824
6825 /// Whether or not this block is uniform
6826 ///
6827 /// If true, then all non-exited lanes in a warp which execute this block
6828 /// are guaranteed to execute it together
6829 pub uniform: bool,
6830
6831 pub instrs: Vec<Box<Instr>>,
6832 }
6833
6834 impl BasicBlock {
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>) -> MappedInstrs, )6835 pub fn map_instrs(
6836 &mut self,
6837 mut map: impl FnMut(Box<Instr>) -> MappedInstrs,
6838 ) {
6839 let mut instrs = Vec::new();
6840 for i in self.instrs.drain(..) {
6841 match map(i) {
6842 MappedInstrs::None => (),
6843 MappedInstrs::One(i) => {
6844 instrs.push(i);
6845 }
6846 MappedInstrs::Many(mut v) => {
6847 instrs.append(&mut v);
6848 }
6849 }
6850 }
6851 self.instrs = instrs;
6852 }
6853
phi_dsts_ip(&self) -> Option<usize>6854 pub fn phi_dsts_ip(&self) -> Option<usize> {
6855 for (ip, instr) in self.instrs.iter().enumerate() {
6856 match &instr.op {
6857 Op::Annotate(_) => (),
6858 Op::PhiDsts(_) => return Some(ip),
6859 _ => break,
6860 }
6861 }
6862 None
6863 }
6864
phi_dsts(&self) -> Option<&OpPhiDsts>6865 pub fn phi_dsts(&self) -> Option<&OpPhiDsts> {
6866 self.phi_dsts_ip().map(|ip| match &self.instrs[ip].op {
6867 Op::PhiDsts(phi) => phi,
6868 _ => panic!("Expected to find the phi"),
6869 })
6870 }
6871
6872 #[allow(dead_code)]
phi_dsts_mut(&mut self) -> Option<&mut OpPhiDsts>6873 pub fn phi_dsts_mut(&mut self) -> Option<&mut OpPhiDsts> {
6874 self.phi_dsts_ip().map(|ip| match &mut self.instrs[ip].op {
6875 Op::PhiDsts(phi) => phi,
6876 _ => panic!("Expected to find the phi"),
6877 })
6878 }
6879
phi_srcs_ip(&self) -> Option<usize>6880 pub fn phi_srcs_ip(&self) -> Option<usize> {
6881 for (ip, instr) in self.instrs.iter().enumerate().rev() {
6882 match &instr.op {
6883 Op::Annotate(_) => (),
6884 Op::PhiSrcs(_) => return Some(ip),
6885 _ if instr.is_branch() => (),
6886 _ => break,
6887 }
6888 }
6889 None
6890 }
phi_srcs(&self) -> Option<&OpPhiSrcs>6891 pub fn phi_srcs(&self) -> Option<&OpPhiSrcs> {
6892 self.phi_srcs_ip().map(|ip| match &self.instrs[ip].op {
6893 Op::PhiSrcs(phi) => phi,
6894 _ => panic!("Expected to find the phi"),
6895 })
6896 }
6897
phi_srcs_mut(&mut self) -> Option<&mut OpPhiSrcs>6898 pub fn phi_srcs_mut(&mut self) -> Option<&mut OpPhiSrcs> {
6899 self.phi_srcs_ip().map(|ip| match &mut self.instrs[ip].op {
6900 Op::PhiSrcs(phi) => phi,
6901 _ => panic!("Expected to find the phi"),
6902 })
6903 }
6904
branch(&self) -> Option<&Instr>6905 pub fn branch(&self) -> Option<&Instr> {
6906 if let Some(i) = self.instrs.last() {
6907 if i.is_branch() {
6908 Some(i)
6909 } else {
6910 None
6911 }
6912 } else {
6913 None
6914 }
6915 }
6916
branch_ip(&self) -> Option<usize>6917 pub fn branch_ip(&self) -> Option<usize> {
6918 if let Some(i) = self.instrs.last() {
6919 if i.is_branch() {
6920 Some(self.instrs.len() - 1)
6921 } else {
6922 None
6923 }
6924 } else {
6925 None
6926 }
6927 }
6928
6929 #[allow(dead_code)]
branch_mut(&mut self) -> Option<&mut Instr>6930 pub fn branch_mut(&mut self) -> Option<&mut Instr> {
6931 if let Some(i) = self.instrs.last_mut() {
6932 if i.is_branch() {
6933 Some(i)
6934 } else {
6935 None
6936 }
6937 } else {
6938 None
6939 }
6940 }
6941
falls_through(&self) -> bool6942 pub fn falls_through(&self) -> bool {
6943 if let Some(i) = self.branch() {
6944 !i.pred.is_true()
6945 } else {
6946 true
6947 }
6948 }
6949 }
6950
6951 pub struct Function {
6952 pub ssa_alloc: SSAValueAllocator,
6953 pub phi_alloc: PhiAllocator,
6954 pub blocks: CFG<BasicBlock>,
6955 }
6956
6957 impl Function {
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, )6958 pub fn map_instrs(
6959 &mut self,
6960 mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
6961 ) {
6962 let alloc = &mut self.ssa_alloc;
6963 for b in &mut self.blocks {
6964 b.map_instrs(|i| map(i, alloc));
6965 }
6966 }
6967 }
6968
6969 impl fmt::Display for Function {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6970 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6971 let mut pred_width = 0;
6972 let mut dsts_width = 0;
6973 let mut op_width = 0;
6974
6975 let mut blocks = Vec::new();
6976 for b in &self.blocks {
6977 let mut instrs = Vec::new();
6978 for i in &b.instrs {
6979 let mut pred = String::new();
6980 write!(pred, "{}", Fmt(|f| i.fmt_pred(f)))?;
6981 let mut dsts = String::new();
6982 write!(dsts, "{}", Fmt(|f| i.op.fmt_dsts(f)))?;
6983 let mut op = String::new();
6984 write!(op, "{}", Fmt(|f| i.op.fmt_op(f)))?;
6985 let mut deps = String::new();
6986 write!(deps, "{}", i.deps)?;
6987
6988 pred_width = max(pred_width, pred.len());
6989 dsts_width = max(dsts_width, dsts.len());
6990 op_width = max(op_width, op.len());
6991 let is_annotation = matches!(i.op, Op::Annotate(_));
6992
6993 instrs.push((pred, dsts, op, deps, is_annotation));
6994 }
6995 blocks.push(instrs);
6996 }
6997
6998 for (i, mut b) in blocks.drain(..).enumerate() {
6999 let u = if self.blocks[i].uniform { ".u" } else { "" };
7000 write!(f, "block{u} {} {} [", i, self.blocks[i].label)?;
7001 for (pi, p) in self.blocks.pred_indices(i).iter().enumerate() {
7002 if pi > 0 {
7003 write!(f, ", ")?;
7004 }
7005 write!(f, "{}", p)?;
7006 }
7007 write!(f, "] -> {{\n")?;
7008
7009 for (pred, dsts, op, deps, is_annotation) in b.drain(..) {
7010 let eq_sym = if dsts.is_empty() { " " } else { "=" };
7011 if is_annotation {
7012 write!(f, "\n{}\n", op)?;
7013 } else if deps.is_empty() {
7014 write!(
7015 f,
7016 "{:<pred_width$} {:<dsts_width$} {} {}\n",
7017 pred, dsts, eq_sym, op,
7018 )?;
7019 } else {
7020 write!(
7021 f,
7022 "{:<pred_width$} {:<dsts_width$} {} \
7023 {:<op_width$} //{}\n",
7024 pred, dsts, eq_sym, op, deps,
7025 )?;
7026 }
7027 }
7028
7029 write!(f, "}} -> [")?;
7030 for (si, s) in self.blocks.succ_indices(i).iter().enumerate() {
7031 if si > 0 {
7032 write!(f, ", ")?;
7033 }
7034 write!(f, "{}", s)?;
7035 }
7036 write!(f, "]\n")?;
7037 }
7038 Ok(())
7039 }
7040 }
7041
7042 #[derive(Debug)]
7043 pub struct ComputeShaderInfo {
7044 pub local_size: [u16; 3],
7045 pub smem_size: u16,
7046 }
7047
7048 #[derive(Debug)]
7049 pub struct FragmentShaderInfo {
7050 pub uses_kill: bool,
7051 pub does_interlock: bool,
7052 pub post_depth_coverage: bool,
7053 pub early_fragment_tests: bool,
7054 pub uses_sample_shading: bool,
7055 }
7056
7057 #[derive(Debug)]
7058 pub struct GeometryShaderInfo {
7059 pub passthrough_enable: bool,
7060 pub stream_out_mask: u8,
7061 pub threads_per_input_primitive: u8,
7062 pub output_topology: OutputTopology,
7063 pub max_output_vertex_count: u16,
7064 }
7065
7066 impl Default for GeometryShaderInfo {
default() -> Self7067 fn default() -> Self {
7068 Self {
7069 passthrough_enable: false,
7070 stream_out_mask: 0,
7071 threads_per_input_primitive: 0,
7072 output_topology: OutputTopology::LineStrip,
7073 max_output_vertex_count: 0,
7074 }
7075 }
7076 }
7077
7078 #[derive(Debug)]
7079 pub struct TessellationInitShaderInfo {
7080 pub per_patch_attribute_count: u8,
7081 pub threads_per_patch: u8,
7082 }
7083
7084 #[repr(u8)]
7085 #[derive(Clone, Copy, Debug)]
7086 pub enum TessellationDomain {
7087 Isoline = NAK_TS_DOMAIN_ISOLINE,
7088 Triangle = NAK_TS_DOMAIN_TRIANGLE,
7089 Quad = NAK_TS_DOMAIN_QUAD,
7090 }
7091
7092 #[repr(u8)]
7093 #[derive(Clone, Copy, Debug)]
7094 pub enum TessellationSpacing {
7095 Integer = NAK_TS_SPACING_INTEGER,
7096 FractionalOdd = NAK_TS_SPACING_FRACT_ODD,
7097 FractionalEven = NAK_TS_SPACING_FRACT_EVEN,
7098 }
7099
7100 #[repr(u8)]
7101 #[derive(Clone, Copy, Debug)]
7102 pub enum TessellationPrimitives {
7103 Points = NAK_TS_PRIMS_POINTS,
7104 Lines = NAK_TS_PRIMS_LINES,
7105 TrianglesCW = NAK_TS_PRIMS_TRIANGLES_CW,
7106 TrianglesCCW = NAK_TS_PRIMS_TRIANGLES_CCW,
7107 }
7108
7109 #[derive(Debug)]
7110 pub struct TessellationShaderInfo {
7111 pub domain: TessellationDomain,
7112 pub spacing: TessellationSpacing,
7113 pub primitives: TessellationPrimitives,
7114 }
7115
7116 #[derive(Debug)]
7117 pub enum ShaderStageInfo {
7118 Compute(ComputeShaderInfo),
7119 Vertex,
7120 Fragment(FragmentShaderInfo),
7121 Geometry(GeometryShaderInfo),
7122 TessellationInit(TessellationInitShaderInfo),
7123 Tessellation(TessellationShaderInfo),
7124 }
7125
7126 #[derive(Debug, Default)]
7127 pub struct SysValInfo {
7128 pub ab: u32,
7129 pub c: u16,
7130 }
7131
7132 #[derive(Debug)]
7133 pub struct VtgIoInfo {
7134 pub sysvals_in: SysValInfo,
7135 pub sysvals_in_d: u8,
7136 pub sysvals_out: SysValInfo,
7137 pub sysvals_out_d: u8,
7138 pub attr_in: [u32; 4],
7139 pub attr_out: [u32; 4],
7140 pub store_req_start: u8,
7141 pub store_req_end: u8,
7142 pub clip_enable: u8,
7143 pub cull_enable: u8,
7144 pub xfb: Option<Box<nak_xfb_info>>,
7145 }
7146
7147 impl VtgIoInfo {
mark_attrs(&mut self, addrs: Range<u16>, written: bool)7148 fn mark_attrs(&mut self, addrs: Range<u16>, written: bool) {
7149 let sysvals = if written {
7150 &mut self.sysvals_out
7151 } else {
7152 &mut self.sysvals_in
7153 };
7154
7155 let sysvals_d = if written {
7156 &mut self.sysvals_out_d
7157 } else {
7158 &mut self.sysvals_in_d
7159 };
7160
7161 let mut attr = BitMutView::new(if written {
7162 &mut self.attr_out
7163 } else {
7164 &mut self.attr_in
7165 });
7166
7167 let mut addrs = addrs;
7168 addrs.start &= !3;
7169 for addr in addrs.step_by(4) {
7170 if addr < 0x080 {
7171 sysvals.ab |= 1 << (addr / 4);
7172 } else if addr < 0x280 {
7173 let attr_idx = (addr - 0x080) as usize / 4;
7174 attr.set_bit(attr_idx, true);
7175 } else if addr < 0x2c0 {
7176 panic!("FF color I/O not supported");
7177 } else if addr < 0x300 {
7178 sysvals.c |= 1 << ((addr - 0x2c0) / 4);
7179 } else if addr >= 0x3a0 && addr < 0x3c0 {
7180 *sysvals_d |= 1 << ((addr - 0x3a0) / 4);
7181 }
7182 }
7183 }
7184
mark_attrs_read(&mut self, addrs: Range<u16>)7185 pub fn mark_attrs_read(&mut self, addrs: Range<u16>) {
7186 self.mark_attrs(addrs, false);
7187 }
7188
mark_attrs_written(&mut self, addrs: Range<u16>)7189 pub fn mark_attrs_written(&mut self, addrs: Range<u16>) {
7190 self.mark_attrs(addrs, true);
7191 }
7192
attr_written(&self, addr: u16) -> bool7193 pub fn attr_written(&self, addr: u16) -> bool {
7194 if addr < 0x080 {
7195 self.sysvals_out.ab & (1 << (addr / 4)) != 0
7196 } else if addr < 0x280 {
7197 let attr_idx = (addr - 0x080) as usize / 4;
7198 BitView::new(&self.attr_out).get_bit(attr_idx)
7199 } else if addr < 0x2c0 {
7200 panic!("FF color I/O not supported");
7201 } else if addr < 0x300 {
7202 self.sysvals_out.c & (1 << ((addr - 0x2c0) / 4)) != 0
7203 } else if addr >= 0x3a0 && addr < 0x3c0 {
7204 self.sysvals_out_d & (1 << ((addr - 0x3a0) / 4)) != 0
7205 } else {
7206 panic!("Unknown I/O address");
7207 }
7208 }
7209
mark_store_req(&mut self, addrs: Range<u16>)7210 pub fn mark_store_req(&mut self, addrs: Range<u16>) {
7211 let start = (addrs.start / 4).try_into().unwrap();
7212 let end = ((addrs.end - 1) / 4).try_into().unwrap();
7213 self.store_req_start = min(self.store_req_start, start);
7214 self.store_req_end = max(self.store_req_end, end);
7215 }
7216 }
7217
7218 #[derive(Debug)]
7219 pub struct FragmentIoInfo {
7220 pub sysvals_in: SysValInfo,
7221 pub sysvals_in_d: [PixelImap; 8],
7222 pub attr_in: [PixelImap; 128],
7223 pub barycentric_attr_in: [u32; 4],
7224
7225 pub reads_sample_mask: bool,
7226 pub writes_color: u32,
7227 pub writes_sample_mask: bool,
7228 pub writes_depth: bool,
7229 }
7230
7231 impl FragmentIoInfo {
mark_attr_read(&mut self, addr: u16, interp: PixelImap)7232 pub fn mark_attr_read(&mut self, addr: u16, interp: PixelImap) {
7233 if addr < 0x080 {
7234 self.sysvals_in.ab |= 1 << (addr / 4);
7235 } else if addr < 0x280 {
7236 let attr_idx = (addr - 0x080) as usize / 4;
7237 self.attr_in[attr_idx] = interp;
7238 } else if addr < 0x2c0 {
7239 panic!("FF color I/O not supported");
7240 } else if addr < 0x300 {
7241 self.sysvals_in.c |= 1 << ((addr - 0x2c0) / 4);
7242 } else if addr >= 0x3a0 && addr < 0x3c0 {
7243 let attr_idx = (addr - 0x3a0) as usize / 4;
7244 self.sysvals_in_d[attr_idx] = interp;
7245 }
7246 }
7247
mark_barycentric_attr_in(&mut self, addr: u16)7248 pub fn mark_barycentric_attr_in(&mut self, addr: u16) {
7249 assert!(addr >= 0x80 && addr < 0x280);
7250
7251 let mut attr = BitMutView::new(&mut self.barycentric_attr_in);
7252
7253 let attr_idx = (addr - 0x080) as usize / 4;
7254 attr.set_bit(attr_idx, true);
7255 }
7256 }
7257
7258 #[derive(Debug)]
7259 pub enum ShaderIoInfo {
7260 None,
7261 Vtg(VtgIoInfo),
7262 Fragment(FragmentIoInfo),
7263 }
7264
7265 #[derive(Debug)]
7266 pub struct ShaderInfo {
7267 pub num_gprs: u8,
7268 pub num_control_barriers: u8,
7269 pub num_instrs: u32,
7270 pub slm_size: u32,
7271 pub max_crs_depth: u32,
7272 pub uses_global_mem: bool,
7273 pub writes_global_mem: bool,
7274 pub uses_fp64: bool,
7275 pub stage: ShaderStageInfo,
7276 pub io: ShaderIoInfo,
7277 }
7278
7279 pub trait ShaderModel {
sm(&self) -> u87280 fn sm(&self) -> u8;
num_regs(&self, file: RegFile) -> u327281 fn num_regs(&self, file: RegFile) -> u32;
crs_size(&self, max_crs_depth: u32) -> u327282 fn crs_size(&self, max_crs_depth: u32) -> u32;
7283
op_can_be_uniform(&self, op: &Op) -> bool7284 fn op_can_be_uniform(&self, op: &Op) -> bool;
7285
legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op)7286 fn legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op);
encode_shader(&self, s: &Shader<'_>) -> Vec<u32>7287 fn encode_shader(&self, s: &Shader<'_>) -> Vec<u32>;
7288 }
7289
7290 pub struct Shader<'a> {
7291 pub sm: &'a dyn ShaderModel,
7292 pub info: ShaderInfo,
7293 pub functions: Vec<Function>,
7294 }
7295
7296 impl Shader<'_> {
for_each_instr(&self, f: &mut impl FnMut(&Instr))7297 pub fn for_each_instr(&self, f: &mut impl FnMut(&Instr)) {
7298 for func in &self.functions {
7299 for b in &func.blocks {
7300 for i in &b.instrs {
7301 f(i);
7302 }
7303 }
7304 }
7305 }
7306
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, )7307 pub fn map_instrs(
7308 &mut self,
7309 mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
7310 ) {
7311 for f in &mut self.functions {
7312 f.map_instrs(&mut map);
7313 }
7314 }
7315
7316 /// Remove all annotations, presumably before encoding the shader.
remove_annotations(&mut self)7317 pub fn remove_annotations(&mut self) {
7318 self.map_instrs(|instr: Box<Instr>, _| -> MappedInstrs {
7319 if matches!(instr.op, Op::Annotate(_)) {
7320 MappedInstrs::None
7321 } else {
7322 MappedInstrs::One(instr)
7323 }
7324 })
7325 }
7326
gather_info(&mut self)7327 pub fn gather_info(&mut self) {
7328 let mut num_instrs = 0;
7329 let mut uses_global_mem = false;
7330 let mut writes_global_mem = false;
7331
7332 self.for_each_instr(&mut |instr| {
7333 num_instrs += 1;
7334
7335 if !uses_global_mem {
7336 uses_global_mem = instr.uses_global_mem();
7337 }
7338
7339 if !writes_global_mem {
7340 writes_global_mem = instr.writes_global_mem();
7341 }
7342 });
7343
7344 self.info.num_instrs = num_instrs;
7345 self.info.uses_global_mem = uses_global_mem;
7346 self.info.writes_global_mem = writes_global_mem;
7347 }
7348 }
7349
7350 impl fmt::Display for Shader<'_> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result7351 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
7352 for func in &self.functions {
7353 write!(f, "{}", func)?;
7354 }
7355 Ok(())
7356 }
7357 }
7358