xref: /aosp_15_r20/external/mesa3d/src/nouveau/compiler/nak/spill_values.rs (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 // Copyright © 2023 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 #![allow(unstable_name_collisions)]
5 
6 use crate::api::{GetDebugFlags, DEBUG};
7 use crate::ir::*;
8 use crate::liveness::{
9     BlockLiveness, LiveSet, Liveness, NextUseBlockLiveness, NextUseLiveness,
10 };
11 
12 use compiler::bitset::BitSet;
13 use std::cell::RefCell;
14 use std::cmp::{max, Ordering, Reverse};
15 use std::collections::{BinaryHeap, HashMap, HashSet};
16 
17 struct PhiDstMap {
18     ssa_phi: HashMap<SSAValue, u32>,
19 }
20 
21 impl PhiDstMap {
new() -> PhiDstMap22     fn new() -> PhiDstMap {
23         PhiDstMap {
24             ssa_phi: HashMap::new(),
25         }
26     }
27 
add_phi_dst(&mut self, phi_idx: u32, dst: Dst)28     fn add_phi_dst(&mut self, phi_idx: u32, dst: Dst) {
29         let vec = dst.as_ssa().expect("Not an SSA destination");
30         debug_assert!(vec.comps() == 1);
31         self.ssa_phi.insert(vec[0], phi_idx);
32     }
33 
from_block(block: &BasicBlock) -> PhiDstMap34     pub fn from_block(block: &BasicBlock) -> PhiDstMap {
35         let mut map = PhiDstMap::new();
36         if let Some(phi) = block.phi_dsts() {
37             for (idx, dst) in phi.dsts.iter() {
38                 map.add_phi_dst(*idx, *dst);
39             }
40         }
41         map
42     }
43 
get_phi_idx(&self, ssa: &SSAValue) -> Option<&u32>44     fn get_phi_idx(&self, ssa: &SSAValue) -> Option<&u32> {
45         self.ssa_phi.get(ssa)
46     }
47 }
48 
49 struct PhiSrcMap {
50     phi_src: HashMap<u32, SSAValue>,
51 }
52 
53 impl PhiSrcMap {
new() -> PhiSrcMap54     fn new() -> PhiSrcMap {
55         PhiSrcMap {
56             phi_src: HashMap::new(),
57         }
58     }
59 
add_phi_src(&mut self, phi_idx: u32, src: Src)60     fn add_phi_src(&mut self, phi_idx: u32, src: Src) {
61         debug_assert!(src.src_mod.is_none());
62         let vec = src.src_ref.as_ssa().expect("Not an SSA source");
63         debug_assert!(vec.comps() == 1);
64         self.phi_src.insert(phi_idx, vec[0]);
65     }
66 
from_block(block: &BasicBlock) -> PhiSrcMap67     pub fn from_block(block: &BasicBlock) -> PhiSrcMap {
68         let mut map = PhiSrcMap::new();
69         if let Some(phi) = block.phi_srcs() {
70             for (idx, src) in phi.srcs.iter() {
71                 map.add_phi_src(*idx, *src);
72             }
73         }
74         map
75     }
76 
get_src_ssa(&self, phi_idx: &u32) -> &SSAValue77     pub fn get_src_ssa(&self, phi_idx: &u32) -> &SSAValue {
78         self.phi_src.get(phi_idx).expect("Phi source missing")
79     }
80 }
81 
82 trait Spill {
spill_file(&self, file: RegFile) -> RegFile83     fn spill_file(&self, file: RegFile) -> RegFile;
spill(&self, dst: SSAValue, src: Src) -> Box<Instr>84     fn spill(&self, dst: SSAValue, src: Src) -> Box<Instr>;
fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>85     fn fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>;
86 }
87 
88 struct SpillUniform {}
89 
90 impl SpillUniform {
new() -> Self91     fn new() -> Self {
92         Self {}
93     }
94 }
95 
96 impl Spill for SpillUniform {
spill_file(&self, file: RegFile) -> RegFile97     fn spill_file(&self, file: RegFile) -> RegFile {
98         debug_assert!(file.is_uniform());
99         file.to_warp()
100     }
101 
spill(&self, dst: SSAValue, src: Src) -> Box<Instr>102     fn spill(&self, dst: SSAValue, src: Src) -> Box<Instr> {
103         Instr::new_boxed(OpCopy {
104             dst: dst.into(),
105             src: src,
106         })
107     }
108 
fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>109     fn fill(&self, dst: Dst, src: SSAValue) -> Box<Instr> {
110         Instr::new_boxed(OpR2UR {
111             dst: dst,
112             src: src.into(),
113         })
114     }
115 }
116 
117 struct SpillPred {}
118 
119 impl SpillPred {
new() -> Self120     fn new() -> Self {
121         Self {}
122     }
123 }
124 
125 impl Spill for SpillPred {
spill_file(&self, file: RegFile) -> RegFile126     fn spill_file(&self, file: RegFile) -> RegFile {
127         match file {
128             RegFile::Pred => RegFile::GPR,
129             RegFile::UPred => RegFile::UGPR,
130             _ => panic!("Unsupported register file"),
131         }
132     }
133 
spill(&self, dst: SSAValue, src: Src) -> Box<Instr>134     fn spill(&self, dst: SSAValue, src: Src) -> Box<Instr> {
135         assert!(matches!(dst.file(), RegFile::GPR | RegFile::UGPR));
136         if let Some(b) = src.as_bool() {
137             let u32_src = if b {
138                 Src::new_imm_u32(!0)
139             } else {
140                 Src::new_zero()
141             };
142             Instr::new_boxed(OpCopy {
143                 dst: dst.into(),
144                 src: u32_src,
145             })
146         } else {
147             Instr::new_boxed(OpSel {
148                 dst: dst.into(),
149                 cond: src.bnot(),
150                 srcs: [Src::new_zero(), Src::new_imm_u32(!0)],
151             })
152         }
153     }
154 
fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>155     fn fill(&self, dst: Dst, src: SSAValue) -> Box<Instr> {
156         assert!(matches!(src.file(), RegFile::GPR | RegFile::UGPR));
157         Instr::new_boxed(OpISetP {
158             dst: dst,
159             set_op: PredSetOp::And,
160             cmp_op: IntCmpOp::Ne,
161             cmp_type: IntCmpType::U32,
162             ex: false,
163             srcs: [Src::new_zero(), src.into()],
164             accum: true.into(),
165             low_cmp: true.into(),
166         })
167     }
168 }
169 
170 struct SpillBar {}
171 
172 impl SpillBar {
new() -> Self173     fn new() -> Self {
174         Self {}
175     }
176 }
177 
178 impl Spill for SpillBar {
spill_file(&self, file: RegFile) -> RegFile179     fn spill_file(&self, file: RegFile) -> RegFile {
180         assert!(file == RegFile::Bar);
181         RegFile::GPR
182     }
183 
spill(&self, dst: SSAValue, src: Src) -> Box<Instr>184     fn spill(&self, dst: SSAValue, src: Src) -> Box<Instr> {
185         assert!(dst.file() == RegFile::GPR);
186         Instr::new_boxed(OpBMov {
187             dst: dst.into(),
188             src: src,
189             clear: false,
190         })
191     }
192 
fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>193     fn fill(&self, dst: Dst, src: SSAValue) -> Box<Instr> {
194         assert!(src.file() == RegFile::GPR);
195         Instr::new_boxed(OpBMov {
196             dst: dst,
197             src: src.into(),
198             clear: false,
199         })
200     }
201 }
202 
203 struct SpillGPR {}
204 
205 impl SpillGPR {
new() -> Self206     fn new() -> Self {
207         Self {}
208     }
209 }
210 
211 impl Spill for SpillGPR {
spill_file(&self, file: RegFile) -> RegFile212     fn spill_file(&self, file: RegFile) -> RegFile {
213         assert!(file == RegFile::GPR);
214         RegFile::Mem
215     }
216 
spill(&self, dst: SSAValue, src: Src) -> Box<Instr>217     fn spill(&self, dst: SSAValue, src: Src) -> Box<Instr> {
218         assert!(dst.file() == RegFile::Mem);
219         Instr::new_boxed(OpCopy {
220             dst: dst.into(),
221             src: src,
222         })
223     }
224 
fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>225     fn fill(&self, dst: Dst, src: SSAValue) -> Box<Instr> {
226         assert!(src.file() == RegFile::Mem);
227         Instr::new_boxed(OpCopy {
228             dst: dst,
229             src: src.into(),
230         })
231     }
232 }
233 
234 #[derive(Eq, PartialEq)]
235 struct SSANextUse {
236     ssa: SSAValue,
237     next_use: usize,
238 }
239 
240 impl SSANextUse {
new(ssa: SSAValue, next_use: usize) -> SSANextUse241     fn new(ssa: SSAValue, next_use: usize) -> SSANextUse {
242         SSANextUse {
243             ssa: ssa,
244             next_use: next_use,
245         }
246     }
247 }
248 
249 impl Ord for SSANextUse {
cmp(&self, other: &Self) -> Ordering250     fn cmp(&self, other: &Self) -> Ordering {
251         self.next_use
252             .cmp(&other.next_use)
253             .then_with(|| self.ssa.idx().cmp(&other.ssa.idx()))
254     }
255 }
256 
257 impl PartialOrd for SSANextUse {
partial_cmp(&self, other: &Self) -> Option<Ordering>258     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
259         Some(self.cmp(other))
260     }
261 }
262 
263 struct SpillCache<'a, S: Spill> {
264     alloc: &'a mut SSAValueAllocator,
265     spill: S,
266     val_spill: HashMap<SSAValue, SSAValue>,
267 }
268 
269 impl<'a, S: Spill> SpillCache<'a, S> {
new(alloc: &'a mut SSAValueAllocator, spill: S) -> SpillCache<'a, S>270     fn new(alloc: &'a mut SSAValueAllocator, spill: S) -> SpillCache<'a, S> {
271         SpillCache {
272             alloc: alloc,
273             spill: spill,
274             val_spill: HashMap::new(),
275         }
276     }
277 
spill_file(&self, file: RegFile) -> RegFile278     fn spill_file(&self, file: RegFile) -> RegFile {
279         self.spill.spill_file(file)
280     }
281 
get_spill(&mut self, ssa: SSAValue) -> SSAValue282     fn get_spill(&mut self, ssa: SSAValue) -> SSAValue {
283         *self.val_spill.entry(ssa).or_insert_with(|| {
284             self.alloc.alloc(self.spill.spill_file(ssa.file()))
285         })
286     }
287 
spill_src(&mut self, ssa: SSAValue, src: Src) -> Box<Instr>288     fn spill_src(&mut self, ssa: SSAValue, src: Src) -> Box<Instr> {
289         let dst = self.get_spill(ssa);
290         self.spill.spill(dst, src)
291     }
292 
spill(&mut self, ssa: SSAValue) -> Box<Instr>293     fn spill(&mut self, ssa: SSAValue) -> Box<Instr> {
294         self.spill_src(ssa, ssa.into())
295     }
296 
fill_dst(&mut self, dst: Dst, ssa: SSAValue) -> Box<Instr>297     fn fill_dst(&mut self, dst: Dst, ssa: SSAValue) -> Box<Instr> {
298         let src = self.get_spill(ssa);
299         self.spill.fill(dst, src)
300     }
301 
fill(&mut self, ssa: SSAValue) -> Box<Instr>302     fn fill(&mut self, ssa: SSAValue) -> Box<Instr> {
303         self.fill_dst(ssa.into(), ssa)
304     }
305 }
306 
307 struct SpillChooser<'a> {
308     bl: &'a NextUseBlockLiveness,
309     pinned: &'a HashSet<SSAValue>,
310     ip: usize,
311     count: usize,
312     spills: BinaryHeap<Reverse<SSANextUse>>,
313     min_next_use: usize,
314 }
315 
316 struct SpillChoiceIter {
317     spills: BinaryHeap<Reverse<SSANextUse>>,
318 }
319 
320 impl<'a> SpillChooser<'a> {
new( bl: &'a NextUseBlockLiveness, pinned: &'a HashSet<SSAValue>, ip: usize, count: usize, ) -> Self321     pub fn new(
322         bl: &'a NextUseBlockLiveness,
323         pinned: &'a HashSet<SSAValue>,
324         ip: usize,
325         count: usize,
326     ) -> Self {
327         Self {
328             bl: bl,
329             pinned: pinned,
330             ip: ip,
331             count: count,
332             spills: BinaryHeap::new(),
333             min_next_use: ip + 1,
334         }
335     }
336 
add_candidate(&mut self, ssa: SSAValue)337     pub fn add_candidate(&mut self, ssa: SSAValue) {
338         // Don't spill anything that's pinned
339         if self.pinned.contains(&ssa) {
340             return;
341         }
342 
343         // Ignore anything used sonner than spill options we've already
344         // rejected.
345         let next_use = self.bl.next_use_after_or_at_ip(&ssa, self.ip).unwrap();
346         if next_use < self.min_next_use {
347             return;
348         }
349 
350         self.spills.push(Reverse(SSANextUse::new(ssa, next_use)));
351 
352         if self.spills.len() > self.count {
353             // Because we reversed the heap, pop actually removes the
354             // one with the lowest next_use which is what we want here.
355             let old = self.spills.pop().unwrap();
356             debug_assert!(self.spills.len() == self.count);
357             self.min_next_use = max(self.min_next_use, old.0.next_use);
358         }
359     }
360 }
361 
362 impl<'a> IntoIterator for SpillChooser<'a> {
363     type Item = SSAValue;
364     type IntoIter = SpillChoiceIter;
365 
into_iter(self) -> SpillChoiceIter366     fn into_iter(self) -> SpillChoiceIter {
367         SpillChoiceIter {
368             spills: self.spills,
369         }
370     }
371 }
372 
373 impl Iterator for SpillChoiceIter {
374     type Item = SSAValue;
375 
size_hint(&self) -> (usize, Option<usize>)376     fn size_hint(&self) -> (usize, Option<usize>) {
377         let len = self.spills.len();
378         (len, Some(len))
379     }
380 
next(&mut self) -> Option<SSAValue>381     fn next(&mut self) -> Option<SSAValue> {
382         self.spills.pop().map(|x| x.0.ssa)
383     }
384 }
385 
386 #[derive(Clone)]
387 struct SSAState {
388     // The set of variables which currently exist in registers
389     w: LiveSet,
390     // The set of variables which have already been spilled.  These don't need
391     // to be spilled again.
392     s: HashSet<SSAValue>,
393     // The set of pinned variables
394     p: HashSet<SSAValue>,
395 }
396 
spill_values<S: Spill>( func: &mut Function, file: RegFile, limit: u32, spill: S, )397 fn spill_values<S: Spill>(
398     func: &mut Function,
399     file: RegFile,
400     limit: u32,
401     spill: S,
402 ) {
403     let files = RegFileSet::from_iter([file]);
404     let live = NextUseLiveness::for_function(func, &files);
405     let blocks = &mut func.blocks;
406 
407     // Record the set of SSA values used within each loop
408     let mut phi_dst_maps = Vec::new();
409     let mut phi_src_maps = Vec::new();
410     let mut loop_uses = HashMap::new();
411     for b_idx in 0..blocks.len() {
412         phi_dst_maps.push(PhiDstMap::from_block(&blocks[b_idx]));
413         phi_src_maps.push(PhiSrcMap::from_block(&blocks[b_idx]));
414 
415         if let Some(lh_idx) = blocks.loop_header_index(b_idx) {
416             let uses = loop_uses
417                 .entry(lh_idx)
418                 .or_insert_with(|| RefCell::new(HashSet::new()));
419             let uses = uses.get_mut();
420 
421             for instr in &blocks[b_idx].instrs {
422                 instr.for_each_ssa_use(|ssa| {
423                     if ssa.file() == file {
424                         uses.insert(*ssa);
425                     }
426                 });
427             }
428         }
429     }
430 
431     if !loop_uses.is_empty() {
432         // The previous loop only added values to the uses set for the
433         // inner-most loop.  Propagate from inner loops to outer loops.
434         for b_idx in (0..blocks.len()).rev() {
435             let Some(uses) = loop_uses.get(&b_idx) else {
436                 continue;
437             };
438             let uses = uses.borrow();
439 
440             let Some(dom) = blocks.dom_parent_index(b_idx) else {
441                 continue;
442             };
443 
444             let Some(dom_lh_idx) = blocks.loop_header_index(dom) else {
445                 continue;
446             };
447 
448             let mut parent_uses =
449                 loop_uses.get(&dom_lh_idx).unwrap().borrow_mut();
450             for ssa in uses.iter() {
451                 parent_uses.insert(*ssa);
452             }
453         }
454     }
455 
456     let mut spill = SpillCache::new(&mut func.ssa_alloc, spill);
457     let mut spilled_phis = BitSet::new();
458 
459     let mut ssa_state_in: Vec<SSAState> = Vec::new();
460     let mut ssa_state_out: Vec<SSAState> = Vec::new();
461 
462     for b_idx in 0..blocks.len() {
463         let bl = live.block_live(b_idx);
464 
465         let preds = blocks.pred_indices(b_idx).to_vec();
466         let w = if preds.is_empty() {
467             // This is the start block so we start with nothing in
468             // registers.
469             LiveSet::new()
470         } else if preds.len() == 1 {
471             // If we only have one predecessor then it can't possibly be a
472             // loop header and we can just copy the predecessor's w.
473             assert!(!blocks.is_loop_header(b_idx));
474             assert!(preds[0] < b_idx);
475             let p_w = &ssa_state_out[preds[0]].w;
476             LiveSet::from_iter(
477                 p_w.iter().filter(|ssa| bl.is_live_in(ssa)).cloned(),
478             )
479         } else if !blocks[b_idx].uniform && file.is_uniform() {
480             // If this is a non-uniform block, then we can't spill or fill any
481             // uniform registers.  The good news is that none of our non-uniform
482             // predecessors could spill, either, so we know that everything that
483             // was resident coming in will fit in the register file.
484             let mut w = LiveSet::new();
485             for p_idx in &preds {
486                 if *p_idx < b_idx {
487                     let p_w = &ssa_state_out[*p_idx].w;
488                     w.extend(
489                         p_w.iter().filter(|ssa| bl.is_live_in(ssa)).cloned(),
490                     );
491                 }
492             }
493             debug_assert!(w.count(file) <= limit);
494             w
495         } else if blocks.is_loop_header(b_idx) {
496             let mut i_b: HashSet<SSAValue> =
497                 HashSet::from_iter(bl.iter_live_in().cloned());
498 
499             if let Some(phi) = blocks[b_idx].phi_dsts() {
500                 for (_, dst) in phi.dsts.iter() {
501                     if let Dst::SSA(vec) = dst {
502                         assert!(vec.comps() == 1);
503                         let ssa = vec[0];
504                         if ssa.file() == file {
505                             i_b.insert(ssa);
506                         }
507                     }
508                 }
509             }
510 
511             let lu = loop_uses.get(&b_idx).unwrap().borrow();
512             let mut w = LiveSet::new();
513 
514             let mut some = BinaryHeap::new();
515             for ssa in i_b.iter() {
516                 if lu.contains(ssa) {
517                     let next_use = bl.first_use(ssa).unwrap();
518                     some.push(Reverse(SSANextUse::new(*ssa, next_use)));
519                 }
520             }
521             while w.count(file) < limit {
522                 let Some(entry) = some.pop() else {
523                     break;
524                 };
525                 w.insert(entry.0.ssa);
526             }
527 
528             // If we still have room, consider values which aren't used
529             // inside the loop.
530             if w.count(file) < limit {
531                 for ssa in i_b.iter() {
532                     debug_assert!(ssa.file() == file);
533                     if !lu.contains(ssa) {
534                         let next_use = bl.first_use(ssa).unwrap();
535                         some.push(Reverse(SSANextUse::new(*ssa, next_use)));
536                     }
537                 }
538 
539                 while w.count(file) < limit {
540                     let Some(entry) = some.pop() else {
541                         break;
542                     };
543                     w.insert(entry.0.ssa);
544                 }
545             }
546 
547             w
548         } else {
549             let phi_dst_map = &phi_dst_maps[b_idx];
550 
551             struct SSAPredInfo {
552                 num_preds: usize,
553                 next_use: usize,
554             }
555             let mut live: HashMap<SSAValue, SSAPredInfo> = HashMap::new();
556 
557             for p_idx in &preds {
558                 let phi_src_map = &phi_src_maps[*p_idx];
559 
560                 for mut ssa in ssa_state_out[*p_idx].w.iter().cloned() {
561                     if let Some(phi) = phi_dst_map.get_phi_idx(&ssa) {
562                         ssa = *phi_src_map.get_src_ssa(phi);
563                     }
564 
565                     if let Some(next_use) = bl.first_use(&ssa) {
566                         live.entry(ssa)
567                             .and_modify(|e| e.num_preds += 1)
568                             .or_insert_with(|| SSAPredInfo {
569                                 num_preds: 1,
570                                 next_use: next_use,
571                             });
572                     }
573                 }
574             }
575 
576             let mut w = LiveSet::new();
577             let mut some = BinaryHeap::new();
578 
579             for (ssa, info) in live.drain() {
580                 if info.num_preds == preds.len() {
581                     // This one is in all the input sets
582                     w.insert(ssa);
583                 } else {
584                     some.push(Reverse(SSANextUse::new(ssa, info.next_use)));
585                 }
586             }
587             while w.count(file) < limit {
588                 let Some(entry) = some.pop() else {
589                     break;
590                 };
591                 let ssa = entry.0.ssa;
592                 assert!(ssa.file() == file);
593                 w.insert(ssa);
594             }
595 
596             w
597         };
598 
599         let s = if preds.is_empty() {
600             HashSet::new()
601         } else if preds.len() == 1 {
602             let p_s = &ssa_state_out[preds[0]].s;
603             HashSet::from_iter(
604                 p_s.iter().filter(|ssa| bl.is_live_in(ssa)).cloned(),
605             )
606         } else {
607             let mut s = HashSet::new();
608             for p_idx in &preds {
609                 if *p_idx >= b_idx {
610                     continue;
611                 }
612 
613                 // We diverge a bit from Braun and Hack here.  They assume that
614                 // S is is a subset of W which is clearly bogus.  Instead, we
615                 // take the union of all forward edge predecessor S_out and
616                 // intersect with live-in for the current block.
617                 for ssa in ssa_state_out[*p_idx].s.iter() {
618                     if bl.is_live_in(ssa) {
619                         s.insert(*ssa);
620                     }
621                 }
622             }
623 
624             // The loop header heuristic sometimes drops stuff from W that has
625             // never been spilled so we need to make sure everything live-in
626             // which isn't in W is included in the spill set so that it gets
627             // properly spilled when we spill across CF edges.
628             if blocks.is_loop_header(b_idx) {
629                 for ssa in bl.iter_live_in() {
630                     if !w.contains(ssa) {
631                         s.insert(*ssa);
632                     }
633                 }
634             }
635 
636             s
637         };
638 
639         let mut p = HashSet::new();
640         for p_idx in &preds {
641             if *p_idx < b_idx {
642                 let p_p = &ssa_state_out[*p_idx].p;
643                 p.extend(p_p.iter().filter(|ssa| bl.is_live_in(ssa)).cloned());
644             }
645         }
646 
647         for ssa in bl.iter_live_in() {
648             debug_assert!(w.contains(ssa) || s.contains(ssa));
649         }
650 
651         let mut b = SSAState { w: w, s: s, p: p };
652 
653         assert!(ssa_state_in.len() == b_idx);
654         ssa_state_in.push(b.clone());
655 
656         let bb = &mut blocks[b_idx];
657 
658         let mut instrs = Vec::new();
659         for (ip, mut instr) in bb.instrs.drain(..).enumerate() {
660             match &mut instr.op {
661                 Op::PhiDsts(phi) => {
662                     // For phis, anything that is not in W needs to be spilled
663                     // by setting the destination to some spill value.
664                     for (idx, dst) in phi.dsts.iter_mut() {
665                         let vec = dst.as_ssa().unwrap();
666                         debug_assert!(vec.comps() == 1);
667                         let ssa = &vec[0];
668 
669                         if ssa.file() == file && !b.w.contains(ssa) {
670                             spilled_phis.insert((*idx).try_into().unwrap());
671                             b.s.insert(*ssa);
672                             *dst = spill.get_spill(*ssa).into();
673                         }
674                     }
675                 }
676                 Op::PhiSrcs(_) => {
677                     // We handle phi sources later.  For now, leave them be.
678                 }
679                 Op::ParCopy(pcopy) => {
680                     let mut num_w_dsts = 0_u32;
681                     for (dst, src) in pcopy.dsts_srcs.iter_mut() {
682                         let dst_vec = dst.as_ssa().unwrap();
683                         debug_assert!(dst_vec.comps() == 1);
684                         let dst_ssa = &dst_vec[0];
685 
686                         debug_assert!(src.src_mod.is_none());
687                         let src_vec = src.src_ref.as_ssa().unwrap();
688                         debug_assert!(src_vec.comps() == 1);
689                         let src_ssa = &src_vec[0];
690 
691                         debug_assert!(dst_ssa.file() == src_ssa.file());
692                         if src_ssa.file() != file {
693                             continue;
694                         }
695 
696                         // If it's not resident, rewrite to just move from one
697                         // spill to another, assuming that copying in spill
698                         // space is efficient
699                         if b.w.contains(src_ssa) {
700                             num_w_dsts += 1;
701                         } else {
702                             debug_assert!(b.s.contains(src_ssa));
703                             b.s.insert(*dst_ssa);
704                             *src = spill.get_spill(*src_ssa).into();
705                             *dst = spill.get_spill(*dst_ssa).into();
706                         }
707                     }
708 
709                     // We can now assume that a source is in W if and only if
710                     // the file matches.  Remove all killed sources from W.
711                     for (_, src) in pcopy.dsts_srcs.iter() {
712                         let src_ssa = &src.src_ref.as_ssa().unwrap()[0];
713                         if !bl.is_live_after_ip(src_ssa, ip) {
714                             b.w.remove(src_ssa);
715                         }
716                     }
717 
718                     let rel_limit = limit - b.w.count(file);
719                     if num_w_dsts > rel_limit {
720                         // We can't spill uniform registers in a non-uniform
721                         // block
722                         assert!(bb.uniform || !file.is_uniform());
723 
724                         let count = num_w_dsts - rel_limit;
725                         let count = count.try_into().unwrap();
726 
727                         let mut spills = SpillChooser::new(bl, &b.p, ip, count);
728                         for (dst, _) in pcopy.dsts_srcs.iter() {
729                             let dst_ssa = &dst.as_ssa().unwrap()[0];
730                             if dst_ssa.file() == file {
731                                 spills.add_candidate(*dst_ssa);
732                             }
733                         }
734 
735                         let spills: HashSet<SSAValue> =
736                             HashSet::from_iter(spills);
737 
738                         for (dst, src) in pcopy.dsts_srcs.iter_mut() {
739                             let dst_ssa = &dst.as_ssa().unwrap()[0];
740                             let src_ssa = &src.src_ref.as_ssa().unwrap()[0];
741                             if spills.contains(dst_ssa) {
742                                 if b.s.insert(*src_ssa) {
743                                     if DEBUG.annotate() {
744                                         instrs.push(Instr::new_boxed(
745                                             OpAnnotate {
746                                                 annotation:
747                                                     "generated by spill_values"
748                                                         .into(),
749                                             },
750                                         ));
751                                     }
752                                     instrs.push(spill.spill(*src_ssa));
753                                 }
754                                 b.s.insert(*dst_ssa);
755                                 *src = spill.get_spill(*src_ssa).into();
756                                 *dst = spill.get_spill(*dst_ssa).into();
757                             }
758                         }
759                     }
760 
761                     for (dst, _) in pcopy.dsts_srcs.iter() {
762                         let dst_ssa = &dst.as_ssa().unwrap()[0];
763                         if dst_ssa.file() == file {
764                             b.w.insert(*dst_ssa);
765                         }
766                     }
767                 }
768                 _ => {
769                     if file == RegFile::UGPR && !bb.uniform {
770                         // We can't spill UGPRs in a non-uniform block.
771                         // Instead, we depend on two facts:
772                         //
773                         //  1. Uniform instructions are not allowed in
774                         //     non-uniform blocks
775                         //
776                         //  2. Non-uniform instructions can always accept a wave
777                         //     register in leu of a uniform register
778                         //
779                         debug_assert!(spill.spill_file(file) == RegFile::GPR);
780                         instr.for_each_ssa_use_mut(|ssa| {
781                             if ssa.file() == file && !b.w.contains(ssa) {
782                                 *ssa = spill.get_spill(*ssa).into();
783                             }
784                         });
785                     } else if file == RegFile::UPred && !bb.uniform {
786                         // We can't spill UPreds in a non-uniform block.
787                         // Instead, we depend on two facts:
788                         //
789                         //  1. Uniform instructions are not allowed in
790                         //     non-uniform blocks
791                         //
792                         //  2. Non-uniform instructions can always accept a wave
793                         //     register in leu of a uniform register
794                         //
795                         //  3. We can un-spill from a UGPR directly to a Pred
796                         //
797                         // This also shouldn't come up that often in practice
798                         // so it's okay to un-spill every time on the spot.
799                         //
800                         instr.for_each_ssa_use_mut(|ssa| {
801                             if ssa.file() == file && !b.w.contains(ssa) {
802                                 if DEBUG.annotate() {
803                                     instrs.push(Instr::new_boxed(OpAnnotate {
804                                         annotation: "generated by spill_values"
805                                             .into(),
806                                     }));
807                                 }
808                                 let tmp = spill.alloc.alloc(RegFile::Pred);
809                                 instrs.push(spill.fill_dst(tmp.into(), *ssa));
810                                 *ssa = tmp;
811                             }
812                         });
813                     } else {
814                         // First compute fills even though those have to come
815                         // after spills.
816                         let mut fills = Vec::new();
817                         instr.for_each_ssa_use(|ssa| {
818                             if ssa.file() == file && !b.w.contains(ssa) {
819                                 debug_assert!(b.s.contains(ssa));
820                                 debug_assert!(bb.uniform || !ssa.is_uniform());
821                                 fills.push(spill.fill(*ssa));
822                                 b.w.insert(*ssa);
823                             }
824                         });
825 
826                         let rel_pressure =
827                             bl.get_instr_pressure(ip, &instr)[file];
828                         let abs_pressure =
829                             b.w.count(file) + u32::from(rel_pressure);
830 
831                         if abs_pressure > limit {
832                             let count = abs_pressure - limit;
833                             let count = count.try_into().unwrap();
834 
835                             let mut spills =
836                                 SpillChooser::new(bl, &b.p, ip, count);
837                             for ssa in b.w.iter() {
838                                 spills.add_candidate(*ssa);
839                             }
840 
841                             for ssa in spills {
842                                 debug_assert!(ssa.file() == file);
843                                 b.w.remove(&ssa);
844                                 if DEBUG.annotate() {
845                                     instrs.push(Instr::new_boxed(OpAnnotate {
846                                         annotation: "generated by spill_values"
847                                             .into(),
848                                     }));
849                                 }
850                                 instrs.push(spill.spill(ssa));
851                                 b.s.insert(ssa);
852                             }
853                         }
854 
855                         if DEBUG.annotate() {
856                             instrs.push(Instr::new_boxed(OpAnnotate {
857                                 annotation: "generated by spill_values".into(),
858                             }));
859                         }
860                         instrs.append(&mut fills);
861 
862                         instr.for_each_ssa_use(|ssa| {
863                             if ssa.file() == file {
864                                 debug_assert!(b.w.contains(ssa));
865                             }
866                         });
867 
868                         b.w.insert_instr_top_down(ip, &instr, bl);
869                     }
870                 }
871             }
872 
873             // OpPin takes the normal spilling path but we want to also mark any
874             // of its destination SSA values as pinned.
875             if matches!(&instr.op, Op::Pin(_)) {
876                 instr.for_each_ssa_def(|ssa| {
877                     b.p.insert(*ssa);
878                 });
879             }
880 
881             instrs.push(instr);
882         }
883         bb.instrs = instrs;
884 
885         assert!(ssa_state_out.len() == b_idx);
886         ssa_state_out.push(b);
887     }
888 
889     // Now that everthing is spilled, we handle phi sources and connect the
890     // blocks by adding spills and fills as needed along edges.
891     for p_idx in 0..blocks.len() {
892         let succ = blocks.succ_indices(p_idx);
893         if succ.len() != 1 {
894             // We don't have any critical edges
895             for s_idx in succ {
896                 debug_assert!(blocks.pred_indices(*s_idx).len() == 1);
897             }
898             continue;
899         }
900         let s_idx = succ[0];
901 
902         let pb = &mut blocks[p_idx];
903         let p_out = &ssa_state_out[p_idx];
904         let s_in = &ssa_state_in[s_idx];
905         let phi_dst_map = &phi_dst_maps[s_idx];
906 
907         let mut spills = Vec::new();
908         let mut fills = Vec::new();
909 
910         if let Some(phi) = pb.phi_srcs_mut() {
911             for (idx, src) in phi.srcs.iter_mut() {
912                 debug_assert!(src.src_mod.is_none());
913                 let vec = src.src_ref.as_ssa().unwrap();
914                 debug_assert!(vec.comps() == 1);
915                 let ssa = &vec[0];
916 
917                 if ssa.file() != file {
918                     continue;
919                 }
920 
921                 if spilled_phis.get((*idx).try_into().unwrap()) {
922                     if !p_out.s.contains(ssa) {
923                         spills.push(*ssa);
924                     }
925                     *src = spill.get_spill(*ssa).into();
926                 } else {
927                     if !p_out.w.contains(ssa) {
928                         fills.push(*ssa);
929                     }
930                 }
931             }
932         }
933 
934         for ssa in s_in.s.iter() {
935             if p_out.w.contains(ssa) && !p_out.s.contains(ssa) {
936                 spills.push(*ssa);
937             }
938         }
939 
940         for ssa in s_in.w.iter() {
941             if phi_dst_map.get_phi_idx(ssa).is_some() {
942                 continue;
943             }
944 
945             if !p_out.w.contains(ssa) {
946                 fills.push(*ssa);
947             }
948         }
949 
950         if spills.is_empty() && fills.is_empty() {
951             continue;
952         }
953 
954         // Sort to ensure stability of the algorithm
955         spills.sort_by_key(|ssa| ssa.idx());
956         fills.sort_by_key(|ssa| ssa.idx());
957 
958         let mut instrs = Vec::new();
959         for ssa in spills {
960             instrs.push(spill.spill(ssa));
961         }
962         for ssa in fills {
963             debug_assert!(pb.uniform || !ssa.is_uniform());
964             instrs.push(spill.fill(ssa));
965         }
966 
967         // Insert spills and fills right after the phi (if any)
968         let ip = pb
969             .phi_srcs_ip()
970             .or_else(|| pb.branch_ip())
971             .unwrap_or_else(|| pb.instrs.len());
972         pb.instrs.splice(ip..ip, instrs.into_iter());
973     }
974 }
975 
976 impl Function {
977     /// Spill values from @file to fit within @limit registers
978     ///
979     /// This pass assumes that the function is already in CSSA form.  See
980     /// @to_cssa for more details.
981     ///
982     /// The algorithm implemented here is roughly based on "Register Spilling
983     /// and Live-Range Splitting for SSA-Form Programs" by Braun and Hack.  The
984     /// primary contributions of the Braun and Hack paper are the global
985     /// next-use distances which are implemented by @NextUseLiveness and a
986     /// heuristic for computing spill sets at block boundaries.  The paper
987     /// describes two sets:
988     ///
989     ///  - W, the set of variables currently resident
990     ///
991     ///  - S, the set of variables which have been spilled
992     ///
993     /// These sets are tracked as we walk instructions and [un]spill values to
994     /// satisfy the given limit.  When spills are required we spill the value
995     /// with the nighest next-use IP.  At block boundaries, Braun and Hack
996     /// describe a heuristic for determining the starting W and S sets based on
997     /// the W and S from the end of each of the forward edge predecessor blocks.
998     ///
999     /// What Braun and Hack do not describe is how to handle phis and parallel
1000     /// copies.  Because we assume the function is already in CSSA form, we can
1001     /// use a fairly simple algorithm.  On the first pass, we ignore phi sources
1002     /// and assign phi destinations based on W at the start of the block.  If
1003     /// the phi destination is in W, we leave it alone.  If it is not in W, then
1004     /// we allocate a new spill value and assign it to the phi destination.  In
1005     /// a second pass, we handle phi sources based on the destination.  If the
1006     /// destination is in W, we leave it alone.  If the destination is spilled,
1007     /// we read from the spill value corresponding to the source, spilling first
1008     /// if needed.  In the second pass, we also handle spilling across blocks as
1009     /// needed for values that do not pass through a phi.
1010     ///
1011     /// A special case is also required for parallel copies because they can
1012     /// have an unbounded number of destinations.  For any source values not in
1013     /// W, we allocate a spill value for the destination and copy in the spill
1014     /// register file.  For any sources which are in W, we try to leave as much
1015     /// in W as possible.  However, since source values may not be killed by the
1016     /// copy and because one source value may be copied to arbitrarily many
1017     /// destinations, that is not always possible.  Whenever we need to spill
1018     /// values, we spill according to the highest next-use of the destination
1019     /// and we spill the source first and then parallel copy the source into a
1020     /// spilled destination value.
1021     ///
1022     /// This all assumes that it's better to copy in spill space than to unspill
1023     /// just for the sake of a parallel copy.  While this may not be true in
1024     /// general, especially not when spilling to memory, the register allocator
1025     /// is good at eliding unnecessary copies.
spill_values(&mut self, file: RegFile, limit: u32)1026     pub fn spill_values(&mut self, file: RegFile, limit: u32) {
1027         match file {
1028             RegFile::GPR => {
1029                 let spill = SpillGPR::new();
1030                 spill_values(self, file, limit, spill);
1031             }
1032             RegFile::UGPR => {
1033                 let spill = SpillUniform::new();
1034                 spill_values(self, file, limit, spill);
1035             }
1036             RegFile::Pred => {
1037                 let spill = SpillPred::new();
1038                 spill_values(self, file, limit, spill);
1039             }
1040             RegFile::UPred => {
1041                 let spill = SpillPred::new();
1042                 spill_values(self, file, limit, spill);
1043             }
1044             RegFile::Bar => {
1045                 let spill = SpillBar::new();
1046                 spill_values(self, file, limit, spill);
1047             }
1048             _ => panic!("Don't know how to spill {} registers", file),
1049         }
1050 
1051         self.repair_ssa();
1052         self.opt_dce();
1053 
1054         if DEBUG.print() {
1055             eprintln!("NAK IR after spilling {}:\n{}", file, self);
1056         }
1057     }
1058 }
1059