1 // Copyright © 2023 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 #![allow(unstable_name_collisions)]
5
6 use crate::api::{GetDebugFlags, DEBUG};
7 use crate::ir::*;
8 use crate::liveness::{
9 BlockLiveness, LiveSet, Liveness, NextUseBlockLiveness, NextUseLiveness,
10 };
11
12 use compiler::bitset::BitSet;
13 use std::cell::RefCell;
14 use std::cmp::{max, Ordering, Reverse};
15 use std::collections::{BinaryHeap, HashMap, HashSet};
16
17 struct PhiDstMap {
18 ssa_phi: HashMap<SSAValue, u32>,
19 }
20
21 impl PhiDstMap {
new() -> PhiDstMap22 fn new() -> PhiDstMap {
23 PhiDstMap {
24 ssa_phi: HashMap::new(),
25 }
26 }
27
add_phi_dst(&mut self, phi_idx: u32, dst: Dst)28 fn add_phi_dst(&mut self, phi_idx: u32, dst: Dst) {
29 let vec = dst.as_ssa().expect("Not an SSA destination");
30 debug_assert!(vec.comps() == 1);
31 self.ssa_phi.insert(vec[0], phi_idx);
32 }
33
from_block(block: &BasicBlock) -> PhiDstMap34 pub fn from_block(block: &BasicBlock) -> PhiDstMap {
35 let mut map = PhiDstMap::new();
36 if let Some(phi) = block.phi_dsts() {
37 for (idx, dst) in phi.dsts.iter() {
38 map.add_phi_dst(*idx, *dst);
39 }
40 }
41 map
42 }
43
get_phi_idx(&self, ssa: &SSAValue) -> Option<&u32>44 fn get_phi_idx(&self, ssa: &SSAValue) -> Option<&u32> {
45 self.ssa_phi.get(ssa)
46 }
47 }
48
49 struct PhiSrcMap {
50 phi_src: HashMap<u32, SSAValue>,
51 }
52
53 impl PhiSrcMap {
new() -> PhiSrcMap54 fn new() -> PhiSrcMap {
55 PhiSrcMap {
56 phi_src: HashMap::new(),
57 }
58 }
59
add_phi_src(&mut self, phi_idx: u32, src: Src)60 fn add_phi_src(&mut self, phi_idx: u32, src: Src) {
61 debug_assert!(src.src_mod.is_none());
62 let vec = src.src_ref.as_ssa().expect("Not an SSA source");
63 debug_assert!(vec.comps() == 1);
64 self.phi_src.insert(phi_idx, vec[0]);
65 }
66
from_block(block: &BasicBlock) -> PhiSrcMap67 pub fn from_block(block: &BasicBlock) -> PhiSrcMap {
68 let mut map = PhiSrcMap::new();
69 if let Some(phi) = block.phi_srcs() {
70 for (idx, src) in phi.srcs.iter() {
71 map.add_phi_src(*idx, *src);
72 }
73 }
74 map
75 }
76
get_src_ssa(&self, phi_idx: &u32) -> &SSAValue77 pub fn get_src_ssa(&self, phi_idx: &u32) -> &SSAValue {
78 self.phi_src.get(phi_idx).expect("Phi source missing")
79 }
80 }
81
82 trait Spill {
spill_file(&self, file: RegFile) -> RegFile83 fn spill_file(&self, file: RegFile) -> RegFile;
spill(&self, dst: SSAValue, src: Src) -> Box<Instr>84 fn spill(&self, dst: SSAValue, src: Src) -> Box<Instr>;
fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>85 fn fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>;
86 }
87
88 struct SpillUniform {}
89
90 impl SpillUniform {
new() -> Self91 fn new() -> Self {
92 Self {}
93 }
94 }
95
96 impl Spill for SpillUniform {
spill_file(&self, file: RegFile) -> RegFile97 fn spill_file(&self, file: RegFile) -> RegFile {
98 debug_assert!(file.is_uniform());
99 file.to_warp()
100 }
101
spill(&self, dst: SSAValue, src: Src) -> Box<Instr>102 fn spill(&self, dst: SSAValue, src: Src) -> Box<Instr> {
103 Instr::new_boxed(OpCopy {
104 dst: dst.into(),
105 src: src,
106 })
107 }
108
fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>109 fn fill(&self, dst: Dst, src: SSAValue) -> Box<Instr> {
110 Instr::new_boxed(OpR2UR {
111 dst: dst,
112 src: src.into(),
113 })
114 }
115 }
116
117 struct SpillPred {}
118
119 impl SpillPred {
new() -> Self120 fn new() -> Self {
121 Self {}
122 }
123 }
124
125 impl Spill for SpillPred {
spill_file(&self, file: RegFile) -> RegFile126 fn spill_file(&self, file: RegFile) -> RegFile {
127 match file {
128 RegFile::Pred => RegFile::GPR,
129 RegFile::UPred => RegFile::UGPR,
130 _ => panic!("Unsupported register file"),
131 }
132 }
133
spill(&self, dst: SSAValue, src: Src) -> Box<Instr>134 fn spill(&self, dst: SSAValue, src: Src) -> Box<Instr> {
135 assert!(matches!(dst.file(), RegFile::GPR | RegFile::UGPR));
136 if let Some(b) = src.as_bool() {
137 let u32_src = if b {
138 Src::new_imm_u32(!0)
139 } else {
140 Src::new_zero()
141 };
142 Instr::new_boxed(OpCopy {
143 dst: dst.into(),
144 src: u32_src,
145 })
146 } else {
147 Instr::new_boxed(OpSel {
148 dst: dst.into(),
149 cond: src.bnot(),
150 srcs: [Src::new_zero(), Src::new_imm_u32(!0)],
151 })
152 }
153 }
154
fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>155 fn fill(&self, dst: Dst, src: SSAValue) -> Box<Instr> {
156 assert!(matches!(src.file(), RegFile::GPR | RegFile::UGPR));
157 Instr::new_boxed(OpISetP {
158 dst: dst,
159 set_op: PredSetOp::And,
160 cmp_op: IntCmpOp::Ne,
161 cmp_type: IntCmpType::U32,
162 ex: false,
163 srcs: [Src::new_zero(), src.into()],
164 accum: true.into(),
165 low_cmp: true.into(),
166 })
167 }
168 }
169
170 struct SpillBar {}
171
172 impl SpillBar {
new() -> Self173 fn new() -> Self {
174 Self {}
175 }
176 }
177
178 impl Spill for SpillBar {
spill_file(&self, file: RegFile) -> RegFile179 fn spill_file(&self, file: RegFile) -> RegFile {
180 assert!(file == RegFile::Bar);
181 RegFile::GPR
182 }
183
spill(&self, dst: SSAValue, src: Src) -> Box<Instr>184 fn spill(&self, dst: SSAValue, src: Src) -> Box<Instr> {
185 assert!(dst.file() == RegFile::GPR);
186 Instr::new_boxed(OpBMov {
187 dst: dst.into(),
188 src: src,
189 clear: false,
190 })
191 }
192
fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>193 fn fill(&self, dst: Dst, src: SSAValue) -> Box<Instr> {
194 assert!(src.file() == RegFile::GPR);
195 Instr::new_boxed(OpBMov {
196 dst: dst,
197 src: src.into(),
198 clear: false,
199 })
200 }
201 }
202
203 struct SpillGPR {}
204
205 impl SpillGPR {
new() -> Self206 fn new() -> Self {
207 Self {}
208 }
209 }
210
211 impl Spill for SpillGPR {
spill_file(&self, file: RegFile) -> RegFile212 fn spill_file(&self, file: RegFile) -> RegFile {
213 assert!(file == RegFile::GPR);
214 RegFile::Mem
215 }
216
spill(&self, dst: SSAValue, src: Src) -> Box<Instr>217 fn spill(&self, dst: SSAValue, src: Src) -> Box<Instr> {
218 assert!(dst.file() == RegFile::Mem);
219 Instr::new_boxed(OpCopy {
220 dst: dst.into(),
221 src: src,
222 })
223 }
224
fill(&self, dst: Dst, src: SSAValue) -> Box<Instr>225 fn fill(&self, dst: Dst, src: SSAValue) -> Box<Instr> {
226 assert!(src.file() == RegFile::Mem);
227 Instr::new_boxed(OpCopy {
228 dst: dst,
229 src: src.into(),
230 })
231 }
232 }
233
234 #[derive(Eq, PartialEq)]
235 struct SSANextUse {
236 ssa: SSAValue,
237 next_use: usize,
238 }
239
240 impl SSANextUse {
new(ssa: SSAValue, next_use: usize) -> SSANextUse241 fn new(ssa: SSAValue, next_use: usize) -> SSANextUse {
242 SSANextUse {
243 ssa: ssa,
244 next_use: next_use,
245 }
246 }
247 }
248
249 impl Ord for SSANextUse {
cmp(&self, other: &Self) -> Ordering250 fn cmp(&self, other: &Self) -> Ordering {
251 self.next_use
252 .cmp(&other.next_use)
253 .then_with(|| self.ssa.idx().cmp(&other.ssa.idx()))
254 }
255 }
256
257 impl PartialOrd for SSANextUse {
partial_cmp(&self, other: &Self) -> Option<Ordering>258 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
259 Some(self.cmp(other))
260 }
261 }
262
263 struct SpillCache<'a, S: Spill> {
264 alloc: &'a mut SSAValueAllocator,
265 spill: S,
266 val_spill: HashMap<SSAValue, SSAValue>,
267 }
268
269 impl<'a, S: Spill> SpillCache<'a, S> {
new(alloc: &'a mut SSAValueAllocator, spill: S) -> SpillCache<'a, S>270 fn new(alloc: &'a mut SSAValueAllocator, spill: S) -> SpillCache<'a, S> {
271 SpillCache {
272 alloc: alloc,
273 spill: spill,
274 val_spill: HashMap::new(),
275 }
276 }
277
spill_file(&self, file: RegFile) -> RegFile278 fn spill_file(&self, file: RegFile) -> RegFile {
279 self.spill.spill_file(file)
280 }
281
get_spill(&mut self, ssa: SSAValue) -> SSAValue282 fn get_spill(&mut self, ssa: SSAValue) -> SSAValue {
283 *self.val_spill.entry(ssa).or_insert_with(|| {
284 self.alloc.alloc(self.spill.spill_file(ssa.file()))
285 })
286 }
287
spill_src(&mut self, ssa: SSAValue, src: Src) -> Box<Instr>288 fn spill_src(&mut self, ssa: SSAValue, src: Src) -> Box<Instr> {
289 let dst = self.get_spill(ssa);
290 self.spill.spill(dst, src)
291 }
292
spill(&mut self, ssa: SSAValue) -> Box<Instr>293 fn spill(&mut self, ssa: SSAValue) -> Box<Instr> {
294 self.spill_src(ssa, ssa.into())
295 }
296
fill_dst(&mut self, dst: Dst, ssa: SSAValue) -> Box<Instr>297 fn fill_dst(&mut self, dst: Dst, ssa: SSAValue) -> Box<Instr> {
298 let src = self.get_spill(ssa);
299 self.spill.fill(dst, src)
300 }
301
fill(&mut self, ssa: SSAValue) -> Box<Instr>302 fn fill(&mut self, ssa: SSAValue) -> Box<Instr> {
303 self.fill_dst(ssa.into(), ssa)
304 }
305 }
306
307 struct SpillChooser<'a> {
308 bl: &'a NextUseBlockLiveness,
309 pinned: &'a HashSet<SSAValue>,
310 ip: usize,
311 count: usize,
312 spills: BinaryHeap<Reverse<SSANextUse>>,
313 min_next_use: usize,
314 }
315
316 struct SpillChoiceIter {
317 spills: BinaryHeap<Reverse<SSANextUse>>,
318 }
319
320 impl<'a> SpillChooser<'a> {
new( bl: &'a NextUseBlockLiveness, pinned: &'a HashSet<SSAValue>, ip: usize, count: usize, ) -> Self321 pub fn new(
322 bl: &'a NextUseBlockLiveness,
323 pinned: &'a HashSet<SSAValue>,
324 ip: usize,
325 count: usize,
326 ) -> Self {
327 Self {
328 bl: bl,
329 pinned: pinned,
330 ip: ip,
331 count: count,
332 spills: BinaryHeap::new(),
333 min_next_use: ip + 1,
334 }
335 }
336
add_candidate(&mut self, ssa: SSAValue)337 pub fn add_candidate(&mut self, ssa: SSAValue) {
338 // Don't spill anything that's pinned
339 if self.pinned.contains(&ssa) {
340 return;
341 }
342
343 // Ignore anything used sonner than spill options we've already
344 // rejected.
345 let next_use = self.bl.next_use_after_or_at_ip(&ssa, self.ip).unwrap();
346 if next_use < self.min_next_use {
347 return;
348 }
349
350 self.spills.push(Reverse(SSANextUse::new(ssa, next_use)));
351
352 if self.spills.len() > self.count {
353 // Because we reversed the heap, pop actually removes the
354 // one with the lowest next_use which is what we want here.
355 let old = self.spills.pop().unwrap();
356 debug_assert!(self.spills.len() == self.count);
357 self.min_next_use = max(self.min_next_use, old.0.next_use);
358 }
359 }
360 }
361
362 impl<'a> IntoIterator for SpillChooser<'a> {
363 type Item = SSAValue;
364 type IntoIter = SpillChoiceIter;
365
into_iter(self) -> SpillChoiceIter366 fn into_iter(self) -> SpillChoiceIter {
367 SpillChoiceIter {
368 spills: self.spills,
369 }
370 }
371 }
372
373 impl Iterator for SpillChoiceIter {
374 type Item = SSAValue;
375
size_hint(&self) -> (usize, Option<usize>)376 fn size_hint(&self) -> (usize, Option<usize>) {
377 let len = self.spills.len();
378 (len, Some(len))
379 }
380
next(&mut self) -> Option<SSAValue>381 fn next(&mut self) -> Option<SSAValue> {
382 self.spills.pop().map(|x| x.0.ssa)
383 }
384 }
385
386 #[derive(Clone)]
387 struct SSAState {
388 // The set of variables which currently exist in registers
389 w: LiveSet,
390 // The set of variables which have already been spilled. These don't need
391 // to be spilled again.
392 s: HashSet<SSAValue>,
393 // The set of pinned variables
394 p: HashSet<SSAValue>,
395 }
396
spill_values<S: Spill>( func: &mut Function, file: RegFile, limit: u32, spill: S, )397 fn spill_values<S: Spill>(
398 func: &mut Function,
399 file: RegFile,
400 limit: u32,
401 spill: S,
402 ) {
403 let files = RegFileSet::from_iter([file]);
404 let live = NextUseLiveness::for_function(func, &files);
405 let blocks = &mut func.blocks;
406
407 // Record the set of SSA values used within each loop
408 let mut phi_dst_maps = Vec::new();
409 let mut phi_src_maps = Vec::new();
410 let mut loop_uses = HashMap::new();
411 for b_idx in 0..blocks.len() {
412 phi_dst_maps.push(PhiDstMap::from_block(&blocks[b_idx]));
413 phi_src_maps.push(PhiSrcMap::from_block(&blocks[b_idx]));
414
415 if let Some(lh_idx) = blocks.loop_header_index(b_idx) {
416 let uses = loop_uses
417 .entry(lh_idx)
418 .or_insert_with(|| RefCell::new(HashSet::new()));
419 let uses = uses.get_mut();
420
421 for instr in &blocks[b_idx].instrs {
422 instr.for_each_ssa_use(|ssa| {
423 if ssa.file() == file {
424 uses.insert(*ssa);
425 }
426 });
427 }
428 }
429 }
430
431 if !loop_uses.is_empty() {
432 // The previous loop only added values to the uses set for the
433 // inner-most loop. Propagate from inner loops to outer loops.
434 for b_idx in (0..blocks.len()).rev() {
435 let Some(uses) = loop_uses.get(&b_idx) else {
436 continue;
437 };
438 let uses = uses.borrow();
439
440 let Some(dom) = blocks.dom_parent_index(b_idx) else {
441 continue;
442 };
443
444 let Some(dom_lh_idx) = blocks.loop_header_index(dom) else {
445 continue;
446 };
447
448 let mut parent_uses =
449 loop_uses.get(&dom_lh_idx).unwrap().borrow_mut();
450 for ssa in uses.iter() {
451 parent_uses.insert(*ssa);
452 }
453 }
454 }
455
456 let mut spill = SpillCache::new(&mut func.ssa_alloc, spill);
457 let mut spilled_phis = BitSet::new();
458
459 let mut ssa_state_in: Vec<SSAState> = Vec::new();
460 let mut ssa_state_out: Vec<SSAState> = Vec::new();
461
462 for b_idx in 0..blocks.len() {
463 let bl = live.block_live(b_idx);
464
465 let preds = blocks.pred_indices(b_idx).to_vec();
466 let w = if preds.is_empty() {
467 // This is the start block so we start with nothing in
468 // registers.
469 LiveSet::new()
470 } else if preds.len() == 1 {
471 // If we only have one predecessor then it can't possibly be a
472 // loop header and we can just copy the predecessor's w.
473 assert!(!blocks.is_loop_header(b_idx));
474 assert!(preds[0] < b_idx);
475 let p_w = &ssa_state_out[preds[0]].w;
476 LiveSet::from_iter(
477 p_w.iter().filter(|ssa| bl.is_live_in(ssa)).cloned(),
478 )
479 } else if !blocks[b_idx].uniform && file.is_uniform() {
480 // If this is a non-uniform block, then we can't spill or fill any
481 // uniform registers. The good news is that none of our non-uniform
482 // predecessors could spill, either, so we know that everything that
483 // was resident coming in will fit in the register file.
484 let mut w = LiveSet::new();
485 for p_idx in &preds {
486 if *p_idx < b_idx {
487 let p_w = &ssa_state_out[*p_idx].w;
488 w.extend(
489 p_w.iter().filter(|ssa| bl.is_live_in(ssa)).cloned(),
490 );
491 }
492 }
493 debug_assert!(w.count(file) <= limit);
494 w
495 } else if blocks.is_loop_header(b_idx) {
496 let mut i_b: HashSet<SSAValue> =
497 HashSet::from_iter(bl.iter_live_in().cloned());
498
499 if let Some(phi) = blocks[b_idx].phi_dsts() {
500 for (_, dst) in phi.dsts.iter() {
501 if let Dst::SSA(vec) = dst {
502 assert!(vec.comps() == 1);
503 let ssa = vec[0];
504 if ssa.file() == file {
505 i_b.insert(ssa);
506 }
507 }
508 }
509 }
510
511 let lu = loop_uses.get(&b_idx).unwrap().borrow();
512 let mut w = LiveSet::new();
513
514 let mut some = BinaryHeap::new();
515 for ssa in i_b.iter() {
516 if lu.contains(ssa) {
517 let next_use = bl.first_use(ssa).unwrap();
518 some.push(Reverse(SSANextUse::new(*ssa, next_use)));
519 }
520 }
521 while w.count(file) < limit {
522 let Some(entry) = some.pop() else {
523 break;
524 };
525 w.insert(entry.0.ssa);
526 }
527
528 // If we still have room, consider values which aren't used
529 // inside the loop.
530 if w.count(file) < limit {
531 for ssa in i_b.iter() {
532 debug_assert!(ssa.file() == file);
533 if !lu.contains(ssa) {
534 let next_use = bl.first_use(ssa).unwrap();
535 some.push(Reverse(SSANextUse::new(*ssa, next_use)));
536 }
537 }
538
539 while w.count(file) < limit {
540 let Some(entry) = some.pop() else {
541 break;
542 };
543 w.insert(entry.0.ssa);
544 }
545 }
546
547 w
548 } else {
549 let phi_dst_map = &phi_dst_maps[b_idx];
550
551 struct SSAPredInfo {
552 num_preds: usize,
553 next_use: usize,
554 }
555 let mut live: HashMap<SSAValue, SSAPredInfo> = HashMap::new();
556
557 for p_idx in &preds {
558 let phi_src_map = &phi_src_maps[*p_idx];
559
560 for mut ssa in ssa_state_out[*p_idx].w.iter().cloned() {
561 if let Some(phi) = phi_dst_map.get_phi_idx(&ssa) {
562 ssa = *phi_src_map.get_src_ssa(phi);
563 }
564
565 if let Some(next_use) = bl.first_use(&ssa) {
566 live.entry(ssa)
567 .and_modify(|e| e.num_preds += 1)
568 .or_insert_with(|| SSAPredInfo {
569 num_preds: 1,
570 next_use: next_use,
571 });
572 }
573 }
574 }
575
576 let mut w = LiveSet::new();
577 let mut some = BinaryHeap::new();
578
579 for (ssa, info) in live.drain() {
580 if info.num_preds == preds.len() {
581 // This one is in all the input sets
582 w.insert(ssa);
583 } else {
584 some.push(Reverse(SSANextUse::new(ssa, info.next_use)));
585 }
586 }
587 while w.count(file) < limit {
588 let Some(entry) = some.pop() else {
589 break;
590 };
591 let ssa = entry.0.ssa;
592 assert!(ssa.file() == file);
593 w.insert(ssa);
594 }
595
596 w
597 };
598
599 let s = if preds.is_empty() {
600 HashSet::new()
601 } else if preds.len() == 1 {
602 let p_s = &ssa_state_out[preds[0]].s;
603 HashSet::from_iter(
604 p_s.iter().filter(|ssa| bl.is_live_in(ssa)).cloned(),
605 )
606 } else {
607 let mut s = HashSet::new();
608 for p_idx in &preds {
609 if *p_idx >= b_idx {
610 continue;
611 }
612
613 // We diverge a bit from Braun and Hack here. They assume that
614 // S is is a subset of W which is clearly bogus. Instead, we
615 // take the union of all forward edge predecessor S_out and
616 // intersect with live-in for the current block.
617 for ssa in ssa_state_out[*p_idx].s.iter() {
618 if bl.is_live_in(ssa) {
619 s.insert(*ssa);
620 }
621 }
622 }
623
624 // The loop header heuristic sometimes drops stuff from W that has
625 // never been spilled so we need to make sure everything live-in
626 // which isn't in W is included in the spill set so that it gets
627 // properly spilled when we spill across CF edges.
628 if blocks.is_loop_header(b_idx) {
629 for ssa in bl.iter_live_in() {
630 if !w.contains(ssa) {
631 s.insert(*ssa);
632 }
633 }
634 }
635
636 s
637 };
638
639 let mut p = HashSet::new();
640 for p_idx in &preds {
641 if *p_idx < b_idx {
642 let p_p = &ssa_state_out[*p_idx].p;
643 p.extend(p_p.iter().filter(|ssa| bl.is_live_in(ssa)).cloned());
644 }
645 }
646
647 for ssa in bl.iter_live_in() {
648 debug_assert!(w.contains(ssa) || s.contains(ssa));
649 }
650
651 let mut b = SSAState { w: w, s: s, p: p };
652
653 assert!(ssa_state_in.len() == b_idx);
654 ssa_state_in.push(b.clone());
655
656 let bb = &mut blocks[b_idx];
657
658 let mut instrs = Vec::new();
659 for (ip, mut instr) in bb.instrs.drain(..).enumerate() {
660 match &mut instr.op {
661 Op::PhiDsts(phi) => {
662 // For phis, anything that is not in W needs to be spilled
663 // by setting the destination to some spill value.
664 for (idx, dst) in phi.dsts.iter_mut() {
665 let vec = dst.as_ssa().unwrap();
666 debug_assert!(vec.comps() == 1);
667 let ssa = &vec[0];
668
669 if ssa.file() == file && !b.w.contains(ssa) {
670 spilled_phis.insert((*idx).try_into().unwrap());
671 b.s.insert(*ssa);
672 *dst = spill.get_spill(*ssa).into();
673 }
674 }
675 }
676 Op::PhiSrcs(_) => {
677 // We handle phi sources later. For now, leave them be.
678 }
679 Op::ParCopy(pcopy) => {
680 let mut num_w_dsts = 0_u32;
681 for (dst, src) in pcopy.dsts_srcs.iter_mut() {
682 let dst_vec = dst.as_ssa().unwrap();
683 debug_assert!(dst_vec.comps() == 1);
684 let dst_ssa = &dst_vec[0];
685
686 debug_assert!(src.src_mod.is_none());
687 let src_vec = src.src_ref.as_ssa().unwrap();
688 debug_assert!(src_vec.comps() == 1);
689 let src_ssa = &src_vec[0];
690
691 debug_assert!(dst_ssa.file() == src_ssa.file());
692 if src_ssa.file() != file {
693 continue;
694 }
695
696 // If it's not resident, rewrite to just move from one
697 // spill to another, assuming that copying in spill
698 // space is efficient
699 if b.w.contains(src_ssa) {
700 num_w_dsts += 1;
701 } else {
702 debug_assert!(b.s.contains(src_ssa));
703 b.s.insert(*dst_ssa);
704 *src = spill.get_spill(*src_ssa).into();
705 *dst = spill.get_spill(*dst_ssa).into();
706 }
707 }
708
709 // We can now assume that a source is in W if and only if
710 // the file matches. Remove all killed sources from W.
711 for (_, src) in pcopy.dsts_srcs.iter() {
712 let src_ssa = &src.src_ref.as_ssa().unwrap()[0];
713 if !bl.is_live_after_ip(src_ssa, ip) {
714 b.w.remove(src_ssa);
715 }
716 }
717
718 let rel_limit = limit - b.w.count(file);
719 if num_w_dsts > rel_limit {
720 // We can't spill uniform registers in a non-uniform
721 // block
722 assert!(bb.uniform || !file.is_uniform());
723
724 let count = num_w_dsts - rel_limit;
725 let count = count.try_into().unwrap();
726
727 let mut spills = SpillChooser::new(bl, &b.p, ip, count);
728 for (dst, _) in pcopy.dsts_srcs.iter() {
729 let dst_ssa = &dst.as_ssa().unwrap()[0];
730 if dst_ssa.file() == file {
731 spills.add_candidate(*dst_ssa);
732 }
733 }
734
735 let spills: HashSet<SSAValue> =
736 HashSet::from_iter(spills);
737
738 for (dst, src) in pcopy.dsts_srcs.iter_mut() {
739 let dst_ssa = &dst.as_ssa().unwrap()[0];
740 let src_ssa = &src.src_ref.as_ssa().unwrap()[0];
741 if spills.contains(dst_ssa) {
742 if b.s.insert(*src_ssa) {
743 if DEBUG.annotate() {
744 instrs.push(Instr::new_boxed(
745 OpAnnotate {
746 annotation:
747 "generated by spill_values"
748 .into(),
749 },
750 ));
751 }
752 instrs.push(spill.spill(*src_ssa));
753 }
754 b.s.insert(*dst_ssa);
755 *src = spill.get_spill(*src_ssa).into();
756 *dst = spill.get_spill(*dst_ssa).into();
757 }
758 }
759 }
760
761 for (dst, _) in pcopy.dsts_srcs.iter() {
762 let dst_ssa = &dst.as_ssa().unwrap()[0];
763 if dst_ssa.file() == file {
764 b.w.insert(*dst_ssa);
765 }
766 }
767 }
768 _ => {
769 if file == RegFile::UGPR && !bb.uniform {
770 // We can't spill UGPRs in a non-uniform block.
771 // Instead, we depend on two facts:
772 //
773 // 1. Uniform instructions are not allowed in
774 // non-uniform blocks
775 //
776 // 2. Non-uniform instructions can always accept a wave
777 // register in leu of a uniform register
778 //
779 debug_assert!(spill.spill_file(file) == RegFile::GPR);
780 instr.for_each_ssa_use_mut(|ssa| {
781 if ssa.file() == file && !b.w.contains(ssa) {
782 *ssa = spill.get_spill(*ssa).into();
783 }
784 });
785 } else if file == RegFile::UPred && !bb.uniform {
786 // We can't spill UPreds in a non-uniform block.
787 // Instead, we depend on two facts:
788 //
789 // 1. Uniform instructions are not allowed in
790 // non-uniform blocks
791 //
792 // 2. Non-uniform instructions can always accept a wave
793 // register in leu of a uniform register
794 //
795 // 3. We can un-spill from a UGPR directly to a Pred
796 //
797 // This also shouldn't come up that often in practice
798 // so it's okay to un-spill every time on the spot.
799 //
800 instr.for_each_ssa_use_mut(|ssa| {
801 if ssa.file() == file && !b.w.contains(ssa) {
802 if DEBUG.annotate() {
803 instrs.push(Instr::new_boxed(OpAnnotate {
804 annotation: "generated by spill_values"
805 .into(),
806 }));
807 }
808 let tmp = spill.alloc.alloc(RegFile::Pred);
809 instrs.push(spill.fill_dst(tmp.into(), *ssa));
810 *ssa = tmp;
811 }
812 });
813 } else {
814 // First compute fills even though those have to come
815 // after spills.
816 let mut fills = Vec::new();
817 instr.for_each_ssa_use(|ssa| {
818 if ssa.file() == file && !b.w.contains(ssa) {
819 debug_assert!(b.s.contains(ssa));
820 debug_assert!(bb.uniform || !ssa.is_uniform());
821 fills.push(spill.fill(*ssa));
822 b.w.insert(*ssa);
823 }
824 });
825
826 let rel_pressure =
827 bl.get_instr_pressure(ip, &instr)[file];
828 let abs_pressure =
829 b.w.count(file) + u32::from(rel_pressure);
830
831 if abs_pressure > limit {
832 let count = abs_pressure - limit;
833 let count = count.try_into().unwrap();
834
835 let mut spills =
836 SpillChooser::new(bl, &b.p, ip, count);
837 for ssa in b.w.iter() {
838 spills.add_candidate(*ssa);
839 }
840
841 for ssa in spills {
842 debug_assert!(ssa.file() == file);
843 b.w.remove(&ssa);
844 if DEBUG.annotate() {
845 instrs.push(Instr::new_boxed(OpAnnotate {
846 annotation: "generated by spill_values"
847 .into(),
848 }));
849 }
850 instrs.push(spill.spill(ssa));
851 b.s.insert(ssa);
852 }
853 }
854
855 if DEBUG.annotate() {
856 instrs.push(Instr::new_boxed(OpAnnotate {
857 annotation: "generated by spill_values".into(),
858 }));
859 }
860 instrs.append(&mut fills);
861
862 instr.for_each_ssa_use(|ssa| {
863 if ssa.file() == file {
864 debug_assert!(b.w.contains(ssa));
865 }
866 });
867
868 b.w.insert_instr_top_down(ip, &instr, bl);
869 }
870 }
871 }
872
873 // OpPin takes the normal spilling path but we want to also mark any
874 // of its destination SSA values as pinned.
875 if matches!(&instr.op, Op::Pin(_)) {
876 instr.for_each_ssa_def(|ssa| {
877 b.p.insert(*ssa);
878 });
879 }
880
881 instrs.push(instr);
882 }
883 bb.instrs = instrs;
884
885 assert!(ssa_state_out.len() == b_idx);
886 ssa_state_out.push(b);
887 }
888
889 // Now that everthing is spilled, we handle phi sources and connect the
890 // blocks by adding spills and fills as needed along edges.
891 for p_idx in 0..blocks.len() {
892 let succ = blocks.succ_indices(p_idx);
893 if succ.len() != 1 {
894 // We don't have any critical edges
895 for s_idx in succ {
896 debug_assert!(blocks.pred_indices(*s_idx).len() == 1);
897 }
898 continue;
899 }
900 let s_idx = succ[0];
901
902 let pb = &mut blocks[p_idx];
903 let p_out = &ssa_state_out[p_idx];
904 let s_in = &ssa_state_in[s_idx];
905 let phi_dst_map = &phi_dst_maps[s_idx];
906
907 let mut spills = Vec::new();
908 let mut fills = Vec::new();
909
910 if let Some(phi) = pb.phi_srcs_mut() {
911 for (idx, src) in phi.srcs.iter_mut() {
912 debug_assert!(src.src_mod.is_none());
913 let vec = src.src_ref.as_ssa().unwrap();
914 debug_assert!(vec.comps() == 1);
915 let ssa = &vec[0];
916
917 if ssa.file() != file {
918 continue;
919 }
920
921 if spilled_phis.get((*idx).try_into().unwrap()) {
922 if !p_out.s.contains(ssa) {
923 spills.push(*ssa);
924 }
925 *src = spill.get_spill(*ssa).into();
926 } else {
927 if !p_out.w.contains(ssa) {
928 fills.push(*ssa);
929 }
930 }
931 }
932 }
933
934 for ssa in s_in.s.iter() {
935 if p_out.w.contains(ssa) && !p_out.s.contains(ssa) {
936 spills.push(*ssa);
937 }
938 }
939
940 for ssa in s_in.w.iter() {
941 if phi_dst_map.get_phi_idx(ssa).is_some() {
942 continue;
943 }
944
945 if !p_out.w.contains(ssa) {
946 fills.push(*ssa);
947 }
948 }
949
950 if spills.is_empty() && fills.is_empty() {
951 continue;
952 }
953
954 // Sort to ensure stability of the algorithm
955 spills.sort_by_key(|ssa| ssa.idx());
956 fills.sort_by_key(|ssa| ssa.idx());
957
958 let mut instrs = Vec::new();
959 for ssa in spills {
960 instrs.push(spill.spill(ssa));
961 }
962 for ssa in fills {
963 debug_assert!(pb.uniform || !ssa.is_uniform());
964 instrs.push(spill.fill(ssa));
965 }
966
967 // Insert spills and fills right after the phi (if any)
968 let ip = pb
969 .phi_srcs_ip()
970 .or_else(|| pb.branch_ip())
971 .unwrap_or_else(|| pb.instrs.len());
972 pb.instrs.splice(ip..ip, instrs.into_iter());
973 }
974 }
975
976 impl Function {
977 /// Spill values from @file to fit within @limit registers
978 ///
979 /// This pass assumes that the function is already in CSSA form. See
980 /// @to_cssa for more details.
981 ///
982 /// The algorithm implemented here is roughly based on "Register Spilling
983 /// and Live-Range Splitting for SSA-Form Programs" by Braun and Hack. The
984 /// primary contributions of the Braun and Hack paper are the global
985 /// next-use distances which are implemented by @NextUseLiveness and a
986 /// heuristic for computing spill sets at block boundaries. The paper
987 /// describes two sets:
988 ///
989 /// - W, the set of variables currently resident
990 ///
991 /// - S, the set of variables which have been spilled
992 ///
993 /// These sets are tracked as we walk instructions and [un]spill values to
994 /// satisfy the given limit. When spills are required we spill the value
995 /// with the nighest next-use IP. At block boundaries, Braun and Hack
996 /// describe a heuristic for determining the starting W and S sets based on
997 /// the W and S from the end of each of the forward edge predecessor blocks.
998 ///
999 /// What Braun and Hack do not describe is how to handle phis and parallel
1000 /// copies. Because we assume the function is already in CSSA form, we can
1001 /// use a fairly simple algorithm. On the first pass, we ignore phi sources
1002 /// and assign phi destinations based on W at the start of the block. If
1003 /// the phi destination is in W, we leave it alone. If it is not in W, then
1004 /// we allocate a new spill value and assign it to the phi destination. In
1005 /// a second pass, we handle phi sources based on the destination. If the
1006 /// destination is in W, we leave it alone. If the destination is spilled,
1007 /// we read from the spill value corresponding to the source, spilling first
1008 /// if needed. In the second pass, we also handle spilling across blocks as
1009 /// needed for values that do not pass through a phi.
1010 ///
1011 /// A special case is also required for parallel copies because they can
1012 /// have an unbounded number of destinations. For any source values not in
1013 /// W, we allocate a spill value for the destination and copy in the spill
1014 /// register file. For any sources which are in W, we try to leave as much
1015 /// in W as possible. However, since source values may not be killed by the
1016 /// copy and because one source value may be copied to arbitrarily many
1017 /// destinations, that is not always possible. Whenever we need to spill
1018 /// values, we spill according to the highest next-use of the destination
1019 /// and we spill the source first and then parallel copy the source into a
1020 /// spilled destination value.
1021 ///
1022 /// This all assumes that it's better to copy in spill space than to unspill
1023 /// just for the sake of a parallel copy. While this may not be true in
1024 /// general, especially not when spilling to memory, the register allocator
1025 /// is good at eliding unnecessary copies.
spill_values(&mut self, file: RegFile, limit: u32)1026 pub fn spill_values(&mut self, file: RegFile, limit: u32) {
1027 match file {
1028 RegFile::GPR => {
1029 let spill = SpillGPR::new();
1030 spill_values(self, file, limit, spill);
1031 }
1032 RegFile::UGPR => {
1033 let spill = SpillUniform::new();
1034 spill_values(self, file, limit, spill);
1035 }
1036 RegFile::Pred => {
1037 let spill = SpillPred::new();
1038 spill_values(self, file, limit, spill);
1039 }
1040 RegFile::UPred => {
1041 let spill = SpillPred::new();
1042 spill_values(self, file, limit, spill);
1043 }
1044 RegFile::Bar => {
1045 let spill = SpillBar::new();
1046 spill_values(self, file, limit, spill);
1047 }
1048 _ => panic!("Don't know how to spill {} registers", file),
1049 }
1050
1051 self.repair_ssa();
1052 self.opt_dce();
1053
1054 if DEBUG.print() {
1055 eprintln!("NAK IR after spilling {}:\n{}", file, self);
1056 }
1057 }
1058 }
1059