1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 use crate::api::{GetDebugFlags, DEBUG};
5 use crate::ir::*;
6 use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness};
7
8 use compiler::bitset::BitSet;
9 use std::cmp::{max, Ordering};
10 use std::collections::{HashMap, HashSet};
11
12 struct KillSet {
13 set: HashSet<SSAValue>,
14 vec: Vec<SSAValue>,
15 }
16
17 impl KillSet {
new() -> KillSet18 pub fn new() -> KillSet {
19 KillSet {
20 set: HashSet::new(),
21 vec: Vec::new(),
22 }
23 }
24
len(&self) -> usize25 pub fn len(&self) -> usize {
26 self.vec.len()
27 }
28
clear(&mut self)29 pub fn clear(&mut self) {
30 self.set.clear();
31 self.vec.clear();
32 }
33
insert(&mut self, ssa: SSAValue)34 pub fn insert(&mut self, ssa: SSAValue) {
35 if self.set.insert(ssa) {
36 self.vec.push(ssa);
37 }
38 }
39
iter(&self) -> std::slice::Iter<'_, SSAValue>40 pub fn iter(&self) -> std::slice::Iter<'_, SSAValue> {
41 self.vec.iter()
42 }
43
is_empty(&self) -> bool44 pub fn is_empty(&self) -> bool {
45 self.vec.is_empty()
46 }
47 }
48
49 // These two helpers are carefully paired for the purposes of RA.
50 // src_ssa_ref() returns whatever SSARef is present in the source, if any.
51 // src_set_reg() overwrites that SSARef with a RegRef.
52 #[inline]
src_ssa_ref(src: &Src) -> Option<&SSARef>53 fn src_ssa_ref(src: &Src) -> Option<&SSARef> {
54 match &src.src_ref {
55 SrcRef::SSA(ssa) => Some(ssa),
56 SrcRef::CBuf(CBufRef {
57 buf: CBuf::BindlessSSA(ssa),
58 ..
59 }) => Some(ssa),
60 _ => None,
61 }
62 }
63
64 #[inline]
src_set_reg(src: &mut Src, reg: RegRef)65 fn src_set_reg(src: &mut Src, reg: RegRef) {
66 match &mut src.src_ref {
67 SrcRef::SSA(_) => {
68 src.src_ref = reg.into();
69 }
70 SrcRef::CBuf(cb) => {
71 debug_assert!(matches!(&cb.buf, CBuf::BindlessSSA(_)));
72 debug_assert!(reg.file() == RegFile::UGPR && reg.comps() == 2);
73 cb.buf = CBuf::BindlessUGPR(reg);
74 }
75 _ => (),
76 }
77 }
78
79 enum SSAUse {
80 FixedReg(u32),
81 Vec(SSARef),
82 }
83
84 struct SSAUseMap {
85 ssa_map: HashMap<SSAValue, Vec<(usize, SSAUse)>>,
86 }
87
88 impl SSAUseMap {
add_fixed_reg_use(&mut self, ip: usize, ssa: SSAValue, reg: u32)89 fn add_fixed_reg_use(&mut self, ip: usize, ssa: SSAValue, reg: u32) {
90 let v = self.ssa_map.entry(ssa).or_default();
91 v.push((ip, SSAUse::FixedReg(reg)));
92 }
93
add_vec_use(&mut self, ip: usize, vec: SSARef)94 fn add_vec_use(&mut self, ip: usize, vec: SSARef) {
95 if vec.comps() == 1 {
96 return;
97 }
98
99 for ssa in vec.iter() {
100 let v = self.ssa_map.entry(*ssa).or_default();
101 v.push((ip, SSAUse::Vec(vec)));
102 }
103 }
104
find_vec_use_after(&self, ssa: SSAValue, ip: usize) -> Option<&SSAUse>105 fn find_vec_use_after(&self, ssa: SSAValue, ip: usize) -> Option<&SSAUse> {
106 if let Some(v) = self.ssa_map.get(&ssa) {
107 let p = v.partition_point(|(uip, _)| *uip <= ip);
108 if p == v.len() {
109 None
110 } else {
111 let (_, u) = &v[p];
112 Some(u)
113 }
114 } else {
115 None
116 }
117 }
118
add_block(&mut self, b: &BasicBlock)119 pub fn add_block(&mut self, b: &BasicBlock) {
120 for (ip, instr) in b.instrs.iter().enumerate() {
121 match &instr.op {
122 Op::RegOut(op) => {
123 for (i, src) in op.srcs.iter().enumerate() {
124 let out_reg = u32::try_from(i).unwrap();
125 if let Some(ssa) = src_ssa_ref(src) {
126 assert!(ssa.comps() == 1);
127 self.add_fixed_reg_use(ip, ssa[0], out_reg);
128 }
129 }
130 }
131 _ => {
132 // We don't care about predicates because they're scalar
133 for src in instr.srcs() {
134 if let Some(ssa) = src_ssa_ref(src) {
135 self.add_vec_use(ip, *ssa);
136 }
137 }
138 }
139 }
140 }
141 }
142
for_block(b: &BasicBlock) -> SSAUseMap143 pub fn for_block(b: &BasicBlock) -> SSAUseMap {
144 let mut am = SSAUseMap {
145 ssa_map: HashMap::new(),
146 };
147 am.add_block(b);
148 am
149 }
150 }
151
152 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
153 enum LiveRef {
154 SSA(SSAValue),
155 Phi(u32),
156 }
157
158 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
159 struct LiveValue {
160 pub live_ref: LiveRef,
161 pub reg_ref: RegRef,
162 }
163
164 // We need a stable ordering of live values so that RA is deterministic
165 impl Ord for LiveValue {
cmp(&self, other: &Self) -> Ordering166 fn cmp(&self, other: &Self) -> Ordering {
167 let s_file = u8::from(self.reg_ref.file());
168 let o_file = u8::from(other.reg_ref.file());
169 match s_file.cmp(&o_file) {
170 Ordering::Equal => {
171 let s_idx = self.reg_ref.base_idx();
172 let o_idx = other.reg_ref.base_idx();
173 s_idx.cmp(&o_idx)
174 }
175 ord => ord,
176 }
177 }
178 }
179
180 impl PartialOrd for LiveValue {
partial_cmp(&self, other: &Self) -> Option<Ordering>181 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
182 Some(self.cmp(other))
183 }
184 }
185
186 #[derive(Clone)]
187 struct RegAllocator {
188 file: RegFile,
189 num_regs: u32,
190 used: BitSet,
191 pinned: BitSet,
192 reg_ssa: Vec<SSAValue>,
193 ssa_reg: HashMap<SSAValue, u32>,
194 }
195
196 impl RegAllocator {
new(file: RegFile, num_regs: u32) -> Self197 pub fn new(file: RegFile, num_regs: u32) -> Self {
198 Self {
199 file: file,
200 num_regs: num_regs,
201 used: BitSet::new(),
202 pinned: BitSet::new(),
203 reg_ssa: Vec::new(),
204 ssa_reg: HashMap::new(),
205 }
206 }
207
file(&self) -> RegFile208 fn file(&self) -> RegFile {
209 self.file
210 }
211
num_regs_used(&self) -> u32212 pub fn num_regs_used(&self) -> u32 {
213 self.ssa_reg.len().try_into().unwrap()
214 }
215
reg_is_used(&self, reg: u32) -> bool216 pub fn reg_is_used(&self, reg: u32) -> bool {
217 self.used.get(reg.try_into().unwrap())
218 }
219
reg_is_pinned(&self, reg: u32) -> bool220 pub fn reg_is_pinned(&self, reg: u32) -> bool {
221 self.pinned.get(reg.try_into().unwrap())
222 }
223
try_get_reg(&self, ssa: SSAValue) -> Option<u32>224 pub fn try_get_reg(&self, ssa: SSAValue) -> Option<u32> {
225 self.ssa_reg.get(&ssa).cloned()
226 }
227
try_get_ssa(&self, reg: u32) -> Option<SSAValue>228 pub fn try_get_ssa(&self, reg: u32) -> Option<SSAValue> {
229 if self.reg_is_used(reg) {
230 Some(self.reg_ssa[usize::try_from(reg).unwrap()])
231 } else {
232 None
233 }
234 }
235
try_get_vec_reg(&self, vec: &SSARef) -> Option<u32>236 pub fn try_get_vec_reg(&self, vec: &SSARef) -> Option<u32> {
237 let Some(reg) = self.try_get_reg(vec[0]) else {
238 return None;
239 };
240
241 let align = u32::from(vec.comps()).next_power_of_two();
242 if reg % align != 0 {
243 return None;
244 }
245
246 for c in 1..vec.comps() {
247 let ssa = vec[usize::from(c)];
248 if self.try_get_reg(ssa) != Some(reg + u32::from(c)) {
249 return None;
250 }
251 }
252 Some(reg)
253 }
254
free_ssa(&mut self, ssa: SSAValue) -> u32255 pub fn free_ssa(&mut self, ssa: SSAValue) -> u32 {
256 assert!(ssa.file() == self.file);
257 let reg = self.ssa_reg.remove(&ssa).unwrap();
258 assert!(self.reg_is_used(reg));
259 let reg_usize = usize::try_from(reg).unwrap();
260 assert!(self.reg_ssa[reg_usize] == ssa);
261 self.used.remove(reg_usize);
262 self.pinned.remove(reg_usize);
263 reg
264 }
265
assign_reg(&mut self, ssa: SSAValue, reg: u32)266 pub fn assign_reg(&mut self, ssa: SSAValue, reg: u32) {
267 assert!(ssa.file() == self.file);
268 assert!(reg < self.num_regs);
269 assert!(!self.reg_is_used(reg));
270
271 let reg_usize = usize::try_from(reg).unwrap();
272 if reg_usize >= self.reg_ssa.len() {
273 self.reg_ssa.resize(reg_usize + 1, SSAValue::NONE);
274 }
275 self.reg_ssa[reg_usize] = ssa;
276 let old = self.ssa_reg.insert(ssa, reg);
277 assert!(old.is_none());
278 self.used.insert(reg_usize);
279 }
280
pin_reg(&mut self, reg: u32)281 pub fn pin_reg(&mut self, reg: u32) {
282 assert!(self.reg_is_used(reg));
283 self.pinned.insert(reg.try_into().unwrap());
284 }
285
reg_range_is_unset(set: &BitSet, reg: u32, comps: u8) -> bool286 fn reg_range_is_unset(set: &BitSet, reg: u32, comps: u8) -> bool {
287 for c in 0..u32::from(comps) {
288 if set.get((reg + c).try_into().unwrap()) {
289 return false;
290 }
291 }
292 true
293 }
294
try_find_unset_reg_range( &self, set: &BitSet, start_reg: u32, align: u32, comps: u8, ) -> Option<u32>295 fn try_find_unset_reg_range(
296 &self,
297 set: &BitSet,
298 start_reg: u32,
299 align: u32,
300 comps: u8,
301 ) -> Option<u32> {
302 assert!(comps > 0 && u32::from(comps) <= self.num_regs);
303
304 let mut next_reg = start_reg;
305 loop {
306 let reg: u32 = set
307 .next_unset(usize::try_from(next_reg).unwrap())
308 .try_into()
309 .unwrap();
310
311 // Ensure we're properly aligned
312 let reg = reg.next_multiple_of(align);
313
314 // Ensure we're in-bounds. This also serves as a check to ensure
315 // that u8::try_from(reg + i) will succeed.
316 if reg > self.num_regs - u32::from(comps) {
317 return None;
318 }
319
320 if Self::reg_range_is_unset(set, reg, comps) {
321 return Some(reg);
322 }
323
324 next_reg = reg + align;
325 }
326 }
327
try_find_unused_reg_range( &self, start_reg: u32, align: u32, comps: u8, ) -> Option<u32>328 pub fn try_find_unused_reg_range(
329 &self,
330 start_reg: u32,
331 align: u32,
332 comps: u8,
333 ) -> Option<u32> {
334 self.try_find_unset_reg_range(&self.used, start_reg, align, comps)
335 }
336
alloc_scalar( &mut self, ip: usize, sum: &SSAUseMap, ssa: SSAValue, ) -> u32337 pub fn alloc_scalar(
338 &mut self,
339 ip: usize,
340 sum: &SSAUseMap,
341 ssa: SSAValue,
342 ) -> u32 {
343 if let Some(u) = sum.find_vec_use_after(ssa, ip) {
344 match u {
345 SSAUse::FixedReg(reg) => {
346 if !self.reg_is_used(*reg) {
347 self.assign_reg(ssa, *reg);
348 return *reg;
349 }
350 }
351 SSAUse::Vec(vec) => {
352 let mut comp = u8::MAX;
353 for c in 0..vec.comps() {
354 if vec[usize::from(c)] == ssa {
355 comp = c;
356 break;
357 }
358 }
359 assert!(comp < vec.comps());
360
361 let align = u32::from(vec.comps()).next_power_of_two();
362 for c in 0..vec.comps() {
363 if c == comp {
364 continue;
365 }
366
367 let other = vec[usize::from(c)];
368 let Some(other_reg) = self.try_get_reg(other) else {
369 continue;
370 };
371
372 let vec_reg = other_reg & !(align - 1);
373 if other_reg != vec_reg + u32::from(c) {
374 continue;
375 }
376
377 let reg = vec_reg + u32::from(comp);
378 if reg < self.num_regs && !self.reg_is_used(reg) {
379 self.assign_reg(ssa, reg);
380 return reg;
381 }
382 }
383
384 // We weren't able to pair it with an already allocated
385 // register but maybe we can at least find an aligned one.
386 if let Some(reg) =
387 self.try_find_unused_reg_range(0, align, 1)
388 {
389 self.assign_reg(ssa, reg);
390 return reg;
391 }
392 }
393 }
394 }
395
396 let reg = self
397 .try_find_unused_reg_range(0, 1, 1)
398 .expect("Failed to find free register");
399 self.assign_reg(ssa, reg);
400 reg
401 }
402 }
403
404 struct VecRegAllocator<'a> {
405 ra: &'a mut RegAllocator,
406 pcopy: OpParCopy,
407 pinned: BitSet,
408 evicted: HashMap<SSAValue, u32>,
409 }
410
411 impl<'a> VecRegAllocator<'a> {
new(ra: &'a mut RegAllocator) -> Self412 fn new(ra: &'a mut RegAllocator) -> Self {
413 let pinned = ra.pinned.clone();
414 VecRegAllocator {
415 ra,
416 pcopy: OpParCopy::new(),
417 pinned,
418 evicted: HashMap::new(),
419 }
420 }
421
file(&self) -> RegFile422 fn file(&self) -> RegFile {
423 self.ra.file()
424 }
425
pin_reg(&mut self, reg: u32)426 fn pin_reg(&mut self, reg: u32) {
427 self.pinned.insert(reg.try_into().unwrap());
428 }
429
pin_reg_range(&mut self, reg: u32, comps: u8)430 fn pin_reg_range(&mut self, reg: u32, comps: u8) {
431 for c in 0..u32::from(comps) {
432 self.pin_reg(reg + c);
433 }
434 }
435
reg_is_pinned(&self, reg: u32) -> bool436 fn reg_is_pinned(&self, reg: u32) -> bool {
437 self.pinned.get(reg.try_into().unwrap())
438 }
439
reg_range_is_unpinned(&self, reg: u32, comps: u8) -> bool440 fn reg_range_is_unpinned(&self, reg: u32, comps: u8) -> bool {
441 RegAllocator::reg_range_is_unset(&self.pinned, reg, comps)
442 }
443
assign_pin_reg(&mut self, ssa: SSAValue, reg: u32) -> RegRef444 fn assign_pin_reg(&mut self, ssa: SSAValue, reg: u32) -> RegRef {
445 self.pin_reg(reg);
446 self.ra.assign_reg(ssa, reg);
447 RegRef::new(self.file(), reg, 1)
448 }
449
assign_pin_vec_reg(&mut self, vec: SSARef, reg: u32) -> RegRef450 pub fn assign_pin_vec_reg(&mut self, vec: SSARef, reg: u32) -> RegRef {
451 for c in 0..vec.comps() {
452 let ssa = vec[usize::from(c)];
453 self.assign_pin_reg(ssa, reg + u32::from(c));
454 }
455 RegRef::new(self.file(), reg, vec.comps())
456 }
457
try_find_unpinned_reg_range( &self, start_reg: u32, align: u32, comps: u8, ) -> Option<u32>458 fn try_find_unpinned_reg_range(
459 &self,
460 start_reg: u32,
461 align: u32,
462 comps: u8,
463 ) -> Option<u32> {
464 self.ra
465 .try_find_unset_reg_range(&self.pinned, start_reg, align, comps)
466 }
467
evict_ssa(&mut self, ssa: SSAValue, old_reg: u32)468 pub fn evict_ssa(&mut self, ssa: SSAValue, old_reg: u32) {
469 assert!(ssa.file() == self.file());
470 assert!(!self.reg_is_pinned(old_reg));
471 self.evicted.insert(ssa, old_reg);
472 }
473
evict_reg_if_used(&mut self, reg: u32)474 pub fn evict_reg_if_used(&mut self, reg: u32) {
475 assert!(!self.reg_is_pinned(reg));
476
477 if let Some(ssa) = self.ra.try_get_ssa(reg) {
478 self.ra.free_ssa(ssa);
479 self.evict_ssa(ssa, reg);
480 }
481 }
482
move_ssa_to_reg(&mut self, ssa: SSAValue, new_reg: u32)483 fn move_ssa_to_reg(&mut self, ssa: SSAValue, new_reg: u32) {
484 if let Some(old_reg) = self.ra.try_get_reg(ssa) {
485 assert!(self.evicted.get(&ssa).is_none());
486 assert!(!self.reg_is_pinned(old_reg));
487
488 if new_reg == old_reg {
489 self.pin_reg(new_reg);
490 } else {
491 self.ra.free_ssa(ssa);
492 self.evict_reg_if_used(new_reg);
493
494 self.pcopy.push(
495 RegRef::new(self.file(), new_reg, 1).into(),
496 RegRef::new(self.file(), old_reg, 1).into(),
497 );
498
499 self.assign_pin_reg(ssa, new_reg);
500 }
501 } else if let Some(old_reg) = self.evicted.remove(&ssa) {
502 self.evict_reg_if_used(new_reg);
503
504 self.pcopy.push(
505 RegRef::new(self.file(), new_reg, 1).into(),
506 RegRef::new(self.file(), old_reg, 1).into(),
507 );
508
509 self.assign_pin_reg(ssa, new_reg);
510 } else {
511 panic!("Unknown SSA value");
512 }
513 }
514
finish(mut self, pcopy: &mut OpParCopy)515 fn finish(mut self, pcopy: &mut OpParCopy) {
516 pcopy.dsts_srcs.append(&mut self.pcopy.dsts_srcs);
517
518 if !self.evicted.is_empty() {
519 // Sort so we get determinism, even if the hash map order changes
520 // from one run to another or due to rust compiler updates.
521 let mut evicted: Vec<_> = self.evicted.drain().collect();
522 evicted.sort_by_key(|(_, reg)| *reg);
523
524 for (ssa, old_reg) in evicted {
525 let mut next_reg = 0;
526 let new_reg = loop {
527 let reg = self
528 .ra
529 .try_find_unused_reg_range(next_reg, 1, 1)
530 .expect("Failed to find free register");
531 if !self.reg_is_pinned(reg) {
532 break reg;
533 }
534 next_reg = reg + 1;
535 };
536
537 pcopy.push(
538 RegRef::new(self.file(), new_reg, 1).into(),
539 RegRef::new(self.file(), old_reg, 1).into(),
540 );
541 self.assign_pin_reg(ssa, new_reg);
542 }
543 }
544 }
545
try_get_vec_reg(&self, vec: &SSARef) -> Option<u32>546 pub fn try_get_vec_reg(&self, vec: &SSARef) -> Option<u32> {
547 self.ra.try_get_vec_reg(vec)
548 }
549
collect_vector(&mut self, vec: &SSARef) -> RegRef550 pub fn collect_vector(&mut self, vec: &SSARef) -> RegRef {
551 if let Some(reg) = self.try_get_vec_reg(vec) {
552 self.pin_reg_range(reg, vec.comps());
553 return RegRef::new(self.file(), reg, vec.comps());
554 }
555
556 let comps = vec.comps();
557 let align = u32::from(comps).next_power_of_two();
558
559 let reg = self
560 .ra
561 .try_find_unused_reg_range(0, align, comps)
562 .or_else(|| {
563 for c in 0..comps {
564 let ssa = vec[usize::from(c)];
565 let Some(comp_reg) = self.ra.try_get_reg(ssa) else {
566 continue;
567 };
568
569 let vec_reg = comp_reg & !(align - 1);
570 if comp_reg != vec_reg + u32::from(c) {
571 continue;
572 }
573
574 if vec_reg + u32::from(comps) > self.ra.num_regs {
575 continue;
576 }
577
578 if self.reg_range_is_unpinned(vec_reg, comps) {
579 return Some(vec_reg);
580 }
581 }
582 None
583 })
584 .or_else(|| self.try_find_unpinned_reg_range(0, align, comps))
585 .expect("Failed to find an unpinned register range");
586
587 for c in 0..comps {
588 let ssa = vec[usize::from(c)];
589 self.move_ssa_to_reg(ssa, reg + u32::from(c));
590 }
591
592 RegRef::new(self.file(), reg, comps)
593 }
594
alloc_vector(&mut self, vec: SSARef) -> RegRef595 pub fn alloc_vector(&mut self, vec: SSARef) -> RegRef {
596 let comps = vec.comps();
597 let align = u32::from(comps).next_power_of_two();
598
599 if let Some(reg) = self.ra.try_find_unused_reg_range(0, align, comps) {
600 return self.assign_pin_vec_reg(vec, reg);
601 }
602
603 let reg = self
604 .try_find_unpinned_reg_range(0, align, comps)
605 .expect("Failed to find an unpinned register range");
606
607 for c in 0..comps {
608 self.evict_reg_if_used(reg + u32::from(c));
609 }
610 self.assign_pin_vec_reg(vec, reg)
611 }
612
free_killed(&mut self, killed: &KillSet)613 pub fn free_killed(&mut self, killed: &KillSet) {
614 for ssa in killed.iter() {
615 if ssa.file() == self.file() {
616 self.ra.free_ssa(*ssa);
617 }
618 }
619 }
620 }
621
622 impl Drop for VecRegAllocator<'_> {
drop(&mut self)623 fn drop(&mut self) {
624 assert!(self.evicted.is_empty());
625 }
626 }
627
instr_remap_srcs_file(instr: &mut Instr, ra: &mut VecRegAllocator)628 fn instr_remap_srcs_file(instr: &mut Instr, ra: &mut VecRegAllocator) {
629 // Collect vector sources first since those may silently pin some of our
630 // scalar sources.
631 for src in instr.srcs_mut() {
632 if let Some(ssa) = src_ssa_ref(src) {
633 if ssa.file().unwrap() == ra.file() && ssa.comps() > 1 {
634 let reg = ra.collect_vector(ssa);
635 src_set_reg(src, reg);
636 }
637 }
638 }
639
640 if let PredRef::SSA(pred) = instr.pred.pred_ref {
641 if pred.file() == ra.file() {
642 instr.pred.pred_ref = ra.collect_vector(&pred.into()).into();
643 }
644 }
645
646 for src in instr.srcs_mut() {
647 if let Some(ssa) = src_ssa_ref(src) {
648 if ssa.file().unwrap() == ra.file() && ssa.comps() == 1 {
649 let reg = ra.collect_vector(ssa);
650 src_set_reg(src, reg);
651 }
652 }
653 }
654 }
655
instr_alloc_scalar_dsts_file( instr: &mut Instr, ip: usize, sum: &SSAUseMap, ra: &mut RegAllocator, )656 fn instr_alloc_scalar_dsts_file(
657 instr: &mut Instr,
658 ip: usize,
659 sum: &SSAUseMap,
660 ra: &mut RegAllocator,
661 ) {
662 for dst in instr.dsts_mut() {
663 if let Dst::SSA(ssa) = dst {
664 if ssa.file().unwrap() == ra.file() {
665 assert!(ssa.comps() == 1);
666 let reg = ra.alloc_scalar(ip, sum, ssa[0]);
667 *dst = RegRef::new(ra.file(), reg, 1).into();
668 }
669 }
670 }
671 }
672
instr_assign_regs_file( instr: &mut Instr, ip: usize, sum: &SSAUseMap, killed: &KillSet, pcopy: &mut OpParCopy, ra: &mut RegAllocator, )673 fn instr_assign_regs_file(
674 instr: &mut Instr,
675 ip: usize,
676 sum: &SSAUseMap,
677 killed: &KillSet,
678 pcopy: &mut OpParCopy,
679 ra: &mut RegAllocator,
680 ) {
681 struct VecDst {
682 dst_idx: usize,
683 comps: u8,
684 killed: Option<SSARef>,
685 reg: u32,
686 }
687
688 let mut vec_dsts = Vec::new();
689 let mut vec_dst_comps = 0;
690 for (i, dst) in instr.dsts().iter().enumerate() {
691 if let Dst::SSA(ssa) = dst {
692 if ssa.file().unwrap() == ra.file() && ssa.comps() > 1 {
693 vec_dsts.push(VecDst {
694 dst_idx: i,
695 comps: ssa.comps(),
696 killed: None,
697 reg: u32::MAX,
698 });
699 vec_dst_comps += ssa.comps();
700 }
701 }
702 }
703
704 // No vector destinations is the easy case
705 if vec_dst_comps == 0 {
706 let mut vra = VecRegAllocator::new(ra);
707 instr_remap_srcs_file(instr, &mut vra);
708 vra.free_killed(killed);
709 vra.finish(pcopy);
710 instr_alloc_scalar_dsts_file(instr, ip, sum, ra);
711 return;
712 }
713
714 // Predicates can't be vectors. This lets us ignore instr.pred in our
715 // analysis for the cases below. Only the easy case above needs to care
716 // about them.
717 assert!(!ra.file().is_predicate());
718
719 let mut avail = killed.set.clone();
720 let mut killed_vecs = Vec::new();
721 for src in instr.srcs() {
722 if let Some(vec) = src_ssa_ref(src) {
723 if vec.comps() > 1 {
724 let mut vec_killed = true;
725 for ssa in vec.iter() {
726 if ssa.file() != ra.file() || !avail.contains(ssa) {
727 vec_killed = false;
728 break;
729 }
730 }
731 if vec_killed {
732 for ssa in vec.iter() {
733 avail.remove(ssa);
734 }
735 killed_vecs.push(*vec);
736 }
737 }
738 }
739 }
740
741 vec_dsts.sort_by_key(|v| v.comps);
742 killed_vecs.sort_by_key(|v| v.comps());
743
744 let mut next_dst_reg = 0;
745 let mut vec_dsts_map_to_killed_srcs = true;
746 let mut could_trivially_allocate = true;
747 for vec_dst in vec_dsts.iter_mut().rev() {
748 while let Some(src) = killed_vecs.pop() {
749 if src.comps() >= vec_dst.comps {
750 vec_dst.killed = Some(src);
751 break;
752 }
753 }
754 if vec_dst.killed.is_none() {
755 vec_dsts_map_to_killed_srcs = false;
756 }
757
758 let align = u32::from(vec_dst.comps).next_power_of_two();
759 if let Some(reg) =
760 ra.try_find_unused_reg_range(next_dst_reg, align, vec_dst.comps)
761 {
762 vec_dst.reg = reg;
763 next_dst_reg = reg + u32::from(vec_dst.comps);
764 } else {
765 could_trivially_allocate = false;
766 }
767 }
768
769 if vec_dsts_map_to_killed_srcs {
770 let mut vra = VecRegAllocator::new(ra);
771 instr_remap_srcs_file(instr, &mut vra);
772
773 for vec_dst in &mut vec_dsts {
774 let src_vec = vec_dst.killed.as_ref().unwrap();
775 vec_dst.reg = vra.try_get_vec_reg(src_vec).unwrap();
776 }
777
778 vra.free_killed(killed);
779
780 for vec_dst in vec_dsts {
781 let dst = &mut instr.dsts_mut()[vec_dst.dst_idx];
782 *dst = vra
783 .assign_pin_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg)
784 .into();
785 }
786
787 vra.finish(pcopy);
788
789 instr_alloc_scalar_dsts_file(instr, ip, sum, ra);
790 } else if could_trivially_allocate {
791 let mut vra = VecRegAllocator::new(ra);
792 for vec_dst in vec_dsts {
793 let dst = &mut instr.dsts_mut()[vec_dst.dst_idx];
794 *dst = vra
795 .assign_pin_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg)
796 .into();
797 }
798
799 instr_remap_srcs_file(instr, &mut vra);
800 vra.free_killed(killed);
801 vra.finish(pcopy);
802 instr_alloc_scalar_dsts_file(instr, ip, sum, ra);
803 } else {
804 let mut vra = VecRegAllocator::new(ra);
805 instr_remap_srcs_file(instr, &mut vra);
806
807 // Allocate vector destinations first so we have the most freedom.
808 // Scalar destinations can fill in holes.
809 for dst in instr.dsts_mut() {
810 if let Dst::SSA(ssa) = dst {
811 if ssa.file().unwrap() == vra.file() && ssa.comps() > 1 {
812 *dst = vra.alloc_vector(*ssa).into();
813 }
814 }
815 }
816
817 vra.free_killed(killed);
818 vra.finish(pcopy);
819
820 instr_alloc_scalar_dsts_file(instr, ip, sum, ra);
821 }
822 }
823
824 impl PerRegFile<RegAllocator> {
assign_reg(&mut self, ssa: SSAValue, reg: RegRef)825 pub fn assign_reg(&mut self, ssa: SSAValue, reg: RegRef) {
826 assert!(reg.file() == ssa.file());
827 assert!(reg.comps() == 1);
828 self[ssa.file()].assign_reg(ssa, reg.base_idx());
829 }
830
free_killed(&mut self, killed: &KillSet)831 pub fn free_killed(&mut self, killed: &KillSet) {
832 for ssa in killed.iter() {
833 self[ssa.file()].free_ssa(*ssa);
834 }
835 }
836 }
837
838 struct AssignRegsBlock {
839 ra: PerRegFile<RegAllocator>,
840 pcopy_tmp_gprs: u8,
841 live_in: Vec<LiveValue>,
842 phi_out: HashMap<u32, SrcRef>,
843 }
844
845 impl AssignRegsBlock {
new(num_regs: &PerRegFile<u32>, pcopy_tmp_gprs: u8) -> AssignRegsBlock846 fn new(num_regs: &PerRegFile<u32>, pcopy_tmp_gprs: u8) -> AssignRegsBlock {
847 AssignRegsBlock {
848 ra: PerRegFile::new_with(|file| {
849 RegAllocator::new(file, num_regs[file])
850 }),
851 pcopy_tmp_gprs: pcopy_tmp_gprs,
852 live_in: Vec::new(),
853 phi_out: HashMap::new(),
854 }
855 }
856
get_scalar(&self, ssa: SSAValue) -> RegRef857 fn get_scalar(&self, ssa: SSAValue) -> RegRef {
858 let ra = &self.ra[ssa.file()];
859 let reg = ra.try_get_reg(ssa).expect("Unknown SSA value");
860 RegRef::new(ssa.file(), reg, 1)
861 }
862
alloc_scalar( &mut self, ip: usize, sum: &SSAUseMap, ssa: SSAValue, ) -> RegRef863 fn alloc_scalar(
864 &mut self,
865 ip: usize,
866 sum: &SSAUseMap,
867 ssa: SSAValue,
868 ) -> RegRef {
869 let ra = &mut self.ra[ssa.file()];
870 let reg = ra.alloc_scalar(ip, sum, ssa);
871 RegRef::new(ssa.file(), reg, 1)
872 }
873
pin_vector(&mut self, reg: RegRef)874 fn pin_vector(&mut self, reg: RegRef) {
875 let ra = &mut self.ra[reg.file()];
876 for c in 0..reg.comps() {
877 ra.pin_reg(reg.comp(c).base_idx());
878 }
879 }
880
try_coalesce(&mut self, ssa: SSAValue, src: &Src) -> bool881 fn try_coalesce(&mut self, ssa: SSAValue, src: &Src) -> bool {
882 debug_assert!(src.src_mod.is_none());
883 let SrcRef::Reg(src_reg) = src.src_ref else {
884 return false;
885 };
886 debug_assert!(src_reg.comps() == 1);
887
888 if src_reg.file() != ssa.file() {
889 return false;
890 }
891
892 let ra = &mut self.ra[src_reg.file()];
893 if ra.reg_is_used(src_reg.base_idx()) {
894 return false;
895 }
896
897 ra.assign_reg(ssa, src_reg.base_idx());
898 true
899 }
900
pcopy_tmp(&self) -> Option<RegRef>901 fn pcopy_tmp(&self) -> Option<RegRef> {
902 if self.pcopy_tmp_gprs > 0 {
903 Some(RegRef::new(
904 RegFile::GPR,
905 self.ra[RegFile::GPR].num_regs,
906 self.pcopy_tmp_gprs,
907 ))
908 } else {
909 None
910 }
911 }
912
assign_regs_instr( &mut self, mut instr: Box<Instr>, ip: usize, sum: &SSAUseMap, srcs_killed: &KillSet, dsts_killed: &KillSet, pcopy: &mut OpParCopy, ) -> Option<Box<Instr>>913 fn assign_regs_instr(
914 &mut self,
915 mut instr: Box<Instr>,
916 ip: usize,
917 sum: &SSAUseMap,
918 srcs_killed: &KillSet,
919 dsts_killed: &KillSet,
920 pcopy: &mut OpParCopy,
921 ) -> Option<Box<Instr>> {
922 match &mut instr.op {
923 Op::Undef(undef) => {
924 if let Dst::SSA(ssa) = undef.dst {
925 assert!(ssa.comps() == 1);
926 self.alloc_scalar(ip, sum, ssa[0]);
927 }
928 assert!(srcs_killed.is_empty());
929 self.ra.free_killed(dsts_killed);
930 None
931 }
932 Op::PhiSrcs(phi) => {
933 for (id, src) in phi.srcs.iter() {
934 assert!(src.src_mod.is_none());
935 if let Some(ssa) = src_ssa_ref(src) {
936 assert!(ssa.comps() == 1);
937 let reg = self.get_scalar(ssa[0]);
938 self.phi_out.insert(*id, reg.into());
939 } else {
940 self.phi_out.insert(*id, src.src_ref);
941 }
942 }
943 assert!(dsts_killed.is_empty());
944 None
945 }
946 Op::PhiDsts(phi) => {
947 assert!(instr.pred.is_true());
948
949 for (id, dst) in phi.dsts.iter() {
950 if let Dst::SSA(ssa) = dst {
951 assert!(ssa.comps() == 1);
952 let reg = self.alloc_scalar(ip, sum, ssa[0]);
953 self.live_in.push(LiveValue {
954 live_ref: LiveRef::Phi(*id),
955 reg_ref: reg,
956 });
957 }
958 }
959 assert!(srcs_killed.is_empty());
960 self.ra.free_killed(dsts_killed);
961
962 None
963 }
964 Op::Break(op) => {
965 for src in op.srcs_as_mut_slice() {
966 if let Some(ssa) = src_ssa_ref(src) {
967 assert!(ssa.comps() == 1);
968 let reg = self.get_scalar(ssa[0]);
969 src_set_reg(src, reg);
970 }
971 }
972
973 self.ra.free_killed(srcs_killed);
974
975 if let Dst::SSA(ssa) = &op.bar_out {
976 let reg = *op.bar_in.src_ref.as_reg().unwrap();
977 self.ra.assign_reg(ssa[0], reg);
978 op.bar_out = reg.into();
979 }
980
981 self.ra.free_killed(dsts_killed);
982
983 Some(instr)
984 }
985 Op::BSSy(op) => {
986 for src in op.srcs_as_mut_slice() {
987 if let Some(ssa) = src_ssa_ref(src) {
988 assert!(ssa.comps() == 1);
989 let reg = self.get_scalar(ssa[0]);
990 src_set_reg(src, reg);
991 }
992 }
993
994 self.ra.free_killed(srcs_killed);
995
996 if let Dst::SSA(ssa) = &op.bar_out {
997 let reg = *op.bar_in.src_ref.as_reg().unwrap();
998 self.ra.assign_reg(ssa[0], reg);
999 op.bar_out = reg.into();
1000 }
1001
1002 self.ra.free_killed(dsts_killed);
1003
1004 Some(instr)
1005 }
1006 Op::Copy(copy) => {
1007 if let Some(ssa) = src_ssa_ref(©.src) {
1008 // This may be a Cbuf::BindlessSSA source so we need to
1009 // support vectors because cbuf handles are vec2s. However,
1010 // since we only have a single scalar destination, we can
1011 // just allocate and free killed up-front.
1012 let ra = &mut self.ra[ssa.file().unwrap()];
1013 let mut vra = VecRegAllocator::new(ra);
1014 let reg = vra.collect_vector(ssa);
1015 vra.free_killed(srcs_killed);
1016 vra.finish(pcopy);
1017 src_set_reg(&mut copy.src, reg);
1018 }
1019
1020 let mut del_copy = false;
1021 if let Dst::SSA(dst_vec) = &mut copy.dst {
1022 debug_assert!(dst_vec.comps() == 1);
1023 let dst_ssa = &dst_vec[0];
1024
1025 if self.try_coalesce(*dst_ssa, ©.src) {
1026 del_copy = true;
1027 } else {
1028 copy.dst = self.alloc_scalar(ip, sum, *dst_ssa).into();
1029 }
1030 }
1031
1032 self.ra.free_killed(dsts_killed);
1033
1034 if del_copy {
1035 None
1036 } else {
1037 Some(instr)
1038 }
1039 }
1040 Op::Pin(OpPin { src, dst }) | Op::Unpin(OpUnpin { src, dst }) => {
1041 assert!(instr.pred.is_true());
1042
1043 // These basically act as a vector version of OpCopy except that
1044 // they only work on SSA values and we pin the destination if
1045 // it's OpPin.
1046 let src_vec = src.as_ssa().unwrap();
1047 let dst_vec = dst.as_ssa().unwrap();
1048 assert!(src_vec.comps() == dst_vec.comps());
1049
1050 if srcs_killed.len() == src_vec.comps().into()
1051 && src_vec.file() == dst_vec.file()
1052 {
1053 let ra = &mut self.ra[src_vec.file().unwrap()];
1054 let mut vra = VecRegAllocator::new(ra);
1055 let reg = vra.collect_vector(src_vec);
1056 vra.finish(pcopy);
1057 for c in 0..src_vec.comps() {
1058 let c_reg = ra.free_ssa(src_vec[usize::from(c)]);
1059 debug_assert!(c_reg == reg.comp(c).base_idx());
1060 ra.assign_reg(dst_vec[usize::from(c)], c_reg);
1061 }
1062
1063 if matches!(&instr.op, Op::Pin(_)) {
1064 self.pin_vector(reg);
1065 }
1066 self.ra.free_killed(dsts_killed);
1067
1068 None
1069 } else {
1070 // Otherwise, turn into a parallel copy
1071 //
1072 // We can always allocate the destination first in this
1073 // case.
1074 assert!(dst_vec.comps() > 1 || srcs_killed.is_empty());
1075
1076 let dst_ra = &mut self.ra[dst_vec.file().unwrap()];
1077 let mut vra = VecRegAllocator::new(dst_ra);
1078 let dst_reg = vra.alloc_vector(*dst_vec);
1079 vra.finish(pcopy);
1080
1081 let mut pin_copy = OpParCopy::new();
1082 for c in 0..dst_reg.comps() {
1083 let src_reg = self.get_scalar(src_vec[usize::from(c)]);
1084 pin_copy.push(dst_reg.comp(c).into(), src_reg.into());
1085 }
1086
1087 if matches!(&instr.op, Op::Pin(_)) {
1088 self.pin_vector(dst_reg);
1089 }
1090 self.ra.free_killed(srcs_killed);
1091 self.ra.free_killed(dsts_killed);
1092
1093 Some(Instr::new_boxed(pin_copy))
1094 }
1095 }
1096 Op::ParCopy(pcopy) => {
1097 for (_, src) in pcopy.dsts_srcs.iter_mut() {
1098 if let Some(src_vec) = src_ssa_ref(src) {
1099 debug_assert!(src_vec.comps() == 1);
1100 let reg = self.get_scalar(src_vec[0]).into();
1101 src_set_reg(src, reg);
1102 }
1103 }
1104
1105 self.ra.free_killed(srcs_killed);
1106
1107 // Try to coalesce destinations into sources, if possible
1108 pcopy.dsts_srcs.retain(|dst, src| match dst {
1109 Dst::None => false,
1110 Dst::SSA(dst_vec) => {
1111 debug_assert!(dst_vec.comps() == 1);
1112 !self.try_coalesce(dst_vec[0], src)
1113 }
1114 Dst::Reg(_) => true,
1115 });
1116
1117 for (dst, _) in pcopy.dsts_srcs.iter_mut() {
1118 if let Dst::SSA(dst_vec) = dst {
1119 debug_assert!(dst_vec.comps() == 1);
1120 *dst = self.alloc_scalar(ip, sum, dst_vec[0]).into();
1121 }
1122 }
1123
1124 self.ra.free_killed(dsts_killed);
1125
1126 pcopy.tmp = self.pcopy_tmp();
1127 if pcopy.is_empty() {
1128 None
1129 } else {
1130 Some(instr)
1131 }
1132 }
1133 Op::RegOut(out) => {
1134 for src in out.srcs.iter_mut() {
1135 if let Some(src_vec) = src_ssa_ref(src) {
1136 debug_assert!(src_vec.comps() == 1);
1137 let reg = self.get_scalar(src_vec[0]).into();
1138 src_set_reg(src, reg);
1139 }
1140 }
1141
1142 self.ra.free_killed(srcs_killed);
1143 assert!(dsts_killed.is_empty());
1144
1145 // This should be the last instruction and its sources should
1146 // be the last free GPRs.
1147 debug_assert!(self.ra[RegFile::GPR].num_regs_used() == 0);
1148
1149 for (i, src) in out.srcs.iter().enumerate() {
1150 let reg = u32::try_from(i).unwrap();
1151 let dst = RegRef::new(RegFile::GPR, reg, 1);
1152 pcopy.push(dst.into(), *src);
1153 }
1154
1155 None
1156 }
1157 _ => {
1158 for file in self.ra.values_mut() {
1159 instr_assign_regs_file(
1160 &mut instr,
1161 ip,
1162 sum,
1163 srcs_killed,
1164 pcopy,
1165 file,
1166 );
1167 }
1168 self.ra.free_killed(dsts_killed);
1169 Some(instr)
1170 }
1171 }
1172 }
1173
first_pass<BL: BlockLiveness>( &mut self, b: &mut BasicBlock, bl: &BL, pred_ra: Option<&PerRegFile<RegAllocator>>, )1174 fn first_pass<BL: BlockLiveness>(
1175 &mut self,
1176 b: &mut BasicBlock,
1177 bl: &BL,
1178 pred_ra: Option<&PerRegFile<RegAllocator>>,
1179 ) {
1180 // Populate live in from the register file we're handed. We'll add more
1181 // live in when we process the OpPhiDst, if any.
1182 if let Some(pred_ra) = pred_ra {
1183 for (raf, pred_raf) in self.ra.values_mut().zip(pred_ra.values()) {
1184 for (ssa, reg) in &pred_raf.ssa_reg {
1185 if bl.is_live_in(ssa) {
1186 raf.assign_reg(*ssa, *reg);
1187 if pred_raf.reg_is_pinned(*reg) {
1188 raf.pin_reg(*reg);
1189 }
1190 self.live_in.push(LiveValue {
1191 live_ref: LiveRef::SSA(*ssa),
1192 reg_ref: RegRef::new(raf.file(), *reg, 1),
1193 });
1194 }
1195 }
1196 }
1197 }
1198
1199 let sum = SSAUseMap::for_block(b);
1200
1201 let mut instrs = Vec::new();
1202 let mut srcs_killed = KillSet::new();
1203 let mut dsts_killed = KillSet::new();
1204
1205 for (ip, instr) in b.instrs.drain(..).enumerate() {
1206 // Build up the kill set
1207 srcs_killed.clear();
1208 if let PredRef::SSA(ssa) = &instr.pred.pred_ref {
1209 if !bl.is_live_after_ip(ssa, ip) {
1210 srcs_killed.insert(*ssa);
1211 }
1212 }
1213 for src in instr.srcs() {
1214 for ssa in src.iter_ssa() {
1215 if !bl.is_live_after_ip(ssa, ip) {
1216 srcs_killed.insert(*ssa);
1217 }
1218 }
1219 }
1220
1221 dsts_killed.clear();
1222 for dst in instr.dsts() {
1223 if let Dst::SSA(vec) = dst {
1224 for ssa in vec.iter() {
1225 if !bl.is_live_after_ip(ssa, ip) {
1226 dsts_killed.insert(*ssa);
1227 }
1228 }
1229 }
1230 }
1231
1232 let mut pcopy = OpParCopy::new();
1233 pcopy.tmp = self.pcopy_tmp();
1234
1235 let instr = self.assign_regs_instr(
1236 instr,
1237 ip,
1238 &sum,
1239 &srcs_killed,
1240 &dsts_killed,
1241 &mut pcopy,
1242 );
1243
1244 if !pcopy.is_empty() {
1245 if DEBUG.annotate() {
1246 instrs.push(Instr::new_boxed(OpAnnotate {
1247 annotation: "generated by assign_regs".into(),
1248 }));
1249 }
1250 if !b.uniform {
1251 for dst in pcopy.dsts_as_slice() {
1252 if let Dst::Reg(reg) = dst {
1253 assert!(!reg.is_uniform());
1254 }
1255 }
1256 }
1257 instrs.push(Instr::new_boxed(pcopy));
1258 }
1259
1260 if let Some(instr) = instr {
1261 instrs.push(instr);
1262 }
1263 }
1264
1265 // Sort live-in to maintain determinism
1266 self.live_in.sort();
1267
1268 b.instrs = instrs;
1269 }
1270
second_pass(&self, target: &AssignRegsBlock, b: &mut BasicBlock)1271 fn second_pass(&self, target: &AssignRegsBlock, b: &mut BasicBlock) {
1272 let mut pcopy = OpParCopy::new();
1273 pcopy.tmp = self.pcopy_tmp();
1274
1275 for lv in &target.live_in {
1276 let src = match lv.live_ref {
1277 LiveRef::SSA(ssa) => SrcRef::from(self.get_scalar(ssa)),
1278 LiveRef::Phi(phi) => *self.phi_out.get(&phi).unwrap(),
1279 };
1280 let dst = lv.reg_ref;
1281 if let SrcRef::Reg(src_reg) = src {
1282 if dst == src_reg {
1283 continue;
1284 }
1285 }
1286 pcopy.push(dst.into(), src.into());
1287 }
1288
1289 if DEBUG.annotate() {
1290 b.instrs.push(Instr::new_boxed(OpAnnotate {
1291 annotation: "generated by assign_regs".into(),
1292 }));
1293 }
1294 if b.branch().is_some() {
1295 b.instrs.insert(b.instrs.len() - 1, Instr::new_boxed(pcopy));
1296 } else {
1297 b.instrs.push(Instr::new_boxed(pcopy));
1298 }
1299 }
1300 }
1301
1302 impl Shader<'_> {
assign_regs(&mut self)1303 pub fn assign_regs(&mut self) {
1304 assert!(self.functions.len() == 1);
1305 let f = &mut self.functions[0];
1306
1307 // Convert to CSSA before we spill or assign registers
1308 f.to_cssa();
1309
1310 let mut live = SimpleLiveness::for_function(f);
1311 let mut max_live = live.calc_max_live(f);
1312
1313 // We want at least one temporary GPR reserved for parallel copies.
1314 let mut tmp_gprs = 1_u8;
1315
1316 let spill_files =
1317 [RegFile::UPred, RegFile::Pred, RegFile::UGPR, RegFile::Bar];
1318 for file in spill_files {
1319 let num_regs = self.sm.num_regs(file);
1320 if max_live[file] > num_regs {
1321 f.spill_values(file, num_regs);
1322
1323 // Re-calculate liveness after we spill
1324 live = SimpleLiveness::for_function(f);
1325 max_live = live.calc_max_live(f);
1326
1327 if file == RegFile::Bar {
1328 tmp_gprs = max(tmp_gprs, 2);
1329 }
1330 }
1331 }
1332
1333 // An instruction can have at most 4 vector sources/destinations. In
1334 // order to ensure we always succeed at allocation, regardless of
1335 // arbitrary choices, we need at least 16 GPRs.
1336 let mut gpr_limit = max(max_live[RegFile::GPR], 16);
1337 let mut total_gprs = gpr_limit + u32::from(tmp_gprs);
1338
1339 let max_gprs = if DEBUG.spill() {
1340 // We need at least 16 registers to satisfy RA constraints for
1341 // texture ops and another 2 for parallel copy lowering
1342 18
1343 } else {
1344 self.sm.num_regs(RegFile::GPR)
1345 };
1346 if total_gprs > max_gprs {
1347 // If we're spilling GPRs, we need to reserve 2 GPRs for OpParCopy
1348 // lowering because it needs to be able lower Mem copies which
1349 // require a temporary
1350 tmp_gprs = max(tmp_gprs, 2);
1351 total_gprs = max_gprs;
1352 gpr_limit = total_gprs - u32::from(tmp_gprs);
1353
1354 f.spill_values(RegFile::GPR, gpr_limit);
1355
1356 // Re-calculate liveness one last time
1357 live = SimpleLiveness::for_function(f);
1358 }
1359
1360 self.info.num_gprs = total_gprs.try_into().unwrap();
1361
1362 let limit = PerRegFile::new_with(|file| {
1363 if file == RegFile::GPR {
1364 gpr_limit
1365 } else {
1366 self.sm.num_regs(file)
1367 }
1368 });
1369
1370 let mut blocks: Vec<AssignRegsBlock> = Vec::new();
1371 for b_idx in 0..f.blocks.len() {
1372 let pred = f.blocks.pred_indices(b_idx);
1373 let pred_ra = if pred.is_empty() {
1374 None
1375 } else {
1376 // Start with the previous block's.
1377 Some(&blocks[pred[0]].ra)
1378 };
1379
1380 let bl = live.block_live(b_idx);
1381
1382 let mut arb = AssignRegsBlock::new(&limit, tmp_gprs);
1383 arb.first_pass(&mut f.blocks[b_idx], bl, pred_ra);
1384
1385 assert!(blocks.len() == b_idx);
1386 blocks.push(arb);
1387 }
1388
1389 for b_idx in 0..f.blocks.len() {
1390 let arb = &blocks[b_idx];
1391 for sb_idx in f.blocks.succ_indices(b_idx).to_vec() {
1392 arb.second_pass(&blocks[sb_idx], &mut f.blocks[b_idx]);
1393 }
1394 }
1395 }
1396 }
1397