xref: /aosp_15_r20/external/mesa3d/src/nouveau/compiler/nak/assign_regs.rs (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 use crate::api::{GetDebugFlags, DEBUG};
5 use crate::ir::*;
6 use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness};
7 
8 use compiler::bitset::BitSet;
9 use std::cmp::{max, Ordering};
10 use std::collections::{HashMap, HashSet};
11 
12 struct KillSet {
13     set: HashSet<SSAValue>,
14     vec: Vec<SSAValue>,
15 }
16 
17 impl KillSet {
new() -> KillSet18     pub fn new() -> KillSet {
19         KillSet {
20             set: HashSet::new(),
21             vec: Vec::new(),
22         }
23     }
24 
len(&self) -> usize25     pub fn len(&self) -> usize {
26         self.vec.len()
27     }
28 
clear(&mut self)29     pub fn clear(&mut self) {
30         self.set.clear();
31         self.vec.clear();
32     }
33 
insert(&mut self, ssa: SSAValue)34     pub fn insert(&mut self, ssa: SSAValue) {
35         if self.set.insert(ssa) {
36             self.vec.push(ssa);
37         }
38     }
39 
iter(&self) -> std::slice::Iter<'_, SSAValue>40     pub fn iter(&self) -> std::slice::Iter<'_, SSAValue> {
41         self.vec.iter()
42     }
43 
is_empty(&self) -> bool44     pub fn is_empty(&self) -> bool {
45         self.vec.is_empty()
46     }
47 }
48 
49 // These two helpers are carefully paired for the purposes of RA.
50 // src_ssa_ref() returns whatever SSARef is present in the source, if any.
51 // src_set_reg() overwrites that SSARef with a RegRef.
52 #[inline]
src_ssa_ref(src: &Src) -> Option<&SSARef>53 fn src_ssa_ref(src: &Src) -> Option<&SSARef> {
54     match &src.src_ref {
55         SrcRef::SSA(ssa) => Some(ssa),
56         SrcRef::CBuf(CBufRef {
57             buf: CBuf::BindlessSSA(ssa),
58             ..
59         }) => Some(ssa),
60         _ => None,
61     }
62 }
63 
64 #[inline]
src_set_reg(src: &mut Src, reg: RegRef)65 fn src_set_reg(src: &mut Src, reg: RegRef) {
66     match &mut src.src_ref {
67         SrcRef::SSA(_) => {
68             src.src_ref = reg.into();
69         }
70         SrcRef::CBuf(cb) => {
71             debug_assert!(matches!(&cb.buf, CBuf::BindlessSSA(_)));
72             debug_assert!(reg.file() == RegFile::UGPR && reg.comps() == 2);
73             cb.buf = CBuf::BindlessUGPR(reg);
74         }
75         _ => (),
76     }
77 }
78 
79 enum SSAUse {
80     FixedReg(u32),
81     Vec(SSARef),
82 }
83 
84 struct SSAUseMap {
85     ssa_map: HashMap<SSAValue, Vec<(usize, SSAUse)>>,
86 }
87 
88 impl SSAUseMap {
add_fixed_reg_use(&mut self, ip: usize, ssa: SSAValue, reg: u32)89     fn add_fixed_reg_use(&mut self, ip: usize, ssa: SSAValue, reg: u32) {
90         let v = self.ssa_map.entry(ssa).or_default();
91         v.push((ip, SSAUse::FixedReg(reg)));
92     }
93 
add_vec_use(&mut self, ip: usize, vec: SSARef)94     fn add_vec_use(&mut self, ip: usize, vec: SSARef) {
95         if vec.comps() == 1 {
96             return;
97         }
98 
99         for ssa in vec.iter() {
100             let v = self.ssa_map.entry(*ssa).or_default();
101             v.push((ip, SSAUse::Vec(vec)));
102         }
103     }
104 
find_vec_use_after(&self, ssa: SSAValue, ip: usize) -> Option<&SSAUse>105     fn find_vec_use_after(&self, ssa: SSAValue, ip: usize) -> Option<&SSAUse> {
106         if let Some(v) = self.ssa_map.get(&ssa) {
107             let p = v.partition_point(|(uip, _)| *uip <= ip);
108             if p == v.len() {
109                 None
110             } else {
111                 let (_, u) = &v[p];
112                 Some(u)
113             }
114         } else {
115             None
116         }
117     }
118 
add_block(&mut self, b: &BasicBlock)119     pub fn add_block(&mut self, b: &BasicBlock) {
120         for (ip, instr) in b.instrs.iter().enumerate() {
121             match &instr.op {
122                 Op::RegOut(op) => {
123                     for (i, src) in op.srcs.iter().enumerate() {
124                         let out_reg = u32::try_from(i).unwrap();
125                         if let Some(ssa) = src_ssa_ref(src) {
126                             assert!(ssa.comps() == 1);
127                             self.add_fixed_reg_use(ip, ssa[0], out_reg);
128                         }
129                     }
130                 }
131                 _ => {
132                     // We don't care about predicates because they're scalar
133                     for src in instr.srcs() {
134                         if let Some(ssa) = src_ssa_ref(src) {
135                             self.add_vec_use(ip, *ssa);
136                         }
137                     }
138                 }
139             }
140         }
141     }
142 
for_block(b: &BasicBlock) -> SSAUseMap143     pub fn for_block(b: &BasicBlock) -> SSAUseMap {
144         let mut am = SSAUseMap {
145             ssa_map: HashMap::new(),
146         };
147         am.add_block(b);
148         am
149     }
150 }
151 
152 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
153 enum LiveRef {
154     SSA(SSAValue),
155     Phi(u32),
156 }
157 
158 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
159 struct LiveValue {
160     pub live_ref: LiveRef,
161     pub reg_ref: RegRef,
162 }
163 
164 // We need a stable ordering of live values so that RA is deterministic
165 impl Ord for LiveValue {
cmp(&self, other: &Self) -> Ordering166     fn cmp(&self, other: &Self) -> Ordering {
167         let s_file = u8::from(self.reg_ref.file());
168         let o_file = u8::from(other.reg_ref.file());
169         match s_file.cmp(&o_file) {
170             Ordering::Equal => {
171                 let s_idx = self.reg_ref.base_idx();
172                 let o_idx = other.reg_ref.base_idx();
173                 s_idx.cmp(&o_idx)
174             }
175             ord => ord,
176         }
177     }
178 }
179 
180 impl PartialOrd for LiveValue {
partial_cmp(&self, other: &Self) -> Option<Ordering>181     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
182         Some(self.cmp(other))
183     }
184 }
185 
186 #[derive(Clone)]
187 struct RegAllocator {
188     file: RegFile,
189     num_regs: u32,
190     used: BitSet,
191     pinned: BitSet,
192     reg_ssa: Vec<SSAValue>,
193     ssa_reg: HashMap<SSAValue, u32>,
194 }
195 
196 impl RegAllocator {
new(file: RegFile, num_regs: u32) -> Self197     pub fn new(file: RegFile, num_regs: u32) -> Self {
198         Self {
199             file: file,
200             num_regs: num_regs,
201             used: BitSet::new(),
202             pinned: BitSet::new(),
203             reg_ssa: Vec::new(),
204             ssa_reg: HashMap::new(),
205         }
206     }
207 
file(&self) -> RegFile208     fn file(&self) -> RegFile {
209         self.file
210     }
211 
num_regs_used(&self) -> u32212     pub fn num_regs_used(&self) -> u32 {
213         self.ssa_reg.len().try_into().unwrap()
214     }
215 
reg_is_used(&self, reg: u32) -> bool216     pub fn reg_is_used(&self, reg: u32) -> bool {
217         self.used.get(reg.try_into().unwrap())
218     }
219 
reg_is_pinned(&self, reg: u32) -> bool220     pub fn reg_is_pinned(&self, reg: u32) -> bool {
221         self.pinned.get(reg.try_into().unwrap())
222     }
223 
try_get_reg(&self, ssa: SSAValue) -> Option<u32>224     pub fn try_get_reg(&self, ssa: SSAValue) -> Option<u32> {
225         self.ssa_reg.get(&ssa).cloned()
226     }
227 
try_get_ssa(&self, reg: u32) -> Option<SSAValue>228     pub fn try_get_ssa(&self, reg: u32) -> Option<SSAValue> {
229         if self.reg_is_used(reg) {
230             Some(self.reg_ssa[usize::try_from(reg).unwrap()])
231         } else {
232             None
233         }
234     }
235 
try_get_vec_reg(&self, vec: &SSARef) -> Option<u32>236     pub fn try_get_vec_reg(&self, vec: &SSARef) -> Option<u32> {
237         let Some(reg) = self.try_get_reg(vec[0]) else {
238             return None;
239         };
240 
241         let align = u32::from(vec.comps()).next_power_of_two();
242         if reg % align != 0 {
243             return None;
244         }
245 
246         for c in 1..vec.comps() {
247             let ssa = vec[usize::from(c)];
248             if self.try_get_reg(ssa) != Some(reg + u32::from(c)) {
249                 return None;
250             }
251         }
252         Some(reg)
253     }
254 
free_ssa(&mut self, ssa: SSAValue) -> u32255     pub fn free_ssa(&mut self, ssa: SSAValue) -> u32 {
256         assert!(ssa.file() == self.file);
257         let reg = self.ssa_reg.remove(&ssa).unwrap();
258         assert!(self.reg_is_used(reg));
259         let reg_usize = usize::try_from(reg).unwrap();
260         assert!(self.reg_ssa[reg_usize] == ssa);
261         self.used.remove(reg_usize);
262         self.pinned.remove(reg_usize);
263         reg
264     }
265 
assign_reg(&mut self, ssa: SSAValue, reg: u32)266     pub fn assign_reg(&mut self, ssa: SSAValue, reg: u32) {
267         assert!(ssa.file() == self.file);
268         assert!(reg < self.num_regs);
269         assert!(!self.reg_is_used(reg));
270 
271         let reg_usize = usize::try_from(reg).unwrap();
272         if reg_usize >= self.reg_ssa.len() {
273             self.reg_ssa.resize(reg_usize + 1, SSAValue::NONE);
274         }
275         self.reg_ssa[reg_usize] = ssa;
276         let old = self.ssa_reg.insert(ssa, reg);
277         assert!(old.is_none());
278         self.used.insert(reg_usize);
279     }
280 
pin_reg(&mut self, reg: u32)281     pub fn pin_reg(&mut self, reg: u32) {
282         assert!(self.reg_is_used(reg));
283         self.pinned.insert(reg.try_into().unwrap());
284     }
285 
reg_range_is_unset(set: &BitSet, reg: u32, comps: u8) -> bool286     fn reg_range_is_unset(set: &BitSet, reg: u32, comps: u8) -> bool {
287         for c in 0..u32::from(comps) {
288             if set.get((reg + c).try_into().unwrap()) {
289                 return false;
290             }
291         }
292         true
293     }
294 
try_find_unset_reg_range( &self, set: &BitSet, start_reg: u32, align: u32, comps: u8, ) -> Option<u32>295     fn try_find_unset_reg_range(
296         &self,
297         set: &BitSet,
298         start_reg: u32,
299         align: u32,
300         comps: u8,
301     ) -> Option<u32> {
302         assert!(comps > 0 && u32::from(comps) <= self.num_regs);
303 
304         let mut next_reg = start_reg;
305         loop {
306             let reg: u32 = set
307                 .next_unset(usize::try_from(next_reg).unwrap())
308                 .try_into()
309                 .unwrap();
310 
311             // Ensure we're properly aligned
312             let reg = reg.next_multiple_of(align);
313 
314             // Ensure we're in-bounds. This also serves as a check to ensure
315             // that u8::try_from(reg + i) will succeed.
316             if reg > self.num_regs - u32::from(comps) {
317                 return None;
318             }
319 
320             if Self::reg_range_is_unset(set, reg, comps) {
321                 return Some(reg);
322             }
323 
324             next_reg = reg + align;
325         }
326     }
327 
try_find_unused_reg_range( &self, start_reg: u32, align: u32, comps: u8, ) -> Option<u32>328     pub fn try_find_unused_reg_range(
329         &self,
330         start_reg: u32,
331         align: u32,
332         comps: u8,
333     ) -> Option<u32> {
334         self.try_find_unset_reg_range(&self.used, start_reg, align, comps)
335     }
336 
alloc_scalar( &mut self, ip: usize, sum: &SSAUseMap, ssa: SSAValue, ) -> u32337     pub fn alloc_scalar(
338         &mut self,
339         ip: usize,
340         sum: &SSAUseMap,
341         ssa: SSAValue,
342     ) -> u32 {
343         if let Some(u) = sum.find_vec_use_after(ssa, ip) {
344             match u {
345                 SSAUse::FixedReg(reg) => {
346                     if !self.reg_is_used(*reg) {
347                         self.assign_reg(ssa, *reg);
348                         return *reg;
349                     }
350                 }
351                 SSAUse::Vec(vec) => {
352                     let mut comp = u8::MAX;
353                     for c in 0..vec.comps() {
354                         if vec[usize::from(c)] == ssa {
355                             comp = c;
356                             break;
357                         }
358                     }
359                     assert!(comp < vec.comps());
360 
361                     let align = u32::from(vec.comps()).next_power_of_two();
362                     for c in 0..vec.comps() {
363                         if c == comp {
364                             continue;
365                         }
366 
367                         let other = vec[usize::from(c)];
368                         let Some(other_reg) = self.try_get_reg(other) else {
369                             continue;
370                         };
371 
372                         let vec_reg = other_reg & !(align - 1);
373                         if other_reg != vec_reg + u32::from(c) {
374                             continue;
375                         }
376 
377                         let reg = vec_reg + u32::from(comp);
378                         if reg < self.num_regs && !self.reg_is_used(reg) {
379                             self.assign_reg(ssa, reg);
380                             return reg;
381                         }
382                     }
383 
384                     // We weren't able to pair it with an already allocated
385                     // register but maybe we can at least find an aligned one.
386                     if let Some(reg) =
387                         self.try_find_unused_reg_range(0, align, 1)
388                     {
389                         self.assign_reg(ssa, reg);
390                         return reg;
391                     }
392                 }
393             }
394         }
395 
396         let reg = self
397             .try_find_unused_reg_range(0, 1, 1)
398             .expect("Failed to find free register");
399         self.assign_reg(ssa, reg);
400         reg
401     }
402 }
403 
404 struct VecRegAllocator<'a> {
405     ra: &'a mut RegAllocator,
406     pcopy: OpParCopy,
407     pinned: BitSet,
408     evicted: HashMap<SSAValue, u32>,
409 }
410 
411 impl<'a> VecRegAllocator<'a> {
new(ra: &'a mut RegAllocator) -> Self412     fn new(ra: &'a mut RegAllocator) -> Self {
413         let pinned = ra.pinned.clone();
414         VecRegAllocator {
415             ra,
416             pcopy: OpParCopy::new(),
417             pinned,
418             evicted: HashMap::new(),
419         }
420     }
421 
file(&self) -> RegFile422     fn file(&self) -> RegFile {
423         self.ra.file()
424     }
425 
pin_reg(&mut self, reg: u32)426     fn pin_reg(&mut self, reg: u32) {
427         self.pinned.insert(reg.try_into().unwrap());
428     }
429 
pin_reg_range(&mut self, reg: u32, comps: u8)430     fn pin_reg_range(&mut self, reg: u32, comps: u8) {
431         for c in 0..u32::from(comps) {
432             self.pin_reg(reg + c);
433         }
434     }
435 
reg_is_pinned(&self, reg: u32) -> bool436     fn reg_is_pinned(&self, reg: u32) -> bool {
437         self.pinned.get(reg.try_into().unwrap())
438     }
439 
reg_range_is_unpinned(&self, reg: u32, comps: u8) -> bool440     fn reg_range_is_unpinned(&self, reg: u32, comps: u8) -> bool {
441         RegAllocator::reg_range_is_unset(&self.pinned, reg, comps)
442     }
443 
assign_pin_reg(&mut self, ssa: SSAValue, reg: u32) -> RegRef444     fn assign_pin_reg(&mut self, ssa: SSAValue, reg: u32) -> RegRef {
445         self.pin_reg(reg);
446         self.ra.assign_reg(ssa, reg);
447         RegRef::new(self.file(), reg, 1)
448     }
449 
assign_pin_vec_reg(&mut self, vec: SSARef, reg: u32) -> RegRef450     pub fn assign_pin_vec_reg(&mut self, vec: SSARef, reg: u32) -> RegRef {
451         for c in 0..vec.comps() {
452             let ssa = vec[usize::from(c)];
453             self.assign_pin_reg(ssa, reg + u32::from(c));
454         }
455         RegRef::new(self.file(), reg, vec.comps())
456     }
457 
try_find_unpinned_reg_range( &self, start_reg: u32, align: u32, comps: u8, ) -> Option<u32>458     fn try_find_unpinned_reg_range(
459         &self,
460         start_reg: u32,
461         align: u32,
462         comps: u8,
463     ) -> Option<u32> {
464         self.ra
465             .try_find_unset_reg_range(&self.pinned, start_reg, align, comps)
466     }
467 
evict_ssa(&mut self, ssa: SSAValue, old_reg: u32)468     pub fn evict_ssa(&mut self, ssa: SSAValue, old_reg: u32) {
469         assert!(ssa.file() == self.file());
470         assert!(!self.reg_is_pinned(old_reg));
471         self.evicted.insert(ssa, old_reg);
472     }
473 
evict_reg_if_used(&mut self, reg: u32)474     pub fn evict_reg_if_used(&mut self, reg: u32) {
475         assert!(!self.reg_is_pinned(reg));
476 
477         if let Some(ssa) = self.ra.try_get_ssa(reg) {
478             self.ra.free_ssa(ssa);
479             self.evict_ssa(ssa, reg);
480         }
481     }
482 
move_ssa_to_reg(&mut self, ssa: SSAValue, new_reg: u32)483     fn move_ssa_to_reg(&mut self, ssa: SSAValue, new_reg: u32) {
484         if let Some(old_reg) = self.ra.try_get_reg(ssa) {
485             assert!(self.evicted.get(&ssa).is_none());
486             assert!(!self.reg_is_pinned(old_reg));
487 
488             if new_reg == old_reg {
489                 self.pin_reg(new_reg);
490             } else {
491                 self.ra.free_ssa(ssa);
492                 self.evict_reg_if_used(new_reg);
493 
494                 self.pcopy.push(
495                     RegRef::new(self.file(), new_reg, 1).into(),
496                     RegRef::new(self.file(), old_reg, 1).into(),
497                 );
498 
499                 self.assign_pin_reg(ssa, new_reg);
500             }
501         } else if let Some(old_reg) = self.evicted.remove(&ssa) {
502             self.evict_reg_if_used(new_reg);
503 
504             self.pcopy.push(
505                 RegRef::new(self.file(), new_reg, 1).into(),
506                 RegRef::new(self.file(), old_reg, 1).into(),
507             );
508 
509             self.assign_pin_reg(ssa, new_reg);
510         } else {
511             panic!("Unknown SSA value");
512         }
513     }
514 
finish(mut self, pcopy: &mut OpParCopy)515     fn finish(mut self, pcopy: &mut OpParCopy) {
516         pcopy.dsts_srcs.append(&mut self.pcopy.dsts_srcs);
517 
518         if !self.evicted.is_empty() {
519             // Sort so we get determinism, even if the hash map order changes
520             // from one run to another or due to rust compiler updates.
521             let mut evicted: Vec<_> = self.evicted.drain().collect();
522             evicted.sort_by_key(|(_, reg)| *reg);
523 
524             for (ssa, old_reg) in evicted {
525                 let mut next_reg = 0;
526                 let new_reg = loop {
527                     let reg = self
528                         .ra
529                         .try_find_unused_reg_range(next_reg, 1, 1)
530                         .expect("Failed to find free register");
531                     if !self.reg_is_pinned(reg) {
532                         break reg;
533                     }
534                     next_reg = reg + 1;
535                 };
536 
537                 pcopy.push(
538                     RegRef::new(self.file(), new_reg, 1).into(),
539                     RegRef::new(self.file(), old_reg, 1).into(),
540                 );
541                 self.assign_pin_reg(ssa, new_reg);
542             }
543         }
544     }
545 
try_get_vec_reg(&self, vec: &SSARef) -> Option<u32>546     pub fn try_get_vec_reg(&self, vec: &SSARef) -> Option<u32> {
547         self.ra.try_get_vec_reg(vec)
548     }
549 
collect_vector(&mut self, vec: &SSARef) -> RegRef550     pub fn collect_vector(&mut self, vec: &SSARef) -> RegRef {
551         if let Some(reg) = self.try_get_vec_reg(vec) {
552             self.pin_reg_range(reg, vec.comps());
553             return RegRef::new(self.file(), reg, vec.comps());
554         }
555 
556         let comps = vec.comps();
557         let align = u32::from(comps).next_power_of_two();
558 
559         let reg = self
560             .ra
561             .try_find_unused_reg_range(0, align, comps)
562             .or_else(|| {
563                 for c in 0..comps {
564                     let ssa = vec[usize::from(c)];
565                     let Some(comp_reg) = self.ra.try_get_reg(ssa) else {
566                         continue;
567                     };
568 
569                     let vec_reg = comp_reg & !(align - 1);
570                     if comp_reg != vec_reg + u32::from(c) {
571                         continue;
572                     }
573 
574                     if vec_reg + u32::from(comps) > self.ra.num_regs {
575                         continue;
576                     }
577 
578                     if self.reg_range_is_unpinned(vec_reg, comps) {
579                         return Some(vec_reg);
580                     }
581                 }
582                 None
583             })
584             .or_else(|| self.try_find_unpinned_reg_range(0, align, comps))
585             .expect("Failed to find an unpinned register range");
586 
587         for c in 0..comps {
588             let ssa = vec[usize::from(c)];
589             self.move_ssa_to_reg(ssa, reg + u32::from(c));
590         }
591 
592         RegRef::new(self.file(), reg, comps)
593     }
594 
alloc_vector(&mut self, vec: SSARef) -> RegRef595     pub fn alloc_vector(&mut self, vec: SSARef) -> RegRef {
596         let comps = vec.comps();
597         let align = u32::from(comps).next_power_of_two();
598 
599         if let Some(reg) = self.ra.try_find_unused_reg_range(0, align, comps) {
600             return self.assign_pin_vec_reg(vec, reg);
601         }
602 
603         let reg = self
604             .try_find_unpinned_reg_range(0, align, comps)
605             .expect("Failed to find an unpinned register range");
606 
607         for c in 0..comps {
608             self.evict_reg_if_used(reg + u32::from(c));
609         }
610         self.assign_pin_vec_reg(vec, reg)
611     }
612 
free_killed(&mut self, killed: &KillSet)613     pub fn free_killed(&mut self, killed: &KillSet) {
614         for ssa in killed.iter() {
615             if ssa.file() == self.file() {
616                 self.ra.free_ssa(*ssa);
617             }
618         }
619     }
620 }
621 
622 impl Drop for VecRegAllocator<'_> {
drop(&mut self)623     fn drop(&mut self) {
624         assert!(self.evicted.is_empty());
625     }
626 }
627 
instr_remap_srcs_file(instr: &mut Instr, ra: &mut VecRegAllocator)628 fn instr_remap_srcs_file(instr: &mut Instr, ra: &mut VecRegAllocator) {
629     // Collect vector sources first since those may silently pin some of our
630     // scalar sources.
631     for src in instr.srcs_mut() {
632         if let Some(ssa) = src_ssa_ref(src) {
633             if ssa.file().unwrap() == ra.file() && ssa.comps() > 1 {
634                 let reg = ra.collect_vector(ssa);
635                 src_set_reg(src, reg);
636             }
637         }
638     }
639 
640     if let PredRef::SSA(pred) = instr.pred.pred_ref {
641         if pred.file() == ra.file() {
642             instr.pred.pred_ref = ra.collect_vector(&pred.into()).into();
643         }
644     }
645 
646     for src in instr.srcs_mut() {
647         if let Some(ssa) = src_ssa_ref(src) {
648             if ssa.file().unwrap() == ra.file() && ssa.comps() == 1 {
649                 let reg = ra.collect_vector(ssa);
650                 src_set_reg(src, reg);
651             }
652         }
653     }
654 }
655 
instr_alloc_scalar_dsts_file( instr: &mut Instr, ip: usize, sum: &SSAUseMap, ra: &mut RegAllocator, )656 fn instr_alloc_scalar_dsts_file(
657     instr: &mut Instr,
658     ip: usize,
659     sum: &SSAUseMap,
660     ra: &mut RegAllocator,
661 ) {
662     for dst in instr.dsts_mut() {
663         if let Dst::SSA(ssa) = dst {
664             if ssa.file().unwrap() == ra.file() {
665                 assert!(ssa.comps() == 1);
666                 let reg = ra.alloc_scalar(ip, sum, ssa[0]);
667                 *dst = RegRef::new(ra.file(), reg, 1).into();
668             }
669         }
670     }
671 }
672 
instr_assign_regs_file( instr: &mut Instr, ip: usize, sum: &SSAUseMap, killed: &KillSet, pcopy: &mut OpParCopy, ra: &mut RegAllocator, )673 fn instr_assign_regs_file(
674     instr: &mut Instr,
675     ip: usize,
676     sum: &SSAUseMap,
677     killed: &KillSet,
678     pcopy: &mut OpParCopy,
679     ra: &mut RegAllocator,
680 ) {
681     struct VecDst {
682         dst_idx: usize,
683         comps: u8,
684         killed: Option<SSARef>,
685         reg: u32,
686     }
687 
688     let mut vec_dsts = Vec::new();
689     let mut vec_dst_comps = 0;
690     for (i, dst) in instr.dsts().iter().enumerate() {
691         if let Dst::SSA(ssa) = dst {
692             if ssa.file().unwrap() == ra.file() && ssa.comps() > 1 {
693                 vec_dsts.push(VecDst {
694                     dst_idx: i,
695                     comps: ssa.comps(),
696                     killed: None,
697                     reg: u32::MAX,
698                 });
699                 vec_dst_comps += ssa.comps();
700             }
701         }
702     }
703 
704     // No vector destinations is the easy case
705     if vec_dst_comps == 0 {
706         let mut vra = VecRegAllocator::new(ra);
707         instr_remap_srcs_file(instr, &mut vra);
708         vra.free_killed(killed);
709         vra.finish(pcopy);
710         instr_alloc_scalar_dsts_file(instr, ip, sum, ra);
711         return;
712     }
713 
714     // Predicates can't be vectors.  This lets us ignore instr.pred in our
715     // analysis for the cases below. Only the easy case above needs to care
716     // about them.
717     assert!(!ra.file().is_predicate());
718 
719     let mut avail = killed.set.clone();
720     let mut killed_vecs = Vec::new();
721     for src in instr.srcs() {
722         if let Some(vec) = src_ssa_ref(src) {
723             if vec.comps() > 1 {
724                 let mut vec_killed = true;
725                 for ssa in vec.iter() {
726                     if ssa.file() != ra.file() || !avail.contains(ssa) {
727                         vec_killed = false;
728                         break;
729                     }
730                 }
731                 if vec_killed {
732                     for ssa in vec.iter() {
733                         avail.remove(ssa);
734                     }
735                     killed_vecs.push(*vec);
736                 }
737             }
738         }
739     }
740 
741     vec_dsts.sort_by_key(|v| v.comps);
742     killed_vecs.sort_by_key(|v| v.comps());
743 
744     let mut next_dst_reg = 0;
745     let mut vec_dsts_map_to_killed_srcs = true;
746     let mut could_trivially_allocate = true;
747     for vec_dst in vec_dsts.iter_mut().rev() {
748         while let Some(src) = killed_vecs.pop() {
749             if src.comps() >= vec_dst.comps {
750                 vec_dst.killed = Some(src);
751                 break;
752             }
753         }
754         if vec_dst.killed.is_none() {
755             vec_dsts_map_to_killed_srcs = false;
756         }
757 
758         let align = u32::from(vec_dst.comps).next_power_of_two();
759         if let Some(reg) =
760             ra.try_find_unused_reg_range(next_dst_reg, align, vec_dst.comps)
761         {
762             vec_dst.reg = reg;
763             next_dst_reg = reg + u32::from(vec_dst.comps);
764         } else {
765             could_trivially_allocate = false;
766         }
767     }
768 
769     if vec_dsts_map_to_killed_srcs {
770         let mut vra = VecRegAllocator::new(ra);
771         instr_remap_srcs_file(instr, &mut vra);
772 
773         for vec_dst in &mut vec_dsts {
774             let src_vec = vec_dst.killed.as_ref().unwrap();
775             vec_dst.reg = vra.try_get_vec_reg(src_vec).unwrap();
776         }
777 
778         vra.free_killed(killed);
779 
780         for vec_dst in vec_dsts {
781             let dst = &mut instr.dsts_mut()[vec_dst.dst_idx];
782             *dst = vra
783                 .assign_pin_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg)
784                 .into();
785         }
786 
787         vra.finish(pcopy);
788 
789         instr_alloc_scalar_dsts_file(instr, ip, sum, ra);
790     } else if could_trivially_allocate {
791         let mut vra = VecRegAllocator::new(ra);
792         for vec_dst in vec_dsts {
793             let dst = &mut instr.dsts_mut()[vec_dst.dst_idx];
794             *dst = vra
795                 .assign_pin_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg)
796                 .into();
797         }
798 
799         instr_remap_srcs_file(instr, &mut vra);
800         vra.free_killed(killed);
801         vra.finish(pcopy);
802         instr_alloc_scalar_dsts_file(instr, ip, sum, ra);
803     } else {
804         let mut vra = VecRegAllocator::new(ra);
805         instr_remap_srcs_file(instr, &mut vra);
806 
807         // Allocate vector destinations first so we have the most freedom.
808         // Scalar destinations can fill in holes.
809         for dst in instr.dsts_mut() {
810             if let Dst::SSA(ssa) = dst {
811                 if ssa.file().unwrap() == vra.file() && ssa.comps() > 1 {
812                     *dst = vra.alloc_vector(*ssa).into();
813                 }
814             }
815         }
816 
817         vra.free_killed(killed);
818         vra.finish(pcopy);
819 
820         instr_alloc_scalar_dsts_file(instr, ip, sum, ra);
821     }
822 }
823 
824 impl PerRegFile<RegAllocator> {
assign_reg(&mut self, ssa: SSAValue, reg: RegRef)825     pub fn assign_reg(&mut self, ssa: SSAValue, reg: RegRef) {
826         assert!(reg.file() == ssa.file());
827         assert!(reg.comps() == 1);
828         self[ssa.file()].assign_reg(ssa, reg.base_idx());
829     }
830 
free_killed(&mut self, killed: &KillSet)831     pub fn free_killed(&mut self, killed: &KillSet) {
832         for ssa in killed.iter() {
833             self[ssa.file()].free_ssa(*ssa);
834         }
835     }
836 }
837 
838 struct AssignRegsBlock {
839     ra: PerRegFile<RegAllocator>,
840     pcopy_tmp_gprs: u8,
841     live_in: Vec<LiveValue>,
842     phi_out: HashMap<u32, SrcRef>,
843 }
844 
845 impl AssignRegsBlock {
new(num_regs: &PerRegFile<u32>, pcopy_tmp_gprs: u8) -> AssignRegsBlock846     fn new(num_regs: &PerRegFile<u32>, pcopy_tmp_gprs: u8) -> AssignRegsBlock {
847         AssignRegsBlock {
848             ra: PerRegFile::new_with(|file| {
849                 RegAllocator::new(file, num_regs[file])
850             }),
851             pcopy_tmp_gprs: pcopy_tmp_gprs,
852             live_in: Vec::new(),
853             phi_out: HashMap::new(),
854         }
855     }
856 
get_scalar(&self, ssa: SSAValue) -> RegRef857     fn get_scalar(&self, ssa: SSAValue) -> RegRef {
858         let ra = &self.ra[ssa.file()];
859         let reg = ra.try_get_reg(ssa).expect("Unknown SSA value");
860         RegRef::new(ssa.file(), reg, 1)
861     }
862 
alloc_scalar( &mut self, ip: usize, sum: &SSAUseMap, ssa: SSAValue, ) -> RegRef863     fn alloc_scalar(
864         &mut self,
865         ip: usize,
866         sum: &SSAUseMap,
867         ssa: SSAValue,
868     ) -> RegRef {
869         let ra = &mut self.ra[ssa.file()];
870         let reg = ra.alloc_scalar(ip, sum, ssa);
871         RegRef::new(ssa.file(), reg, 1)
872     }
873 
pin_vector(&mut self, reg: RegRef)874     fn pin_vector(&mut self, reg: RegRef) {
875         let ra = &mut self.ra[reg.file()];
876         for c in 0..reg.comps() {
877             ra.pin_reg(reg.comp(c).base_idx());
878         }
879     }
880 
try_coalesce(&mut self, ssa: SSAValue, src: &Src) -> bool881     fn try_coalesce(&mut self, ssa: SSAValue, src: &Src) -> bool {
882         debug_assert!(src.src_mod.is_none());
883         let SrcRef::Reg(src_reg) = src.src_ref else {
884             return false;
885         };
886         debug_assert!(src_reg.comps() == 1);
887 
888         if src_reg.file() != ssa.file() {
889             return false;
890         }
891 
892         let ra = &mut self.ra[src_reg.file()];
893         if ra.reg_is_used(src_reg.base_idx()) {
894             return false;
895         }
896 
897         ra.assign_reg(ssa, src_reg.base_idx());
898         true
899     }
900 
pcopy_tmp(&self) -> Option<RegRef>901     fn pcopy_tmp(&self) -> Option<RegRef> {
902         if self.pcopy_tmp_gprs > 0 {
903             Some(RegRef::new(
904                 RegFile::GPR,
905                 self.ra[RegFile::GPR].num_regs,
906                 self.pcopy_tmp_gprs,
907             ))
908         } else {
909             None
910         }
911     }
912 
assign_regs_instr( &mut self, mut instr: Box<Instr>, ip: usize, sum: &SSAUseMap, srcs_killed: &KillSet, dsts_killed: &KillSet, pcopy: &mut OpParCopy, ) -> Option<Box<Instr>>913     fn assign_regs_instr(
914         &mut self,
915         mut instr: Box<Instr>,
916         ip: usize,
917         sum: &SSAUseMap,
918         srcs_killed: &KillSet,
919         dsts_killed: &KillSet,
920         pcopy: &mut OpParCopy,
921     ) -> Option<Box<Instr>> {
922         match &mut instr.op {
923             Op::Undef(undef) => {
924                 if let Dst::SSA(ssa) = undef.dst {
925                     assert!(ssa.comps() == 1);
926                     self.alloc_scalar(ip, sum, ssa[0]);
927                 }
928                 assert!(srcs_killed.is_empty());
929                 self.ra.free_killed(dsts_killed);
930                 None
931             }
932             Op::PhiSrcs(phi) => {
933                 for (id, src) in phi.srcs.iter() {
934                     assert!(src.src_mod.is_none());
935                     if let Some(ssa) = src_ssa_ref(src) {
936                         assert!(ssa.comps() == 1);
937                         let reg = self.get_scalar(ssa[0]);
938                         self.phi_out.insert(*id, reg.into());
939                     } else {
940                         self.phi_out.insert(*id, src.src_ref);
941                     }
942                 }
943                 assert!(dsts_killed.is_empty());
944                 None
945             }
946             Op::PhiDsts(phi) => {
947                 assert!(instr.pred.is_true());
948 
949                 for (id, dst) in phi.dsts.iter() {
950                     if let Dst::SSA(ssa) = dst {
951                         assert!(ssa.comps() == 1);
952                         let reg = self.alloc_scalar(ip, sum, ssa[0]);
953                         self.live_in.push(LiveValue {
954                             live_ref: LiveRef::Phi(*id),
955                             reg_ref: reg,
956                         });
957                     }
958                 }
959                 assert!(srcs_killed.is_empty());
960                 self.ra.free_killed(dsts_killed);
961 
962                 None
963             }
964             Op::Break(op) => {
965                 for src in op.srcs_as_mut_slice() {
966                     if let Some(ssa) = src_ssa_ref(src) {
967                         assert!(ssa.comps() == 1);
968                         let reg = self.get_scalar(ssa[0]);
969                         src_set_reg(src, reg);
970                     }
971                 }
972 
973                 self.ra.free_killed(srcs_killed);
974 
975                 if let Dst::SSA(ssa) = &op.bar_out {
976                     let reg = *op.bar_in.src_ref.as_reg().unwrap();
977                     self.ra.assign_reg(ssa[0], reg);
978                     op.bar_out = reg.into();
979                 }
980 
981                 self.ra.free_killed(dsts_killed);
982 
983                 Some(instr)
984             }
985             Op::BSSy(op) => {
986                 for src in op.srcs_as_mut_slice() {
987                     if let Some(ssa) = src_ssa_ref(src) {
988                         assert!(ssa.comps() == 1);
989                         let reg = self.get_scalar(ssa[0]);
990                         src_set_reg(src, reg);
991                     }
992                 }
993 
994                 self.ra.free_killed(srcs_killed);
995 
996                 if let Dst::SSA(ssa) = &op.bar_out {
997                     let reg = *op.bar_in.src_ref.as_reg().unwrap();
998                     self.ra.assign_reg(ssa[0], reg);
999                     op.bar_out = reg.into();
1000                 }
1001 
1002                 self.ra.free_killed(dsts_killed);
1003 
1004                 Some(instr)
1005             }
1006             Op::Copy(copy) => {
1007                 if let Some(ssa) = src_ssa_ref(&copy.src) {
1008                     // This may be a Cbuf::BindlessSSA source so we need to
1009                     // support vectors because cbuf handles are vec2s. However,
1010                     // since we only have a single scalar destination, we can
1011                     // just allocate and free killed up-front.
1012                     let ra = &mut self.ra[ssa.file().unwrap()];
1013                     let mut vra = VecRegAllocator::new(ra);
1014                     let reg = vra.collect_vector(ssa);
1015                     vra.free_killed(srcs_killed);
1016                     vra.finish(pcopy);
1017                     src_set_reg(&mut copy.src, reg);
1018                 }
1019 
1020                 let mut del_copy = false;
1021                 if let Dst::SSA(dst_vec) = &mut copy.dst {
1022                     debug_assert!(dst_vec.comps() == 1);
1023                     let dst_ssa = &dst_vec[0];
1024 
1025                     if self.try_coalesce(*dst_ssa, &copy.src) {
1026                         del_copy = true;
1027                     } else {
1028                         copy.dst = self.alloc_scalar(ip, sum, *dst_ssa).into();
1029                     }
1030                 }
1031 
1032                 self.ra.free_killed(dsts_killed);
1033 
1034                 if del_copy {
1035                     None
1036                 } else {
1037                     Some(instr)
1038                 }
1039             }
1040             Op::Pin(OpPin { src, dst }) | Op::Unpin(OpUnpin { src, dst }) => {
1041                 assert!(instr.pred.is_true());
1042 
1043                 // These basically act as a vector version of OpCopy except that
1044                 // they only work on SSA values and we pin the destination if
1045                 // it's OpPin.
1046                 let src_vec = src.as_ssa().unwrap();
1047                 let dst_vec = dst.as_ssa().unwrap();
1048                 assert!(src_vec.comps() == dst_vec.comps());
1049 
1050                 if srcs_killed.len() == src_vec.comps().into()
1051                     && src_vec.file() == dst_vec.file()
1052                 {
1053                     let ra = &mut self.ra[src_vec.file().unwrap()];
1054                     let mut vra = VecRegAllocator::new(ra);
1055                     let reg = vra.collect_vector(src_vec);
1056                     vra.finish(pcopy);
1057                     for c in 0..src_vec.comps() {
1058                         let c_reg = ra.free_ssa(src_vec[usize::from(c)]);
1059                         debug_assert!(c_reg == reg.comp(c).base_idx());
1060                         ra.assign_reg(dst_vec[usize::from(c)], c_reg);
1061                     }
1062 
1063                     if matches!(&instr.op, Op::Pin(_)) {
1064                         self.pin_vector(reg);
1065                     }
1066                     self.ra.free_killed(dsts_killed);
1067 
1068                     None
1069                 } else {
1070                     // Otherwise, turn into a parallel copy
1071                     //
1072                     // We can always allocate the destination first in this
1073                     // case.
1074                     assert!(dst_vec.comps() > 1 || srcs_killed.is_empty());
1075 
1076                     let dst_ra = &mut self.ra[dst_vec.file().unwrap()];
1077                     let mut vra = VecRegAllocator::new(dst_ra);
1078                     let dst_reg = vra.alloc_vector(*dst_vec);
1079                     vra.finish(pcopy);
1080 
1081                     let mut pin_copy = OpParCopy::new();
1082                     for c in 0..dst_reg.comps() {
1083                         let src_reg = self.get_scalar(src_vec[usize::from(c)]);
1084                         pin_copy.push(dst_reg.comp(c).into(), src_reg.into());
1085                     }
1086 
1087                     if matches!(&instr.op, Op::Pin(_)) {
1088                         self.pin_vector(dst_reg);
1089                     }
1090                     self.ra.free_killed(srcs_killed);
1091                     self.ra.free_killed(dsts_killed);
1092 
1093                     Some(Instr::new_boxed(pin_copy))
1094                 }
1095             }
1096             Op::ParCopy(pcopy) => {
1097                 for (_, src) in pcopy.dsts_srcs.iter_mut() {
1098                     if let Some(src_vec) = src_ssa_ref(src) {
1099                         debug_assert!(src_vec.comps() == 1);
1100                         let reg = self.get_scalar(src_vec[0]).into();
1101                         src_set_reg(src, reg);
1102                     }
1103                 }
1104 
1105                 self.ra.free_killed(srcs_killed);
1106 
1107                 // Try to coalesce destinations into sources, if possible
1108                 pcopy.dsts_srcs.retain(|dst, src| match dst {
1109                     Dst::None => false,
1110                     Dst::SSA(dst_vec) => {
1111                         debug_assert!(dst_vec.comps() == 1);
1112                         !self.try_coalesce(dst_vec[0], src)
1113                     }
1114                     Dst::Reg(_) => true,
1115                 });
1116 
1117                 for (dst, _) in pcopy.dsts_srcs.iter_mut() {
1118                     if let Dst::SSA(dst_vec) = dst {
1119                         debug_assert!(dst_vec.comps() == 1);
1120                         *dst = self.alloc_scalar(ip, sum, dst_vec[0]).into();
1121                     }
1122                 }
1123 
1124                 self.ra.free_killed(dsts_killed);
1125 
1126                 pcopy.tmp = self.pcopy_tmp();
1127                 if pcopy.is_empty() {
1128                     None
1129                 } else {
1130                     Some(instr)
1131                 }
1132             }
1133             Op::RegOut(out) => {
1134                 for src in out.srcs.iter_mut() {
1135                     if let Some(src_vec) = src_ssa_ref(src) {
1136                         debug_assert!(src_vec.comps() == 1);
1137                         let reg = self.get_scalar(src_vec[0]).into();
1138                         src_set_reg(src, reg);
1139                     }
1140                 }
1141 
1142                 self.ra.free_killed(srcs_killed);
1143                 assert!(dsts_killed.is_empty());
1144 
1145                 // This should be the last instruction and its sources should
1146                 // be the last free GPRs.
1147                 debug_assert!(self.ra[RegFile::GPR].num_regs_used() == 0);
1148 
1149                 for (i, src) in out.srcs.iter().enumerate() {
1150                     let reg = u32::try_from(i).unwrap();
1151                     let dst = RegRef::new(RegFile::GPR, reg, 1);
1152                     pcopy.push(dst.into(), *src);
1153                 }
1154 
1155                 None
1156             }
1157             _ => {
1158                 for file in self.ra.values_mut() {
1159                     instr_assign_regs_file(
1160                         &mut instr,
1161                         ip,
1162                         sum,
1163                         srcs_killed,
1164                         pcopy,
1165                         file,
1166                     );
1167                 }
1168                 self.ra.free_killed(dsts_killed);
1169                 Some(instr)
1170             }
1171         }
1172     }
1173 
first_pass<BL: BlockLiveness>( &mut self, b: &mut BasicBlock, bl: &BL, pred_ra: Option<&PerRegFile<RegAllocator>>, )1174     fn first_pass<BL: BlockLiveness>(
1175         &mut self,
1176         b: &mut BasicBlock,
1177         bl: &BL,
1178         pred_ra: Option<&PerRegFile<RegAllocator>>,
1179     ) {
1180         // Populate live in from the register file we're handed.  We'll add more
1181         // live in when we process the OpPhiDst, if any.
1182         if let Some(pred_ra) = pred_ra {
1183             for (raf, pred_raf) in self.ra.values_mut().zip(pred_ra.values()) {
1184                 for (ssa, reg) in &pred_raf.ssa_reg {
1185                     if bl.is_live_in(ssa) {
1186                         raf.assign_reg(*ssa, *reg);
1187                         if pred_raf.reg_is_pinned(*reg) {
1188                             raf.pin_reg(*reg);
1189                         }
1190                         self.live_in.push(LiveValue {
1191                             live_ref: LiveRef::SSA(*ssa),
1192                             reg_ref: RegRef::new(raf.file(), *reg, 1),
1193                         });
1194                     }
1195                 }
1196             }
1197         }
1198 
1199         let sum = SSAUseMap::for_block(b);
1200 
1201         let mut instrs = Vec::new();
1202         let mut srcs_killed = KillSet::new();
1203         let mut dsts_killed = KillSet::new();
1204 
1205         for (ip, instr) in b.instrs.drain(..).enumerate() {
1206             // Build up the kill set
1207             srcs_killed.clear();
1208             if let PredRef::SSA(ssa) = &instr.pred.pred_ref {
1209                 if !bl.is_live_after_ip(ssa, ip) {
1210                     srcs_killed.insert(*ssa);
1211                 }
1212             }
1213             for src in instr.srcs() {
1214                 for ssa in src.iter_ssa() {
1215                     if !bl.is_live_after_ip(ssa, ip) {
1216                         srcs_killed.insert(*ssa);
1217                     }
1218                 }
1219             }
1220 
1221             dsts_killed.clear();
1222             for dst in instr.dsts() {
1223                 if let Dst::SSA(vec) = dst {
1224                     for ssa in vec.iter() {
1225                         if !bl.is_live_after_ip(ssa, ip) {
1226                             dsts_killed.insert(*ssa);
1227                         }
1228                     }
1229                 }
1230             }
1231 
1232             let mut pcopy = OpParCopy::new();
1233             pcopy.tmp = self.pcopy_tmp();
1234 
1235             let instr = self.assign_regs_instr(
1236                 instr,
1237                 ip,
1238                 &sum,
1239                 &srcs_killed,
1240                 &dsts_killed,
1241                 &mut pcopy,
1242             );
1243 
1244             if !pcopy.is_empty() {
1245                 if DEBUG.annotate() {
1246                     instrs.push(Instr::new_boxed(OpAnnotate {
1247                         annotation: "generated by assign_regs".into(),
1248                     }));
1249                 }
1250                 if !b.uniform {
1251                     for dst in pcopy.dsts_as_slice() {
1252                         if let Dst::Reg(reg) = dst {
1253                             assert!(!reg.is_uniform());
1254                         }
1255                     }
1256                 }
1257                 instrs.push(Instr::new_boxed(pcopy));
1258             }
1259 
1260             if let Some(instr) = instr {
1261                 instrs.push(instr);
1262             }
1263         }
1264 
1265         // Sort live-in to maintain determinism
1266         self.live_in.sort();
1267 
1268         b.instrs = instrs;
1269     }
1270 
second_pass(&self, target: &AssignRegsBlock, b: &mut BasicBlock)1271     fn second_pass(&self, target: &AssignRegsBlock, b: &mut BasicBlock) {
1272         let mut pcopy = OpParCopy::new();
1273         pcopy.tmp = self.pcopy_tmp();
1274 
1275         for lv in &target.live_in {
1276             let src = match lv.live_ref {
1277                 LiveRef::SSA(ssa) => SrcRef::from(self.get_scalar(ssa)),
1278                 LiveRef::Phi(phi) => *self.phi_out.get(&phi).unwrap(),
1279             };
1280             let dst = lv.reg_ref;
1281             if let SrcRef::Reg(src_reg) = src {
1282                 if dst == src_reg {
1283                     continue;
1284                 }
1285             }
1286             pcopy.push(dst.into(), src.into());
1287         }
1288 
1289         if DEBUG.annotate() {
1290             b.instrs.push(Instr::new_boxed(OpAnnotate {
1291                 annotation: "generated by assign_regs".into(),
1292             }));
1293         }
1294         if b.branch().is_some() {
1295             b.instrs.insert(b.instrs.len() - 1, Instr::new_boxed(pcopy));
1296         } else {
1297             b.instrs.push(Instr::new_boxed(pcopy));
1298         }
1299     }
1300 }
1301 
1302 impl Shader<'_> {
assign_regs(&mut self)1303     pub fn assign_regs(&mut self) {
1304         assert!(self.functions.len() == 1);
1305         let f = &mut self.functions[0];
1306 
1307         // Convert to CSSA before we spill or assign registers
1308         f.to_cssa();
1309 
1310         let mut live = SimpleLiveness::for_function(f);
1311         let mut max_live = live.calc_max_live(f);
1312 
1313         // We want at least one temporary GPR reserved for parallel copies.
1314         let mut tmp_gprs = 1_u8;
1315 
1316         let spill_files =
1317             [RegFile::UPred, RegFile::Pred, RegFile::UGPR, RegFile::Bar];
1318         for file in spill_files {
1319             let num_regs = self.sm.num_regs(file);
1320             if max_live[file] > num_regs {
1321                 f.spill_values(file, num_regs);
1322 
1323                 // Re-calculate liveness after we spill
1324                 live = SimpleLiveness::for_function(f);
1325                 max_live = live.calc_max_live(f);
1326 
1327                 if file == RegFile::Bar {
1328                     tmp_gprs = max(tmp_gprs, 2);
1329                 }
1330             }
1331         }
1332 
1333         // An instruction can have at most 4 vector sources/destinations.  In
1334         // order to ensure we always succeed at allocation, regardless of
1335         // arbitrary choices, we need at least 16 GPRs.
1336         let mut gpr_limit = max(max_live[RegFile::GPR], 16);
1337         let mut total_gprs = gpr_limit + u32::from(tmp_gprs);
1338 
1339         let max_gprs = if DEBUG.spill() {
1340             // We need at least 16 registers to satisfy RA constraints for
1341             // texture ops and another 2 for parallel copy lowering
1342             18
1343         } else {
1344             self.sm.num_regs(RegFile::GPR)
1345         };
1346         if total_gprs > max_gprs {
1347             // If we're spilling GPRs, we need to reserve 2 GPRs for OpParCopy
1348             // lowering because it needs to be able lower Mem copies which
1349             // require a temporary
1350             tmp_gprs = max(tmp_gprs, 2);
1351             total_gprs = max_gprs;
1352             gpr_limit = total_gprs - u32::from(tmp_gprs);
1353 
1354             f.spill_values(RegFile::GPR, gpr_limit);
1355 
1356             // Re-calculate liveness one last time
1357             live = SimpleLiveness::for_function(f);
1358         }
1359 
1360         self.info.num_gprs = total_gprs.try_into().unwrap();
1361 
1362         let limit = PerRegFile::new_with(|file| {
1363             if file == RegFile::GPR {
1364                 gpr_limit
1365             } else {
1366                 self.sm.num_regs(file)
1367             }
1368         });
1369 
1370         let mut blocks: Vec<AssignRegsBlock> = Vec::new();
1371         for b_idx in 0..f.blocks.len() {
1372             let pred = f.blocks.pred_indices(b_idx);
1373             let pred_ra = if pred.is_empty() {
1374                 None
1375             } else {
1376                 // Start with the previous block's.
1377                 Some(&blocks[pred[0]].ra)
1378             };
1379 
1380             let bl = live.block_live(b_idx);
1381 
1382             let mut arb = AssignRegsBlock::new(&limit, tmp_gprs);
1383             arb.first_pass(&mut f.blocks[b_idx], bl, pred_ra);
1384 
1385             assert!(blocks.len() == b_idx);
1386             blocks.push(arb);
1387         }
1388 
1389         for b_idx in 0..f.blocks.len() {
1390             let arb = &blocks[b_idx];
1391             for sb_idx in f.blocks.succ_indices(b_idx).to_vec() {
1392                 arb.second_pass(&blocks[sb_idx], &mut f.blocks[b_idx]);
1393             }
1394         }
1395     }
1396 }
1397