1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 use crate::api::{GetDebugFlags, DEBUG};
5 use crate::ir::*;
6
7 use std::cmp::max;
8 use std::collections::{HashMap, HashSet};
9 use std::ops::{Index, IndexMut, Range};
10 use std::slice;
11
12 struct RegTracker<T> {
13 reg: [T; 255],
14 ureg: [T; 63],
15 pred: [T; 7],
16 upred: [T; 7],
17 carry: [T; 1],
18 }
19
new_array_with<T, const N: usize>(f: &impl Fn() -> T) -> [T; N]20 fn new_array_with<T, const N: usize>(f: &impl Fn() -> T) -> [T; N] {
21 let mut v = Vec::new();
22 for _ in 0..N {
23 v.push(f());
24 }
25 v.try_into()
26 .unwrap_or_else(|_| panic!("Array size mismatch"))
27 }
28
29 impl<T> RegTracker<T> {
new_with(f: &impl Fn() -> T) -> Self30 pub fn new_with(f: &impl Fn() -> T) -> Self {
31 Self {
32 reg: new_array_with(f),
33 ureg: new_array_with(f),
34 pred: new_array_with(f),
35 upred: new_array_with(f),
36 carry: new_array_with(f),
37 }
38 }
39
for_each_instr_pred_mut( &mut self, instr: &Instr, mut f: impl FnMut(&mut T), )40 pub fn for_each_instr_pred_mut(
41 &mut self,
42 instr: &Instr,
43 mut f: impl FnMut(&mut T),
44 ) {
45 if let PredRef::Reg(reg) = &instr.pred.pred_ref {
46 for i in &mut self[*reg] {
47 f(i);
48 }
49 }
50 }
51
for_each_instr_src_mut( &mut self, instr: &Instr, mut f: impl FnMut(usize, &mut T), )52 pub fn for_each_instr_src_mut(
53 &mut self,
54 instr: &Instr,
55 mut f: impl FnMut(usize, &mut T),
56 ) {
57 for (i, src) in instr.srcs().iter().enumerate() {
58 match &src.src_ref {
59 SrcRef::Reg(reg) => {
60 for t in &mut self[*reg] {
61 f(i, t);
62 }
63 }
64 SrcRef::CBuf(CBufRef {
65 buf: CBuf::BindlessUGPR(reg),
66 ..
67 }) => {
68 for t in &mut self[*reg] {
69 f(i, t);
70 }
71 }
72 _ => (),
73 }
74 }
75 }
76
for_each_instr_dst_mut( &mut self, instr: &Instr, mut f: impl FnMut(usize, &mut T), )77 pub fn for_each_instr_dst_mut(
78 &mut self,
79 instr: &Instr,
80 mut f: impl FnMut(usize, &mut T),
81 ) {
82 for (i, dst) in instr.dsts().iter().enumerate() {
83 if let Dst::Reg(reg) = dst {
84 for t in &mut self[*reg] {
85 f(i, t);
86 }
87 }
88 }
89 }
90 }
91
92 impl<T> Index<RegRef> for RegTracker<T> {
93 type Output = [T];
94
index(&self, reg: RegRef) -> &[T]95 fn index(&self, reg: RegRef) -> &[T] {
96 let range = reg.idx_range();
97 let range = Range {
98 start: usize::try_from(range.start).unwrap(),
99 end: usize::try_from(range.end).unwrap(),
100 };
101
102 match reg.file() {
103 RegFile::GPR => &self.reg[range],
104 RegFile::UGPR => &self.ureg[range],
105 RegFile::Pred => &self.pred[range],
106 RegFile::UPred => &self.upred[range],
107 RegFile::Carry => &self.carry[range],
108 RegFile::Bar => &[], // Barriers have a HW scoreboard
109 RegFile::Mem => panic!("Not a register"),
110 }
111 }
112 }
113
114 impl<T> IndexMut<RegRef> for RegTracker<T> {
index_mut(&mut self, reg: RegRef) -> &mut [T]115 fn index_mut(&mut self, reg: RegRef) -> &mut [T] {
116 let range = reg.idx_range();
117 let range = Range {
118 start: usize::try_from(range.start).unwrap(),
119 end: usize::try_from(range.end).unwrap(),
120 };
121
122 match reg.file() {
123 RegFile::GPR => &mut self.reg[range],
124 RegFile::UGPR => &mut self.ureg[range],
125 RegFile::Pred => &mut self.pred[range],
126 RegFile::UPred => &mut self.upred[range],
127 RegFile::Carry => &mut self.carry[range],
128 RegFile::Bar => &mut [], // Barriers have a HW scoreboard
129 RegFile::Mem => panic!("Not a register"),
130 }
131 }
132 }
133
134 #[derive(Clone)]
135 enum RegUse<T: Clone> {
136 None,
137 Write(T),
138 Reads(Vec<T>),
139 }
140
141 impl<T: Clone> RegUse<T> {
deps(&self) -> &[T]142 pub fn deps(&self) -> &[T] {
143 match self {
144 RegUse::None => &[],
145 RegUse::Write(dep) => slice::from_ref(dep),
146 RegUse::Reads(deps) => &deps[..],
147 }
148 }
149
clear(&mut self) -> Self150 pub fn clear(&mut self) -> Self {
151 std::mem::replace(self, RegUse::None)
152 }
153
clear_write(&mut self) -> Self154 pub fn clear_write(&mut self) -> Self {
155 if matches!(self, RegUse::Write(_)) {
156 std::mem::replace(self, RegUse::None)
157 } else {
158 RegUse::None
159 }
160 }
161
add_read(&mut self, dep: T) -> Self162 pub fn add_read(&mut self, dep: T) -> Self {
163 match self {
164 RegUse::None => {
165 *self = RegUse::Reads(vec![dep]);
166 RegUse::None
167 }
168 RegUse::Write(_) => {
169 std::mem::replace(self, RegUse::Reads(vec![dep]))
170 }
171 RegUse::Reads(reads) => {
172 reads.push(dep);
173 RegUse::None
174 }
175 }
176 }
177
set_write(&mut self, dep: T) -> Self178 pub fn set_write(&mut self, dep: T) -> Self {
179 std::mem::replace(self, RegUse::Write(dep))
180 }
181 }
182
183 struct DepNode {
184 read_dep: Option<usize>,
185 first_wait: Option<(usize, usize)>,
186 }
187
188 struct DepGraph {
189 deps: Vec<DepNode>,
190 instr_deps: HashMap<(usize, usize), (usize, usize)>,
191 instr_waits: HashMap<(usize, usize), Vec<usize>>,
192 active: HashSet<usize>,
193 }
194
195 impl DepGraph {
new() -> Self196 pub fn new() -> Self {
197 Self {
198 deps: Vec::new(),
199 instr_deps: HashMap::new(),
200 instr_waits: HashMap::new(),
201 active: HashSet::new(),
202 }
203 }
204
add_new_dep(&mut self, read_dep: Option<usize>) -> usize205 fn add_new_dep(&mut self, read_dep: Option<usize>) -> usize {
206 let dep = self.deps.len();
207 self.deps.push(DepNode {
208 read_dep: read_dep,
209 first_wait: None,
210 });
211 dep
212 }
213
add_instr(&mut self, block_idx: usize, ip: usize) -> (usize, usize)214 pub fn add_instr(&mut self, block_idx: usize, ip: usize) -> (usize, usize) {
215 let rd = self.add_new_dep(None);
216 let wr = self.add_new_dep(Some(rd));
217 self.instr_deps.insert((block_idx, ip), (rd, wr));
218 (rd, wr)
219 }
220
add_signal(&mut self, dep: usize)221 pub fn add_signal(&mut self, dep: usize) {
222 self.active.insert(dep);
223 }
224
add_waits( &mut self, block_idx: usize, ip: usize, mut waits: Vec<usize>, )225 pub fn add_waits(
226 &mut self,
227 block_idx: usize,
228 ip: usize,
229 mut waits: Vec<usize>,
230 ) {
231 for dep in &waits {
232 // A wait on a write automatically waits on the read. By removing
233 // it from the active set here we ensure that we don't record any
234 // duplicate write/read waits in the retain below.
235 if let Some(rd) = &self.deps[*dep].read_dep {
236 self.active.remove(rd);
237 }
238 }
239
240 waits.retain(|dep| {
241 let node = &mut self.deps[*dep];
242 if let Some(wait) = node.first_wait {
243 // Someone has already waited on this dep
244 debug_assert!(!self.active.contains(dep));
245 debug_assert!((block_idx, ip) >= wait);
246 false
247 } else if !self.active.contains(dep) {
248 // Even if it doesn't have a use, it may still be deactivated.
249 // This can happen if we depend the the destination before any
250 // of its sources.
251 false
252 } else {
253 self.deps[*dep].first_wait = Some((block_idx, ip));
254 self.active.remove(dep);
255 true
256 }
257 });
258
259 // Sort for stability. The list of waits may come from a HashSet (see
260 // add_barrier()) and so it's not guaranteed stable across Rust
261 // versions. This also ensures that everything always waits on oldest
262 // dependencies first.
263 waits.sort();
264
265 let _old = self.instr_waits.insert((block_idx, ip), waits);
266 debug_assert!(_old.is_none());
267 }
268
add_barrier(&mut self, block_idx: usize, ip: usize)269 pub fn add_barrier(&mut self, block_idx: usize, ip: usize) {
270 let waits = self.active.iter().cloned().collect();
271 self.add_waits(block_idx, ip, waits);
272 debug_assert!(self.active.is_empty());
273 }
274
dep_is_waited_after( &self, dep: usize, block_idx: usize, ip: usize, ) -> bool275 pub fn dep_is_waited_after(
276 &self,
277 dep: usize,
278 block_idx: usize,
279 ip: usize,
280 ) -> bool {
281 if let Some(wait) = self.deps[dep].first_wait {
282 wait > (block_idx, ip)
283 } else {
284 false
285 }
286 }
287
get_instr_deps( &self, block_idx: usize, ip: usize, ) -> (usize, usize)288 pub fn get_instr_deps(
289 &self,
290 block_idx: usize,
291 ip: usize,
292 ) -> (usize, usize) {
293 *self.instr_deps.get(&(block_idx, ip)).unwrap()
294 }
295
get_instr_waits(&self, block_idx: usize, ip: usize) -> &[usize]296 pub fn get_instr_waits(&self, block_idx: usize, ip: usize) -> &[usize] {
297 if let Some(waits) = self.instr_waits.get(&(block_idx, ip)) {
298 &waits[..]
299 } else {
300 &[]
301 }
302 }
303 }
304
305 struct BarAlloc {
306 num_bars: u8,
307 bar_dep: [usize; 6],
308 }
309
310 impl BarAlloc {
new() -> BarAlloc311 pub fn new() -> BarAlloc {
312 BarAlloc {
313 num_bars: 6,
314 bar_dep: [usize::MAX; 6],
315 }
316 }
317
bar_is_free(&self, bar: u8) -> bool318 pub fn bar_is_free(&self, bar: u8) -> bool {
319 debug_assert!(bar < self.num_bars);
320 self.bar_dep[usize::from(bar)] == usize::MAX
321 }
322
set_bar_dep(&mut self, bar: u8, dep: usize)323 pub fn set_bar_dep(&mut self, bar: u8, dep: usize) {
324 debug_assert!(self.bar_is_free(bar));
325 self.bar_dep[usize::from(bar)] = dep;
326 }
327
free_bar(&mut self, bar: u8)328 pub fn free_bar(&mut self, bar: u8) {
329 debug_assert!(!self.bar_is_free(bar));
330 self.bar_dep[usize::from(bar)] = usize::MAX;
331 }
332
try_find_free_bar(&self) -> Option<u8>333 pub fn try_find_free_bar(&self) -> Option<u8> {
334 for bar in 0..self.num_bars {
335 if self.bar_is_free(bar) {
336 return Some(bar);
337 }
338 }
339 None
340 }
341
free_some_bar(&mut self) -> u8342 pub fn free_some_bar(&mut self) -> u8 {
343 // Get the oldest by looking for the one with the smallest dep
344 let mut bar = 0;
345 for b in 1..self.num_bars {
346 if self.bar_dep[usize::from(b)] < self.bar_dep[usize::from(bar)] {
347 bar = b;
348 }
349 }
350 self.free_bar(bar);
351 bar
352 }
353
get_bar_for_dep(&self, dep: usize) -> Option<u8>354 pub fn get_bar_for_dep(&self, dep: usize) -> Option<u8> {
355 for bar in 0..self.num_bars {
356 if self.bar_dep[usize::from(bar)] == dep {
357 return Some(bar);
358 }
359 }
360 None
361 }
362 }
363
assign_barriers(f: &mut Function, sm: &dyn ShaderModel)364 fn assign_barriers(f: &mut Function, sm: &dyn ShaderModel) {
365 let mut uses = RegTracker::new_with(&|| RegUse::None);
366 let mut deps = DepGraph::new();
367
368 for (bi, b) in f.blocks.iter().enumerate() {
369 for (ip, instr) in b.instrs.iter().enumerate() {
370 if instr.is_branch() {
371 deps.add_barrier(bi, ip);
372 } else {
373 // Execution predicates are handled immediately and we don't
374 // need barriers for them, regardless of whether or not it's a
375 // fixed-latency instruction.
376 let mut waits = Vec::new();
377 uses.for_each_instr_pred_mut(instr, |u| {
378 let u = u.clear_write();
379 waits.extend_from_slice(u.deps());
380 });
381
382 if instr.has_fixed_latency(sm.sm()) {
383 // Delays will cover us here. We just need to make sure
384 // that we wait on any uses that we consume.
385 uses.for_each_instr_src_mut(instr, |_, u| {
386 let u = u.clear_write();
387 waits.extend_from_slice(u.deps());
388 });
389 uses.for_each_instr_dst_mut(instr, |_, u| {
390 let u = u.clear();
391 waits.extend_from_slice(u.deps());
392 });
393 } else {
394 let (rd, wr) = deps.add_instr(bi, ip);
395 uses.for_each_instr_src_mut(instr, |_, u| {
396 // Only mark a dep as signaled if we actually have
397 // something that shows up in the register file as
398 // needing scoreboarding
399 deps.add_signal(rd);
400 let u = u.add_read(rd);
401 waits.extend_from_slice(u.deps());
402 });
403 uses.for_each_instr_dst_mut(instr, |_, u| {
404 // Only mark a dep as signaled if we actually have
405 // something that shows up in the register file as
406 // needing scoreboarding
407 deps.add_signal(wr);
408 let u = u.set_write(wr);
409 for dep in u.deps() {
410 // Don't wait on ourselves
411 if *dep != rd {
412 waits.push(*dep);
413 }
414 }
415 });
416 }
417 deps.add_waits(bi, ip, waits);
418 }
419 }
420 }
421
422 let mut bars = BarAlloc::new();
423
424 for (bi, b) in f.blocks.iter_mut().enumerate() {
425 for (ip, instr) in b.instrs.iter_mut().enumerate() {
426 let mut wait_mask = 0_u8;
427 for dep in deps.get_instr_waits(bi, ip) {
428 if let Some(bar) = bars.get_bar_for_dep(*dep) {
429 wait_mask |= 1 << bar;
430 bars.free_bar(bar);
431 }
432 }
433 instr.deps.add_wt_bar_mask(wait_mask);
434
435 if instr.needs_yield() {
436 instr.deps.set_yield(true);
437 }
438
439 if instr.has_fixed_latency(sm.sm()) {
440 continue;
441 }
442
443 let (rd_dep, wr_dep) = deps.get_instr_deps(bi, ip);
444 if deps.dep_is_waited_after(rd_dep, bi, ip) {
445 let rd_bar = bars.try_find_free_bar().unwrap_or_else(|| {
446 let bar = bars.free_some_bar();
447 instr.deps.add_wt_bar(bar);
448 bar
449 });
450 bars.set_bar_dep(rd_bar, rd_dep);
451 instr.deps.set_rd_bar(rd_bar);
452 }
453 if deps.dep_is_waited_after(wr_dep, bi, ip) {
454 let wr_bar = bars.try_find_free_bar().unwrap_or_else(|| {
455 let bar = bars.free_some_bar();
456 instr.deps.add_wt_bar(bar);
457 bar
458 });
459 bars.set_bar_dep(wr_bar, wr_dep);
460 instr.deps.set_wr_bar(wr_bar);
461 }
462 }
463 }
464 }
465
exec_latency(sm: u8, op: &Op) -> u32466 fn exec_latency(sm: u8, op: &Op) -> u32 {
467 if sm >= 70 {
468 match op {
469 Op::Bar(_) | Op::MemBar(_) => {
470 if sm >= 80 {
471 6
472 } else {
473 5
474 }
475 }
476 Op::CCtl(_op) => {
477 // CCTL.C needs 8, CCTL.I needs 11
478 11
479 }
480 // Op::DepBar(_) => 4,
481 _ => 1, // TODO: co-issue
482 }
483 } else {
484 match op {
485 Op::CCtl(_)
486 | Op::MemBar(_)
487 | Op::Bra(_)
488 | Op::SSy(_)
489 | Op::Sync(_)
490 | Op::Brk(_)
491 | Op::PBk(_)
492 | Op::Cont(_)
493 | Op::PCnt(_)
494 | Op::Exit(_)
495 | Op::Bar(_)
496 | Op::Kill(_)
497 | Op::OutFinal(_) => 13,
498 _ => 1,
499 }
500 }
501 }
502
instr_latency(op: &Op, dst_idx: usize) -> u32503 fn instr_latency(op: &Op, dst_idx: usize) -> u32 {
504 let file = match op.dsts_as_slice()[dst_idx] {
505 Dst::None => return 0,
506 Dst::SSA(vec) => vec.file().unwrap(),
507 Dst::Reg(reg) => reg.file(),
508 };
509
510 // This is BS and we know it
511 match file {
512 RegFile::GPR => 6,
513 RegFile::UGPR => 12,
514 RegFile::Pred => 13,
515 RegFile::UPred => 11,
516 RegFile::Bar => 0, // Barriers have a HW scoreboard
517 RegFile::Carry => 6,
518 RegFile::Mem => panic!("Not a register"),
519 }
520 }
521
522 /// Read-after-write latency
raw_latency( _sm: u8, write: &Op, dst_idx: usize, _read: &Op, _src_idx: usize, ) -> u32523 fn raw_latency(
524 _sm: u8,
525 write: &Op,
526 dst_idx: usize,
527 _read: &Op,
528 _src_idx: usize,
529 ) -> u32 {
530 instr_latency(write, dst_idx)
531 }
532
533 /// Write-after-read latency
war_latency( _sm: u8, _read: &Op, _src_idx: usize, _write: &Op, _dst_idx: usize, ) -> u32534 fn war_latency(
535 _sm: u8,
536 _read: &Op,
537 _src_idx: usize,
538 _write: &Op,
539 _dst_idx: usize,
540 ) -> u32 {
541 // We assume the source gets read in the first 4 cycles. We don't know how
542 // quickly the write will happen. This is all a guess.
543 4
544 }
545
546 /// Write-after-write latency
waw_latency( _sm: u8, a: &Op, a_dst_idx: usize, _b: &Op, _b_dst_idx: usize, ) -> u32547 fn waw_latency(
548 _sm: u8,
549 a: &Op,
550 a_dst_idx: usize,
551 _b: &Op,
552 _b_dst_idx: usize,
553 ) -> u32 {
554 // We know our latencies are wrong so assume the wrote could happen anywhere
555 // between 0 and instr_latency(a) cycles
556 instr_latency(a, a_dst_idx)
557 }
558
559 /// Predicate read-after-write latency
paw_latency(_sm: u8, _write: &Op, _dst_idx: usize) -> u32560 fn paw_latency(_sm: u8, _write: &Op, _dst_idx: usize) -> u32 {
561 13
562 }
563
calc_delays(f: &mut Function, sm: &dyn ShaderModel)564 fn calc_delays(f: &mut Function, sm: &dyn ShaderModel) {
565 for b in f.blocks.iter_mut().rev() {
566 let mut cycle = 0_u32;
567
568 // Vector mapping IP to start cycle
569 let mut instr_cycle = Vec::new();
570 instr_cycle.resize(b.instrs.len(), 0_u32);
571
572 // Maps registers to RegUse<ip, src_dst_idx>. Predicates are
573 // represented by src_idx = usize::MAX.
574 let mut uses: RegTracker<RegUse<(usize, usize)>> =
575 RegTracker::new_with(&|| RegUse::None);
576
577 // Map from barrier to last waited cycle
578 let mut bars = [0_u32; 6];
579
580 for ip in (0..b.instrs.len()).rev() {
581 let instr = &b.instrs[ip];
582 let mut min_start = cycle + exec_latency(sm.sm(), &instr.op);
583 if let Some(bar) = instr.deps.rd_bar() {
584 min_start = max(min_start, bars[usize::from(bar)] + 2);
585 }
586 if let Some(bar) = instr.deps.wr_bar() {
587 min_start = max(min_start, bars[usize::from(bar)] + 2);
588 }
589 uses.for_each_instr_dst_mut(instr, |i, u| match u {
590 RegUse::None => {
591 // We don't know how it will be used but it may be used in
592 // the next block so we need at least assume the maximum
593 // destination latency from the end of the block.
594 let s = instr_latency(&instr.op, i);
595 min_start = max(min_start, s);
596 }
597 RegUse::Write((w_ip, w_dst_idx)) => {
598 let s = instr_cycle[*w_ip]
599 + waw_latency(
600 sm.sm(),
601 &instr.op,
602 i,
603 &b.instrs[*w_ip].op,
604 *w_dst_idx,
605 );
606 min_start = max(min_start, s);
607 }
608 RegUse::Reads(reads) => {
609 for (r_ip, r_src_idx) in reads {
610 let c = instr_cycle[*r_ip];
611 let s = if *r_src_idx == usize::MAX {
612 c + paw_latency(sm.sm(), &instr.op, i)
613 } else {
614 c + raw_latency(
615 sm.sm(),
616 &instr.op,
617 i,
618 &b.instrs[*r_ip].op,
619 *r_src_idx,
620 )
621 };
622 min_start = max(min_start, s);
623 }
624 }
625 });
626 uses.for_each_instr_src_mut(instr, |i, u| match u {
627 RegUse::None => (),
628 RegUse::Write((w_ip, w_dst_idx)) => {
629 let s = instr_cycle[*w_ip]
630 + war_latency(
631 sm.sm(),
632 &instr.op,
633 i,
634 &b.instrs[*w_ip].op,
635 *w_dst_idx,
636 );
637 min_start = max(min_start, s);
638 }
639 RegUse::Reads(_) => (),
640 });
641
642 let instr = &mut b.instrs[ip];
643
644 let delay = min_start - cycle;
645 let delay = delay
646 .clamp(MIN_INSTR_DELAY.into(), MAX_INSTR_DELAY.into())
647 .try_into()
648 .unwrap();
649 instr.deps.set_delay(delay);
650
651 instr_cycle[ip] = min_start;
652 uses.for_each_instr_pred_mut(instr, |c| {
653 c.add_read((ip, usize::MAX));
654 });
655 uses.for_each_instr_src_mut(instr, |i, c| {
656 c.add_read((ip, i));
657 });
658 uses.for_each_instr_dst_mut(instr, |i, c| {
659 c.set_write((ip, i));
660 });
661 for (bar, c) in bars.iter_mut().enumerate() {
662 if instr.deps.wt_bar_mask & (1 << bar) != 0 {
663 *c = min_start;
664 }
665 }
666
667 cycle = min_start;
668 }
669 }
670
671 // It's unclear exactly why but the blob inserts a Nop with a delay of 2
672 // after every instruction which has an exec latency. Perhaps it has
673 // something to do with .yld? In any case, the extra 2 cycles aren't worth
674 // the chance of weird bugs.
675 f.map_instrs(|mut instr, _| {
676 if matches!(instr.op, Op::SrcBar(_)) {
677 instr.op = Op::Nop(OpNop { label: None });
678 MappedInstrs::One(instr)
679 } else if exec_latency(sm.sm(), &instr.op) > 1 {
680 let mut nop = Instr::new_boxed(OpNop { label: None });
681 nop.deps.set_delay(2);
682 MappedInstrs::Many(vec![instr, nop])
683 } else {
684 MappedInstrs::One(instr)
685 }
686 });
687 }
688
689 impl Shader<'_> {
assign_deps_serial(&mut self)690 pub fn assign_deps_serial(&mut self) {
691 for f in &mut self.functions {
692 for b in &mut f.blocks.iter_mut().rev() {
693 let mut wt = 0_u8;
694 for instr in &mut b.instrs {
695 if matches!(&instr.op, Op::Bar(_))
696 || matches!(&instr.op, Op::BClear(_))
697 || matches!(&instr.op, Op::BSSy(_))
698 || matches!(&instr.op, Op::BSync(_))
699 {
700 instr.deps.set_yield(true);
701 } else if instr.is_branch() {
702 instr.deps.add_wt_bar_mask(0x3f);
703 } else {
704 instr.deps.add_wt_bar_mask(wt);
705 if instr.dsts().len() > 0 {
706 instr.deps.set_wr_bar(0);
707 wt |= 1 << 0;
708 }
709 if !instr.pred.pred_ref.is_none()
710 || instr.srcs().len() > 0
711 {
712 instr.deps.set_rd_bar(1);
713 wt |= 1 << 1;
714 }
715 }
716 }
717 }
718 }
719 }
720
calc_instr_deps(&mut self)721 pub fn calc_instr_deps(&mut self) {
722 if DEBUG.serial() {
723 self.assign_deps_serial();
724 } else {
725 for f in &mut self.functions {
726 assign_barriers(f, self.sm);
727 calc_delays(f, self.sm);
728 }
729 }
730 }
731 }
732