1 // Copyright © 2022 Collabora, Ltd. 2 // SPDX-License-Identifier: MIT 3 4 use crate::ir::*; 5 6 pub trait Builder { push_instr(&mut self, instr: Box<Instr>) -> &mut Instr7 fn push_instr(&mut self, instr: Box<Instr>) -> &mut Instr; 8 sm(&self) -> u89 fn sm(&self) -> u8; 10 push_op(&mut self, op: impl Into<Op>) -> &mut Instr11 fn push_op(&mut self, op: impl Into<Op>) -> &mut Instr { 12 self.push_instr(Instr::new_boxed(op)) 13 } 14 predicate(&mut self, pred: Pred) -> PredicatedBuilder<'_, Self> where Self: Sized,15 fn predicate(&mut self, pred: Pred) -> PredicatedBuilder<'_, Self> 16 where 17 Self: Sized, 18 { 19 PredicatedBuilder { 20 b: self, 21 pred: pred, 22 } 23 } 24 lop2_to(&mut self, dst: Dst, op: LogicOp2, mut x: Src, mut y: Src)25 fn lop2_to(&mut self, dst: Dst, op: LogicOp2, mut x: Src, mut y: Src) { 26 let is_predicate = match dst { 27 Dst::None => panic!("No LOP destination"), 28 Dst::SSA(ssa) => ssa.is_predicate(), 29 Dst::Reg(reg) => reg.is_predicate(), 30 }; 31 assert!(x.is_predicate() == is_predicate); 32 assert!(y.is_predicate() == is_predicate); 33 34 if self.sm() >= 70 { 35 let mut op = op.to_lut(); 36 if x.src_mod.is_bnot() { 37 op = LogicOp3::new_lut(&|x, y, _| op.eval(!x, y, 0)); 38 x.src_mod = SrcMod::None; 39 } 40 if y.src_mod.is_bnot() { 41 op = LogicOp3::new_lut(&|x, y, _| op.eval(x, !y, 0)); 42 y.src_mod = SrcMod::None; 43 } 44 if is_predicate { 45 self.push_op(OpPLop3 { 46 dsts: [dst, Dst::None], 47 srcs: [x, y, true.into()], 48 ops: [op, LogicOp3::new_const(false)], 49 }); 50 } else { 51 self.push_op(OpLop3 { 52 dst: dst, 53 srcs: [x, y, 0.into()], 54 op: op, 55 }); 56 } 57 } else { 58 if is_predicate { 59 let mut x = x; 60 let cmp_op = match op { 61 LogicOp2::And => PredSetOp::And, 62 LogicOp2::Or => PredSetOp::Or, 63 LogicOp2::Xor => PredSetOp::Xor, 64 LogicOp2::PassB => { 65 // Pass through B by AND with PT 66 x = true.into(); 67 PredSetOp::And 68 } 69 }; 70 self.push_op(OpPSetP { 71 dsts: [dst, Dst::None], 72 ops: [cmp_op, PredSetOp::And], 73 srcs: [x, y, true.into()], 74 }); 75 } else { 76 self.push_op(OpLop2 { 77 dst: dst, 78 srcs: [x, y], 79 op: op, 80 }); 81 } 82 } 83 } 84 prmt_to(&mut self, dst: Dst, x: Src, y: Src, sel: [u8; 4])85 fn prmt_to(&mut self, dst: Dst, x: Src, y: Src, sel: [u8; 4]) { 86 if sel == [0, 1, 2, 3] { 87 self.copy_to(dst, x); 88 } else if sel == [4, 5, 6, 7] { 89 self.copy_to(dst, y); 90 } else { 91 let mut sel_u32 = 0; 92 for i in 0..4 { 93 assert!(sel[i] < 16); 94 sel_u32 |= u32::from(sel[i]) << (i * 4); 95 } 96 97 self.push_op(OpPrmt { 98 dst: dst, 99 srcs: [x, y], 100 sel: sel_u32.into(), 101 mode: PrmtMode::Index, 102 }); 103 } 104 } 105 copy_to(&mut self, dst: Dst, src: Src)106 fn copy_to(&mut self, dst: Dst, src: Src) { 107 self.push_op(OpCopy { dst: dst, src: src }); 108 } 109 swap(&mut self, x: RegRef, y: RegRef)110 fn swap(&mut self, x: RegRef, y: RegRef) { 111 assert!(x.file() == y.file()); 112 self.push_op(OpSwap { 113 dsts: [x.into(), y.into()], 114 srcs: [y.into(), x.into()], 115 }); 116 } 117 } 118 119 pub trait SSABuilder: Builder { alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSARef120 fn alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSARef; 121 shl(&mut self, x: Src, shift: Src) -> SSARef122 fn shl(&mut self, x: Src, shift: Src) -> SSARef { 123 let dst = self.alloc_ssa(RegFile::GPR, 1); 124 if self.sm() >= 70 { 125 self.push_op(OpShf { 126 dst: dst.into(), 127 low: x, 128 high: 0.into(), 129 shift: shift, 130 right: false, 131 wrap: true, 132 data_type: IntType::I32, 133 dst_high: false, 134 }); 135 } else { 136 self.push_op(OpShl { 137 dst: dst.into(), 138 src: x, 139 shift: shift, 140 wrap: true, 141 }); 142 } 143 dst 144 } 145 shl64(&mut self, x: Src, shift: Src) -> SSARef146 fn shl64(&mut self, x: Src, shift: Src) -> SSARef { 147 let x = x.as_ssa().unwrap(); 148 debug_assert!(shift.src_mod.is_none()); 149 150 let dst = self.alloc_ssa(RegFile::GPR, 2); 151 if self.sm() >= 70 { 152 self.push_op(OpShf { 153 dst: dst[0].into(), 154 low: x[0].into(), 155 high: 0.into(), 156 shift, 157 right: false, 158 wrap: true, 159 data_type: IntType::U64, 160 dst_high: false, 161 }); 162 } else { 163 // On Maxwell and earlier, shf.l doesn't work without .high so we 164 // have to use a regular 32-bit shift here. 32-bit shift doesn't 165 // have the NIR wrap semantics so we need to wrap manually. 166 let shift = if let SrcRef::Imm32(imm) = shift.src_ref { 167 (imm & 0x3f).into() 168 } else { 169 self.lop2(LogicOp2::And, shift, 0x3f.into()).into() 170 }; 171 self.push_op(OpShf { 172 dst: dst[0].into(), 173 low: 0.into(), 174 high: x[0].into(), 175 shift, 176 right: false, 177 wrap: false, 178 data_type: IntType::U32, 179 dst_high: true, 180 }); 181 } 182 self.push_op(OpShf { 183 dst: dst[1].into(), 184 low: x[0].into(), 185 high: x[1].into(), 186 shift, 187 right: false, 188 wrap: true, 189 data_type: IntType::U64, 190 dst_high: true, 191 }); 192 dst 193 } 194 shr(&mut self, x: Src, shift: Src, signed: bool) -> SSARef195 fn shr(&mut self, x: Src, shift: Src, signed: bool) -> SSARef { 196 let dst = self.alloc_ssa(RegFile::GPR, 1); 197 if self.sm() >= 70 { 198 self.push_op(OpShf { 199 dst: dst.into(), 200 low: 0.into(), 201 high: x, 202 shift: shift, 203 right: true, 204 wrap: true, 205 data_type: if signed { IntType::I32 } else { IntType::U32 }, 206 dst_high: true, 207 }); 208 } else { 209 self.push_op(OpShr { 210 dst: dst.into(), 211 src: x, 212 shift: shift, 213 wrap: true, 214 signed, 215 }); 216 } 217 dst 218 } 219 shr64(&mut self, x: Src, shift: Src, signed: bool) -> SSARef220 fn shr64(&mut self, x: Src, shift: Src, signed: bool) -> SSARef { 221 let x = x.as_ssa().unwrap(); 222 debug_assert!(shift.src_mod.is_none()); 223 224 let dst = self.alloc_ssa(RegFile::GPR, 2); 225 self.push_op(OpShf { 226 dst: dst[0].into(), 227 low: x[0].into(), 228 high: x[1].into(), 229 shift, 230 right: true, 231 wrap: true, 232 data_type: if signed { IntType::I64 } else { IntType::U64 }, 233 dst_high: false, 234 }); 235 self.push_op(OpShf { 236 dst: dst[1].into(), 237 low: 0.into(), 238 high: x[1].into(), 239 shift, 240 right: true, 241 wrap: true, 242 data_type: if signed { IntType::I64 } else { IntType::U64 }, 243 dst_high: true, 244 }); 245 dst 246 } 247 fadd(&mut self, x: Src, y: Src) -> SSARef248 fn fadd(&mut self, x: Src, y: Src) -> SSARef { 249 let dst = self.alloc_ssa(RegFile::GPR, 1); 250 self.push_op(OpFAdd { 251 dst: dst.into(), 252 srcs: [x, y], 253 saturate: false, 254 rnd_mode: FRndMode::NearestEven, 255 ftz: false, 256 }); 257 dst 258 } 259 fmul(&mut self, x: Src, y: Src) -> SSARef260 fn fmul(&mut self, x: Src, y: Src) -> SSARef { 261 let dst = self.alloc_ssa(RegFile::GPR, 1); 262 self.push_op(OpFMul { 263 dst: dst.into(), 264 srcs: [x, y], 265 saturate: false, 266 rnd_mode: FRndMode::NearestEven, 267 ftz: false, 268 dnz: false, 269 }); 270 dst 271 } 272 fset(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef273 fn fset(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef { 274 let dst = self.alloc_ssa(RegFile::GPR, 1); 275 self.push_op(OpFSet { 276 dst: dst.into(), 277 cmp_op: cmp_op, 278 srcs: [x, y], 279 ftz: false, 280 }); 281 dst 282 } 283 fsetp(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef284 fn fsetp(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef { 285 let dst = self.alloc_ssa(RegFile::Pred, 1); 286 self.push_op(OpFSetP { 287 dst: dst.into(), 288 set_op: PredSetOp::And, 289 cmp_op: cmp_op, 290 srcs: [x, y], 291 accum: SrcRef::True.into(), 292 ftz: false, 293 }); 294 dst 295 } 296 hadd2(&mut self, x: Src, y: Src) -> SSARef297 fn hadd2(&mut self, x: Src, y: Src) -> SSARef { 298 let dst = self.alloc_ssa(RegFile::GPR, 1); 299 self.push_op(OpHAdd2 { 300 dst: dst.into(), 301 srcs: [x, y], 302 saturate: false, 303 ftz: false, 304 f32: false, 305 }); 306 dst 307 } 308 hset2(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef309 fn hset2(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef { 310 let dst = self.alloc_ssa(RegFile::GPR, 1); 311 self.push_op(OpHSet2 { 312 dst: dst.into(), 313 set_op: PredSetOp::And, 314 cmp_op: cmp_op, 315 srcs: [x, y], 316 ftz: false, 317 accum: SrcRef::True.into(), 318 }); 319 dst 320 } 321 dsetp(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef322 fn dsetp(&mut self, cmp_op: FloatCmpOp, x: Src, y: Src) -> SSARef { 323 let dst = self.alloc_ssa(RegFile::Pred, 1); 324 self.push_op(OpDSetP { 325 dst: dst.into(), 326 set_op: PredSetOp::And, 327 cmp_op: cmp_op, 328 srcs: [x, y], 329 accum: SrcRef::True.into(), 330 }); 331 dst 332 } 333 iabs(&mut self, i: Src) -> SSARef334 fn iabs(&mut self, i: Src) -> SSARef { 335 let dst = self.alloc_ssa(RegFile::GPR, 1); 336 if self.sm() >= 70 { 337 self.push_op(OpIAbs { 338 dst: dst.into(), 339 src: i, 340 }); 341 } else { 342 self.push_op(OpI2I { 343 dst: dst.into(), 344 src: i, 345 src_type: IntType::I32, 346 dst_type: IntType::I32, 347 saturate: false, 348 abs: true, 349 neg: false, 350 }); 351 } 352 dst 353 } 354 iadd(&mut self, x: Src, y: Src, z: Src) -> SSARef355 fn iadd(&mut self, x: Src, y: Src, z: Src) -> SSARef { 356 let dst = self.alloc_ssa(RegFile::GPR, 1); 357 if self.sm() >= 70 { 358 self.push_op(OpIAdd3 { 359 dst: dst.into(), 360 srcs: [x, y, z], 361 overflow: [Dst::None; 2], 362 }); 363 } else { 364 assert!(z.is_zero()); 365 self.push_op(OpIAdd2 { 366 dst: dst.into(), 367 srcs: [x, y], 368 carry_out: Dst::None, 369 }); 370 } 371 dst 372 } 373 iadd64(&mut self, x: Src, y: Src, z: Src) -> SSARef374 fn iadd64(&mut self, x: Src, y: Src, z: Src) -> SSARef { 375 fn split_iadd64_src(src: Src) -> [Src; 2] { 376 match src.src_ref { 377 SrcRef::Zero => [0.into(), 0.into()], 378 SrcRef::SSA(ssa) => { 379 if src.src_mod.is_ineg() { 380 [Src::from(ssa[0]).ineg(), Src::from(ssa[1]).bnot()] 381 } else { 382 [Src::from(ssa[0]), Src::from(ssa[1])] 383 } 384 } 385 _ => panic!("Unsupported iadd64 source"), 386 } 387 } 388 389 let is_3src = !x.is_zero() && !y.is_zero() && !z.is_zero(); 390 391 let x = split_iadd64_src(x); 392 let y = split_iadd64_src(y); 393 let dst = self.alloc_ssa(RegFile::GPR, 2); 394 if self.sm() >= 70 { 395 let carry1 = self.alloc_ssa(RegFile::Pred, 1); 396 let (carry2_dst, carry2_src) = if is_3src { 397 let carry2 = self.alloc_ssa(RegFile::Pred, 1); 398 (carry2.into(), carry2.into()) 399 } else { 400 // If one of the sources is known to be zero, we only need one 401 // carry predicate. 402 (Dst::None, false.into()) 403 }; 404 405 let z = split_iadd64_src(z); 406 self.push_op(OpIAdd3 { 407 dst: dst[0].into(), 408 overflow: [carry1.into(), carry2_dst], 409 srcs: [x[0], y[0], z[0]], 410 }); 411 self.push_op(OpIAdd3X { 412 dst: dst[1].into(), 413 overflow: [Dst::None, Dst::None], 414 srcs: [x[1], y[1], z[1]], 415 carry: [carry1.into(), carry2_src], 416 }); 417 } else { 418 assert!(z.is_zero()); 419 let carry = self.alloc_ssa(RegFile::Carry, 1); 420 self.push_op(OpIAdd2 { 421 dst: dst[0].into(), 422 srcs: [x[0], y[0]], 423 carry_out: carry.into(), 424 }); 425 self.push_op(OpIAdd2X { 426 dst: dst[1].into(), 427 srcs: [x[1], y[1]], 428 carry_out: Dst::None, 429 carry_in: carry.into(), 430 }); 431 } 432 dst 433 } 434 imnmx(&mut self, tp: IntCmpType, x: Src, y: Src, min: Src) -> SSARef435 fn imnmx(&mut self, tp: IntCmpType, x: Src, y: Src, min: Src) -> SSARef { 436 let dst = self.alloc_ssa(RegFile::GPR, 1); 437 self.push_op(OpIMnMx { 438 dst: dst.into(), 439 cmp_type: tp, 440 srcs: [x, y], 441 min: min, 442 }); 443 dst 444 } 445 imul(&mut self, x: Src, y: Src) -> SSARef446 fn imul(&mut self, x: Src, y: Src) -> SSARef { 447 let dst = self.alloc_ssa(RegFile::GPR, 1); 448 if self.sm() >= 70 { 449 self.push_op(OpIMad { 450 dst: dst.into(), 451 srcs: [x, y, 0.into()], 452 signed: false, 453 }); 454 } else { 455 self.push_op(OpIMul { 456 dst: dst[0].into(), 457 srcs: [x, y], 458 signed: [false; 2], 459 high: false, 460 }); 461 } 462 dst 463 } 464 imul_2x32_64(&mut self, x: Src, y: Src, signed: bool) -> SSARef465 fn imul_2x32_64(&mut self, x: Src, y: Src, signed: bool) -> SSARef { 466 let dst = self.alloc_ssa(RegFile::GPR, 2); 467 if self.sm() >= 70 { 468 self.push_op(OpIMad64 { 469 dst: dst.into(), 470 srcs: [x, y, 0.into()], 471 signed, 472 }); 473 } else { 474 self.push_op(OpIMul { 475 dst: dst[0].into(), 476 srcs: [x, y], 477 signed: [signed; 2], 478 high: false, 479 }); 480 self.push_op(OpIMul { 481 dst: dst[1].into(), 482 srcs: [x, y], 483 signed: [signed; 2], 484 high: true, 485 }); 486 } 487 dst 488 } 489 ineg(&mut self, i: Src) -> SSARef490 fn ineg(&mut self, i: Src) -> SSARef { 491 let dst = self.alloc_ssa(RegFile::GPR, 1); 492 if self.sm() >= 70 { 493 self.push_op(OpIAdd3 { 494 dst: dst.into(), 495 overflow: [Dst::None; 2], 496 srcs: [0.into(), i.ineg(), 0.into()], 497 }); 498 } else { 499 self.push_op(OpIAdd2 { 500 dst: dst.into(), 501 srcs: [0.into(), i.ineg()], 502 carry_out: Dst::None, 503 }); 504 } 505 dst 506 } 507 ineg64(&mut self, x: Src) -> SSARef508 fn ineg64(&mut self, x: Src) -> SSARef { 509 self.iadd64(0.into(), x.ineg(), 0.into()) 510 } 511 isetp( &mut self, cmp_type: IntCmpType, cmp_op: IntCmpOp, x: Src, y: Src, ) -> SSARef512 fn isetp( 513 &mut self, 514 cmp_type: IntCmpType, 515 cmp_op: IntCmpOp, 516 x: Src, 517 y: Src, 518 ) -> SSARef { 519 let dst = self.alloc_ssa(RegFile::Pred, 1); 520 self.push_op(OpISetP { 521 dst: dst.into(), 522 set_op: PredSetOp::And, 523 cmp_op: cmp_op, 524 cmp_type: cmp_type, 525 ex: false, 526 srcs: [x, y], 527 accum: true.into(), 528 low_cmp: true.into(), 529 }); 530 dst 531 } 532 isetp64( &mut self, cmp_type: IntCmpType, cmp_op: IntCmpOp, x: Src, y: Src, ) -> SSARef533 fn isetp64( 534 &mut self, 535 cmp_type: IntCmpType, 536 cmp_op: IntCmpOp, 537 x: Src, 538 y: Src, 539 ) -> SSARef { 540 let x = x.as_ssa().unwrap(); 541 let y = y.as_ssa().unwrap(); 542 543 // Low bits are always an unsigned comparison 544 let low = self.isetp(IntCmpType::U32, cmp_op, x[0].into(), y[0].into()); 545 546 let dst = self.alloc_ssa(RegFile::Pred, 1); 547 match cmp_op { 548 IntCmpOp::Eq | IntCmpOp::Ne => { 549 self.push_op(OpISetP { 550 dst: dst.into(), 551 set_op: match cmp_op { 552 IntCmpOp::Eq => PredSetOp::And, 553 IntCmpOp::Ne => PredSetOp::Or, 554 _ => panic!("Not an integer equality"), 555 }, 556 cmp_op: cmp_op, 557 cmp_type: IntCmpType::U32, 558 ex: false, 559 srcs: [x[1].into(), y[1].into()], 560 accum: low.into(), 561 low_cmp: true.into(), 562 }); 563 } 564 IntCmpOp::Ge | IntCmpOp::Gt | IntCmpOp::Le | IntCmpOp::Lt => { 565 if self.sm() >= 70 { 566 self.push_op(OpISetP { 567 dst: dst.into(), 568 set_op: PredSetOp::And, 569 cmp_op, 570 cmp_type, 571 ex: true, 572 srcs: [x[1].into(), y[1].into()], 573 accum: true.into(), 574 low_cmp: low.into(), 575 }); 576 } else { 577 // On Maxwell, iset.ex doesn't do what we want so we need to 578 // do it with 3 comparisons. Fortunately, we can chain them 579 // together and don't need the extra logic that the NIR 580 // lowering would emit. 581 let low_and_high_eq = self.alloc_ssa(RegFile::Pred, 1); 582 self.push_op(OpISetP { 583 dst: low_and_high_eq.into(), 584 set_op: PredSetOp::And, 585 cmp_op: IntCmpOp::Eq, 586 cmp_type: IntCmpType::U32, 587 ex: false, 588 srcs: [x[1].into(), y[1].into()], 589 accum: low.into(), 590 low_cmp: true.into(), 591 }); 592 self.push_op(OpISetP { 593 dst: dst.into(), 594 set_op: PredSetOp::Or, 595 // We always want a strict inequality for the high part 596 // so it's false when the two are equal and safe to OR 597 // with the low part. 598 cmp_op: match cmp_op { 599 IntCmpOp::Lt | IntCmpOp::Le => IntCmpOp::Lt, 600 IntCmpOp::Gt | IntCmpOp::Ge => IntCmpOp::Gt, 601 _ => panic!("Not an integer inequality"), 602 }, 603 cmp_type, 604 ex: false, 605 srcs: [x[1].into(), y[1].into()], 606 accum: low_and_high_eq.into(), 607 low_cmp: true.into(), 608 }); 609 } 610 } 611 } 612 dst 613 } 614 lop2(&mut self, op: LogicOp2, x: Src, y: Src) -> SSARef615 fn lop2(&mut self, op: LogicOp2, x: Src, y: Src) -> SSARef { 616 let dst = if x.is_predicate() { 617 self.alloc_ssa(RegFile::Pred, 1) 618 } else { 619 self.alloc_ssa(RegFile::GPR, 1) 620 }; 621 self.lop2_to(dst.into(), op, x, y); 622 dst 623 } 624 brev(&mut self, x: Src) -> SSARef625 fn brev(&mut self, x: Src) -> SSARef { 626 let dst = self.alloc_ssa(RegFile::GPR, 1); 627 if self.sm() >= 70 { 628 self.push_op(OpBRev { 629 dst: dst.into(), 630 src: x, 631 }); 632 } else { 633 // No BREV in Maxwell 634 self.push_op(OpBfe { 635 dst: dst.into(), 636 base: x, 637 signed: false, 638 range: Src::new_imm_u32(0x2000), 639 reverse: true, 640 }); 641 } 642 dst 643 } 644 mufu(&mut self, op: MuFuOp, src: Src) -> SSARef645 fn mufu(&mut self, op: MuFuOp, src: Src) -> SSARef { 646 let dst = self.alloc_ssa(RegFile::GPR, 1); 647 self.push_op(OpMuFu { 648 dst: dst.into(), 649 op: op, 650 src: src, 651 }); 652 dst 653 } 654 fsin(&mut self, src: Src) -> SSARef655 fn fsin(&mut self, src: Src) -> SSARef { 656 let tmp = if self.sm() >= 70 { 657 let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI); 658 self.fmul(src, frac_1_2pi.into()) 659 } else { 660 let tmp = self.alloc_ssa(RegFile::GPR, 1); 661 self.push_op(OpRro { 662 dst: tmp.into(), 663 op: RroOp::SinCos, 664 src, 665 }); 666 tmp 667 }; 668 self.mufu(MuFuOp::Sin, tmp.into()) 669 } 670 fcos(&mut self, src: Src) -> SSARef671 fn fcos(&mut self, src: Src) -> SSARef { 672 let tmp = if self.sm() >= 70 { 673 let frac_1_2pi = 1.0 / (2.0 * std::f32::consts::PI); 674 self.fmul(src, frac_1_2pi.into()) 675 } else { 676 let tmp = self.alloc_ssa(RegFile::GPR, 1); 677 self.push_op(OpRro { 678 dst: tmp.into(), 679 op: RroOp::SinCos, 680 src, 681 }); 682 tmp 683 }; 684 self.mufu(MuFuOp::Cos, tmp.into()) 685 } 686 fexp2(&mut self, src: Src) -> SSARef687 fn fexp2(&mut self, src: Src) -> SSARef { 688 let tmp = if self.sm() >= 70 { 689 src 690 } else { 691 let tmp = self.alloc_ssa(RegFile::GPR, 1); 692 self.push_op(OpRro { 693 dst: tmp.into(), 694 op: RroOp::Exp2, 695 src, 696 }); 697 tmp.into() 698 }; 699 self.mufu(MuFuOp::Exp2, tmp) 700 } 701 prmt(&mut self, x: Src, y: Src, sel: [u8; 4]) -> SSARef702 fn prmt(&mut self, x: Src, y: Src, sel: [u8; 4]) -> SSARef { 703 let dst = self.alloc_ssa(RegFile::GPR, 1); 704 self.prmt_to(dst.into(), x, y, sel); 705 dst 706 } 707 prmt4(&mut self, src: [Src; 4], sel: [u8; 4]) -> SSARef708 fn prmt4(&mut self, src: [Src; 4], sel: [u8; 4]) -> SSARef { 709 let max_sel = *sel.iter().max().unwrap(); 710 if max_sel < 8 { 711 self.prmt(src[0], src[1], sel) 712 } else if max_sel < 12 { 713 let mut sel_a = [0_u8; 4]; 714 let mut sel_b = [0_u8; 4]; 715 for i in 0..4_u8 { 716 if sel[usize::from(i)] < 8 { 717 sel_a[usize::from(i)] = sel[usize::from(i)]; 718 sel_b[usize::from(i)] = i; 719 } else { 720 sel_b[usize::from(i)] = (sel[usize::from(i)] - 8) + 4; 721 } 722 } 723 let a = self.prmt(src[0], src[1], sel_a); 724 self.prmt(a.into(), src[2], sel_b) 725 } else if max_sel < 16 { 726 let mut sel_a = [0_u8; 4]; 727 let mut sel_b = [0_u8; 4]; 728 let mut sel_c = [0_u8; 4]; 729 for i in 0..4_u8 { 730 if sel[usize::from(i)] < 8 { 731 sel_a[usize::from(i)] = sel[usize::from(i)]; 732 sel_c[usize::from(i)] = i; 733 } else { 734 sel_b[usize::from(i)] = sel[usize::from(i)] - 8; 735 sel_c[usize::from(i)] = 4 + i; 736 } 737 } 738 let a = self.prmt(src[0], src[1], sel_a); 739 let b = self.prmt(src[2], src[3], sel_b); 740 self.prmt(a.into(), b.into(), sel_c) 741 } else { 742 panic!("Invalid permute value: {max_sel}"); 743 } 744 } 745 sel(&mut self, cond: Src, x: Src, y: Src) -> SSARef746 fn sel(&mut self, cond: Src, x: Src, y: Src) -> SSARef { 747 assert!(cond.src_ref.is_predicate()); 748 assert!(x.is_predicate() == y.is_predicate()); 749 if x.is_predicate() { 750 let dst = self.alloc_ssa(RegFile::Pred, 1); 751 if self.sm() >= 70 { 752 self.push_op(OpPLop3 { 753 dsts: [dst.into(), Dst::None], 754 srcs: [cond, x, y], 755 ops: [ 756 LogicOp3::new_lut(&|c, x, y| (c & x) | (!c & y)), 757 LogicOp3::new_const(false), 758 ], 759 }); 760 } else { 761 let tmp = self.alloc_ssa(RegFile::Pred, 1); 762 self.push_op(OpPSetP { 763 dsts: [tmp.into(), Dst::None], 764 ops: [PredSetOp::And, PredSetOp::And], 765 srcs: [cond, x, true.into()], 766 }); 767 self.push_op(OpPSetP { 768 dsts: [dst.into(), Dst::None], 769 ops: [PredSetOp::And, PredSetOp::Or], 770 srcs: [cond.bnot(), y, tmp.into()], 771 }); 772 } 773 dst 774 } else { 775 let dst = self.alloc_ssa(RegFile::GPR, 1); 776 self.push_op(OpSel { 777 dst: dst.into(), 778 cond: cond, 779 srcs: [x, y], 780 }); 781 dst 782 } 783 } 784 undef(&mut self) -> SSARef785 fn undef(&mut self) -> SSARef { 786 let dst = self.alloc_ssa(RegFile::GPR, 1); 787 self.push_op(OpUndef { dst: dst.into() }); 788 dst 789 } 790 copy(&mut self, src: Src) -> SSARef791 fn copy(&mut self, src: Src) -> SSARef { 792 let dst = if src.is_predicate() { 793 self.alloc_ssa(RegFile::Pred, 1) 794 } else { 795 self.alloc_ssa(RegFile::GPR, 1) 796 }; 797 self.copy_to(dst.into(), src); 798 dst 799 } 800 bmov_to_bar(&mut self, src: Src) -> SSARef801 fn bmov_to_bar(&mut self, src: Src) -> SSARef { 802 assert!(src.src_ref.as_ssa().unwrap().file() == Some(RegFile::GPR)); 803 let dst = self.alloc_ssa(RegFile::Bar, 1); 804 self.push_op(OpBMov { 805 dst: dst.into(), 806 src: src, 807 clear: false, 808 }); 809 dst 810 } 811 bmov_to_gpr(&mut self, src: Src) -> SSARef812 fn bmov_to_gpr(&mut self, src: Src) -> SSARef { 813 assert!(src.src_ref.as_ssa().unwrap().file() == Some(RegFile::Bar)); 814 let dst = self.alloc_ssa(RegFile::GPR, 1); 815 self.push_op(OpBMov { 816 dst: dst.into(), 817 src: src, 818 clear: false, 819 }); 820 dst 821 } 822 } 823 824 pub struct InstrBuilder<'a> { 825 instrs: MappedInstrs, 826 sm: &'a dyn ShaderModel, 827 } 828 829 impl<'a> InstrBuilder<'a> { new(sm: &'a dyn ShaderModel) -> Self830 pub fn new(sm: &'a dyn ShaderModel) -> Self { 831 Self { 832 instrs: MappedInstrs::None, 833 sm, 834 } 835 } 836 } 837 838 impl InstrBuilder<'_> { as_vec(self) -> Vec<Box<Instr>>839 pub fn as_vec(self) -> Vec<Box<Instr>> { 840 match self.instrs { 841 MappedInstrs::None => Vec::new(), 842 MappedInstrs::One(i) => vec![i], 843 MappedInstrs::Many(v) => v, 844 } 845 } 846 as_mapped_instrs(self) -> MappedInstrs847 pub fn as_mapped_instrs(self) -> MappedInstrs { 848 self.instrs 849 } 850 } 851 852 impl Builder for InstrBuilder<'_> { push_instr(&mut self, instr: Box<Instr>) -> &mut Instr853 fn push_instr(&mut self, instr: Box<Instr>) -> &mut Instr { 854 self.instrs.push(instr); 855 self.instrs.last_mut().unwrap().as_mut() 856 } 857 sm(&self) -> u8858 fn sm(&self) -> u8 { 859 self.sm.sm() 860 } 861 } 862 863 pub struct SSAInstrBuilder<'a> { 864 b: InstrBuilder<'a>, 865 alloc: &'a mut SSAValueAllocator, 866 } 867 868 impl<'a> SSAInstrBuilder<'a> { new( sm: &'a dyn ShaderModel, alloc: &'a mut SSAValueAllocator, ) -> Self869 pub fn new( 870 sm: &'a dyn ShaderModel, 871 alloc: &'a mut SSAValueAllocator, 872 ) -> Self { 873 Self { 874 b: InstrBuilder::new(sm), 875 alloc: alloc, 876 } 877 } 878 } 879 880 impl SSAInstrBuilder<'_> { as_vec(self) -> Vec<Box<Instr>>881 pub fn as_vec(self) -> Vec<Box<Instr>> { 882 self.b.as_vec() 883 } 884 885 #[allow(dead_code)] as_mapped_instrs(self) -> MappedInstrs886 pub fn as_mapped_instrs(self) -> MappedInstrs { 887 self.b.as_mapped_instrs() 888 } 889 } 890 891 impl<'a> Builder for SSAInstrBuilder<'a> { push_instr(&mut self, instr: Box<Instr>) -> &mut Instr892 fn push_instr(&mut self, instr: Box<Instr>) -> &mut Instr { 893 self.b.push_instr(instr) 894 } 895 sm(&self) -> u8896 fn sm(&self) -> u8 { 897 self.b.sm() 898 } 899 } 900 901 impl<'a> SSABuilder for SSAInstrBuilder<'a> { alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSARef902 fn alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSARef { 903 self.alloc.alloc_vec(file, comps) 904 } 905 } 906 907 pub struct PredicatedBuilder<'a, T: Builder> { 908 b: &'a mut T, 909 pred: Pred, 910 } 911 912 impl<'a, T: Builder> Builder for PredicatedBuilder<'a, T> { push_instr(&mut self, instr: Box<Instr>) -> &mut Instr913 fn push_instr(&mut self, instr: Box<Instr>) -> &mut Instr { 914 let mut instr = instr; 915 assert!(instr.pred.is_true()); 916 instr.pred = self.pred; 917 self.b.push_instr(instr) 918 } 919 sm(&self) -> u8920 fn sm(&self) -> u8 { 921 self.b.sm() 922 } 923 } 924 925 impl<'a, T: SSABuilder> SSABuilder for PredicatedBuilder<'a, T> { alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSARef926 fn alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSARef { 927 self.b.alloc_ssa(file, comps) 928 } 929 } 930 931 pub struct UniformBuilder<'a, T: Builder> { 932 b: &'a mut T, 933 uniform: bool, 934 } 935 936 impl<'a, T: Builder> UniformBuilder<'a, T> { new(b: &'a mut T, uniform: bool) -> Self937 pub fn new(b: &'a mut T, uniform: bool) -> Self { 938 Self { b, uniform } 939 } 940 } 941 942 impl<'a, T: Builder> Builder for UniformBuilder<'a, T> { push_instr(&mut self, instr: Box<Instr>) -> &mut Instr943 fn push_instr(&mut self, instr: Box<Instr>) -> &mut Instr { 944 self.b.push_instr(instr) 945 } 946 sm(&self) -> u8947 fn sm(&self) -> u8 { 948 self.b.sm() 949 } 950 } 951 952 impl<'a, T: SSABuilder> SSABuilder for UniformBuilder<'a, T> { alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSARef953 fn alloc_ssa(&mut self, file: RegFile, comps: u8) -> SSARef { 954 let file = if self.uniform { 955 file.to_uniform().unwrap() 956 } else { 957 file 958 }; 959 self.b.alloc_ssa(file, comps) 960 } 961 } 962