1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===// 2 // 3 // The Subzero Code Generator 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Declares the TargetLoweringARM32 class, which implements the 12 /// TargetLowering interface for the ARM 32-bit architecture. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H 17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H 18 19 #include "IceAssemblerARM32.h" 20 #include "IceDefs.h" 21 #include "IceInstARM32.h" 22 #include "IceRegistersARM32.h" 23 #include "IceTargetLowering.h" 24 25 #include <utility> 26 27 namespace Ice { 28 namespace ARM32 { 29 30 // Class encapsulating ARM cpu features / instruction set. 31 class TargetARM32Features { 32 TargetARM32Features() = delete; 33 TargetARM32Features(const TargetARM32Features &) = delete; 34 TargetARM32Features &operator=(const TargetARM32Features &) = delete; 35 36 public: 37 explicit TargetARM32Features(const ClFlags &Flags); 38 39 enum ARM32InstructionSet { 40 Begin, 41 // Neon is the PNaCl baseline instruction set. 42 Neon = Begin, 43 HWDivArm, // HW divide in ARM mode (not just Thumb mode). 44 End 45 }; 46 hasFeature(ARM32InstructionSet I)47 bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; } 48 49 private: 50 ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin; 51 }; 52 53 // The target lowering logic for ARM32. 54 class TargetARM32 : public TargetLowering { 55 TargetARM32() = delete; 56 TargetARM32(const TargetARM32 &) = delete; 57 TargetARM32 &operator=(const TargetARM32 &) = delete; 58 59 public: 60 static void staticInit(GlobalContext *Ctx); 61 shouldBePooled(const Constant * C)62 static bool shouldBePooled(const Constant *C) { 63 if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(C)) { 64 return !Utils::isPositiveZero(ConstDouble->getValue()); 65 } 66 if (llvm::isa<ConstantFloat>(C)) 67 return true; 68 return false; 69 } 70 getPointerType()71 static ::Ice::Type getPointerType() { return ::Ice::IceType_i32; } 72 73 // TODO(jvoung): return a unique_ptr. create(Cfg * Func)74 static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) { 75 return makeUnique<TargetARM32>(Func); 76 } 77 createAssembler()78 std::unique_ptr<::Ice::Assembler> createAssembler() const override { 79 return makeUnique<ARM32::AssemblerARM32>(); 80 } 81 initNodeForLowering(CfgNode * Node)82 void initNodeForLowering(CfgNode *Node) override { 83 Computations.forgetProducers(); 84 Computations.recordProducers(Node); 85 Computations.dump(Func); 86 } 87 88 void translateOm1() override; 89 void translateO2() override; 90 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; 91 getNumRegisters()92 SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; } 93 Variable *getPhysicalRegister(RegNumT RegNum, 94 Type Ty = IceType_void) override; 95 const char *getRegName(RegNumT RegNum, Type Ty) const override; 96 SmallBitVector getRegisterSet(RegSetMask Include, 97 RegSetMask Exclude) const override; 98 const SmallBitVector & getRegistersForVariable(const Variable * Var)99 getRegistersForVariable(const Variable *Var) const override { 100 RegClass RC = Var->getRegClass(); 101 switch (RC) { 102 default: 103 assert(RC < RC_Target); 104 return TypeToRegisterSet[RC]; 105 case (RegClass)RegARM32::RCARM32_QtoS: 106 return TypeToRegisterSet[RC]; 107 } 108 } 109 const SmallBitVector & getAllRegistersForVariable(const Variable * Var)110 getAllRegistersForVariable(const Variable *Var) const override { 111 RegClass RC = Var->getRegClass(); 112 assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM); 113 return TypeToRegisterSetUnfiltered[RC]; 114 } getAliasesForRegister(RegNumT Reg)115 const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override { 116 return RegisterAliases[Reg]; 117 } hasFramePointer()118 bool hasFramePointer() const override { return UsesFramePointer; } setHasFramePointer()119 void setHasFramePointer() override { UsesFramePointer = true; } getStackReg()120 RegNumT getStackReg() const override { return RegARM32::Reg_sp; } getFrameReg()121 RegNumT getFrameReg() const override { return RegARM32::Reg_fp; } getFrameOrStackReg()122 RegNumT getFrameOrStackReg() const override { 123 return UsesFramePointer ? getFrameReg() : getStackReg(); 124 } getReservedTmpReg()125 RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; } 126 typeWidthInBytesOnStack(Type Ty)127 size_t typeWidthInBytesOnStack(Type Ty) const override { 128 // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16 129 // are rounded up to 4 bytes. 130 return (typeWidthInBytes(Ty) + 3) & ~3; 131 } 132 uint32_t getStackAlignment() const override; reserveFixedAllocaArea(size_t Size,size_t Align)133 void reserveFixedAllocaArea(size_t Size, size_t Align) override { 134 FixedAllocaSizeBytes = Size; 135 assert(llvm::isPowerOf2_32(Align)); 136 FixedAllocaAlignBytes = Align; 137 PrologEmitsFixedAllocas = true; 138 } getFrameFixedAllocaOffset()139 int32_t getFrameFixedAllocaOffset() const override { 140 return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes); 141 } maxOutArgsSizeBytes()142 uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; } 143 shouldSplitToVariable64On32(Type Ty)144 bool shouldSplitToVariable64On32(Type Ty) const override { 145 return Ty == IceType_i64; 146 } 147 148 // TODO(ascull): what size is best for ARM? getMinJumpTableSize()149 SizeT getMinJumpTableSize() const override { return 3; } 150 void emitJumpTable(const Cfg *Func, 151 const InstJumpTable *JumpTable) const override; 152 153 void emitVariable(const Variable *Var) const override; 154 155 void emit(const ConstantUndef *C) const final; 156 void emit(const ConstantInteger32 *C) const final; 157 void emit(const ConstantInteger64 *C) const final; 158 void emit(const ConstantFloat *C) const final; 159 void emit(const ConstantDouble *C) const final; 160 void emit(const ConstantRelocatable *C) const final; 161 162 void lowerArguments() override; 163 void addProlog(CfgNode *Node) override; 164 void addEpilog(CfgNode *Node) override; 165 166 Operand *loOperand(Operand *Operand); 167 Operand *hiOperand(Operand *Operand); 168 void finishArgumentLowering(Variable *Arg, Variable *FramePtr, 169 size_t BasicFrameOffset, size_t *InArgsSizeBytes); 170 hasCPUFeature(TargetARM32Features::ARM32InstructionSet I)171 bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const { 172 return CPUFeatures.hasFeature(I); 173 } 174 175 enum OperandLegalization { 176 Legal_Reg = 1 << 0, /// physical register, not stack location 177 Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small 178 /// immediates, shifted registers, or modified fp imm. 179 Legal_Mem = 1 << 2, /// includes [r0, r1 lsl #2] as well as [sp, #12] 180 Legal_Rematerializable = 1 << 3, 181 Legal_Default = ~Legal_Rematerializable, 182 }; 183 184 using LegalMask = uint32_t; 185 Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT()); 186 Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default, 187 RegNumT RegNum = RegNumT()); 188 Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT()); 189 shAmtImm(uint32_t ShAmtImm)190 OperandARM32ShAmtImm *shAmtImm(uint32_t ShAmtImm) const { 191 assert(ShAmtImm < 32); 192 return OperandARM32ShAmtImm::create( 193 Func, 194 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmtImm & 0x1F))); 195 } 196 getCtx()197 GlobalContext *getCtx() const { return Ctx; } 198 199 protected: 200 explicit TargetARM32(Cfg *Func); 201 202 void postLower() override; 203 204 enum SafeBoolChain { 205 SBC_No, 206 SBC_Yes, 207 }; 208 209 void lowerAlloca(const InstAlloca *Instr) override; 210 SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Instr); 211 void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest, 212 Operand *Src0, Operand *Src1); 213 void lowerArithmetic(const InstArithmetic *Instr) override; 214 void lowerAssign(const InstAssign *Instr) override; 215 void lowerBr(const InstBr *Instr) override; 216 void lowerCall(const InstCall *Instr) override; 217 void lowerCast(const InstCast *Instr) override; 218 void lowerExtractElement(const InstExtractElement *Instr) override; 219 220 /// CondWhenTrue is a helper type returned by every method in the lowering 221 /// that emits code to set the condition codes. 222 class CondWhenTrue { 223 public: 224 explicit CondWhenTrue(CondARM32::Cond T0, 225 CondARM32::Cond T1 = CondARM32::kNone) WhenTrue0(T0)226 : WhenTrue0(T0), WhenTrue1(T1) { 227 assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone); 228 assert(T1 != T0 || T0 == CondARM32::kNone); 229 } 230 CondARM32::Cond WhenTrue0; 231 CondARM32::Cond WhenTrue1; 232 233 /// invert returns a new object with WhenTrue0 and WhenTrue1 inverted. invert()234 CondWhenTrue invert() const { 235 switch (WhenTrue0) { 236 default: 237 if (WhenTrue1 == CondARM32::kNone) 238 return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0)); 239 return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0), 240 InstARM32::getOppositeCondition(WhenTrue1)); 241 case CondARM32::AL: 242 return CondWhenTrue(CondARM32::kNone); 243 case CondARM32::kNone: 244 return CondWhenTrue(CondARM32::AL); 245 } 246 } 247 }; 248 249 CondWhenTrue lowerFcmpCond(const InstFcmp *Instr); 250 void lowerFcmp(const InstFcmp *Instr) override; 251 CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, 252 Operand *Src0, Operand *Src1); 253 CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0, 254 Operand *Src1); 255 CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0, 256 Operand *Src1); 257 CondWhenTrue lowerIcmpCond(InstIcmp::ICond Condition, Operand *Src0, 258 Operand *Src1); 259 CondWhenTrue lowerIcmpCond(const InstIcmp *Instr); 260 void lowerIcmp(const InstIcmp *Instr) override; 261 /// Emits the basic sequence for lower-linked/store-exclusive loops: 262 /// 263 /// retry: 264 /// ldrex tmp, [Addr] 265 /// StoreValue = Operation(tmp) 266 /// strexCond success, StoreValue, [Addr] 267 /// cmpCond success, #0 268 /// bne retry 269 /// 270 /// Operation needs to return which value to strex in Addr, it must not change 271 /// the flags if Cond is not AL, and must not emit any instructions that could 272 /// end up writing to memory. Operation also needs to handle fake-defing for 273 /// i64 handling. 274 void 275 lowerLoadLinkedStoreExclusive(Type Ty, Operand *Addr, 276 std::function<Variable *(Variable *)> Operation, 277 CondARM32::Cond Cond = CondARM32::AL); 278 void lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, 279 Operand *Val); 280 void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, 281 Operand *Val); 282 void lowerBreakpoint(const InstBreakpoint *Instr) override; 283 void lowerIntrinsic(const InstIntrinsic *Instr) override; 284 void lowerInsertElement(const InstInsertElement *Instr) override; 285 void lowerLoad(const InstLoad *Instr) override; 286 void lowerPhi(const InstPhi *Instr) override; 287 void lowerRet(const InstRet *Instr) override; 288 void lowerSelect(const InstSelect *Instr) override; 289 void lowerShuffleVector(const InstShuffleVector *Instr) override; 290 void lowerStore(const InstStore *Instr) override; 291 void lowerSwitch(const InstSwitch *Instr) override; 292 void lowerUnreachable(const InstUnreachable *Instr) override; 293 void prelowerPhis() override; 294 uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override; 295 void genTargetHelperCallFor(Inst *Instr) override; 296 void doAddressOptLoad() override; 297 void doAddressOptStore() override; 298 299 OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty); 300 301 Variable64On32 *makeI64RegPair(); 302 Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT()); 303 static Type stackSlotType(); 304 Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT()); 305 void alignRegisterPow2(Variable *Reg, uint32_t Align, 306 RegNumT TmpRegNum = RegNumT()); 307 308 /// Returns a vector in a register with the given constant entries. 309 Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT()); 310 311 // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap; 312 // .LSKIP: <continuation>. If no check is needed nothing is inserted. 313 void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi); 314 using ExtInstr = void (TargetARM32::*)(Variable *, Variable *, 315 CondARM32::Cond); 316 using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *, 317 CondARM32::Cond); 318 void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1, 319 ExtInstr ExtFunc, DivInstr DivFunc, bool IsRemainder); 320 321 void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi); 322 323 // The following are helpers that insert lowered ARM32 instructions with 324 // minimal syntactic overhead, so that the lowering code can look as close to 325 // assembly as practical. 326 void _add(Variable *Dest, Variable *Src0, Operand *Src1, 327 CondARM32::Cond Pred = CondARM32::AL) { 328 Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred); 329 } 330 void _adds(Variable *Dest, Variable *Src0, Operand *Src1, 331 CondARM32::Cond Pred = CondARM32::AL) { 332 constexpr bool SetFlags = true; 333 Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred, SetFlags); 334 if (SetFlags) { 335 Context.insert<InstFakeUse>(Dest); 336 } 337 } 338 void _adc(Variable *Dest, Variable *Src0, Operand *Src1, 339 CondARM32::Cond Pred = CondARM32::AL) { 340 Context.insert<InstARM32Adc>(Dest, Src0, Src1, Pred); 341 } 342 void _and(Variable *Dest, Variable *Src0, Operand *Src1, 343 CondARM32::Cond Pred = CondARM32::AL) { 344 Context.insert<InstARM32And>(Dest, Src0, Src1, Pred); 345 } 346 void _asr(Variable *Dest, Variable *Src0, Operand *Src1, 347 CondARM32::Cond Pred = CondARM32::AL) { 348 Context.insert<InstARM32Asr>(Dest, Src0, Src1, Pred); 349 } 350 void _bic(Variable *Dest, Variable *Src0, Operand *Src1, 351 CondARM32::Cond Pred = CondARM32::AL) { 352 Context.insert<InstARM32Bic>(Dest, Src0, Src1, Pred); 353 } _br(CfgNode * TargetTrue,CfgNode * TargetFalse,CondARM32::Cond Condition)354 void _br(CfgNode *TargetTrue, CfgNode *TargetFalse, 355 CondARM32::Cond Condition) { 356 Context.insert<InstARM32Br>(TargetTrue, TargetFalse, Condition); 357 } _br(CfgNode * Target)358 void _br(CfgNode *Target) { Context.insert<InstARM32Br>(Target); } _br(CfgNode * Target,CondARM32::Cond Condition)359 void _br(CfgNode *Target, CondARM32::Cond Condition) { 360 Context.insert<InstARM32Br>(Target, Condition); 361 } _br(InstARM32Label * Label,CondARM32::Cond Condition)362 void _br(InstARM32Label *Label, CondARM32::Cond Condition) { 363 Context.insert<InstARM32Br>(Label, Condition); 364 } 365 void _cmn(Variable *Src0, Operand *Src1, 366 CondARM32::Cond Pred = CondARM32::AL) { 367 Context.insert<InstARM32Cmn>(Src0, Src1, Pred); 368 } 369 void _cmp(Variable *Src0, Operand *Src1, 370 CondARM32::Cond Pred = CondARM32::AL) { 371 Context.insert<InstARM32Cmp>(Src0, Src1, Pred); 372 } 373 void _clz(Variable *Dest, Variable *Src0, 374 CondARM32::Cond Pred = CondARM32::AL) { 375 Context.insert<InstARM32Clz>(Dest, Src0, Pred); 376 } _dmb()377 void _dmb() { Context.insert<InstARM32Dmb>(); } 378 void _eor(Variable *Dest, Variable *Src0, Operand *Src1, 379 CondARM32::Cond Pred = CondARM32::AL) { 380 Context.insert<InstARM32Eor>(Dest, Src0, Src1, Pred); 381 } 382 /// _ldr, for all your memory to Variable data moves. It handles all types 383 /// (integer, floating point, and vectors.) Addr needs to be valid for Dest's 384 /// type (e.g., no immediates for vector loads, and no index registers for fp 385 /// loads.) 386 void _ldr(Variable *Dest, OperandARM32Mem *Addr, 387 CondARM32::Cond Pred = CondARM32::AL) { 388 Context.insert<InstARM32Ldr>(Dest, Addr, Pred); 389 } 390 InstARM32Ldrex *_ldrex(Variable *Dest, OperandARM32Mem *Addr, 391 CondARM32::Cond Pred = CondARM32::AL) { 392 auto *Ldrex = Context.insert<InstARM32Ldrex>(Dest, Addr, Pred); 393 if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) { 394 Context.insert<InstFakeDef>(Dest64->getLo(), Dest); 395 Context.insert<InstFakeDef>(Dest64->getHi(), Dest); 396 } 397 return Ldrex; 398 } 399 void _lsl(Variable *Dest, Variable *Src0, Operand *Src1, 400 CondARM32::Cond Pred = CondARM32::AL) { 401 Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred); 402 } 403 void _lsls(Variable *Dest, Variable *Src0, Operand *Src1, 404 CondARM32::Cond Pred = CondARM32::AL) { 405 constexpr bool SetFlags = true; 406 Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred, SetFlags); 407 if (SetFlags) { 408 Context.insert<InstFakeUse>(Dest); 409 } 410 } 411 void _lsr(Variable *Dest, Variable *Src0, Operand *Src1, 412 CondARM32::Cond Pred = CondARM32::AL) { 413 Context.insert<InstARM32Lsr>(Dest, Src0, Src1, Pred); 414 } 415 void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc, 416 CondARM32::Cond Pred = CondARM32::AL) { 417 Context.insert<InstARM32Mla>(Dest, Src0, Src1, Acc, Pred); 418 } 419 void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc, 420 CondARM32::Cond Pred = CondARM32::AL) { 421 Context.insert<InstARM32Mls>(Dest, Src0, Src1, Acc, Pred); 422 } 423 /// _mov, for all your Variable to Variable data movement needs. It handles 424 /// all types (integer, floating point, and vectors), as well as moves between 425 /// Core and VFP registers. This is not a panacea: you must obey the (weird, 426 /// confusing, non-uniform) rules for data moves in ARM. 427 void _mov(Variable *Dest, Operand *Src0, 428 CondARM32::Cond Pred = CondARM32::AL) { 429 // _mov used to be unique in the sense that it would create a temporary 430 // automagically if Dest was nullptr. It won't do that anymore, so we keep 431 // an assert around just in case there is some untested code path where Dest 432 // is nullptr. 433 assert(Dest != nullptr); 434 assert(!llvm::isa<OperandARM32Mem>(Src0)); 435 auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred); 436 437 if (Instr->isMultiDest()) { 438 // If Instr is multi-dest, then Dest must be a Variable64On32. We add a 439 // fake-def for Instr.DestHi here. 440 assert(llvm::isa<Variable64On32>(Dest)); 441 Context.insert<InstFakeDef>(Instr->getDestHi()); 442 } 443 } 444 445 void _mov_redefined(Variable *Dest, Operand *Src0, 446 CondARM32::Cond Pred = CondARM32::AL) { 447 auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred); 448 Instr->setDestRedefined(); 449 if (Instr->isMultiDest()) { 450 // If Instr is multi-dest, then Dest must be a Variable64On32. We add a 451 // fake-def for Instr.DestHi here. 452 assert(llvm::isa<Variable64On32>(Dest)); 453 Context.insert<InstFakeDef>(Instr->getDestHi()); 454 } 455 } 456 _nop()457 void _nop() { Context.insert<InstARM32Nop>(); } 458 459 // Generates a vmov instruction to extract the given index from a vector 460 // register. 461 void _extractelement(Variable *Dest, Variable *Src0, uint32_t Index, 462 CondARM32::Cond Pred = CondARM32::AL) { 463 Context.insert<InstARM32Extract>(Dest, Src0, Index, Pred); 464 } 465 466 // Generates a vmov instruction to insert a value into the given index of a 467 // vector register. 468 void _insertelement(Variable *Dest, Variable *Src0, uint32_t Index, 469 CondARM32::Cond Pred = CondARM32::AL) { 470 Context.insert<InstARM32Insert>(Dest, Src0, Index, Pred); 471 } 472 473 // -------------------------------------------------------------------------- 474 // Begin bool folding machinery. 475 // 476 // There are three types of boolean lowerings handled by this target: 477 // 478 // 1) Boolean expressions leading to a boolean Variable definition 479 // --------------------------------------------------------------- 480 // 481 // Whenever a i1 Variable is live out (i.e., its live range extends beyond 482 // the defining basic block) we do not fold the operation. We instead 483 // materialize (i.e., compute) the variable normally, so that it can be used 484 // when needed. We also materialize i1 values that are not single use to 485 // avoid code duplication. These expressions are not short circuited. 486 // 487 // 2) Boolean expressions leading to a select 488 // ------------------------------------------ 489 // 490 // These include boolean chains leading to a select instruction, as well as 491 // i1 Sexts. These boolean expressions are lowered to: 492 // 493 // mov T, <false value> 494 // CC <- eval(Boolean Expression) 495 // movCC T, <true value> 496 // 497 // For Sexts, <false value> is 0, and <true value> is -1. 498 // 499 // 3) Boolean expressions leading to a br i1 500 // ----------------------------------------- 501 // 502 // These are the boolean chains leading to a branch. These chains are 503 // short-circuited, i.e.: 504 // 505 // A = or i1 B, C 506 // br i1 A, label %T, label %F 507 // 508 // becomes 509 // 510 // tst B 511 // jne %T 512 // tst B 513 // jne %T 514 // j %F 515 // 516 // and 517 // 518 // A = and i1 B, C 519 // br i1 A, label %T, label %F 520 // 521 // becomes 522 // 523 // tst B 524 // jeq %F 525 // tst B 526 // jeq %F 527 // j %T 528 // 529 // Arbitrarily long chains are short circuited, e.g 530 // 531 // A = or i1 B, C 532 // D = and i1 A, E 533 // F = and i1 G, H 534 // I = or i1 D, F 535 // br i1 I, label %True, label %False 536 // 537 // becomes 538 // 539 // Label[A]: 540 // tst B, 1 541 // bne Label[D] 542 // tst C, 1 543 // beq Label[I] 544 // Label[D]: 545 // tst E, 1 546 // bne %True 547 // Label[I] 548 // tst G, 1 549 // beq %False 550 // tst H, 1 551 // beq %False (bne %True) 552 553 /// lowerInt1 materializes Boolean to a Variable. 554 SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean); 555 556 /// lowerInt1ForSelect generates the following instruction sequence: 557 /// 558 /// mov T, FalseValue 559 /// CC <- eval(Boolean) 560 /// movCC T, TrueValue 561 /// mov Dest, T 562 /// 563 /// It is used for lowering select i1, as well as i1 Sext. 564 void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue, 565 Operand *FalseValue); 566 567 /// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or 568 /// an InstARM32Label (but never both) so that, during br i1 lowering, we can 569 /// create auxiliary labels for short circuiting the condition evaluation. 570 class LowerInt1BranchTarget { 571 public: LowerInt1BranchTarget(CfgNode * const Target)572 explicit LowerInt1BranchTarget(CfgNode *const Target) 573 : NodeTarget(Target) {} LowerInt1BranchTarget(InstARM32Label * const Target)574 explicit LowerInt1BranchTarget(InstARM32Label *const Target) 575 : LabelTarget(Target) {} 576 577 /// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that 578 /// is the exact copy of this if Label is nullptr; otherwise, the returned 579 /// object will wrap Label instead. 580 LowerInt1BranchTarget createForLabelOrDuplicate(InstARM32Label * Label)581 createForLabelOrDuplicate(InstARM32Label *Label) const { 582 if (Label != nullptr) 583 return LowerInt1BranchTarget(Label); 584 if (NodeTarget) 585 return LowerInt1BranchTarget(NodeTarget); 586 return LowerInt1BranchTarget(LabelTarget); 587 } 588 589 CfgNode *const NodeTarget = nullptr; 590 InstARM32Label *const LabelTarget = nullptr; 591 }; 592 593 /// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for 594 /// determining which type arithmetic is allowed to be short circuited. This 595 /// is useful for lowering 596 /// 597 /// t1 = and i1 A, B 598 /// t2 = and i1 t1, C 599 /// br i1 t2, label %False, label %True 600 /// 601 /// to 602 /// 603 /// tst A, 1 604 /// beq %False 605 /// tst B, 1 606 /// beq %False 607 /// tst C, 1 608 /// bne %True 609 /// b %False 610 /// 611 /// Without this information, short circuiting would only allow to short 612 /// circuit a single high level instruction. For example: 613 /// 614 /// t1 = or i1 A, B 615 /// t2 = and i1 t1, C 616 /// br i1 t2, label %False, label %True 617 /// 618 /// cannot be lowered to 619 /// 620 /// tst A, 1 621 /// bne %True 622 /// tst B, 1 623 /// bne %True 624 /// tst C, 1 625 /// beq %True 626 /// b %False 627 /// 628 /// It needs to be lowered to 629 /// 630 /// tst A, 1 631 /// bne Aux 632 /// tst B, 1 633 /// beq %False 634 /// Aux: 635 /// tst C, 1 636 /// bne %True 637 /// b %False 638 /// 639 /// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it 640 /// might.) 641 enum LowerInt1AllowShortCircuit { 642 SC_And = 1, 643 SC_Or = 2, 644 SC_All = SC_And | SC_Or, 645 }; 646 647 /// ShortCircuitCondAndLabel wraps the condition codes that should be used 648 /// after a lowerInt1ForBranch returns to branch to the 649 /// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the 650 /// called lowerInt1forBranch created an internal (i.e., short-circuit) label 651 /// used for short circuiting. 652 class ShortCircuitCondAndLabel { 653 public: 654 explicit ShortCircuitCondAndLabel(CondWhenTrue &&C, 655 InstARM32Label *L = nullptr) Cond(std::move (C))656 : Cond(std::move(C)), ShortCircuitTarget(L) {} 657 const CondWhenTrue Cond; 658 InstARM32Label *const ShortCircuitTarget; 659 assertNoLabelAndReturnCond()660 CondWhenTrue assertNoLabelAndReturnCond() const { 661 assert(ShortCircuitTarget == nullptr); 662 return Cond; 663 } 664 }; 665 666 /// lowerInt1ForBranch expands Boolean, and returns the condition codes that 667 /// are to be used for branching to the branch's TrueTarget. It may return a 668 /// label that the expansion of Boolean used to short circuit the chain's 669 /// evaluation. 670 ShortCircuitCondAndLabel 671 lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, 672 const LowerInt1BranchTarget &TargetFalse, 673 uint32_t ShortCircuitable); 674 675 // _br is a convenience wrapper that emits br instructions to Target. 676 void _br(const LowerInt1BranchTarget &BrTarget, 677 CondARM32::Cond Cond = CondARM32::AL) { 678 assert((BrTarget.NodeTarget == nullptr) != 679 (BrTarget.LabelTarget == nullptr)); 680 if (BrTarget.NodeTarget != nullptr) 681 _br(BrTarget.NodeTarget, Cond); 682 else 683 _br(BrTarget.LabelTarget, Cond); 684 } 685 686 // _br_short_circuit is used when lowering InstArithmetic::And and 687 // InstArithmetic::Or and a short circuit branch is needed. _br_short_circuit(const LowerInt1BranchTarget & Target,const CondWhenTrue & Cond)688 void _br_short_circuit(const LowerInt1BranchTarget &Target, 689 const CondWhenTrue &Cond) { 690 if (Cond.WhenTrue1 != CondARM32::kNone) { 691 _br(Target, Cond.WhenTrue1); 692 } 693 if (Cond.WhenTrue0 != CondARM32::kNone) { 694 _br(Target, Cond.WhenTrue0); 695 } 696 } 697 // End of bool folding machinery 698 // -------------------------------------------------------------------------- 699 700 /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with 701 /// an upper16 relocation). 702 void _movt(Variable *Dest, Operand *Src0, 703 CondARM32::Cond Pred = CondARM32::AL) { 704 Context.insert<InstARM32Movt>(Dest, Src0, Pred); 705 } 706 void _movw(Variable *Dest, Operand *Src0, 707 CondARM32::Cond Pred = CondARM32::AL) { 708 Context.insert<InstARM32Movw>(Dest, Src0, Pred); 709 } 710 void _mul(Variable *Dest, Variable *Src0, Variable *Src1, 711 CondARM32::Cond Pred = CondARM32::AL) { 712 Context.insert<InstARM32Mul>(Dest, Src0, Src1, Pred); 713 } 714 void _mvn(Variable *Dest, Operand *Src0, 715 CondARM32::Cond Pred = CondARM32::AL) { 716 Context.insert<InstARM32Mvn>(Dest, Src0, Pred); 717 } 718 void _orr(Variable *Dest, Variable *Src0, Operand *Src1, 719 CondARM32::Cond Pred = CondARM32::AL) { 720 Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred); 721 } 722 void _orrs(Variable *Dest, Variable *Src0, Operand *Src1, 723 CondARM32::Cond Pred = CondARM32::AL) { 724 constexpr bool SetFlags = true; 725 Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred, SetFlags); 726 if (SetFlags) { 727 Context.insert<InstFakeUse>(Dest); 728 } 729 } _push(const VarList & Sources)730 void _push(const VarList &Sources) { Context.insert<InstARM32Push>(Sources); } _pop(const VarList & Dests)731 void _pop(const VarList &Dests) { 732 Context.insert<InstARM32Pop>(Dests); 733 // Mark dests as modified. 734 for (Variable *Dest : Dests) 735 Context.insert<InstFakeDef>(Dest); 736 } 737 void _rbit(Variable *Dest, Variable *Src0, 738 CondARM32::Cond Pred = CondARM32::AL) { 739 Context.insert<InstARM32Rbit>(Dest, Src0, Pred); 740 } 741 void _rev(Variable *Dest, Variable *Src0, 742 CondARM32::Cond Pred = CondARM32::AL) { 743 Context.insert<InstARM32Rev>(Dest, Src0, Pred); 744 } 745 void _ret(Variable *LR, Variable *Src0 = nullptr) { 746 Context.insert<InstARM32Ret>(LR, Src0); 747 } 748 void _rscs(Variable *Dest, Variable *Src0, Operand *Src1, 749 CondARM32::Cond Pred = CondARM32::AL) { 750 constexpr bool SetFlags = true; 751 Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred, SetFlags); 752 if (SetFlags) { 753 Context.insert<InstFakeUse>(Dest); 754 } 755 } 756 void _rsc(Variable *Dest, Variable *Src0, Operand *Src1, 757 CondARM32::Cond Pred = CondARM32::AL) { 758 Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred); 759 } 760 void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1, 761 CondARM32::Cond Pred = CondARM32::AL) { 762 constexpr bool SetFlags = true; 763 Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred, SetFlags); 764 if (SetFlags) { 765 Context.insert<InstFakeUse>(Dest); 766 } 767 } 768 void _rsb(Variable *Dest, Variable *Src0, Operand *Src1, 769 CondARM32::Cond Pred = CondARM32::AL) { 770 Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred); 771 } 772 void _sbc(Variable *Dest, Variable *Src0, Operand *Src1, 773 CondARM32::Cond Pred = CondARM32::AL) { 774 Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred); 775 } 776 void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1, 777 CondARM32::Cond Pred = CondARM32::AL) { 778 constexpr bool SetFlags = true; 779 Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred, SetFlags); 780 if (SetFlags) { 781 Context.insert<InstFakeUse>(Dest); 782 } 783 } 784 void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1, 785 CondARM32::Cond Pred = CondARM32::AL) { 786 Context.insert<InstARM32Sdiv>(Dest, Src0, Src1, Pred); 787 } 788 /// _str, for all your Variable to memory transfers. Addr has the same 789 /// restrictions that it does in _ldr. 790 void _str(Variable *Value, OperandARM32Mem *Addr, 791 CondARM32::Cond Pred = CondARM32::AL) { 792 Context.insert<InstARM32Str>(Value, Addr, Pred); 793 } 794 InstARM32Strex *_strex(Variable *Dest, Variable *Value, OperandARM32Mem *Addr, 795 CondARM32::Cond Pred = CondARM32::AL) { 796 if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) { 797 Context.insert<InstFakeUse>(Value64->getLo()); 798 Context.insert<InstFakeUse>(Value64->getHi()); 799 } 800 return Context.insert<InstARM32Strex>(Dest, Value, Addr, Pred); 801 } 802 void _sub(Variable *Dest, Variable *Src0, Operand *Src1, 803 CondARM32::Cond Pred = CondARM32::AL) { 804 Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred); 805 } 806 void _subs(Variable *Dest, Variable *Src0, Operand *Src1, 807 CondARM32::Cond Pred = CondARM32::AL) { 808 constexpr bool SetFlags = true; 809 Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred, SetFlags); 810 if (SetFlags) { 811 Context.insert<InstFakeUse>(Dest); 812 } 813 } 814 void _sxt(Variable *Dest, Variable *Src0, 815 CondARM32::Cond Pred = CondARM32::AL) { 816 Context.insert<InstARM32Sxt>(Dest, Src0, Pred); 817 } 818 void _tst(Variable *Src0, Operand *Src1, 819 CondARM32::Cond Pred = CondARM32::AL) { 820 Context.insert<InstARM32Tst>(Src0, Src1, Pred); 821 } _trap()822 void _trap() { Context.insert<InstARM32Trap>(); } 823 void _udiv(Variable *Dest, Variable *Src0, Variable *Src1, 824 CondARM32::Cond Pred = CondARM32::AL) { 825 Context.insert<InstARM32Udiv>(Dest, Src0, Src1, Pred); 826 } 827 void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0, 828 Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) { 829 // umull requires DestLo and DestHi to be assigned to different GPRs. The 830 // following lines create overlapping liveness ranges for both variables. If 831 // either one of them is live, then they are both going to be live, and thus 832 // assigned to different registers; if they are both dead, then DCE will 833 // kick in and delete the following three instructions. 834 Context.insert<InstFakeDef>(DestHi); 835 Context.insert<InstARM32Umull>(DestLo, DestHi, Src0, Src1, Pred); 836 Context.insert<InstFakeDef>(DestHi, DestLo)->setDestRedefined(); 837 Context.insert<InstFakeUse>(DestHi); 838 } 839 void _uxt(Variable *Dest, Variable *Src0, 840 CondARM32::Cond Pred = CondARM32::AL) { 841 Context.insert<InstARM32Uxt>(Dest, Src0, Pred); 842 } 843 void _vabs(Variable *Dest, Variable *Src, 844 CondARM32::Cond Pred = CondARM32::AL) { 845 Context.insert<InstARM32Vabs>(Dest, Src, Pred); 846 } _vadd(Variable * Dest,Variable * Src0,Variable * Src1)847 void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) { 848 Context.insert<InstARM32Vadd>(Dest, Src0, Src1); 849 } _vand(Variable * Dest,Variable * Src0,Variable * Src1)850 void _vand(Variable *Dest, Variable *Src0, Variable *Src1) { 851 Context.insert<InstARM32Vand>(Dest, Src0, Src1); 852 } _vbsl(Variable * Dest,Variable * Src0,Variable * Src1)853 InstARM32Vbsl *_vbsl(Variable *Dest, Variable *Src0, Variable *Src1) { 854 return Context.insert<InstARM32Vbsl>(Dest, Src0, Src1); 855 } _vceq(Variable * Dest,Variable * Src0,Variable * Src1)856 void _vceq(Variable *Dest, Variable *Src0, Variable *Src1) { 857 Context.insert<InstARM32Vceq>(Dest, Src0, Src1); 858 } _vcge(Variable * Dest,Variable * Src0,Variable * Src1)859 InstARM32Vcge *_vcge(Variable *Dest, Variable *Src0, Variable *Src1) { 860 return Context.insert<InstARM32Vcge>(Dest, Src0, Src1); 861 } _vcgt(Variable * Dest,Variable * Src0,Variable * Src1)862 InstARM32Vcgt *_vcgt(Variable *Dest, Variable *Src0, Variable *Src1) { 863 return Context.insert<InstARM32Vcgt>(Dest, Src0, Src1); 864 } 865 void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant, 866 CondARM32::Cond Pred = CondARM32::AL) { 867 Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred); 868 } _vdiv(Variable * Dest,Variable * Src0,Variable * Src1)869 void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) { 870 Context.insert<InstARM32Vdiv>(Dest, Src0, Src1); 871 } 872 void _vcmp(Variable *Src0, Variable *Src1, 873 CondARM32::Cond Pred = CondARM32::AL) { 874 Context.insert<InstARM32Vcmp>(Src0, Src1, Pred); 875 } 876 void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero, 877 CondARM32::Cond Pred = CondARM32::AL) { 878 Context.insert<InstARM32Vcmp>(Src0, FpZero, Pred); 879 } _vdup(Variable * Dest,Variable * Src,int Idx)880 void _vdup(Variable *Dest, Variable *Src, int Idx) { 881 Context.insert<InstARM32Vdup>(Dest, Src, Idx); 882 } _veor(Variable * Dest,Variable * Src0,Variable * Src1)883 void _veor(Variable *Dest, Variable *Src0, Variable *Src1) { 884 Context.insert<InstARM32Veor>(Dest, Src0, Src1); 885 } 886 void _vldr1d(Variable *Dest, OperandARM32Mem *Addr, 887 CondARM32::Cond Pred = CondARM32::AL) { 888 Context.insert<InstARM32Vldr1d>(Dest, Addr, Pred); 889 } 890 void _vldr1q(Variable *Dest, OperandARM32Mem *Addr, 891 CondARM32::Cond Pred = CondARM32::AL) { 892 Context.insert<InstARM32Vldr1q>(Dest, Addr, Pred); 893 } 894 void _vmrs(CondARM32::Cond Pred = CondARM32::AL) { 895 Context.insert<InstARM32Vmrs>(Pred); 896 } _vmla(Variable * Dest,Variable * Src0,Variable * Src1)897 void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) { 898 Context.insert<InstARM32Vmla>(Dest, Src0, Src1); 899 } _vmlap(Variable * Dest,Variable * Src0,Variable * Src1)900 void _vmlap(Variable *Dest, Variable *Src0, Variable *Src1) { 901 Context.insert<InstARM32Vmlap>(Dest, Src0, Src1); 902 } _vmls(Variable * Dest,Variable * Src0,Variable * Src1)903 void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) { 904 Context.insert<InstARM32Vmls>(Dest, Src0, Src1); 905 } _vmovl(Variable * Dest,Variable * Src0,Variable * Src1)906 void _vmovl(Variable *Dest, Variable *Src0, Variable *Src1) { 907 Context.insert<InstARM32Vmovl>(Dest, Src0, Src1); 908 } _vmovh(Variable * Dest,Variable * Src0,Variable * Src1)909 void _vmovh(Variable *Dest, Variable *Src0, Variable *Src1) { 910 Context.insert<InstARM32Vmovh>(Dest, Src0, Src1); 911 } _vmovhl(Variable * Dest,Variable * Src0,Variable * Src1)912 void _vmovhl(Variable *Dest, Variable *Src0, Variable *Src1) { 913 Context.insert<InstARM32Vmovhl>(Dest, Src0, Src1); 914 } _vmovlh(Variable * Dest,Variable * Src0,Variable * Src1)915 void _vmovlh(Variable *Dest, Variable *Src0, Variable *Src1) { 916 Context.insert<InstARM32Vmovlh>(Dest, Src0, Src1); 917 } _vmul(Variable * Dest,Variable * Src0,Variable * Src1)918 void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) { 919 Context.insert<InstARM32Vmul>(Dest, Src0, Src1); 920 } _vmulh(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)921 void _vmulh(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { 922 Context.insert<InstARM32Vmulh>(Dest, Src0, Src1) 923 ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); 924 } _vmvn(Variable * Dest,Variable * Src0)925 void _vmvn(Variable *Dest, Variable *Src0) { 926 Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL); 927 } _vneg(Variable * Dest,Variable * Src0)928 void _vneg(Variable *Dest, Variable *Src0) { 929 Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL) 930 ->setSignType(InstARM32::FS_Signed); 931 } _vorr(Variable * Dest,Variable * Src0,Variable * Src1)932 void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) { 933 Context.insert<InstARM32Vorr>(Dest, Src0, Src1); 934 } _vqadd(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)935 void _vqadd(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { 936 Context.insert<InstARM32Vqadd>(Dest, Src0, Src1) 937 ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); 938 } _vqmovn2(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned,bool Saturating)939 void _vqmovn2(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned, 940 bool Saturating) { 941 Context.insert<InstARM32Vqmovn2>(Dest, Src0, Src1) 942 ->setSignType(Saturating ? (Unsigned ? InstARM32::FS_Unsigned 943 : InstARM32::FS_Signed) 944 : InstARM32::FS_None); 945 } _vqsub(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)946 void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { 947 Context.insert<InstARM32Vqsub>(Dest, Src0, Src1) 948 ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); 949 } _vshl(Variable * Dest,Variable * Src0,Variable * Src1)950 InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) { 951 return Context.insert<InstARM32Vshl>(Dest, Src0, Src1); 952 } _vshl(Variable * Dest,Variable * Src0,ConstantInteger32 * Src1)953 void _vshl(Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) { 954 Context.insert<InstARM32Vshl>(Dest, Src0, Src1) 955 ->setSignType(InstARM32::FS_Unsigned); 956 } _vshr(Variable * Dest,Variable * Src0,ConstantInteger32 * Src1)957 InstARM32Vshr *_vshr(Variable *Dest, Variable *Src0, 958 ConstantInteger32 *Src1) { 959 return Context.insert<InstARM32Vshr>(Dest, Src0, Src1); 960 } 961 void _vsqrt(Variable *Dest, Variable *Src, 962 CondARM32::Cond Pred = CondARM32::AL) { 963 Context.insert<InstARM32Vsqrt>(Dest, Src, Pred); 964 } 965 void _vstr1d(Variable *Value, OperandARM32Mem *Addr, 966 CondARM32::Cond Pred = CondARM32::AL) { 967 Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 32); 968 } 969 void _vstr1q(Variable *Value, OperandARM32Mem *Addr, 970 CondARM32::Cond Pred = CondARM32::AL) { 971 Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 64); 972 } _vsub(Variable * Dest,Variable * Src0,Variable * Src1)973 void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) { 974 Context.insert<InstARM32Vsub>(Dest, Src0, Src1); 975 } _vzip(Variable * Dest,Variable * Src0,Variable * Src1)976 void _vzip(Variable *Dest, Variable *Src0, Variable *Src1) { 977 Context.insert<InstARM32Vzip>(Dest, Src0, Src1); 978 } 979 980 // Iterates over the CFG and determines the maximum outgoing stack arguments 981 // bytes. This information is later used during addProlog() to pre-allocate 982 // the outargs area. 983 // TODO(jpp): This could live in the Parser, if we provided a Target-specific 984 // method that the Parser could call. 985 void findMaxStackOutArgsSize(); 986 987 /// Returns true if the given Offset can be represented in a Load/Store Mem 988 /// Operand. 989 bool isLegalMemOffset(Type Ty, int32_t Offset) const; 990 991 void postLowerLegalization(); 992 993 /// Manages the Gotoff relocations created during the function lowering. A 994 /// single Gotoff relocation is created for each global variable used by the 995 /// function being lowered. 996 /// @{ 997 // TODO(jpp): if the same global G is used in different functions, then this 998 // method will emit one G(gotoff) relocation per function. 999 GlobalString createGotoffRelocation(const ConstantRelocatable *CR); 1000 CfgUnorderedSet<GlobalString> KnownGotoffs; 1001 /// @} 1002 1003 class PostLoweringLegalizer { 1004 PostLoweringLegalizer() = delete; 1005 PostLoweringLegalizer(const PostLoweringLegalizer &) = delete; 1006 PostLoweringLegalizer &operator=(const PostLoweringLegalizer &) = delete; 1007 1008 public: PostLoweringLegalizer(TargetARM32 * Target)1009 explicit PostLoweringLegalizer(TargetARM32 *Target) 1010 : Target(Target), StackOrFrameReg(Target->getPhysicalRegister( 1011 Target->getFrameOrStackReg())) {} 1012 1013 void resetTempBaseIfClobberedBy(const Inst *Instr); 1014 1015 // Ensures that the TempBase register held by the this legalizer (if any) is 1016 // assigned to IP. assertNoTempOrAssignedToIP()1017 void assertNoTempOrAssignedToIP() const { 1018 assert(TempBaseReg == nullptr || 1019 TempBaseReg->getRegNum() == Target->getReservedTmpReg()); 1020 } 1021 1022 // Legalizes Mem. if Mem.Base is a Reamaterializable variable, Mem.Offset is 1023 // fixed up. 1024 OperandARM32Mem *legalizeMemOperand(OperandARM32Mem *Mem, 1025 bool AllowOffsets = true); 1026 1027 /// Legalizes Mov if its Source (or Destination) is a spilled Variable, or 1028 /// if its Source is a Rematerializable variable (this form is used in lieu 1029 /// of lea, which is not available in ARM.) 1030 /// 1031 /// Moves to memory become store instructions, and moves from memory, loads. 1032 void legalizeMov(InstARM32Mov *Mov); 1033 1034 private: 1035 /// Creates a new Base register centered around [Base, +/- Offset]. 1036 Variable *newBaseRegister(Variable *Base, int32_t Offset, 1037 RegNumT ScratchRegNum); 1038 1039 /// Creates a new, legal OperandARM32Mem for accessing Base + Offset. 1040 /// The returned mem operand is a legal operand for accessing memory that is 1041 /// of type Ty. 1042 /// 1043 /// If [Base, #Offset] is encodable, then the method returns a Mem operand 1044 /// expressing it. Otherwise, 1045 /// 1046 /// if [TempBaseReg, #Offset-TempBaseOffset] is a valid memory operand, the 1047 /// method will return that. Otherwise, 1048 /// 1049 /// a new base register ip=Base+Offset is created, and the method returns a 1050 /// memory operand expressing [ip, #0]. 1051 OperandARM32Mem *createMemOperand(Type Ty, Variable *Base, int32_t Offset, 1052 bool AllowOffsets = true); 1053 TargetARM32 *const Target; 1054 Variable *const StackOrFrameReg; 1055 Variable *TempBaseReg = nullptr; 1056 int32_t TempBaseOffset = 0; 1057 }; 1058 1059 TargetARM32Features CPUFeatures; 1060 bool UsesFramePointer = false; 1061 bool NeedsStackAlignment = false; 1062 bool MaybeLeafFunc = true; 1063 size_t SpillAreaSizeBytes = 0; 1064 size_t FixedAllocaSizeBytes = 0; 1065 size_t FixedAllocaAlignBytes = 0; 1066 bool PrologEmitsFixedAllocas = false; 1067 uint32_t MaxOutArgsSizeBytes = 0; 1068 // TODO(jpp): std::array instead of array. 1069 static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM]; 1070 static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; 1071 static SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; 1072 SmallBitVector RegsUsed; 1073 VarList PhysicalRegisters[IceType_NUM]; 1074 VarList PreservedGPRs; 1075 VarList PreservedSRegs; 1076 1077 /// Helper class that understands the Calling Convention and register 1078 /// assignments. The first few integer type parameters can use r0-r3, 1079 /// regardless of their position relative to the floating-point/vector 1080 /// arguments in the argument list. Floating-point and vector arguments 1081 /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic, 1082 /// see the ARM Architecture Procedure Calling Standards (AAPCS). 1083 /// 1084 /// Technically, arguments that can start with registers but extend beyond the 1085 /// available registers can be split between the registers and the stack. 1086 /// However, this is typically for passing GPR structs by value, and PNaCl 1087 /// transforms expand this out. 1088 /// 1089 /// At (public) function entry, the stack must be 8-byte aligned. 1090 class CallingConv { 1091 CallingConv(const CallingConv &) = delete; 1092 CallingConv &operator=(const CallingConv &) = delete; 1093 1094 public: 1095 CallingConv(); 1096 ~CallingConv() = default; 1097 1098 /// argInGPR returns true if there is a GPR available for the requested 1099 /// type, and false otherwise. If it returns true, Reg is set to the 1100 /// appropriate register number. Note that, when Ty == IceType_i64, Reg will 1101 /// be an I64 register pair. 1102 bool argInGPR(Type Ty, RegNumT *Reg); 1103 1104 /// argInVFP is to floating-point/vector types what argInGPR is for integer 1105 /// types. 1106 bool argInVFP(Type Ty, RegNumT *Reg); 1107 1108 private: 1109 void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs); 1110 SmallBitVector GPRegsUsed; 1111 CfgVector<RegNumT> GPRArgs; 1112 CfgVector<RegNumT> I64Args; 1113 1114 void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs); 1115 SmallBitVector VFPRegsUsed; 1116 CfgVector<RegNumT> FP32Args; 1117 CfgVector<RegNumT> FP64Args; 1118 CfgVector<RegNumT> Vec128Args; 1119 }; 1120 1121 private: 1122 ENABLE_MAKE_UNIQUE; 1123 1124 OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt, 1125 Operand *Base); 1126 1127 void postambleCtpop64(const InstCall *Instr); 1128 void preambleDivRem(const InstCall *Instr); 1129 CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)> 1130 ARM32HelpersPreamble; 1131 CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)> 1132 ARM32HelpersPostamble; 1133 1134 class ComputationTracker { 1135 public: 1136 ComputationTracker() = default; 1137 ~ComputationTracker() = default; 1138 forgetProducers()1139 void forgetProducers() { KnownComputations.clear(); } 1140 void recordProducers(CfgNode *Node); 1141 getProducerOf(const Operand * Opnd)1142 const Inst *getProducerOf(const Operand *Opnd) const { 1143 auto *Var = llvm::dyn_cast<Variable>(Opnd); 1144 if (Var == nullptr) { 1145 return nullptr; 1146 } 1147 1148 auto Iter = KnownComputations.find(Var->getIndex()); 1149 if (Iter == KnownComputations.end()) { 1150 return nullptr; 1151 } 1152 1153 return Iter->second.Instr; 1154 } 1155 dump(const Cfg * Func)1156 void dump(const Cfg *Func) const { 1157 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding)) 1158 return; 1159 OstreamLocker L(Func->getContext()); 1160 Ostream &Str = Func->getContext()->getStrDump(); 1161 Str << "foldable producer:\n"; 1162 for (const auto &Computation : KnownComputations) { 1163 Str << " "; 1164 Computation.second.Instr->dump(Func); 1165 Str << "\n"; 1166 } 1167 Str << "\n"; 1168 } 1169 1170 private: 1171 class ComputationEntry { 1172 public: ComputationEntry(Inst * I,Type Ty)1173 ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {} 1174 Inst *const Instr; 1175 // Boolean folding is disabled for variables whose live range is multi 1176 // block. We conservatively initialize IsLiveOut to true, and set it to 1177 // false once we find the end of the live range for the variable defined 1178 // by this instruction. If liveness analysis is not performed (e.g., in 1179 // Om1 mode) IsLiveOut will never be set to false, and folding will be 1180 // disabled. 1181 bool IsLiveOut = true; 1182 int32_t NumUses = 0; 1183 Type ComputationType; 1184 }; 1185 1186 // ComputationMap maps a Variable number to a payload identifying which 1187 // instruction defined it. 1188 using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>; 1189 ComputationMap KnownComputations; 1190 }; 1191 1192 ComputationTracker Computations; 1193 1194 // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked 1195 // without specifying a physical register. This is needed for creating unbound 1196 // temporaries during Ice -> ARM lowering, but before register allocation. 1197 // This a safe-guard that no unbound temporaries are created during the 1198 // legalization post-passes. 1199 bool AllowTemporaryWithNoReg = true; 1200 // ForbidTemporaryWithoutReg is a RAII class that manages 1201 // AllowTemporaryWithNoReg. 1202 class ForbidTemporaryWithoutReg { 1203 ForbidTemporaryWithoutReg() = delete; 1204 ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg &) = delete; 1205 ForbidTemporaryWithoutReg & 1206 operator=(const ForbidTemporaryWithoutReg &) = delete; 1207 1208 public: ForbidTemporaryWithoutReg(TargetARM32 * Target)1209 explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) { 1210 Target->AllowTemporaryWithNoReg = false; 1211 } ~ForbidTemporaryWithoutReg()1212 ~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; } 1213 1214 private: 1215 TargetARM32 *const Target; 1216 }; 1217 }; 1218 1219 class TargetDataARM32 final : public TargetDataLowering { 1220 TargetDataARM32() = delete; 1221 TargetDataARM32(const TargetDataARM32 &) = delete; 1222 TargetDataARM32 &operator=(const TargetDataARM32 &) = delete; 1223 1224 public: create(GlobalContext * Ctx)1225 static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) { 1226 return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx)); 1227 } 1228 1229 void lowerGlobals(const VariableDeclarationList &Vars, 1230 const std::string &SectionSuffix) override; 1231 void lowerConstants() override; 1232 void lowerJumpTables() override; 1233 1234 protected: 1235 explicit TargetDataARM32(GlobalContext *Ctx); 1236 1237 private: 1238 ~TargetDataARM32() override = default; 1239 }; 1240 1241 class TargetHeaderARM32 final : public TargetHeaderLowering { 1242 TargetHeaderARM32() = delete; 1243 TargetHeaderARM32(const TargetHeaderARM32 &) = delete; 1244 TargetHeaderARM32 &operator=(const TargetHeaderARM32 &) = delete; 1245 1246 public: create(GlobalContext * Ctx)1247 static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) { 1248 return std::unique_ptr<TargetHeaderLowering>(new TargetHeaderARM32(Ctx)); 1249 } 1250 1251 void lower() override; 1252 1253 protected: 1254 explicit TargetHeaderARM32(GlobalContext *Ctx); 1255 1256 private: 1257 ~TargetHeaderARM32() = default; 1258 1259 TargetARM32Features CPUFeatures; 1260 }; 1261 1262 } // end of namespace ARM32 1263 } // end of namespace Ice 1264 1265 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H 1266