xref: /aosp_15_r20/external/swiftshader/third_party/subzero/src/IceTargetLoweringARM32.h (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Declares the TargetLoweringARM32 class, which implements the
12 /// TargetLowering interface for the ARM 32-bit architecture.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H
17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H
18 
19 #include "IceAssemblerARM32.h"
20 #include "IceDefs.h"
21 #include "IceInstARM32.h"
22 #include "IceRegistersARM32.h"
23 #include "IceTargetLowering.h"
24 
25 #include <utility>
26 
27 namespace Ice {
28 namespace ARM32 {
29 
30 // Class encapsulating ARM cpu features / instruction set.
31 class TargetARM32Features {
32   TargetARM32Features() = delete;
33   TargetARM32Features(const TargetARM32Features &) = delete;
34   TargetARM32Features &operator=(const TargetARM32Features &) = delete;
35 
36 public:
37   explicit TargetARM32Features(const ClFlags &Flags);
38 
39   enum ARM32InstructionSet {
40     Begin,
41     // Neon is the PNaCl baseline instruction set.
42     Neon = Begin,
43     HWDivArm, // HW divide in ARM mode (not just Thumb mode).
44     End
45   };
46 
hasFeature(ARM32InstructionSet I)47   bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; }
48 
49 private:
50   ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
51 };
52 
53 // The target lowering logic for ARM32.
54 class TargetARM32 : public TargetLowering {
55   TargetARM32() = delete;
56   TargetARM32(const TargetARM32 &) = delete;
57   TargetARM32 &operator=(const TargetARM32 &) = delete;
58 
59 public:
60   static void staticInit(GlobalContext *Ctx);
61 
shouldBePooled(const Constant * C)62   static bool shouldBePooled(const Constant *C) {
63     if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(C)) {
64       return !Utils::isPositiveZero(ConstDouble->getValue());
65     }
66     if (llvm::isa<ConstantFloat>(C))
67       return true;
68     return false;
69   }
70 
getPointerType()71   static ::Ice::Type getPointerType() { return ::Ice::IceType_i32; }
72 
73   // TODO(jvoung): return a unique_ptr.
create(Cfg * Func)74   static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) {
75     return makeUnique<TargetARM32>(Func);
76   }
77 
createAssembler()78   std::unique_ptr<::Ice::Assembler> createAssembler() const override {
79     return makeUnique<ARM32::AssemblerARM32>();
80   }
81 
initNodeForLowering(CfgNode * Node)82   void initNodeForLowering(CfgNode *Node) override {
83     Computations.forgetProducers();
84     Computations.recordProducers(Node);
85     Computations.dump(Func);
86   }
87 
88   void translateOm1() override;
89   void translateO2() override;
90   bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
91 
getNumRegisters()92   SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; }
93   Variable *getPhysicalRegister(RegNumT RegNum,
94                                 Type Ty = IceType_void) override;
95   const char *getRegName(RegNumT RegNum, Type Ty) const override;
96   SmallBitVector getRegisterSet(RegSetMask Include,
97                                 RegSetMask Exclude) const override;
98   const SmallBitVector &
getRegistersForVariable(const Variable * Var)99   getRegistersForVariable(const Variable *Var) const override {
100     RegClass RC = Var->getRegClass();
101     switch (RC) {
102     default:
103       assert(RC < RC_Target);
104       return TypeToRegisterSet[RC];
105     case (RegClass)RegARM32::RCARM32_QtoS:
106       return TypeToRegisterSet[RC];
107     }
108   }
109   const SmallBitVector &
getAllRegistersForVariable(const Variable * Var)110   getAllRegistersForVariable(const Variable *Var) const override {
111     RegClass RC = Var->getRegClass();
112     assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM);
113     return TypeToRegisterSetUnfiltered[RC];
114   }
getAliasesForRegister(RegNumT Reg)115   const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override {
116     return RegisterAliases[Reg];
117   }
hasFramePointer()118   bool hasFramePointer() const override { return UsesFramePointer; }
setHasFramePointer()119   void setHasFramePointer() override { UsesFramePointer = true; }
getStackReg()120   RegNumT getStackReg() const override { return RegARM32::Reg_sp; }
getFrameReg()121   RegNumT getFrameReg() const override { return RegARM32::Reg_fp; }
getFrameOrStackReg()122   RegNumT getFrameOrStackReg() const override {
123     return UsesFramePointer ? getFrameReg() : getStackReg();
124   }
getReservedTmpReg()125   RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; }
126 
typeWidthInBytesOnStack(Type Ty)127   size_t typeWidthInBytesOnStack(Type Ty) const override {
128     // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
129     // are rounded up to 4 bytes.
130     return (typeWidthInBytes(Ty) + 3) & ~3;
131   }
132   uint32_t getStackAlignment() const override;
reserveFixedAllocaArea(size_t Size,size_t Align)133   void reserveFixedAllocaArea(size_t Size, size_t Align) override {
134     FixedAllocaSizeBytes = Size;
135     assert(llvm::isPowerOf2_32(Align));
136     FixedAllocaAlignBytes = Align;
137     PrologEmitsFixedAllocas = true;
138   }
getFrameFixedAllocaOffset()139   int32_t getFrameFixedAllocaOffset() const override {
140     return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes);
141   }
maxOutArgsSizeBytes()142   uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
143 
shouldSplitToVariable64On32(Type Ty)144   bool shouldSplitToVariable64On32(Type Ty) const override {
145     return Ty == IceType_i64;
146   }
147 
148   // TODO(ascull): what size is best for ARM?
getMinJumpTableSize()149   SizeT getMinJumpTableSize() const override { return 3; }
150   void emitJumpTable(const Cfg *Func,
151                      const InstJumpTable *JumpTable) const override;
152 
153   void emitVariable(const Variable *Var) const override;
154 
155   void emit(const ConstantUndef *C) const final;
156   void emit(const ConstantInteger32 *C) const final;
157   void emit(const ConstantInteger64 *C) const final;
158   void emit(const ConstantFloat *C) const final;
159   void emit(const ConstantDouble *C) const final;
160   void emit(const ConstantRelocatable *C) const final;
161 
162   void lowerArguments() override;
163   void addProlog(CfgNode *Node) override;
164   void addEpilog(CfgNode *Node) override;
165 
166   Operand *loOperand(Operand *Operand);
167   Operand *hiOperand(Operand *Operand);
168   void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
169                               size_t BasicFrameOffset, size_t *InArgsSizeBytes);
170 
hasCPUFeature(TargetARM32Features::ARM32InstructionSet I)171   bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
172     return CPUFeatures.hasFeature(I);
173   }
174 
175   enum OperandLegalization {
176     Legal_Reg = 1 << 0,  /// physical register, not stack location
177     Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
178                          /// immediates, shifted registers, or modified fp imm.
179     Legal_Mem = 1 << 2,  /// includes [r0, r1 lsl #2] as well as [sp, #12]
180     Legal_Rematerializable = 1 << 3,
181     Legal_Default = ~Legal_Rematerializable,
182   };
183 
184   using LegalMask = uint32_t;
185   Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT());
186   Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default,
187                     RegNumT RegNum = RegNumT());
188   Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT());
189 
shAmtImm(uint32_t ShAmtImm)190   OperandARM32ShAmtImm *shAmtImm(uint32_t ShAmtImm) const {
191     assert(ShAmtImm < 32);
192     return OperandARM32ShAmtImm::create(
193         Func,
194         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmtImm & 0x1F)));
195   }
196 
getCtx()197   GlobalContext *getCtx() const { return Ctx; }
198 
199 protected:
200   explicit TargetARM32(Cfg *Func);
201 
202   void postLower() override;
203 
204   enum SafeBoolChain {
205     SBC_No,
206     SBC_Yes,
207   };
208 
209   void lowerAlloca(const InstAlloca *Instr) override;
210   SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Instr);
211   void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest,
212                             Operand *Src0, Operand *Src1);
213   void lowerArithmetic(const InstArithmetic *Instr) override;
214   void lowerAssign(const InstAssign *Instr) override;
215   void lowerBr(const InstBr *Instr) override;
216   void lowerCall(const InstCall *Instr) override;
217   void lowerCast(const InstCast *Instr) override;
218   void lowerExtractElement(const InstExtractElement *Instr) override;
219 
220   /// CondWhenTrue is a helper type returned by every method in the lowering
221   /// that emits code to set the condition codes.
222   class CondWhenTrue {
223   public:
224     explicit CondWhenTrue(CondARM32::Cond T0,
225                           CondARM32::Cond T1 = CondARM32::kNone)
WhenTrue0(T0)226         : WhenTrue0(T0), WhenTrue1(T1) {
227       assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone);
228       assert(T1 != T0 || T0 == CondARM32::kNone);
229     }
230     CondARM32::Cond WhenTrue0;
231     CondARM32::Cond WhenTrue1;
232 
233     /// invert returns a new object with WhenTrue0 and WhenTrue1 inverted.
invert()234     CondWhenTrue invert() const {
235       switch (WhenTrue0) {
236       default:
237         if (WhenTrue1 == CondARM32::kNone)
238           return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0));
239         return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0),
240                             InstARM32::getOppositeCondition(WhenTrue1));
241       case CondARM32::AL:
242         return CondWhenTrue(CondARM32::kNone);
243       case CondARM32::kNone:
244         return CondWhenTrue(CondARM32::AL);
245       }
246     }
247   };
248 
249   CondWhenTrue lowerFcmpCond(const InstFcmp *Instr);
250   void lowerFcmp(const InstFcmp *Instr) override;
251   CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,
252                                          Operand *Src0, Operand *Src1);
253   CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
254                                   Operand *Src1);
255   CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
256                                   Operand *Src1);
257   CondWhenTrue lowerIcmpCond(InstIcmp::ICond Condition, Operand *Src0,
258                              Operand *Src1);
259   CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
260   void lowerIcmp(const InstIcmp *Instr) override;
261   /// Emits the basic sequence for lower-linked/store-exclusive loops:
262   ///
263   /// retry:
264   ///        ldrex tmp, [Addr]
265   ///        StoreValue = Operation(tmp)
266   ///        strexCond success, StoreValue, [Addr]
267   ///        cmpCond success, #0
268   ///        bne retry
269   ///
270   /// Operation needs to return which value to strex in Addr, it must not change
271   /// the flags if Cond is not AL, and must not emit any instructions that could
272   /// end up writing to memory. Operation also needs to handle fake-defing for
273   /// i64 handling.
274   void
275   lowerLoadLinkedStoreExclusive(Type Ty, Operand *Addr,
276                                 std::function<Variable *(Variable *)> Operation,
277                                 CondARM32::Cond Cond = CondARM32::AL);
278   void lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
279                            Operand *Val);
280   void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
281                       Operand *Val);
282   void lowerBreakpoint(const InstBreakpoint *Instr) override;
283   void lowerIntrinsic(const InstIntrinsic *Instr) override;
284   void lowerInsertElement(const InstInsertElement *Instr) override;
285   void lowerLoad(const InstLoad *Instr) override;
286   void lowerPhi(const InstPhi *Instr) override;
287   void lowerRet(const InstRet *Instr) override;
288   void lowerSelect(const InstSelect *Instr) override;
289   void lowerShuffleVector(const InstShuffleVector *Instr) override;
290   void lowerStore(const InstStore *Instr) override;
291   void lowerSwitch(const InstSwitch *Instr) override;
292   void lowerUnreachable(const InstUnreachable *Instr) override;
293   void prelowerPhis() override;
294   uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override;
295   void genTargetHelperCallFor(Inst *Instr) override;
296   void doAddressOptLoad() override;
297   void doAddressOptStore() override;
298 
299   OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty);
300 
301   Variable64On32 *makeI64RegPair();
302   Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT());
303   static Type stackSlotType();
304   Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT());
305   void alignRegisterPow2(Variable *Reg, uint32_t Align,
306                          RegNumT TmpRegNum = RegNumT());
307 
308   /// Returns a vector in a register with the given constant entries.
309   Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT());
310 
311   // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap;
312   // .LSKIP: <continuation>. If no check is needed nothing is inserted.
313   void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
314   using ExtInstr = void (TargetARM32::*)(Variable *, Variable *,
315                                          CondARM32::Cond);
316   using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *,
317                                          CondARM32::Cond);
318   void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1,
319                     ExtInstr ExtFunc, DivInstr DivFunc, bool IsRemainder);
320 
321   void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi);
322 
323   // The following are helpers that insert lowered ARM32 instructions with
324   // minimal syntactic overhead, so that the lowering code can look as close to
325   // assembly as practical.
326   void _add(Variable *Dest, Variable *Src0, Operand *Src1,
327             CondARM32::Cond Pred = CondARM32::AL) {
328     Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred);
329   }
330   void _adds(Variable *Dest, Variable *Src0, Operand *Src1,
331              CondARM32::Cond Pred = CondARM32::AL) {
332     constexpr bool SetFlags = true;
333     Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred, SetFlags);
334     if (SetFlags) {
335       Context.insert<InstFakeUse>(Dest);
336     }
337   }
338   void _adc(Variable *Dest, Variable *Src0, Operand *Src1,
339             CondARM32::Cond Pred = CondARM32::AL) {
340     Context.insert<InstARM32Adc>(Dest, Src0, Src1, Pred);
341   }
342   void _and(Variable *Dest, Variable *Src0, Operand *Src1,
343             CondARM32::Cond Pred = CondARM32::AL) {
344     Context.insert<InstARM32And>(Dest, Src0, Src1, Pred);
345   }
346   void _asr(Variable *Dest, Variable *Src0, Operand *Src1,
347             CondARM32::Cond Pred = CondARM32::AL) {
348     Context.insert<InstARM32Asr>(Dest, Src0, Src1, Pred);
349   }
350   void _bic(Variable *Dest, Variable *Src0, Operand *Src1,
351             CondARM32::Cond Pred = CondARM32::AL) {
352     Context.insert<InstARM32Bic>(Dest, Src0, Src1, Pred);
353   }
_br(CfgNode * TargetTrue,CfgNode * TargetFalse,CondARM32::Cond Condition)354   void _br(CfgNode *TargetTrue, CfgNode *TargetFalse,
355            CondARM32::Cond Condition) {
356     Context.insert<InstARM32Br>(TargetTrue, TargetFalse, Condition);
357   }
_br(CfgNode * Target)358   void _br(CfgNode *Target) { Context.insert<InstARM32Br>(Target); }
_br(CfgNode * Target,CondARM32::Cond Condition)359   void _br(CfgNode *Target, CondARM32::Cond Condition) {
360     Context.insert<InstARM32Br>(Target, Condition);
361   }
_br(InstARM32Label * Label,CondARM32::Cond Condition)362   void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
363     Context.insert<InstARM32Br>(Label, Condition);
364   }
365   void _cmn(Variable *Src0, Operand *Src1,
366             CondARM32::Cond Pred = CondARM32::AL) {
367     Context.insert<InstARM32Cmn>(Src0, Src1, Pred);
368   }
369   void _cmp(Variable *Src0, Operand *Src1,
370             CondARM32::Cond Pred = CondARM32::AL) {
371     Context.insert<InstARM32Cmp>(Src0, Src1, Pred);
372   }
373   void _clz(Variable *Dest, Variable *Src0,
374             CondARM32::Cond Pred = CondARM32::AL) {
375     Context.insert<InstARM32Clz>(Dest, Src0, Pred);
376   }
_dmb()377   void _dmb() { Context.insert<InstARM32Dmb>(); }
378   void _eor(Variable *Dest, Variable *Src0, Operand *Src1,
379             CondARM32::Cond Pred = CondARM32::AL) {
380     Context.insert<InstARM32Eor>(Dest, Src0, Src1, Pred);
381   }
382   /// _ldr, for all your memory to Variable data moves. It handles all types
383   /// (integer, floating point, and vectors.) Addr needs to be valid for Dest's
384   /// type (e.g., no immediates for vector loads, and no index registers for fp
385   /// loads.)
386   void _ldr(Variable *Dest, OperandARM32Mem *Addr,
387             CondARM32::Cond Pred = CondARM32::AL) {
388     Context.insert<InstARM32Ldr>(Dest, Addr, Pred);
389   }
390   InstARM32Ldrex *_ldrex(Variable *Dest, OperandARM32Mem *Addr,
391                          CondARM32::Cond Pred = CondARM32::AL) {
392     auto *Ldrex = Context.insert<InstARM32Ldrex>(Dest, Addr, Pred);
393     if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
394       Context.insert<InstFakeDef>(Dest64->getLo(), Dest);
395       Context.insert<InstFakeDef>(Dest64->getHi(), Dest);
396     }
397     return Ldrex;
398   }
399   void _lsl(Variable *Dest, Variable *Src0, Operand *Src1,
400             CondARM32::Cond Pred = CondARM32::AL) {
401     Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred);
402   }
403   void _lsls(Variable *Dest, Variable *Src0, Operand *Src1,
404              CondARM32::Cond Pred = CondARM32::AL) {
405     constexpr bool SetFlags = true;
406     Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred, SetFlags);
407     if (SetFlags) {
408       Context.insert<InstFakeUse>(Dest);
409     }
410   }
411   void _lsr(Variable *Dest, Variable *Src0, Operand *Src1,
412             CondARM32::Cond Pred = CondARM32::AL) {
413     Context.insert<InstARM32Lsr>(Dest, Src0, Src1, Pred);
414   }
415   void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
416             CondARM32::Cond Pred = CondARM32::AL) {
417     Context.insert<InstARM32Mla>(Dest, Src0, Src1, Acc, Pred);
418   }
419   void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
420             CondARM32::Cond Pred = CondARM32::AL) {
421     Context.insert<InstARM32Mls>(Dest, Src0, Src1, Acc, Pred);
422   }
423   /// _mov, for all your Variable to Variable data movement needs. It handles
424   /// all types (integer, floating point, and vectors), as well as moves between
425   /// Core and VFP registers. This is not a panacea: you must obey the (weird,
426   /// confusing, non-uniform) rules for data moves in ARM.
427   void _mov(Variable *Dest, Operand *Src0,
428             CondARM32::Cond Pred = CondARM32::AL) {
429     // _mov used to be unique in the sense that it would create a temporary
430     // automagically if Dest was nullptr. It won't do that anymore, so we keep
431     // an assert around just in case there is some untested code path where Dest
432     // is nullptr.
433     assert(Dest != nullptr);
434     assert(!llvm::isa<OperandARM32Mem>(Src0));
435     auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
436 
437     if (Instr->isMultiDest()) {
438       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
439       // fake-def for Instr.DestHi here.
440       assert(llvm::isa<Variable64On32>(Dest));
441       Context.insert<InstFakeDef>(Instr->getDestHi());
442     }
443   }
444 
445   void _mov_redefined(Variable *Dest, Operand *Src0,
446                       CondARM32::Cond Pred = CondARM32::AL) {
447     auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
448     Instr->setDestRedefined();
449     if (Instr->isMultiDest()) {
450       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
451       // fake-def for Instr.DestHi here.
452       assert(llvm::isa<Variable64On32>(Dest));
453       Context.insert<InstFakeDef>(Instr->getDestHi());
454     }
455   }
456 
_nop()457   void _nop() { Context.insert<InstARM32Nop>(); }
458 
459   // Generates a vmov instruction to extract the given index from a vector
460   // register.
461   void _extractelement(Variable *Dest, Variable *Src0, uint32_t Index,
462                        CondARM32::Cond Pred = CondARM32::AL) {
463     Context.insert<InstARM32Extract>(Dest, Src0, Index, Pred);
464   }
465 
466   // Generates a vmov instruction to insert a value into the given index of a
467   // vector register.
468   void _insertelement(Variable *Dest, Variable *Src0, uint32_t Index,
469                       CondARM32::Cond Pred = CondARM32::AL) {
470     Context.insert<InstARM32Insert>(Dest, Src0, Index, Pred);
471   }
472 
473   // --------------------------------------------------------------------------
474   // Begin bool folding machinery.
475   //
476   // There are three types of boolean lowerings handled by this target:
477   //
478   // 1) Boolean expressions leading to a boolean Variable definition
479   // ---------------------------------------------------------------
480   //
481   // Whenever a i1 Variable is live out (i.e., its live range extends beyond
482   // the defining basic block) we do not fold the operation. We instead
483   // materialize (i.e., compute) the variable normally, so that it can be used
484   // when needed. We also materialize i1 values that are not single use to
485   // avoid code duplication. These expressions are not short circuited.
486   //
487   // 2) Boolean expressions leading to a select
488   // ------------------------------------------
489   //
490   // These include boolean chains leading to a select instruction, as well as
491   // i1 Sexts. These boolean expressions are lowered to:
492   //
493   // mov T, <false value>
494   // CC <- eval(Boolean Expression)
495   // movCC T, <true value>
496   //
497   // For Sexts, <false value> is 0, and <true value> is -1.
498   //
499   // 3) Boolean expressions leading to a br i1
500   // -----------------------------------------
501   //
502   // These are the boolean chains leading to a branch. These chains are
503   // short-circuited, i.e.:
504   //
505   //   A = or i1 B, C
506   //   br i1 A, label %T, label %F
507   //
508   // becomes
509   //
510   //   tst B
511   //   jne %T
512   //   tst B
513   //   jne %T
514   //   j %F
515   //
516   // and
517   //
518   //   A = and i1 B, C
519   //   br i1 A, label %T, label %F
520   //
521   // becomes
522   //
523   //   tst B
524   //   jeq %F
525   //   tst B
526   //   jeq %F
527   //   j %T
528   //
529   // Arbitrarily long chains are short circuited, e.g
530   //
531   //   A = or  i1 B, C
532   //   D = and i1 A, E
533   //   F = and i1 G, H
534   //   I = or i1 D, F
535   //   br i1 I, label %True, label %False
536   //
537   // becomes
538   //
539   // Label[A]:
540   //   tst B, 1
541   //   bne Label[D]
542   //   tst C, 1
543   //   beq Label[I]
544   // Label[D]:
545   //   tst E, 1
546   //   bne %True
547   // Label[I]
548   //   tst G, 1
549   //   beq %False
550   //   tst H, 1
551   //   beq %False (bne %True)
552 
553   /// lowerInt1 materializes Boolean to a Variable.
554   SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean);
555 
556   /// lowerInt1ForSelect generates the following instruction sequence:
557   ///
558   ///   mov T, FalseValue
559   ///   CC <- eval(Boolean)
560   ///   movCC T, TrueValue
561   ///   mov Dest, T
562   ///
563   /// It is used for lowering select i1, as well as i1 Sext.
564   void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue,
565                           Operand *FalseValue);
566 
567   /// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or
568   /// an InstARM32Label (but never both) so that, during br i1 lowering, we can
569   /// create auxiliary labels for short circuiting the condition evaluation.
570   class LowerInt1BranchTarget {
571   public:
LowerInt1BranchTarget(CfgNode * const Target)572     explicit LowerInt1BranchTarget(CfgNode *const Target)
573         : NodeTarget(Target) {}
LowerInt1BranchTarget(InstARM32Label * const Target)574     explicit LowerInt1BranchTarget(InstARM32Label *const Target)
575         : LabelTarget(Target) {}
576 
577     /// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that
578     /// is the exact copy of this if Label is nullptr; otherwise, the returned
579     /// object will wrap Label instead.
580     LowerInt1BranchTarget
createForLabelOrDuplicate(InstARM32Label * Label)581     createForLabelOrDuplicate(InstARM32Label *Label) const {
582       if (Label != nullptr)
583         return LowerInt1BranchTarget(Label);
584       if (NodeTarget)
585         return LowerInt1BranchTarget(NodeTarget);
586       return LowerInt1BranchTarget(LabelTarget);
587     }
588 
589     CfgNode *const NodeTarget = nullptr;
590     InstARM32Label *const LabelTarget = nullptr;
591   };
592 
593   /// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for
594   /// determining which type arithmetic is allowed to be short circuited. This
595   /// is useful for lowering
596   ///
597   ///   t1 = and i1 A, B
598   ///   t2 = and i1 t1, C
599   ///   br i1 t2, label %False, label %True
600   ///
601   /// to
602   ///
603   ///   tst A, 1
604   ///   beq %False
605   ///   tst B, 1
606   ///   beq %False
607   ///   tst C, 1
608   ///   bne %True
609   ///   b %False
610   ///
611   /// Without this information, short circuiting would only allow to short
612   /// circuit a single high level instruction. For example:
613   ///
614   ///   t1 = or i1 A, B
615   ///   t2 = and i1 t1, C
616   ///   br i1 t2, label %False, label %True
617   ///
618   /// cannot be lowered to
619   ///
620   ///   tst A, 1
621   ///   bne %True
622   ///   tst B, 1
623   ///   bne %True
624   ///   tst C, 1
625   ///   beq %True
626   ///   b %False
627   ///
628   /// It needs to be lowered to
629   ///
630   ///   tst A, 1
631   ///   bne Aux
632   ///   tst B, 1
633   ///   beq %False
634   /// Aux:
635   ///   tst C, 1
636   ///   bne %True
637   ///   b %False
638   ///
639   /// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it
640   /// might.)
641   enum LowerInt1AllowShortCircuit {
642     SC_And = 1,
643     SC_Or = 2,
644     SC_All = SC_And | SC_Or,
645   };
646 
647   /// ShortCircuitCondAndLabel wraps the condition codes that should be used
648   /// after a lowerInt1ForBranch returns to branch to the
649   /// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the
650   /// called lowerInt1forBranch created an internal (i.e., short-circuit) label
651   /// used for short circuiting.
652   class ShortCircuitCondAndLabel {
653   public:
654     explicit ShortCircuitCondAndLabel(CondWhenTrue &&C,
655                                       InstARM32Label *L = nullptr)
Cond(std::move (C))656         : Cond(std::move(C)), ShortCircuitTarget(L) {}
657     const CondWhenTrue Cond;
658     InstARM32Label *const ShortCircuitTarget;
659 
assertNoLabelAndReturnCond()660     CondWhenTrue assertNoLabelAndReturnCond() const {
661       assert(ShortCircuitTarget == nullptr);
662       return Cond;
663     }
664   };
665 
666   /// lowerInt1ForBranch expands Boolean, and returns the condition codes that
667   /// are to be used for branching to the branch's TrueTarget. It may return a
668   /// label that the expansion of Boolean used to short circuit the chain's
669   /// evaluation.
670   ShortCircuitCondAndLabel
671   lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
672                      const LowerInt1BranchTarget &TargetFalse,
673                      uint32_t ShortCircuitable);
674 
675   // _br is a convenience wrapper that emits br instructions to Target.
676   void _br(const LowerInt1BranchTarget &BrTarget,
677            CondARM32::Cond Cond = CondARM32::AL) {
678     assert((BrTarget.NodeTarget == nullptr) !=
679            (BrTarget.LabelTarget == nullptr));
680     if (BrTarget.NodeTarget != nullptr)
681       _br(BrTarget.NodeTarget, Cond);
682     else
683       _br(BrTarget.LabelTarget, Cond);
684   }
685 
686   // _br_short_circuit is used when lowering InstArithmetic::And and
687   // InstArithmetic::Or and a short circuit branch is needed.
_br_short_circuit(const LowerInt1BranchTarget & Target,const CondWhenTrue & Cond)688   void _br_short_circuit(const LowerInt1BranchTarget &Target,
689                          const CondWhenTrue &Cond) {
690     if (Cond.WhenTrue1 != CondARM32::kNone) {
691       _br(Target, Cond.WhenTrue1);
692     }
693     if (Cond.WhenTrue0 != CondARM32::kNone) {
694       _br(Target, Cond.WhenTrue0);
695     }
696   }
697   // End of bool folding machinery
698   // --------------------------------------------------------------------------
699 
700   /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with
701   /// an upper16 relocation).
702   void _movt(Variable *Dest, Operand *Src0,
703              CondARM32::Cond Pred = CondARM32::AL) {
704     Context.insert<InstARM32Movt>(Dest, Src0, Pred);
705   }
706   void _movw(Variable *Dest, Operand *Src0,
707              CondARM32::Cond Pred = CondARM32::AL) {
708     Context.insert<InstARM32Movw>(Dest, Src0, Pred);
709   }
710   void _mul(Variable *Dest, Variable *Src0, Variable *Src1,
711             CondARM32::Cond Pred = CondARM32::AL) {
712     Context.insert<InstARM32Mul>(Dest, Src0, Src1, Pred);
713   }
714   void _mvn(Variable *Dest, Operand *Src0,
715             CondARM32::Cond Pred = CondARM32::AL) {
716     Context.insert<InstARM32Mvn>(Dest, Src0, Pred);
717   }
718   void _orr(Variable *Dest, Variable *Src0, Operand *Src1,
719             CondARM32::Cond Pred = CondARM32::AL) {
720     Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred);
721   }
722   void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
723              CondARM32::Cond Pred = CondARM32::AL) {
724     constexpr bool SetFlags = true;
725     Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred, SetFlags);
726     if (SetFlags) {
727       Context.insert<InstFakeUse>(Dest);
728     }
729   }
_push(const VarList & Sources)730   void _push(const VarList &Sources) { Context.insert<InstARM32Push>(Sources); }
_pop(const VarList & Dests)731   void _pop(const VarList &Dests) {
732     Context.insert<InstARM32Pop>(Dests);
733     // Mark dests as modified.
734     for (Variable *Dest : Dests)
735       Context.insert<InstFakeDef>(Dest);
736   }
737   void _rbit(Variable *Dest, Variable *Src0,
738              CondARM32::Cond Pred = CondARM32::AL) {
739     Context.insert<InstARM32Rbit>(Dest, Src0, Pred);
740   }
741   void _rev(Variable *Dest, Variable *Src0,
742             CondARM32::Cond Pred = CondARM32::AL) {
743     Context.insert<InstARM32Rev>(Dest, Src0, Pred);
744   }
745   void _ret(Variable *LR, Variable *Src0 = nullptr) {
746     Context.insert<InstARM32Ret>(LR, Src0);
747   }
748   void _rscs(Variable *Dest, Variable *Src0, Operand *Src1,
749              CondARM32::Cond Pred = CondARM32::AL) {
750     constexpr bool SetFlags = true;
751     Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred, SetFlags);
752     if (SetFlags) {
753       Context.insert<InstFakeUse>(Dest);
754     }
755   }
756   void _rsc(Variable *Dest, Variable *Src0, Operand *Src1,
757             CondARM32::Cond Pred = CondARM32::AL) {
758     Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred);
759   }
760   void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1,
761              CondARM32::Cond Pred = CondARM32::AL) {
762     constexpr bool SetFlags = true;
763     Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred, SetFlags);
764     if (SetFlags) {
765       Context.insert<InstFakeUse>(Dest);
766     }
767   }
768   void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
769             CondARM32::Cond Pred = CondARM32::AL) {
770     Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred);
771   }
772   void _sbc(Variable *Dest, Variable *Src0, Operand *Src1,
773             CondARM32::Cond Pred = CondARM32::AL) {
774     Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred);
775   }
776   void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1,
777              CondARM32::Cond Pred = CondARM32::AL) {
778     constexpr bool SetFlags = true;
779     Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred, SetFlags);
780     if (SetFlags) {
781       Context.insert<InstFakeUse>(Dest);
782     }
783   }
784   void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1,
785              CondARM32::Cond Pred = CondARM32::AL) {
786     Context.insert<InstARM32Sdiv>(Dest, Src0, Src1, Pred);
787   }
788   /// _str, for all your Variable to memory transfers. Addr has the same
789   /// restrictions that it does in _ldr.
790   void _str(Variable *Value, OperandARM32Mem *Addr,
791             CondARM32::Cond Pred = CondARM32::AL) {
792     Context.insert<InstARM32Str>(Value, Addr, Pred);
793   }
794   InstARM32Strex *_strex(Variable *Dest, Variable *Value, OperandARM32Mem *Addr,
795                          CondARM32::Cond Pred = CondARM32::AL) {
796     if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
797       Context.insert<InstFakeUse>(Value64->getLo());
798       Context.insert<InstFakeUse>(Value64->getHi());
799     }
800     return Context.insert<InstARM32Strex>(Dest, Value, Addr, Pred);
801   }
802   void _sub(Variable *Dest, Variable *Src0, Operand *Src1,
803             CondARM32::Cond Pred = CondARM32::AL) {
804     Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred);
805   }
806   void _subs(Variable *Dest, Variable *Src0, Operand *Src1,
807              CondARM32::Cond Pred = CondARM32::AL) {
808     constexpr bool SetFlags = true;
809     Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred, SetFlags);
810     if (SetFlags) {
811       Context.insert<InstFakeUse>(Dest);
812     }
813   }
814   void _sxt(Variable *Dest, Variable *Src0,
815             CondARM32::Cond Pred = CondARM32::AL) {
816     Context.insert<InstARM32Sxt>(Dest, Src0, Pred);
817   }
818   void _tst(Variable *Src0, Operand *Src1,
819             CondARM32::Cond Pred = CondARM32::AL) {
820     Context.insert<InstARM32Tst>(Src0, Src1, Pred);
821   }
_trap()822   void _trap() { Context.insert<InstARM32Trap>(); }
823   void _udiv(Variable *Dest, Variable *Src0, Variable *Src1,
824              CondARM32::Cond Pred = CondARM32::AL) {
825     Context.insert<InstARM32Udiv>(Dest, Src0, Src1, Pred);
826   }
827   void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0,
828               Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
829     // umull requires DestLo and DestHi to be assigned to different GPRs. The
830     // following lines create overlapping liveness ranges for both variables. If
831     // either one of them is live, then they are both going to be live, and thus
832     // assigned to different registers; if they are both dead, then DCE will
833     // kick in and delete the following three instructions.
834     Context.insert<InstFakeDef>(DestHi);
835     Context.insert<InstARM32Umull>(DestLo, DestHi, Src0, Src1, Pred);
836     Context.insert<InstFakeDef>(DestHi, DestLo)->setDestRedefined();
837     Context.insert<InstFakeUse>(DestHi);
838   }
839   void _uxt(Variable *Dest, Variable *Src0,
840             CondARM32::Cond Pred = CondARM32::AL) {
841     Context.insert<InstARM32Uxt>(Dest, Src0, Pred);
842   }
843   void _vabs(Variable *Dest, Variable *Src,
844              CondARM32::Cond Pred = CondARM32::AL) {
845     Context.insert<InstARM32Vabs>(Dest, Src, Pred);
846   }
_vadd(Variable * Dest,Variable * Src0,Variable * Src1)847   void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) {
848     Context.insert<InstARM32Vadd>(Dest, Src0, Src1);
849   }
_vand(Variable * Dest,Variable * Src0,Variable * Src1)850   void _vand(Variable *Dest, Variable *Src0, Variable *Src1) {
851     Context.insert<InstARM32Vand>(Dest, Src0, Src1);
852   }
_vbsl(Variable * Dest,Variable * Src0,Variable * Src1)853   InstARM32Vbsl *_vbsl(Variable *Dest, Variable *Src0, Variable *Src1) {
854     return Context.insert<InstARM32Vbsl>(Dest, Src0, Src1);
855   }
_vceq(Variable * Dest,Variable * Src0,Variable * Src1)856   void _vceq(Variable *Dest, Variable *Src0, Variable *Src1) {
857     Context.insert<InstARM32Vceq>(Dest, Src0, Src1);
858   }
_vcge(Variable * Dest,Variable * Src0,Variable * Src1)859   InstARM32Vcge *_vcge(Variable *Dest, Variable *Src0, Variable *Src1) {
860     return Context.insert<InstARM32Vcge>(Dest, Src0, Src1);
861   }
_vcgt(Variable * Dest,Variable * Src0,Variable * Src1)862   InstARM32Vcgt *_vcgt(Variable *Dest, Variable *Src0, Variable *Src1) {
863     return Context.insert<InstARM32Vcgt>(Dest, Src0, Src1);
864   }
865   void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant,
866              CondARM32::Cond Pred = CondARM32::AL) {
867     Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred);
868   }
_vdiv(Variable * Dest,Variable * Src0,Variable * Src1)869   void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) {
870     Context.insert<InstARM32Vdiv>(Dest, Src0, Src1);
871   }
872   void _vcmp(Variable *Src0, Variable *Src1,
873              CondARM32::Cond Pred = CondARM32::AL) {
874     Context.insert<InstARM32Vcmp>(Src0, Src1, Pred);
875   }
876   void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero,
877              CondARM32::Cond Pred = CondARM32::AL) {
878     Context.insert<InstARM32Vcmp>(Src0, FpZero, Pred);
879   }
_vdup(Variable * Dest,Variable * Src,int Idx)880   void _vdup(Variable *Dest, Variable *Src, int Idx) {
881     Context.insert<InstARM32Vdup>(Dest, Src, Idx);
882   }
_veor(Variable * Dest,Variable * Src0,Variable * Src1)883   void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
884     Context.insert<InstARM32Veor>(Dest, Src0, Src1);
885   }
886   void _vldr1d(Variable *Dest, OperandARM32Mem *Addr,
887                CondARM32::Cond Pred = CondARM32::AL) {
888     Context.insert<InstARM32Vldr1d>(Dest, Addr, Pred);
889   }
890   void _vldr1q(Variable *Dest, OperandARM32Mem *Addr,
891                CondARM32::Cond Pred = CondARM32::AL) {
892     Context.insert<InstARM32Vldr1q>(Dest, Addr, Pred);
893   }
894   void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
895     Context.insert<InstARM32Vmrs>(Pred);
896   }
_vmla(Variable * Dest,Variable * Src0,Variable * Src1)897   void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
898     Context.insert<InstARM32Vmla>(Dest, Src0, Src1);
899   }
_vmlap(Variable * Dest,Variable * Src0,Variable * Src1)900   void _vmlap(Variable *Dest, Variable *Src0, Variable *Src1) {
901     Context.insert<InstARM32Vmlap>(Dest, Src0, Src1);
902   }
_vmls(Variable * Dest,Variable * Src0,Variable * Src1)903   void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
904     Context.insert<InstARM32Vmls>(Dest, Src0, Src1);
905   }
_vmovl(Variable * Dest,Variable * Src0,Variable * Src1)906   void _vmovl(Variable *Dest, Variable *Src0, Variable *Src1) {
907     Context.insert<InstARM32Vmovl>(Dest, Src0, Src1);
908   }
_vmovh(Variable * Dest,Variable * Src0,Variable * Src1)909   void _vmovh(Variable *Dest, Variable *Src0, Variable *Src1) {
910     Context.insert<InstARM32Vmovh>(Dest, Src0, Src1);
911   }
_vmovhl(Variable * Dest,Variable * Src0,Variable * Src1)912   void _vmovhl(Variable *Dest, Variable *Src0, Variable *Src1) {
913     Context.insert<InstARM32Vmovhl>(Dest, Src0, Src1);
914   }
_vmovlh(Variable * Dest,Variable * Src0,Variable * Src1)915   void _vmovlh(Variable *Dest, Variable *Src0, Variable *Src1) {
916     Context.insert<InstARM32Vmovlh>(Dest, Src0, Src1);
917   }
_vmul(Variable * Dest,Variable * Src0,Variable * Src1)918   void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
919     Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
920   }
_vmulh(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)921   void _vmulh(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
922     Context.insert<InstARM32Vmulh>(Dest, Src0, Src1)
923         ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
924   }
_vmvn(Variable * Dest,Variable * Src0)925   void _vmvn(Variable *Dest, Variable *Src0) {
926     Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL);
927   }
_vneg(Variable * Dest,Variable * Src0)928   void _vneg(Variable *Dest, Variable *Src0) {
929     Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL)
930         ->setSignType(InstARM32::FS_Signed);
931   }
_vorr(Variable * Dest,Variable * Src0,Variable * Src1)932   void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
933     Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
934   }
_vqadd(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)935   void _vqadd(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
936     Context.insert<InstARM32Vqadd>(Dest, Src0, Src1)
937         ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
938   }
_vqmovn2(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned,bool Saturating)939   void _vqmovn2(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned,
940                 bool Saturating) {
941     Context.insert<InstARM32Vqmovn2>(Dest, Src0, Src1)
942         ->setSignType(Saturating ? (Unsigned ? InstARM32::FS_Unsigned
943                                              : InstARM32::FS_Signed)
944                                  : InstARM32::FS_None);
945   }
_vqsub(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)946   void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
947     Context.insert<InstARM32Vqsub>(Dest, Src0, Src1)
948         ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
949   }
_vshl(Variable * Dest,Variable * Src0,Variable * Src1)950   InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) {
951     return Context.insert<InstARM32Vshl>(Dest, Src0, Src1);
952   }
_vshl(Variable * Dest,Variable * Src0,ConstantInteger32 * Src1)953   void _vshl(Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) {
954     Context.insert<InstARM32Vshl>(Dest, Src0, Src1)
955         ->setSignType(InstARM32::FS_Unsigned);
956   }
_vshr(Variable * Dest,Variable * Src0,ConstantInteger32 * Src1)957   InstARM32Vshr *_vshr(Variable *Dest, Variable *Src0,
958                        ConstantInteger32 *Src1) {
959     return Context.insert<InstARM32Vshr>(Dest, Src0, Src1);
960   }
961   void _vsqrt(Variable *Dest, Variable *Src,
962               CondARM32::Cond Pred = CondARM32::AL) {
963     Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
964   }
965   void _vstr1d(Variable *Value, OperandARM32Mem *Addr,
966                CondARM32::Cond Pred = CondARM32::AL) {
967     Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 32);
968   }
969   void _vstr1q(Variable *Value, OperandARM32Mem *Addr,
970                CondARM32::Cond Pred = CondARM32::AL) {
971     Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 64);
972   }
_vsub(Variable * Dest,Variable * Src0,Variable * Src1)973   void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) {
974     Context.insert<InstARM32Vsub>(Dest, Src0, Src1);
975   }
_vzip(Variable * Dest,Variable * Src0,Variable * Src1)976   void _vzip(Variable *Dest, Variable *Src0, Variable *Src1) {
977     Context.insert<InstARM32Vzip>(Dest, Src0, Src1);
978   }
979 
980   // Iterates over the CFG and determines the maximum outgoing stack arguments
981   // bytes. This information is later used during addProlog() to pre-allocate
982   // the outargs area.
983   // TODO(jpp): This could live in the Parser, if we provided a Target-specific
984   // method that the Parser could call.
985   void findMaxStackOutArgsSize();
986 
987   /// Returns true if the given Offset can be represented in a Load/Store Mem
988   /// Operand.
989   bool isLegalMemOffset(Type Ty, int32_t Offset) const;
990 
991   void postLowerLegalization();
992 
993   /// Manages the Gotoff relocations created during the function lowering. A
994   /// single Gotoff relocation is created for each global variable used by the
995   /// function being lowered.
996   /// @{
997   // TODO(jpp): if the same global G is used in different functions, then this
998   // method will emit one G(gotoff) relocation per function.
999   GlobalString createGotoffRelocation(const ConstantRelocatable *CR);
1000   CfgUnorderedSet<GlobalString> KnownGotoffs;
1001   /// @}
1002 
1003   class PostLoweringLegalizer {
1004     PostLoweringLegalizer() = delete;
1005     PostLoweringLegalizer(const PostLoweringLegalizer &) = delete;
1006     PostLoweringLegalizer &operator=(const PostLoweringLegalizer &) = delete;
1007 
1008   public:
PostLoweringLegalizer(TargetARM32 * Target)1009     explicit PostLoweringLegalizer(TargetARM32 *Target)
1010         : Target(Target), StackOrFrameReg(Target->getPhysicalRegister(
1011                               Target->getFrameOrStackReg())) {}
1012 
1013     void resetTempBaseIfClobberedBy(const Inst *Instr);
1014 
1015     // Ensures that the TempBase register held by the this legalizer (if any) is
1016     // assigned to IP.
assertNoTempOrAssignedToIP()1017     void assertNoTempOrAssignedToIP() const {
1018       assert(TempBaseReg == nullptr ||
1019              TempBaseReg->getRegNum() == Target->getReservedTmpReg());
1020     }
1021 
1022     // Legalizes Mem. if Mem.Base is a Reamaterializable variable, Mem.Offset is
1023     // fixed up.
1024     OperandARM32Mem *legalizeMemOperand(OperandARM32Mem *Mem,
1025                                         bool AllowOffsets = true);
1026 
1027     /// Legalizes Mov if its Source (or Destination) is a spilled Variable, or
1028     /// if its Source is a Rematerializable variable (this form is used in lieu
1029     /// of lea, which is not available in ARM.)
1030     ///
1031     /// Moves to memory become store instructions, and moves from memory, loads.
1032     void legalizeMov(InstARM32Mov *Mov);
1033 
1034   private:
1035     /// Creates a new Base register centered around [Base, +/- Offset].
1036     Variable *newBaseRegister(Variable *Base, int32_t Offset,
1037                               RegNumT ScratchRegNum);
1038 
1039     /// Creates a new, legal OperandARM32Mem for accessing Base + Offset.
1040     /// The returned mem operand is a legal operand for accessing memory that is
1041     /// of type Ty.
1042     ///
1043     /// If [Base, #Offset] is encodable, then the method returns a Mem operand
1044     /// expressing it. Otherwise,
1045     ///
1046     /// if [TempBaseReg, #Offset-TempBaseOffset] is a valid memory operand, the
1047     /// method will return that. Otherwise,
1048     ///
1049     /// a new base register ip=Base+Offset is created, and the method returns a
1050     /// memory operand expressing [ip, #0].
1051     OperandARM32Mem *createMemOperand(Type Ty, Variable *Base, int32_t Offset,
1052                                       bool AllowOffsets = true);
1053     TargetARM32 *const Target;
1054     Variable *const StackOrFrameReg;
1055     Variable *TempBaseReg = nullptr;
1056     int32_t TempBaseOffset = 0;
1057   };
1058 
1059   TargetARM32Features CPUFeatures;
1060   bool UsesFramePointer = false;
1061   bool NeedsStackAlignment = false;
1062   bool MaybeLeafFunc = true;
1063   size_t SpillAreaSizeBytes = 0;
1064   size_t FixedAllocaSizeBytes = 0;
1065   size_t FixedAllocaAlignBytes = 0;
1066   bool PrologEmitsFixedAllocas = false;
1067   uint32_t MaxOutArgsSizeBytes = 0;
1068   // TODO(jpp): std::array instead of array.
1069   static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];
1070   static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
1071   static SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
1072   SmallBitVector RegsUsed;
1073   VarList PhysicalRegisters[IceType_NUM];
1074   VarList PreservedGPRs;
1075   VarList PreservedSRegs;
1076 
1077   /// Helper class that understands the Calling Convention and register
1078   /// assignments. The first few integer type parameters can use r0-r3,
1079   /// regardless of their position relative to the floating-point/vector
1080   /// arguments in the argument list. Floating-point and vector arguments
1081   /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic,
1082   /// see the ARM Architecture Procedure Calling Standards (AAPCS).
1083   ///
1084   /// Technically, arguments that can start with registers but extend beyond the
1085   /// available registers can be split between the registers and the stack.
1086   /// However, this is typically  for passing GPR structs by value, and PNaCl
1087   /// transforms expand this out.
1088   ///
1089   /// At (public) function entry, the stack must be 8-byte aligned.
1090   class CallingConv {
1091     CallingConv(const CallingConv &) = delete;
1092     CallingConv &operator=(const CallingConv &) = delete;
1093 
1094   public:
1095     CallingConv();
1096     ~CallingConv() = default;
1097 
1098     /// argInGPR returns true if there is a GPR available for the requested
1099     /// type, and false otherwise. If it returns true, Reg is set to the
1100     /// appropriate register number. Note that, when Ty == IceType_i64, Reg will
1101     /// be an I64 register pair.
1102     bool argInGPR(Type Ty, RegNumT *Reg);
1103 
1104     /// argInVFP is to floating-point/vector types what argInGPR is for integer
1105     /// types.
1106     bool argInVFP(Type Ty, RegNumT *Reg);
1107 
1108   private:
1109     void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs);
1110     SmallBitVector GPRegsUsed;
1111     CfgVector<RegNumT> GPRArgs;
1112     CfgVector<RegNumT> I64Args;
1113 
1114     void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs);
1115     SmallBitVector VFPRegsUsed;
1116     CfgVector<RegNumT> FP32Args;
1117     CfgVector<RegNumT> FP64Args;
1118     CfgVector<RegNumT> Vec128Args;
1119   };
1120 
1121 private:
1122   ENABLE_MAKE_UNIQUE;
1123 
1124   OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt,
1125                                       Operand *Base);
1126 
1127   void postambleCtpop64(const InstCall *Instr);
1128   void preambleDivRem(const InstCall *Instr);
1129   CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
1130       ARM32HelpersPreamble;
1131   CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
1132       ARM32HelpersPostamble;
1133 
1134   class ComputationTracker {
1135   public:
1136     ComputationTracker() = default;
1137     ~ComputationTracker() = default;
1138 
forgetProducers()1139     void forgetProducers() { KnownComputations.clear(); }
1140     void recordProducers(CfgNode *Node);
1141 
getProducerOf(const Operand * Opnd)1142     const Inst *getProducerOf(const Operand *Opnd) const {
1143       auto *Var = llvm::dyn_cast<Variable>(Opnd);
1144       if (Var == nullptr) {
1145         return nullptr;
1146       }
1147 
1148       auto Iter = KnownComputations.find(Var->getIndex());
1149       if (Iter == KnownComputations.end()) {
1150         return nullptr;
1151       }
1152 
1153       return Iter->second.Instr;
1154     }
1155 
dump(const Cfg * Func)1156     void dump(const Cfg *Func) const {
1157       if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
1158         return;
1159       OstreamLocker L(Func->getContext());
1160       Ostream &Str = Func->getContext()->getStrDump();
1161       Str << "foldable producer:\n";
1162       for (const auto &Computation : KnownComputations) {
1163         Str << "    ";
1164         Computation.second.Instr->dump(Func);
1165         Str << "\n";
1166       }
1167       Str << "\n";
1168     }
1169 
1170   private:
1171     class ComputationEntry {
1172     public:
ComputationEntry(Inst * I,Type Ty)1173       ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {}
1174       Inst *const Instr;
1175       // Boolean folding is disabled for variables whose live range is multi
1176       // block. We conservatively initialize IsLiveOut to true, and set it to
1177       // false once we find the end of the live range for the variable defined
1178       // by this instruction. If liveness analysis is not performed (e.g., in
1179       // Om1 mode) IsLiveOut will never be set to false, and folding will be
1180       // disabled.
1181       bool IsLiveOut = true;
1182       int32_t NumUses = 0;
1183       Type ComputationType;
1184     };
1185 
1186     // ComputationMap maps a Variable number to a payload identifying which
1187     // instruction defined it.
1188     using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>;
1189     ComputationMap KnownComputations;
1190   };
1191 
1192   ComputationTracker Computations;
1193 
1194   // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
1195   // without specifying a physical register. This is needed for creating unbound
1196   // temporaries during Ice -> ARM lowering, but before register allocation.
1197   // This a safe-guard that no unbound temporaries are created during the
1198   // legalization post-passes.
1199   bool AllowTemporaryWithNoReg = true;
1200   // ForbidTemporaryWithoutReg is a RAII class that manages
1201   // AllowTemporaryWithNoReg.
1202   class ForbidTemporaryWithoutReg {
1203     ForbidTemporaryWithoutReg() = delete;
1204     ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg &) = delete;
1205     ForbidTemporaryWithoutReg &
1206     operator=(const ForbidTemporaryWithoutReg &) = delete;
1207 
1208   public:
ForbidTemporaryWithoutReg(TargetARM32 * Target)1209     explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) {
1210       Target->AllowTemporaryWithNoReg = false;
1211     }
~ForbidTemporaryWithoutReg()1212     ~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; }
1213 
1214   private:
1215     TargetARM32 *const Target;
1216   };
1217 };
1218 
1219 class TargetDataARM32 final : public TargetDataLowering {
1220   TargetDataARM32() = delete;
1221   TargetDataARM32(const TargetDataARM32 &) = delete;
1222   TargetDataARM32 &operator=(const TargetDataARM32 &) = delete;
1223 
1224 public:
create(GlobalContext * Ctx)1225   static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) {
1226     return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx));
1227   }
1228 
1229   void lowerGlobals(const VariableDeclarationList &Vars,
1230                     const std::string &SectionSuffix) override;
1231   void lowerConstants() override;
1232   void lowerJumpTables() override;
1233 
1234 protected:
1235   explicit TargetDataARM32(GlobalContext *Ctx);
1236 
1237 private:
1238   ~TargetDataARM32() override = default;
1239 };
1240 
1241 class TargetHeaderARM32 final : public TargetHeaderLowering {
1242   TargetHeaderARM32() = delete;
1243   TargetHeaderARM32(const TargetHeaderARM32 &) = delete;
1244   TargetHeaderARM32 &operator=(const TargetHeaderARM32 &) = delete;
1245 
1246 public:
create(GlobalContext * Ctx)1247   static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) {
1248     return std::unique_ptr<TargetHeaderLowering>(new TargetHeaderARM32(Ctx));
1249   }
1250 
1251   void lower() override;
1252 
1253 protected:
1254   explicit TargetHeaderARM32(GlobalContext *Ctx);
1255 
1256 private:
1257   ~TargetHeaderARM32() = default;
1258 
1259   TargetARM32Features CPUFeatures;
1260 };
1261 
1262 } // end of namespace ARM32
1263 } // end of namespace Ice
1264 
1265 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H
1266