xref: /aosp_15_r20/external/swiftshader/third_party/subzero/src/IceTargetLoweringARM32.cpp (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the TargetLoweringARM32 class, which consists almost
12 /// entirely of the lowering sequence for each high-level instruction.
13 ///
14 //===----------------------------------------------------------------------===//
15 #include "IceTargetLoweringARM32.h"
16 
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstARM32.def"
24 #include "IceInstARM32.h"
25 #include "IceInstVarIter.h"
26 #include "IceLiveness.h"
27 #include "IceOperand.h"
28 #include "IcePhiLoweringImpl.h"
29 #include "IceRegistersARM32.h"
30 #include "IceTargetLoweringARM32.def"
31 #include "IceUtils.h"
32 #include "llvm/Support/MathExtras.h"
33 
34 #include <algorithm>
35 #include <array>
36 #include <utility>
37 
38 namespace ARM32 {
createTargetLowering(::Ice::Cfg * Func)39 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
40   return ::Ice::ARM32::TargetARM32::create(Func);
41 }
42 
43 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)44 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
45   return ::Ice::ARM32::TargetDataARM32::create(Ctx);
46 }
47 
48 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
50   return ::Ice::ARM32::TargetHeaderARM32::create(Ctx);
51 }
52 
staticInit(::Ice::GlobalContext * Ctx)53 void staticInit(::Ice::GlobalContext *Ctx) {
54   ::Ice::ARM32::TargetARM32::staticInit(Ctx);
55 }
56 
shouldBePooled(const::Ice::Constant * C)57 bool shouldBePooled(const ::Ice::Constant *C) {
58   return ::Ice::ARM32::TargetARM32::shouldBePooled(C);
59 }
60 
getPointerType()61 ::Ice::Type getPointerType() {
62   return ::Ice::ARM32::TargetARM32::getPointerType();
63 }
64 
65 } // end of namespace ARM32
66 
67 namespace Ice {
68 namespace ARM32 {
69 
70 namespace {
71 
72 /// SizeOf is used to obtain the size of an initializer list as a constexpr
73 /// expression. This is only needed until our C++ library is updated to
74 /// C++ 14 -- which defines constexpr members to std::initializer_list.
75 class SizeOf {
76   SizeOf(const SizeOf &) = delete;
77   SizeOf &operator=(const SizeOf &) = delete;
78 
79 public:
SizeOf()80   constexpr SizeOf() : Size(0) {}
81   template <typename... T>
SizeOf(T...)82   explicit constexpr SizeOf(T...) : Size(__length<T...>::value) {}
size() const83   constexpr SizeT size() const { return Size; }
84 
85 private:
86   template <typename T, typename... U> struct __length {
87     static constexpr std::size_t value = 1 + __length<U...>::value;
88   };
89 
90   template <typename T> struct __length<T> {
91     static constexpr std::size_t value = 1;
92   };
93 
94   const std::size_t Size;
95 };
96 
97 } // end of anonymous namespace
98 
99 // Defines the RegARM32::Table table with register information.
100 RegARM32::RegTableType RegARM32::RegTable[RegARM32::Reg_NUM] = {
101 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
102           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
103   {name,      encode,                                                          \
104    cc_arg,    scratch,                                                         \
105    preserved, stackptr,                                                        \
106    frameptr,  isGPR,                                                           \
107    isInt,     isI64Pair,                                                       \
108    isFP32,    isFP64,                                                          \
109    isVec128,  (SizeOf alias_init).size(),                                      \
110    alias_init},
111     REGARM32_TABLE
112 #undef X
113 };
114 
115 namespace {
116 
117 // The following table summarizes the logic for lowering the icmp instruction
118 // for i32 and narrower types. Each icmp condition has a clear mapping to an
119 // ARM32 conditional move instruction.
120 
121 const struct TableIcmp32_ {
122   CondARM32::Cond Mapping;
123 } TableIcmp32[] = {
124 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
125   {CondARM32::C_32},
126     ICMPARM32_TABLE
127 #undef X
128 };
129 
130 // The following table summarizes the logic for lowering the icmp instruction
131 // for the i64 type. Two conditional moves are needed for setting to 1 or 0.
132 // The operands may need to be swapped, and there is a slight difference for
133 // signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
134 const struct TableIcmp64_ {
135   bool IsSigned;
136   bool Swapped;
137   CondARM32::Cond C1, C2;
138 } TableIcmp64[] = {
139 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
140   {is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64},
141     ICMPARM32_TABLE
142 #undef X
143 };
144 
getIcmp32Mapping(InstIcmp::ICond Cond)145 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
146   assert(Cond < llvm::array_lengthof(TableIcmp32));
147   return TableIcmp32[Cond].Mapping;
148 }
149 
150 // In some cases, there are x-macros tables for both high-level and low-level
151 // instructions/operands that use the same enum key value. The tables are kept
152 // separate to maintain a proper separation between abstraction layers. There
153 // is a risk that the tables could get out of sync if enum values are reordered
154 // or if entries are added or deleted. The following anonymous namespaces use
155 // static_asserts to ensure everything is kept in sync.
156 
157 // Validate the enum values in ICMPARM32_TABLE.
158 namespace {
159 // Define a temporary set of enum values based on low-level table entries.
160 enum _icmp_ll_enum {
161 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
162   _icmp_ll_##val,
163   ICMPARM32_TABLE
164 #undef X
165       _num
166 };
167 // Define a set of constants based on high-level table entries.
168 #define X(tag, reverse, str)                                                   \
169   static constexpr int _icmp_hl_##tag = InstIcmp::tag;
170 ICEINSTICMP_TABLE
171 #undef X
172 // Define a set of constants based on low-level table entries, and ensure the
173 // table entry keys are consistent.
174 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
175   static_assert(                                                               \
176       _icmp_ll_##val == _icmp_hl_##val,                                        \
177       "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #val);
178 ICMPARM32_TABLE
179 #undef X
180 // Repeat the static asserts with respect to the high-level table entries in
181 // case the high-level table has extra entries.
182 #define X(tag, reverse, str)                                                   \
183   static_assert(                                                               \
184       _icmp_hl_##tag == _icmp_ll_##tag,                                        \
185       "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #tag);
186 ICEINSTICMP_TABLE
187 #undef X
188 } // end of anonymous namespace
189 
190 // Stack alignment
191 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
192 
193 // Value is in bytes. Return Value adjusted to the next highest multiple of the
194 // stack alignment.
applyStackAlignment(uint32_t Value)195 uint32_t applyStackAlignment(uint32_t Value) {
196   return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
197 }
198 
199 // Value is in bytes. Return Value adjusted to the next highest multiple of the
200 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)201 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
202   // Use natural alignment, except that normally (non-NaCl) ARM only aligns
203   // vectors to 8 bytes.
204   // TODO(jvoung): Check this ...
205   size_t typeAlignInBytes = typeWidthInBytes(Ty);
206   if (isVectorType(Ty))
207     typeAlignInBytes = 8;
208   return Utils::applyAlignment(Value, typeAlignInBytes);
209 }
210 
211 // Conservatively check if at compile time we know that the operand is
212 // definitely a non-zero integer.
isGuaranteedNonzeroInt(const Operand * Op)213 bool isGuaranteedNonzeroInt(const Operand *Op) {
214   if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
215     return Const->getValue() != 0;
216   }
217   return false;
218 }
219 
220 } // end of anonymous namespace
221 
TargetARM32Features(const ClFlags & Flags)222 TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
223   static_assert(
224       (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
225           (TargetInstructionSet::ARM32InstructionSet_End -
226            TargetInstructionSet::ARM32InstructionSet_Begin),
227       "ARM32InstructionSet range different from TargetInstructionSet");
228   if (Flags.getTargetInstructionSet() !=
229       TargetInstructionSet::BaseInstructionSet) {
230     InstructionSet = static_cast<ARM32InstructionSet>(
231         (Flags.getTargetInstructionSet() -
232          TargetInstructionSet::ARM32InstructionSet_Begin) +
233         ARM32InstructionSet::Begin);
234   }
235 }
236 
237 namespace {
238 constexpr SizeT NumGPRArgs =
239 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
240           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
241   +(((cc_arg) > 0) ? 1 : 0)
242     REGARM32_GPR_TABLE
243 #undef X
244     ;
245 std::array<RegNumT, NumGPRArgs> GPRArgInitializer;
246 
247 constexpr SizeT NumI64Args =
248 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
249           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
250   +(((cc_arg) > 0) ? 1 : 0)
251     REGARM32_I64PAIR_TABLE
252 #undef X
253     ;
254 std::array<RegNumT, NumI64Args> I64ArgInitializer;
255 
256 constexpr SizeT NumFP32Args =
257 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
258           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
259   +(((cc_arg) > 0) ? 1 : 0)
260     REGARM32_FP32_TABLE
261 #undef X
262     ;
263 std::array<RegNumT, NumFP32Args> FP32ArgInitializer;
264 
265 constexpr SizeT NumFP64Args =
266 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
267           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
268   +(((cc_arg) > 0) ? 1 : 0)
269     REGARM32_FP64_TABLE
270 #undef X
271     ;
272 std::array<RegNumT, NumFP64Args> FP64ArgInitializer;
273 
274 constexpr SizeT NumVec128Args =
275 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
276           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
277   +(((cc_arg > 0)) ? 1 : 0)
278     REGARM32_VEC128_TABLE
279 #undef X
280     ;
281 std::array<RegNumT, NumVec128Args> Vec128ArgInitializer;
282 
getRegClassName(RegClass C)283 const char *getRegClassName(RegClass C) {
284   auto ClassNum = static_cast<RegARM32::RegClassARM32>(C);
285   assert(ClassNum < RegARM32::RCARM32_NUM);
286   switch (ClassNum) {
287   default:
288     assert(C < RC_Target);
289     return regClassString(C);
290   // Add handling of new register classes below.
291   case RegARM32::RCARM32_QtoS:
292     return "QtoS";
293   }
294 }
295 
296 } // end of anonymous namespace
297 
TargetARM32(Cfg * Func)298 TargetARM32::TargetARM32(Cfg *Func)
299     : TargetLowering(Func), CPUFeatures(getFlags()) {}
300 
staticInit(GlobalContext * Ctx)301 void TargetARM32::staticInit(GlobalContext *Ctx) {
302   RegNumT::setLimit(RegARM32::Reg_NUM);
303   // Limit this size (or do all bitsets need to be the same width)???
304   SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
305   SmallBitVector I64PairRegisters(RegARM32::Reg_NUM);
306   SmallBitVector Float32Registers(RegARM32::Reg_NUM);
307   SmallBitVector Float64Registers(RegARM32::Reg_NUM);
308   SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
309   SmallBitVector QtoSRegisters(RegARM32::Reg_NUM);
310   SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
311   const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding;
312   for (int i = 0; i < RegARM32::Reg_NUM; ++i) {
313     const auto &Entry = RegARM32::RegTable[i];
314     IntegerRegisters[i] = Entry.IsInt;
315     I64PairRegisters[i] = Entry.IsI64Pair;
316     Float32Registers[i] = Entry.IsFP32;
317     Float64Registers[i] = Entry.IsFP64;
318     VectorRegisters[i] = Entry.IsVec128;
319     RegisterAliases[i].resize(RegARM32::Reg_NUM);
320     // TODO(eholk): It would be better to store a QtoS flag in the
321     // IceRegistersARM32 table than to compare their encodings here.
322     QtoSRegisters[i] = Entry.IsVec128 && Entry.Encoding < EncodedReg_q8;
323     for (int j = 0; j < Entry.NumAliases; ++j) {
324       assert(i == j || !RegisterAliases[i][Entry.Aliases[j]]);
325       RegisterAliases[i].set(Entry.Aliases[j]);
326     }
327     assert(RegisterAliases[i][i]);
328     if (Entry.CCArg <= 0) {
329       continue;
330     }
331     const auto RegNum = RegNumT::fromInt(i);
332     if (Entry.IsGPR) {
333       GPRArgInitializer[Entry.CCArg - 1] = RegNum;
334     } else if (Entry.IsI64Pair) {
335       I64ArgInitializer[Entry.CCArg - 1] = RegNum;
336     } else if (Entry.IsFP32) {
337       FP32ArgInitializer[Entry.CCArg - 1] = RegNum;
338     } else if (Entry.IsFP64) {
339       FP64ArgInitializer[Entry.CCArg - 1] = RegNum;
340     } else if (Entry.IsVec128) {
341       Vec128ArgInitializer[Entry.CCArg - 1] = RegNum;
342     }
343   }
344   TypeToRegisterSet[IceType_void] = InvalidRegisters;
345   TypeToRegisterSet[IceType_i1] = IntegerRegisters;
346   TypeToRegisterSet[IceType_i8] = IntegerRegisters;
347   TypeToRegisterSet[IceType_i16] = IntegerRegisters;
348   TypeToRegisterSet[IceType_i32] = IntegerRegisters;
349   TypeToRegisterSet[IceType_i64] = I64PairRegisters;
350   TypeToRegisterSet[IceType_f32] = Float32Registers;
351   TypeToRegisterSet[IceType_f64] = Float64Registers;
352   TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
353   TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
354   TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
355   TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
356   TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
357   TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
358   TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
359   TypeToRegisterSet[RegARM32::RCARM32_QtoS] = QtoSRegisters;
360 
361   for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
362     TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
363 
364   filterTypeToRegisterSet(
365       Ctx, RegARM32::Reg_NUM, TypeToRegisterSet,
366       llvm::array_lengthof(TypeToRegisterSet),
367       [](RegNumT RegNum) -> std::string {
368         // This function simply removes ", " from the
369         // register name.
370         std::string Name = RegARM32::getRegName(RegNum);
371         constexpr const char RegSeparator[] = ", ";
372         constexpr size_t RegSeparatorWidth =
373             llvm::array_lengthof(RegSeparator) - 1;
374         for (size_t Pos = Name.find(RegSeparator); Pos != std::string::npos;
375              Pos = Name.find(RegSeparator)) {
376           Name.replace(Pos, RegSeparatorWidth, "");
377         }
378         return Name;
379       },
380       getRegClassName);
381 }
382 
383 namespace {
copyRegAllocFromInfWeightVariable64On32(const VarList & Vars)384 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
385   for (Variable *Var : Vars) {
386     auto *Var64 = llvm::dyn_cast<Variable64On32>(Var);
387     if (!Var64) {
388       // This is not the variable we are looking for.
389       continue;
390     }
391     // only allow infinite-weight i64 temporaries to be register allocated.
392     assert(!Var64->hasReg() || Var64->mustHaveReg());
393     if (!Var64->hasReg()) {
394       continue;
395     }
396     const auto FirstReg =
397         RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum()));
398     // This assumes little endian.
399     Variable *Lo = Var64->getLo();
400     Variable *Hi = Var64->getHi();
401     assert(Lo->hasReg() == Hi->hasReg());
402     if (Lo->hasReg()) {
403       continue;
404     }
405     Lo->setRegNum(FirstReg);
406     Lo->setMustHaveReg();
407     Hi->setRegNum(RegNumT::fixme(FirstReg + 1));
408     Hi->setMustHaveReg();
409   }
410 }
411 } // end of anonymous namespace
412 
getCallStackArgumentsSizeBytes(const InstCall * Call)413 uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
414   TargetARM32::CallingConv CC;
415   RegNumT DummyReg;
416   size_t OutArgsSizeBytes = 0;
417   for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
418     Operand *Arg = legalizeUndef(Call->getArg(i));
419     const Type Ty = Arg->getType();
420     if (isScalarIntegerType(Ty)) {
421       if (CC.argInGPR(Ty, &DummyReg)) {
422         continue;
423       }
424     } else {
425       if (CC.argInVFP(Ty, &DummyReg)) {
426         continue;
427       }
428     }
429 
430     OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
431     OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
432   }
433 
434   return applyStackAlignment(OutArgsSizeBytes);
435 }
436 
genTargetHelperCallFor(Inst * Instr)437 void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
438   constexpr bool NoTailCall = false;
439   constexpr bool IsTargetHelperCall = true;
440 
441   switch (Instr->getKind()) {
442   default:
443     return;
444   case Inst::Arithmetic: {
445     Variable *Dest = Instr->getDest();
446     const Type DestTy = Dest->getType();
447     const InstArithmetic::OpKind Op =
448         llvm::cast<InstArithmetic>(Instr)->getOp();
449     if (isVectorType(DestTy)) {
450       switch (Op) {
451       default:
452         break;
453       case InstArithmetic::Fdiv:
454       case InstArithmetic::Frem:
455       case InstArithmetic::Sdiv:
456       case InstArithmetic::Srem:
457       case InstArithmetic::Udiv:
458       case InstArithmetic::Urem:
459         scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
460         Instr->setDeleted();
461         return;
462       }
463     }
464     switch (DestTy) {
465     default:
466       return;
467     case IceType_i64: {
468       // Technically, ARM has its own aeabi routines, but we can use the
469       // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
470       // the more standard __moddi3 for rem.
471       RuntimeHelper HelperID = RuntimeHelper::H_Num;
472       switch (Op) {
473       default:
474         return;
475       case InstArithmetic::Udiv:
476         HelperID = RuntimeHelper::H_udiv_i64;
477         break;
478       case InstArithmetic::Sdiv:
479         HelperID = RuntimeHelper::H_sdiv_i64;
480         break;
481       case InstArithmetic::Urem:
482         HelperID = RuntimeHelper::H_urem_i64;
483         break;
484       case InstArithmetic::Srem:
485         HelperID = RuntimeHelper::H_srem_i64;
486         break;
487       }
488       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
489       ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
490       constexpr SizeT MaxArgs = 2;
491       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
492                                             NoTailCall, IsTargetHelperCall);
493       Call->addArg(Instr->getSrc(0));
494       Call->addArg(Instr->getSrc(1));
495       Instr->setDeleted();
496       return;
497     }
498     case IceType_i32:
499     case IceType_i16:
500     case IceType_i8: {
501       const bool HasHWDiv = hasCPUFeature(TargetARM32Features::HWDivArm);
502       InstCast::OpKind CastKind;
503       RuntimeHelper HelperID = RuntimeHelper::H_Num;
504       switch (Op) {
505       default:
506         return;
507       case InstArithmetic::Udiv:
508         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_udiv_i32;
509         CastKind = InstCast::Zext;
510         break;
511       case InstArithmetic::Sdiv:
512         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_sdiv_i32;
513         CastKind = InstCast::Sext;
514         break;
515       case InstArithmetic::Urem:
516         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_urem_i32;
517         CastKind = InstCast::Zext;
518         break;
519       case InstArithmetic::Srem:
520         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_srem_i32;
521         CastKind = InstCast::Sext;
522         break;
523       }
524       if (HelperID == RuntimeHelper::H_Num) {
525         // HelperID should only ever be undefined when the processor does not
526         // have a hardware divider. If any other helpers are ever introduced,
527         // the following assert will have to be modified.
528         assert(HasHWDiv);
529         return;
530       }
531       Operand *Src0 = Instr->getSrc(0);
532       Operand *Src1 = Instr->getSrc(1);
533       if (DestTy != IceType_i32) {
534         // Src0 and Src1 have to be zero-, or signed-extended to i32. For Src0,
535         // we just insert a InstCast right before the call to the helper.
536         Variable *Src0_32 = Func->makeVariable(IceType_i32);
537         Context.insert<InstCast>(CastKind, Src0_32, Src0);
538         Src0 = Src0_32;
539 
540         // For extending Src1, we will just insert an InstCast if Src1 is not a
541         // Constant. If it is, then we extend it here, and not during program
542         // runtime. This allows preambleDivRem to optimize-out the div-by-0
543         // check.
544         if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
545           const int32_t ShAmt = (DestTy == IceType_i16) ? 16 : 24;
546           int32_t NewC = C->getValue();
547           if (CastKind == InstCast::Zext) {
548             NewC &= ~(0x80000000l >> ShAmt);
549           } else {
550             NewC = (NewC << ShAmt) >> ShAmt;
551           }
552           Src1 = Ctx->getConstantInt32(NewC);
553         } else {
554           Variable *Src1_32 = Func->makeVariable(IceType_i32);
555           Context.insert<InstCast>(CastKind, Src1_32, Src1);
556           Src1 = Src1_32;
557         }
558       }
559       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
560       ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
561       constexpr SizeT MaxArgs = 2;
562       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
563                                             NoTailCall, IsTargetHelperCall);
564       assert(Src0->getType() == IceType_i32);
565       Call->addArg(Src0);
566       assert(Src1->getType() == IceType_i32);
567       Call->addArg(Src1);
568       Instr->setDeleted();
569       return;
570     }
571     case IceType_f64:
572     case IceType_f32: {
573       if (Op != InstArithmetic::Frem) {
574         return;
575       }
576       constexpr SizeT MaxArgs = 2;
577       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
578           DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
579                                 : RuntimeHelper::H_frem_f64);
580       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
581                                             NoTailCall, IsTargetHelperCall);
582       Call->addArg(Instr->getSrc(0));
583       Call->addArg(Instr->getSrc(1));
584       Instr->setDeleted();
585       return;
586     }
587     }
588     llvm::report_fatal_error("Control flow should never have reached here.");
589   }
590   case Inst::Cast: {
591     Variable *Dest = Instr->getDest();
592     Operand *Src0 = Instr->getSrc(0);
593     const Type DestTy = Dest->getType();
594     const Type SrcTy = Src0->getType();
595     auto *CastInstr = llvm::cast<InstCast>(Instr);
596     const InstCast::OpKind CastKind = CastInstr->getCastKind();
597 
598     switch (CastKind) {
599     default:
600       return;
601     case InstCast::Fptosi:
602     case InstCast::Fptoui: {
603       if (DestTy != IceType_i64) {
604         return;
605       }
606       const bool DestIsSigned = CastKind == InstCast::Fptosi;
607       const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
608       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
609           Src0IsF32 ? (DestIsSigned ? RuntimeHelper::H_fptosi_f32_i64
610                                     : RuntimeHelper::H_fptoui_f32_i64)
611                     : (DestIsSigned ? RuntimeHelper::H_fptosi_f64_i64
612                                     : RuntimeHelper::H_fptoui_f64_i64));
613       static constexpr SizeT MaxArgs = 1;
614       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
615                                             NoTailCall, IsTargetHelperCall);
616       Call->addArg(Src0);
617       Instr->setDeleted();
618       return;
619     }
620     case InstCast::Sitofp:
621     case InstCast::Uitofp: {
622       if (SrcTy != IceType_i64) {
623         return;
624       }
625       const bool SourceIsSigned = CastKind == InstCast::Sitofp;
626       const bool DestIsF32 = isFloat32Asserting32Or64(Dest->getType());
627       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
628           DestIsF32 ? (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f32
629                                       : RuntimeHelper::H_uitofp_i64_f32)
630                     : (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f64
631                                       : RuntimeHelper::H_uitofp_i64_f64));
632       static constexpr SizeT MaxArgs = 1;
633       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
634                                             NoTailCall, IsTargetHelperCall);
635       Call->addArg(Src0);
636       Instr->setDeleted();
637       return;
638     }
639     case InstCast::Bitcast: {
640       if (DestTy == SrcTy) {
641         return;
642       }
643       Variable *CallDest = Dest;
644       RuntimeHelper HelperID = RuntimeHelper::H_Num;
645       switch (DestTy) {
646       default:
647         return;
648       case IceType_i8:
649         assert(SrcTy == IceType_v8i1);
650         HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
651         CallDest = Func->makeVariable(IceType_i32);
652         break;
653       case IceType_i16:
654         assert(SrcTy == IceType_v16i1);
655         HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
656         CallDest = Func->makeVariable(IceType_i32);
657         break;
658       case IceType_v8i1: {
659         assert(SrcTy == IceType_i8);
660         HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
661         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
662         // Arguments to functions are required to be at least 32 bits wide.
663         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
664         Src0 = Src0AsI32;
665       } break;
666       case IceType_v16i1: {
667         assert(SrcTy == IceType_i16);
668         HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
669         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
670         // Arguments to functions are required to be at least 32 bits wide.
671         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
672         Src0 = Src0AsI32;
673       } break;
674       }
675       constexpr SizeT MaxSrcs = 1;
676       InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
677       Call->addArg(Src0);
678       Context.insert(Call);
679       // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
680       // call result to the appropriate type as necessary.
681       if (CallDest->getType() != Dest->getType())
682         Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
683       Instr->setDeleted();
684       return;
685     }
686     case InstCast::Trunc: {
687       if (DestTy == SrcTy) {
688         return;
689       }
690       if (!isVectorType(SrcTy)) {
691         return;
692       }
693       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
694       assert(typeElementType(DestTy) == IceType_i1);
695       assert(isVectorIntegerType(SrcTy));
696       return;
697     }
698     case InstCast::Sext:
699     case InstCast::Zext: {
700       if (DestTy == SrcTy) {
701         return;
702       }
703       if (!isVectorType(DestTy)) {
704         return;
705       }
706       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
707       assert(typeElementType(SrcTy) == IceType_i1);
708       assert(isVectorIntegerType(DestTy));
709       return;
710     }
711     }
712     llvm::report_fatal_error("Control flow should never have reached here.");
713   }
714   case Inst::Intrinsic: {
715     Variable *Dest = Instr->getDest();
716     auto *Intrinsic = llvm::cast<InstIntrinsic>(Instr);
717     Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicID();
718     switch (ID) {
719     default:
720       return;
721     case Intrinsics::Ctpop: {
722       Operand *Src0 = Intrinsic->getArg(0);
723       Operand *TargetHelper =
724           Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
725                                         ? RuntimeHelper::H_call_ctpop_i32
726                                         : RuntimeHelper::H_call_ctpop_i64);
727       static constexpr SizeT MaxArgs = 1;
728       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
729                                             NoTailCall, IsTargetHelperCall);
730       Call->addArg(Src0);
731       Instr->setDeleted();
732       if (Src0->getType() == IceType_i64) {
733         ARM32HelpersPostamble[TargetHelper] = &TargetARM32::postambleCtpop64;
734       }
735       return;
736     }
737     case Intrinsics::Longjmp: {
738       static constexpr SizeT MaxArgs = 2;
739       static constexpr Variable *NoDest = nullptr;
740       Operand *TargetHelper =
741           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
742       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
743                                             NoTailCall, IsTargetHelperCall);
744       Call->addArg(Intrinsic->getArg(0));
745       Call->addArg(Intrinsic->getArg(1));
746       Instr->setDeleted();
747       return;
748     }
749     case Intrinsics::Memcpy: {
750       // In the future, we could potentially emit an inline memcpy/memset, etc.
751       // for intrinsic calls w/ a known length.
752       static constexpr SizeT MaxArgs = 3;
753       static constexpr Variable *NoDest = nullptr;
754       Operand *TargetHelper =
755           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
756       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
757                                             NoTailCall, IsTargetHelperCall);
758       Call->addArg(Intrinsic->getArg(0));
759       Call->addArg(Intrinsic->getArg(1));
760       Call->addArg(Intrinsic->getArg(2));
761       Instr->setDeleted();
762       return;
763     }
764     case Intrinsics::Memmove: {
765       static constexpr SizeT MaxArgs = 3;
766       static constexpr Variable *NoDest = nullptr;
767       Operand *TargetHelper =
768           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
769       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
770                                             NoTailCall, IsTargetHelperCall);
771       Call->addArg(Intrinsic->getArg(0));
772       Call->addArg(Intrinsic->getArg(1));
773       Call->addArg(Intrinsic->getArg(2));
774       Instr->setDeleted();
775       return;
776     }
777     case Intrinsics::Memset: {
778       // The value operand needs to be extended to a stack slot size because the
779       // PNaCl ABI requires arguments to be at least 32 bits wide.
780       Operand *ValOp = Intrinsic->getArg(1);
781       assert(ValOp->getType() == IceType_i8);
782       Variable *ValExt = Func->makeVariable(stackSlotType());
783       Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
784 
785       // Technically, ARM has its own __aeabi_memset, but we can use plain
786       // memset too. The value and size argument need to be flipped if we ever
787       // decide to use __aeabi_memset.
788       static constexpr SizeT MaxArgs = 3;
789       static constexpr Variable *NoDest = nullptr;
790       Operand *TargetHelper =
791           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
792       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
793                                             NoTailCall, IsTargetHelperCall);
794       Call->addArg(Intrinsic->getArg(0));
795       Call->addArg(ValExt);
796       Call->addArg(Intrinsic->getArg(2));
797       Instr->setDeleted();
798       return;
799     }
800     case Intrinsics::Setjmp: {
801       static constexpr SizeT MaxArgs = 1;
802       Operand *TargetHelper =
803           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
804       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
805                                             NoTailCall, IsTargetHelperCall);
806       Call->addArg(Intrinsic->getArg(0));
807       Instr->setDeleted();
808       return;
809     }
810     }
811     llvm::report_fatal_error("Control flow should never have reached here.");
812   }
813   }
814 }
815 
findMaxStackOutArgsSize()816 void TargetARM32::findMaxStackOutArgsSize() {
817   // MinNeededOutArgsBytes should be updated if the Target ever creates a
818   // high-level InstCall that requires more stack bytes.
819   constexpr size_t MinNeededOutArgsBytes = 0;
820   MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
821   for (CfgNode *Node : Func->getNodes()) {
822     Context.init(Node);
823     while (!Context.atEnd()) {
824       PostIncrLoweringContext PostIncrement(Context);
825       Inst *CurInstr = iteratorToInst(Context.getCur());
826       if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
827         SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
828         MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
829       }
830     }
831   }
832 }
833 
834 GlobalString
createGotoffRelocation(const ConstantRelocatable * CR)835 TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) {
836   GlobalString CRName = CR->getName();
837   GlobalString CRGotoffName =
838       Ctx->getGlobalString("GOTOFF$" + Func->getFunctionName() + "$" + CRName);
839   if (KnownGotoffs.count(CRGotoffName) == 0) {
840     constexpr bool SuppressMangling = true;
841     auto *Global =
842         VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
843     Global->setIsConstant(true);
844     Global->setName(CRName);
845     Func->getGlobalPool()->willNotBeEmitted(Global);
846 
847     auto *Gotoff =
848         VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
849     constexpr auto GotFixup = R_ARM_GOTOFF32;
850     Gotoff->setIsConstant(true);
851     Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create(
852         Func->getGlobalPool(), Global, {RelocOffset::create(Ctx, 0)},
853         GotFixup));
854     Gotoff->setName(CRGotoffName);
855     Func->addGlobal(Gotoff);
856     KnownGotoffs.emplace(CRGotoffName);
857   }
858   return CRGotoffName;
859 }
860 
translateO2()861 void TargetARM32::translateO2() {
862   TimerMarker T(TimerStack::TT_O2, Func);
863 
864   genTargetHelperCalls();
865   findMaxStackOutArgsSize();
866 
867   // Do not merge Alloca instructions, and lay out the stack.
868   static constexpr bool SortAndCombineAllocas = true;
869   Func->processAllocas(SortAndCombineAllocas);
870   Func->dump("After Alloca processing");
871 
872   if (!getFlags().getEnablePhiEdgeSplit()) {
873     // Lower Phi instructions.
874     Func->placePhiLoads();
875     if (Func->hasError())
876       return;
877     Func->placePhiStores();
878     if (Func->hasError())
879       return;
880     Func->deletePhis();
881     if (Func->hasError())
882       return;
883     Func->dump("After Phi lowering");
884   }
885 
886   // Address mode optimization.
887   Func->getVMetadata()->init(VMK_SingleDefs);
888   Func->doAddressOpt();
889   Func->materializeVectorShuffles();
890 
891   // Argument lowering
892   Func->doArgLowering();
893 
894   // Target lowering. This requires liveness analysis for some parts of the
895   // lowering decisions, such as compare/branch fusing. If non-lightweight
896   // liveness analysis is used, the instructions need to be renumbered first.
897   // TODO: This renumbering should only be necessary if we're actually
898   // calculating live intervals, which we only do for register allocation.
899   Func->renumberInstructions();
900   if (Func->hasError())
901     return;
902 
903   // TODO: It should be sufficient to use the fastest liveness calculation,
904   // i.e. livenessLightweight(). However, for some reason that slows down the
905   // rest of the translation. Investigate.
906   Func->liveness(Liveness_Basic);
907   if (Func->hasError())
908     return;
909   Func->dump("After ARM32 address mode opt");
910 
911   Func->genCode();
912   if (Func->hasError())
913     return;
914   Func->dump("After ARM32 codegen");
915 
916   // Register allocation. This requires instruction renumbering and full
917   // liveness analysis.
918   Func->renumberInstructions();
919   if (Func->hasError())
920     return;
921   Func->liveness(Liveness_Intervals);
922   if (Func->hasError())
923     return;
924   // The post-codegen dump is done here, after liveness analysis and associated
925   // cleanup, to make the dump cleaner and more useful.
926   Func->dump("After initial ARM32 codegen");
927   // Validate the live range computations. The expensive validation call is
928   // deliberately only made when assertions are enabled.
929   assert(Func->validateLiveness());
930   Func->getVMetadata()->init(VMK_All);
931   regAlloc(RAK_Global);
932   if (Func->hasError())
933     return;
934 
935   copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
936   Func->dump("After linear scan regalloc");
937 
938   if (getFlags().getEnablePhiEdgeSplit()) {
939     Func->advancedPhiLowering();
940     Func->dump("After advanced Phi lowering");
941   }
942 
943   ForbidTemporaryWithoutReg _(this);
944 
945   // Stack frame mapping.
946   Func->genFrame();
947   if (Func->hasError())
948     return;
949   Func->dump("After stack frame mapping");
950 
951   postLowerLegalization();
952   if (Func->hasError())
953     return;
954   Func->dump("After postLowerLegalization");
955 
956   Func->contractEmptyNodes();
957   Func->reorderNodes();
958 
959   // Branch optimization. This needs to be done just before code emission. In
960   // particular, no transformations that insert or reorder CfgNodes should be
961   // done after branch optimization. We go ahead and do it before nop insertion
962   // to reduce the amount of work needed for searching for opportunities.
963   Func->doBranchOpt();
964   Func->dump("After branch optimization");
965 }
966 
translateOm1()967 void TargetARM32::translateOm1() {
968   TimerMarker T(TimerStack::TT_Om1, Func);
969 
970   genTargetHelperCalls();
971   findMaxStackOutArgsSize();
972 
973   // Do not merge Alloca instructions, and lay out the stack.
974   static constexpr bool DontSortAndCombineAllocas = false;
975   Func->processAllocas(DontSortAndCombineAllocas);
976   Func->dump("After Alloca processing");
977 
978   Func->placePhiLoads();
979   if (Func->hasError())
980     return;
981   Func->placePhiStores();
982   if (Func->hasError())
983     return;
984   Func->deletePhis();
985   if (Func->hasError())
986     return;
987   Func->dump("After Phi lowering");
988 
989   Func->doArgLowering();
990 
991   Func->genCode();
992   if (Func->hasError())
993     return;
994   Func->dump("After initial ARM32 codegen");
995 
996   regAlloc(RAK_InfOnly);
997   if (Func->hasError())
998     return;
999 
1000   copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
1001   Func->dump("After regalloc of infinite-weight variables");
1002 
1003   ForbidTemporaryWithoutReg _(this);
1004 
1005   Func->genFrame();
1006   if (Func->hasError())
1007     return;
1008   Func->dump("After stack frame mapping");
1009 
1010   postLowerLegalization();
1011   if (Func->hasError())
1012     return;
1013   Func->dump("After postLowerLegalization");
1014 }
1015 
getStackAlignment() const1016 uint32_t TargetARM32::getStackAlignment() const {
1017   return ARM32_STACK_ALIGNMENT_BYTES;
1018 }
1019 
doBranchOpt(Inst * I,const CfgNode * NextNode)1020 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
1021   if (auto *Br = llvm::dyn_cast<InstARM32Br>(I)) {
1022     return Br->optimizeBranch(NextNode);
1023   }
1024   return false;
1025 }
1026 
getRegName(RegNumT RegNum,Type Ty) const1027 const char *TargetARM32::getRegName(RegNumT RegNum, Type Ty) const {
1028   (void)Ty;
1029   return RegARM32::getRegName(RegNum);
1030 }
1031 
getPhysicalRegister(RegNumT RegNum,Type Ty)1032 Variable *TargetARM32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1033   static const Type DefaultType[] = {
1034 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
1035           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
1036   (isFP32)                                                                     \
1037       ? IceType_f32                                                            \
1038       : ((isFP64) ? IceType_f64 : ((isVec128 ? IceType_v4i32 : IceType_i32))),
1039       REGARM32_TABLE
1040 #undef X
1041   };
1042 
1043   if (Ty == IceType_void) {
1044     assert(unsigned(RegNum) < llvm::array_lengthof(DefaultType));
1045     Ty = DefaultType[RegNum];
1046   }
1047   if (PhysicalRegisters[Ty].empty())
1048     PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
1049   assert(unsigned(RegNum) < PhysicalRegisters[Ty].size());
1050   Variable *Reg = PhysicalRegisters[Ty][RegNum];
1051   if (Reg == nullptr) {
1052     Reg = Func->makeVariable(Ty);
1053     Reg->setRegNum(RegNum);
1054     PhysicalRegisters[Ty][RegNum] = Reg;
1055     // Specially mark a named physical register as an "argument" so that it is
1056     // considered live upon function entry.  Otherwise it's possible to get
1057     // liveness validation errors for saving callee-save registers.
1058     Func->addImplicitArg(Reg);
1059     // Don't bother tracking the live range of a named physical register.
1060     Reg->setIgnoreLiveness();
1061   }
1062   return Reg;
1063 }
1064 
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1065 void TargetARM32::emitJumpTable(const Cfg *Func,
1066                                 const InstJumpTable *JumpTable) const {
1067   (void)Func;
1068   (void)JumpTable;
1069   UnimplementedError(getFlags());
1070 }
1071 
emitVariable(const Variable * Var) const1072 void TargetARM32::emitVariable(const Variable *Var) const {
1073   if (!BuildDefs::dump())
1074     return;
1075   Ostream &Str = Ctx->getStrEmit();
1076   if (Var->hasReg()) {
1077     Str << getRegName(Var->getRegNum(), Var->getType());
1078     return;
1079   }
1080   if (Var->mustHaveReg()) {
1081     llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1082                              ") has no register assigned - function " +
1083                              Func->getFunctionName());
1084   }
1085   assert(!Var->isRematerializable());
1086   int32_t Offset = Var->getStackOffset();
1087   auto BaseRegNum = Var->getBaseRegNum();
1088   if (BaseRegNum.hasNoValue()) {
1089     BaseRegNum = getFrameOrStackReg();
1090   }
1091   const Type VarTy = Var->getType();
1092   Str << "[" << getRegName(BaseRegNum, VarTy);
1093   if (Offset != 0) {
1094     Str << ", #" << Offset;
1095   }
1096   Str << "]";
1097 }
1098 
CallingConv()1099 TargetARM32::CallingConv::CallingConv()
1100     : GPRegsUsed(RegARM32::Reg_NUM),
1101       GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1102       I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1103       VFPRegsUsed(RegARM32::Reg_NUM),
1104       FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1105       FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()),
1106       Vec128Args(Vec128ArgInitializer.rbegin(), Vec128ArgInitializer.rend()) {}
1107 
argInGPR(Type Ty,RegNumT * Reg)1108 bool TargetARM32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1109   CfgVector<RegNumT> *Source;
1110 
1111   switch (Ty) {
1112   default: {
1113     assert(isScalarIntegerType(Ty));
1114     Source = &GPRArgs;
1115   } break;
1116   case IceType_i64: {
1117     Source = &I64Args;
1118   } break;
1119   }
1120 
1121   discardUnavailableGPRsAndTheirAliases(Source);
1122 
1123   if (Source->empty()) {
1124     GPRegsUsed.set();
1125     return false;
1126   }
1127 
1128   *Reg = Source->back();
1129   // Note that we don't Source->pop_back() here. This is intentional. Notice how
1130   // we mark all of Reg's aliases as Used. So, for the next argument,
1131   // Source->back() is marked as unavailable, and it is thus implicitly popped
1132   // from the stack.
1133   GPRegsUsed |= RegisterAliases[*Reg];
1134   return true;
1135 }
1136 
1137 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1138 // i32) will have the first argument in r0, the second in r1-r2, and the third
1139 // on the stack. To model this behavior, whenever we pop a register from Regs,
1140 // we remove all of its aliases from the pool of available GPRs. This has the
1141 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1142 void TargetARM32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1143     CfgVector<RegNumT> *Regs) {
1144   while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1145     GPRegsUsed |= RegisterAliases[Regs->back()];
1146     Regs->pop_back();
1147   }
1148 }
1149 
argInVFP(Type Ty,RegNumT * Reg)1150 bool TargetARM32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1151   CfgVector<RegNumT> *Source;
1152 
1153   switch (Ty) {
1154   default: {
1155     assert(isVectorType(Ty));
1156     Source = &Vec128Args;
1157   } break;
1158   case IceType_f32: {
1159     Source = &FP32Args;
1160   } break;
1161   case IceType_f64: {
1162     Source = &FP64Args;
1163   } break;
1164   }
1165 
1166   discardUnavailableVFPRegs(Source);
1167 
1168   if (Source->empty()) {
1169     VFPRegsUsed.set();
1170     return false;
1171   }
1172 
1173   *Reg = Source->back();
1174   VFPRegsUsed |= RegisterAliases[*Reg];
1175   return true;
1176 }
1177 
1178 // Arguments in VFP registers are not packed, so we don't mark the popped
1179 // registers' aliases as unavailable.
discardUnavailableVFPRegs(CfgVector<RegNumT> * Regs)1180 void TargetARM32::CallingConv::discardUnavailableVFPRegs(
1181     CfgVector<RegNumT> *Regs) {
1182   while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1183     Regs->pop_back();
1184   }
1185 }
1186 
lowerArguments()1187 void TargetARM32::lowerArguments() {
1188   VarList &Args = Func->getArgs();
1189   TargetARM32::CallingConv CC;
1190 
1191   // For each register argument, replace Arg in the argument list with the home
1192   // register. Then generate an instruction in the prolog to copy the home
1193   // register to the assigned location of Arg.
1194   Context.init(Func->getEntryNode());
1195   Context.setInsertPoint(Context.getCur());
1196 
1197   for (SizeT I = 0, E = Args.size(); I < E; ++I) {
1198     Variable *Arg = Args[I];
1199     Type Ty = Arg->getType();
1200     RegNumT RegNum;
1201     if (isScalarIntegerType(Ty)) {
1202       if (!CC.argInGPR(Ty, &RegNum)) {
1203         continue;
1204       }
1205     } else {
1206       if (!CC.argInVFP(Ty, &RegNum)) {
1207         continue;
1208       }
1209     }
1210 
1211     Variable *RegisterArg = Func->makeVariable(Ty);
1212     if (BuildDefs::dump()) {
1213       RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1214     }
1215     RegisterArg->setIsArg();
1216     Arg->setIsArg(false);
1217     Args[I] = RegisterArg;
1218     switch (Ty) {
1219     default: {
1220       RegisterArg->setRegNum(RegNum);
1221     } break;
1222     case IceType_i64: {
1223       auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1224       RegisterArg64->initHiLo(Func);
1225       RegisterArg64->getLo()->setRegNum(
1226           RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(RegNum)));
1227       RegisterArg64->getHi()->setRegNum(
1228           RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(RegNum)));
1229     } break;
1230     }
1231     Context.insert<InstAssign>(Arg, RegisterArg);
1232   }
1233 }
1234 
1235 // Helper function for addProlog().
1236 //
1237 // This assumes Arg is an argument passed on the stack. This sets the frame
1238 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1239 // I64 arg that has been split into Lo and Hi components, it calls itself
1240 // recursively on the components, taking care to handle Lo first because of the
1241 // little-endian architecture. Lastly, this function generates an instruction
1242 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1243 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
1244                                          size_t BasicFrameOffset,
1245                                          size_t *InArgsSizeBytes) {
1246   const Type Ty = Arg->getType();
1247   *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1248 
1249   if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1250     Variable *const Lo = Arg64On32->getLo();
1251     Variable *const Hi = Arg64On32->getHi();
1252     finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1253     finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1254     return;
1255   }
1256   assert(Ty != IceType_i64);
1257 
1258   const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1259   *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1260 
1261   if (!Arg->hasReg()) {
1262     Arg->setStackOffset(ArgStackOffset);
1263     return;
1264   }
1265 
1266   // If the argument variable has been assigned a register, we need to copy the
1267   // value from the stack slot.
1268   Variable *Parameter = Func->makeVariable(Ty);
1269   Parameter->setMustNotHaveReg();
1270   Parameter->setStackOffset(ArgStackOffset);
1271   _mov(Arg, Parameter);
1272 }
1273 
stackSlotType()1274 Type TargetARM32::stackSlotType() { return IceType_i32; }
1275 
addProlog(CfgNode * Node)1276 void TargetARM32::addProlog(CfgNode *Node) {
1277   // Stack frame layout:
1278   //
1279   // +------------------------+
1280   // | 1. preserved registers |
1281   // +------------------------+
1282   // | 2. padding             |
1283   // +------------------------+ <--- FramePointer (if used)
1284   // | 3. global spill area   |
1285   // +------------------------+
1286   // | 4. padding             |
1287   // +------------------------+
1288   // | 5. local spill area    |
1289   // +------------------------+
1290   // | 6. padding             |
1291   // +------------------------+
1292   // | 7. allocas (variable)  |
1293   // +------------------------+
1294   // | 8. padding             |
1295   // +------------------------+
1296   // | 9. out args            |
1297   // +------------------------+ <--- StackPointer
1298   //
1299   // The following variables record the size in bytes of the given areas:
1300   //  * PreservedRegsSizeBytes: area 1
1301   //  * SpillAreaPaddingBytes:  area 2
1302   //  * GlobalsSize:            area 3
1303   //  * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1304   //  * LocalsSpillAreaSize:    area 5
1305   //  * SpillAreaSizeBytes:     areas 2 - 6, and 9
1306   //  * MaxOutArgsSizeBytes:    area 9
1307   //
1308   // Determine stack frame offsets for each Variable without a register
1309   // assignment.  This can be done as one variable per stack slot.  Or, do
1310   // coalescing by running the register allocator again with an infinite set of
1311   // registers (as a side effect, this gives variables a second chance at
1312   // physical register assignment).
1313   //
1314   // A middle ground approach is to leverage sparsity and allocate one block of
1315   // space on the frame for globals (variables with multi-block lifetime), and
1316   // one block to share for locals (single-block lifetime).
1317 
1318   Context.init(Node);
1319   Context.setInsertPoint(Context.getCur());
1320 
1321   SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1322   RegsUsed = SmallBitVector(CalleeSaves.size());
1323   VarList SortedSpilledVariables;
1324   size_t GlobalsSize = 0;
1325   // If there is a separate locals area, this represents that area. Otherwise
1326   // it counts any variable not counted by GlobalsSize.
1327   SpillAreaSizeBytes = 0;
1328   // If there is a separate locals area, this specifies the alignment for it.
1329   uint32_t LocalsSlotsAlignmentBytes = 0;
1330   // The entire spill locations area gets aligned to largest natural alignment
1331   // of the variables that have a spill slot.
1332   uint32_t SpillAreaAlignmentBytes = 0;
1333   // For now, we don't have target-specific variables that need special
1334   // treatment (no stack-slot-linked SpillVariable type).
1335   std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1336     static constexpr bool AssignStackSlot = false;
1337     static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1338     if (llvm::isa<Variable64On32>(Var)) {
1339       return DontAssignStackSlot;
1340     }
1341     return AssignStackSlot;
1342   };
1343 
1344   // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1345   getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1346                         &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1347                         &LocalsSlotsAlignmentBytes, TargetVarHook);
1348   uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1349   SpillAreaSizeBytes += GlobalsSize;
1350 
1351   // Add push instructions for preserved registers. On ARM, "push" can push a
1352   // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
1353   // callee-saved float/vector registers.
1354   //
1355   // The "vpush" instruction can handle a whole list of float/vector registers,
1356   // but it only handles contiguous sequences of registers by specifying the
1357   // start and the length.
1358   PreservedGPRs.reserve(CalleeSaves.size());
1359   PreservedSRegs.reserve(CalleeSaves.size());
1360 
1361   // Consider FP and LR as callee-save / used as needed.
1362   if (UsesFramePointer) {
1363     if (RegsUsed[RegARM32::Reg_fp]) {
1364       llvm::report_fatal_error("Frame pointer has been used.");
1365     }
1366     CalleeSaves[RegARM32::Reg_fp] = true;
1367     RegsUsed[RegARM32::Reg_fp] = true;
1368   }
1369   if (!MaybeLeafFunc) {
1370     CalleeSaves[RegARM32::Reg_lr] = true;
1371     RegsUsed[RegARM32::Reg_lr] = true;
1372   }
1373 
1374   // Make two passes over the used registers. The first pass records all the
1375   // used registers -- and their aliases. Then, we figure out which GPRs and
1376   // VFP S registers should be saved. We don't bother saving D/Q registers
1377   // because their uses are recorded as S regs uses.
1378   SmallBitVector ToPreserve(RegARM32::Reg_NUM);
1379   for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1380     if (CalleeSaves[i] && RegsUsed[i]) {
1381       ToPreserve |= RegisterAliases[i];
1382     }
1383   }
1384 
1385   uint32_t NumCallee = 0;
1386   size_t PreservedRegsSizeBytes = 0;
1387 
1388   // RegClasses is a tuple of
1389   //
1390   // <First Register in Class, Last Register in Class, Vector of Save Registers>
1391   //
1392   // We use this tuple to figure out which register we should push/pop during
1393   // prolog/epilog.
1394   using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1395   const RegClassType RegClasses[] = {
1396       RegClassType(RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last,
1397                    &PreservedGPRs),
1398       RegClassType(RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last,
1399                    &PreservedSRegs)};
1400   for (const auto &RegClass : RegClasses) {
1401     const uint32_t FirstRegInClass = std::get<0>(RegClass);
1402     const uint32_t LastRegInClass = std::get<1>(RegClass);
1403     VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1404     for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) {
1405       if (!ToPreserve[Reg]) {
1406         continue;
1407       }
1408       ++NumCallee;
1409       Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1410       PreservedRegsSizeBytes +=
1411           typeWidthInBytesOnStack(PhysicalRegister->getType());
1412       PreservedRegsInClass->push_back(PhysicalRegister);
1413     }
1414   }
1415 
1416   Ctx->statsUpdateRegistersSaved(NumCallee);
1417   if (!PreservedSRegs.empty())
1418     _push(PreservedSRegs);
1419   if (!PreservedGPRs.empty())
1420     _push(PreservedGPRs);
1421 
1422   // Generate "mov FP, SP" if needed.
1423   if (UsesFramePointer) {
1424     Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1425     Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1426     _mov(FP, SP);
1427     // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1428     Context.insert<InstFakeUse>(FP);
1429   }
1430 
1431   // Align the variables area. SpillAreaPaddingBytes is the size of the region
1432   // after the preserved registers and before the spill areas.
1433   // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1434   // locals area if they are separate.
1435   assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
1436   assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1437   uint32_t SpillAreaPaddingBytes = 0;
1438   uint32_t LocalsSlotsPaddingBytes = 0;
1439   alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1440                        GlobalsSize, LocalsSlotsAlignmentBytes,
1441                        &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1442   SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1443   uint32_t GlobalsAndSubsequentPaddingSize =
1444       GlobalsSize + LocalsSlotsPaddingBytes;
1445 
1446   // Adds the out args space to the stack, and align SP if necessary.
1447   if (!NeedsStackAlignment) {
1448     SpillAreaSizeBytes += MaxOutArgsSizeBytes;
1449   } else {
1450     uint32_t StackOffset = PreservedRegsSizeBytes;
1451     uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1452     StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
1453     SpillAreaSizeBytes = StackSize - StackOffset;
1454   }
1455 
1456   // Combine fixed alloca with SpillAreaSize.
1457   SpillAreaSizeBytes += FixedAllocaSizeBytes;
1458 
1459   // Generate "sub sp, SpillAreaSizeBytes"
1460   if (SpillAreaSizeBytes) {
1461     Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1462     // Use the scratch register if needed to legalize the immediate.
1463     Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1464                                   Legal_Reg | Legal_Flex, getReservedTmpReg());
1465     _sub(SP, SP, SubAmount);
1466     if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) {
1467       alignRegisterPow2(SP, FixedAllocaAlignBytes);
1468     }
1469   }
1470 
1471   Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1472 
1473   // Fill in stack offsets for stack args, and copy args into registers for
1474   // those that were register-allocated. Args are pushed right to left, so
1475   // Arg[0] is closest to the stack/frame pointer.
1476   Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
1477   size_t BasicFrameOffset = PreservedRegsSizeBytes;
1478   if (!UsesFramePointer)
1479     BasicFrameOffset += SpillAreaSizeBytes;
1480 
1481   const VarList &Args = Func->getArgs();
1482   size_t InArgsSizeBytes = 0;
1483   TargetARM32::CallingConv CC;
1484   for (Variable *Arg : Args) {
1485     RegNumT DummyReg;
1486     const Type Ty = Arg->getType();
1487 
1488     // Skip arguments passed in registers.
1489     if (isScalarIntegerType(Ty)) {
1490       if (CC.argInGPR(Ty, &DummyReg)) {
1491         continue;
1492       }
1493     } else {
1494       if (CC.argInVFP(Ty, &DummyReg)) {
1495         continue;
1496       }
1497     }
1498     finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, &InArgsSizeBytes);
1499   }
1500 
1501   // Fill in stack offsets for locals.
1502   assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1503                       SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1504                       UsesFramePointer);
1505   this->HasComputedFrame = true;
1506 
1507   if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1508     OstreamLocker _(Func->getContext());
1509     Ostream &Str = Func->getContext()->getStrDump();
1510 
1511     Str << "Stack layout:\n";
1512     uint32_t SPAdjustmentPaddingSize =
1513         SpillAreaSizeBytes - LocalsSpillAreaSize -
1514         GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1515         MaxOutArgsSizeBytes;
1516     Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1517         << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1518         << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1519         << " globals spill area = " << GlobalsSize << " bytes\n"
1520         << " globals-locals spill areas intermediate padding = "
1521         << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1522         << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1523         << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1524 
1525     Str << "Stack details:\n"
1526         << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1527         << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1528         << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1529         << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1530         << " bytes\n"
1531         << " is FP based = " << UsesFramePointer << "\n";
1532   }
1533 }
1534 
addEpilog(CfgNode * Node)1535 void TargetARM32::addEpilog(CfgNode *Node) {
1536   InstList &Insts = Node->getInsts();
1537   InstList::reverse_iterator RI, E;
1538   for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1539     if (llvm::isa<InstARM32Ret>(*RI))
1540       break;
1541   }
1542   if (RI == E)
1543     return;
1544 
1545   // Convert the reverse_iterator position into its corresponding (forward)
1546   // iterator position.
1547   InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1548   --InsertPoint;
1549   Context.init(Node);
1550   Context.setInsertPoint(InsertPoint);
1551 
1552   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1553   if (UsesFramePointer) {
1554     Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1555     // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1556     // use of SP before the assignment of SP=FP keeps previous SP adjustments
1557     // from being dead-code eliminated.
1558     Context.insert<InstFakeUse>(SP);
1559     _mov_redefined(SP, FP);
1560   } else {
1561     // add SP, SpillAreaSizeBytes
1562     if (SpillAreaSizeBytes) {
1563       // Use the scratch register if needed to legalize the immediate.
1564       Operand *AddAmount =
1565           legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1566                    Legal_Reg | Legal_Flex, getReservedTmpReg());
1567       _add(SP, SP, AddAmount);
1568     }
1569   }
1570 
1571   if (!PreservedGPRs.empty())
1572     _pop(PreservedGPRs);
1573   if (!PreservedSRegs.empty())
1574     _pop(PreservedSRegs);
1575 }
1576 
isLegalMemOffset(Type Ty,int32_t Offset) const1577 bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const {
1578   constexpr bool ZeroExt = false;
1579   return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset);
1580 }
1581 
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1582 Variable *TargetARM32::PostLoweringLegalizer::newBaseRegister(
1583     Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1584   // Legalize will likely need a movw/movt combination, but if the top bits are
1585   // all 0 from negating the offset and subtracting, we could use that instead.
1586   const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1587   Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1588   if (ShouldSub) {
1589     Operand *OffsetVal =
1590         Target->legalize(Target->Ctx->getConstantInt32(-Offset),
1591                          Legal_Reg | Legal_Flex, ScratchRegNum);
1592     Target->_sub(ScratchReg, Base, OffsetVal);
1593   } else {
1594     Operand *OffsetVal =
1595         Target->legalize(Target->Ctx->getConstantInt32(Offset),
1596                          Legal_Reg | Legal_Flex, ScratchRegNum);
1597     Target->_add(ScratchReg, Base, OffsetVal);
1598   }
1599 
1600   if (ScratchRegNum == Target->getReservedTmpReg()) {
1601     const bool BaseIsStackOrFramePtr =
1602         Base->getRegNum() == Target->getFrameOrStackReg();
1603     // There is currently no code path that would trigger this assertion, so we
1604     // leave this assertion here in case it is ever violated. This is not a
1605     // fatal error (thus the use of assert() and not llvm::report_fatal_error)
1606     // as the program compiled by subzero will still work correctly.
1607     assert(BaseIsStackOrFramePtr);
1608     // Side-effect: updates TempBase to reflect the new Temporary.
1609     if (BaseIsStackOrFramePtr) {
1610       TempBaseReg = ScratchReg;
1611       TempBaseOffset = Offset;
1612     } else {
1613       TempBaseReg = nullptr;
1614       TempBaseOffset = 0;
1615     }
1616   }
1617 
1618   return ScratchReg;
1619 }
1620 
createMemOperand(Type Ty,Variable * Base,int32_t Offset,bool AllowOffsets)1621 OperandARM32Mem *TargetARM32::PostLoweringLegalizer::createMemOperand(
1622     Type Ty, Variable *Base, int32_t Offset, bool AllowOffsets) {
1623   assert(!Base->isRematerializable());
1624   if (Offset == 0 || (AllowOffsets && Target->isLegalMemOffset(Ty, Offset))) {
1625     return OperandARM32Mem::create(
1626         Target->Func, Ty, Base,
1627         llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)),
1628         OperandARM32Mem::Offset);
1629   }
1630 
1631   if (!AllowOffsets || TempBaseReg == nullptr) {
1632     newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1633   }
1634 
1635   int32_t OffsetDiff = Offset - TempBaseOffset;
1636   assert(AllowOffsets || OffsetDiff == 0);
1637 
1638   if (!Target->isLegalMemOffset(Ty, OffsetDiff)) {
1639     newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1640     OffsetDiff = 0;
1641   }
1642 
1643   assert(!TempBaseReg->isRematerializable());
1644   return OperandARM32Mem::create(
1645       Target->Func, Ty, TempBaseReg,
1646       llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(OffsetDiff)),
1647       OperandARM32Mem::Offset);
1648 }
1649 
resetTempBaseIfClobberedBy(const Inst * Instr)1650 void TargetARM32::PostLoweringLegalizer::resetTempBaseIfClobberedBy(
1651     const Inst *Instr) {
1652   bool ClobbersTempBase = false;
1653   if (TempBaseReg != nullptr) {
1654     Variable *Dest = Instr->getDest();
1655     if (llvm::isa<InstARM32Call>(Instr)) {
1656       // The following assertion is an invariant, so we remove it from the if
1657       // test. If the invariant is ever broken/invalidated/changed, remember
1658       // to add it back to the if condition.
1659       assert(TempBaseReg->getRegNum() == Target->getReservedTmpReg());
1660       // The linker may need to clobber IP if the call is too far from PC. Thus,
1661       // we assume IP will be overwritten.
1662       ClobbersTempBase = true;
1663     } else if (Dest != nullptr &&
1664                Dest->getRegNum() == TempBaseReg->getRegNum()) {
1665       // Register redefinition.
1666       ClobbersTempBase = true;
1667     }
1668   }
1669 
1670   if (ClobbersTempBase) {
1671     TempBaseReg = nullptr;
1672     TempBaseOffset = 0;
1673   }
1674 }
1675 
legalizeMov(InstARM32Mov * MovInstr)1676 void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) {
1677   Variable *Dest = MovInstr->getDest();
1678   assert(Dest != nullptr);
1679   Type DestTy = Dest->getType();
1680   assert(DestTy != IceType_i64);
1681 
1682   Operand *Src = MovInstr->getSrc(0);
1683   Type SrcTy = Src->getType();
1684   (void)SrcTy;
1685   assert(SrcTy != IceType_i64);
1686 
1687   if (MovInstr->isMultiDest() || MovInstr->isMultiSource())
1688     return;
1689 
1690   bool Legalized = false;
1691   if (!Dest->hasReg()) {
1692     auto *SrcR = llvm::cast<Variable>(Src);
1693     assert(SrcR->hasReg());
1694     assert(!SrcR->isRematerializable());
1695     const int32_t Offset = Dest->getStackOffset();
1696     // This is a _mov(Mem(), Variable), i.e., a store.
1697     Target->_str(SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset),
1698                  MovInstr->getPredicate());
1699     // _str() does not have a Dest, so we add a fake-def(Dest).
1700     Target->Context.insert<InstFakeDef>(Dest);
1701     Legalized = true;
1702   } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
1703     if (Var->isRematerializable()) {
1704       // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
1705 
1706       // ExtraOffset is only needed for frame-pointer based frames as we have
1707       // to account for spill storage.
1708       const int32_t ExtraOffset = (Var->getRegNum() == Target->getFrameReg())
1709                                       ? Target->getFrameFixedAllocaOffset()
1710                                       : 0;
1711 
1712       const int32_t Offset = Var->getStackOffset() + ExtraOffset;
1713       Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
1714       Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
1715       Target->_mov(Dest, T);
1716       Legalized = true;
1717     } else {
1718       if (!Var->hasReg()) {
1719         // This is a _mov(Variable, Mem()), i.e., a load.
1720         const int32_t Offset = Var->getStackOffset();
1721         Target->_ldr(Dest, createMemOperand(DestTy, StackOrFrameReg, Offset),
1722                      MovInstr->getPredicate());
1723         Legalized = true;
1724       }
1725     }
1726   }
1727 
1728   if (Legalized) {
1729     if (MovInstr->isDestRedefined()) {
1730       Target->_set_dest_redefined();
1731     }
1732     MovInstr->setDeleted();
1733   }
1734 }
1735 
1736 // ARM32 address modes:
1737 //  ld/st i[8|16|32]: [reg], [reg +/- imm12], [pc +/- imm12],
1738 //                    [reg +/- reg << shamt5]
1739 //  ld/st f[32|64]  : [reg], [reg +/- imm8] , [pc +/- imm8]
1740 //  ld/st vectors   : [reg]
1741 //
1742 // For now, we don't handle address modes with Relocatables.
1743 namespace {
1744 // MemTraits contains per-type valid address mode information.
1745 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits,  \
1746           ubits, rraddr, shaddr)                                               \
1747   static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag);
1748 ICETYPEARM32_TABLE
1749 #undef X
1750 
1751 static const struct {
1752   int32_t ValidImmMask;
1753   bool CanHaveImm;
1754   bool CanHaveIndex;
1755   bool CanHaveShiftedIndex;
1756 } MemTraits[] = {
1757 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits,  \
1758           ubits, rraddr, shaddr)                                               \
1759   {                                                                            \
1760       (1 << ubits) - 1,                                                        \
1761       (ubits) > 0,                                                             \
1762       rraddr,                                                                  \
1763       shaddr,                                                                  \
1764   },
1765     ICETYPEARM32_TABLE
1766 #undef X
1767 };
1768 static constexpr SizeT MemTraitsSize = llvm::array_lengthof(MemTraits);
1769 } // end of anonymous namespace
1770 
1771 OperandARM32Mem *
legalizeMemOperand(OperandARM32Mem * Mem,bool AllowOffsets)1772 TargetARM32::PostLoweringLegalizer::legalizeMemOperand(OperandARM32Mem *Mem,
1773                                                        bool AllowOffsets) {
1774   assert(!Mem->isRegReg() || !Mem->getIndex()->isRematerializable());
1775   assert(Mem->isRegReg() || Target->isLegalMemOffset(
1776                                 Mem->getType(), Mem->getOffset()->getValue()));
1777 
1778   bool Legalized = false;
1779   Variable *Base = Mem->getBase();
1780   int32_t Offset = Mem->isRegReg() ? 0 : Mem->getOffset()->getValue();
1781   if (Base->isRematerializable()) {
1782     const int32_t ExtraOffset = (Base->getRegNum() == Target->getFrameReg())
1783                                     ? Target->getFrameFixedAllocaOffset()
1784                                     : 0;
1785     Offset += Base->getStackOffset() + ExtraOffset;
1786     Base = Target->getPhysicalRegister(Base->getRegNum());
1787     assert(!Base->isRematerializable());
1788     Legalized = true;
1789   }
1790 
1791   if (!Legalized) {
1792     return nullptr;
1793   }
1794 
1795   if (!Mem->isRegReg()) {
1796     return createMemOperand(Mem->getType(), Base, Offset, AllowOffsets);
1797   }
1798 
1799   assert(MemTraits[Mem->getType()].CanHaveIndex);
1800 
1801   if (Offset != 0) {
1802     if (TempBaseReg == nullptr) {
1803       Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1804     } else {
1805       uint32_t Imm8, Rotate;
1806       const int32_t OffsetDiff = Offset - TempBaseOffset;
1807       if (OffsetDiff == 0) {
1808         Base = TempBaseReg;
1809       } else if (OperandARM32FlexImm::canHoldImm(OffsetDiff, &Rotate, &Imm8)) {
1810         auto *OffsetDiffF = OperandARM32FlexImm::create(
1811             Target->Func, IceType_i32, Imm8, Rotate);
1812         Target->_add(TempBaseReg, TempBaseReg, OffsetDiffF);
1813         TempBaseOffset += OffsetDiff;
1814         Base = TempBaseReg;
1815       } else if (OperandARM32FlexImm::canHoldImm(-OffsetDiff, &Rotate, &Imm8)) {
1816         auto *OffsetDiffF = OperandARM32FlexImm::create(
1817             Target->Func, IceType_i32, Imm8, Rotate);
1818         Target->_sub(TempBaseReg, TempBaseReg, OffsetDiffF);
1819         TempBaseOffset += OffsetDiff;
1820         Base = TempBaseReg;
1821       } else {
1822         Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1823       }
1824     }
1825   }
1826 
1827   return OperandARM32Mem::create(Target->Func, Mem->getType(), Base,
1828                                  Mem->getIndex(), Mem->getShiftOp(),
1829                                  Mem->getShiftAmt(), Mem->getAddrMode());
1830 }
1831 
postLowerLegalization()1832 void TargetARM32::postLowerLegalization() {
1833   // If a stack variable's frame offset doesn't fit, convert from:
1834   //   ldr X, OFF[SP]
1835   // to:
1836   //   movw/movt TMP, OFF_PART
1837   //   add TMP, TMP, SP
1838   //   ldr X, OFF_MORE[TMP]
1839   //
1840   // This is safe because we have reserved TMP, and add for ARM does not
1841   // clobber the flags register.
1842   Func->dump("Before postLowerLegalization");
1843   assert(hasComputedFrame());
1844   // Do a fairly naive greedy clustering for now. Pick the first stack slot
1845   // that's out of bounds and make a new base reg using the architecture's temp
1846   // register. If that works for the next slot, then great. Otherwise, create a
1847   // new base register, clobbering the previous base register. Never share a
1848   // base reg across different basic blocks. This isn't ideal if local and
1849   // multi-block variables are far apart and their references are interspersed.
1850   // It may help to be more coordinated about assign stack slot numbers and may
1851   // help to assign smaller offsets to higher-weight variables so that they
1852   // don't depend on this legalization.
1853   for (CfgNode *Node : Func->getNodes()) {
1854     Context.init(Node);
1855     // One legalizer per basic block, otherwise we would share the Temporary
1856     // Base Register between basic blocks.
1857     PostLoweringLegalizer Legalizer(this);
1858     while (!Context.atEnd()) {
1859       PostIncrLoweringContext PostIncrement(Context);
1860       Inst *CurInstr = iteratorToInst(Context.getCur());
1861 
1862       // Check if the previous TempBaseReg is clobbered, and reset if needed.
1863       Legalizer.resetTempBaseIfClobberedBy(CurInstr);
1864 
1865       if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {
1866         Legalizer.legalizeMov(MovInstr);
1867       } else if (auto *LdrInstr = llvm::dyn_cast<InstARM32Ldr>(CurInstr)) {
1868         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
1869                 llvm::cast<OperandARM32Mem>(LdrInstr->getSrc(0)))) {
1870           _ldr(CurInstr->getDest(), LegalMem, LdrInstr->getPredicate());
1871           CurInstr->setDeleted();
1872         }
1873       } else if (auto *LdrexInstr = llvm::dyn_cast<InstARM32Ldrex>(CurInstr)) {
1874         constexpr bool DisallowOffsetsBecauseLdrex = false;
1875         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
1876                 llvm::cast<OperandARM32Mem>(LdrexInstr->getSrc(0)),
1877                 DisallowOffsetsBecauseLdrex)) {
1878           _ldrex(CurInstr->getDest(), LegalMem, LdrexInstr->getPredicate());
1879           CurInstr->setDeleted();
1880         }
1881       } else if (auto *StrInstr = llvm::dyn_cast<InstARM32Str>(CurInstr)) {
1882         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
1883                 llvm::cast<OperandARM32Mem>(StrInstr->getSrc(1)))) {
1884           _str(llvm::cast<Variable>(CurInstr->getSrc(0)), LegalMem,
1885                StrInstr->getPredicate());
1886           CurInstr->setDeleted();
1887         }
1888       } else if (auto *StrexInstr = llvm::dyn_cast<InstARM32Strex>(CurInstr)) {
1889         constexpr bool DisallowOffsetsBecauseStrex = false;
1890         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
1891                 llvm::cast<OperandARM32Mem>(StrexInstr->getSrc(1)),
1892                 DisallowOffsetsBecauseStrex)) {
1893           _strex(CurInstr->getDest(), llvm::cast<Variable>(CurInstr->getSrc(0)),
1894                  LegalMem, StrexInstr->getPredicate());
1895           CurInstr->setDeleted();
1896         }
1897       }
1898 
1899       // Sanity-check: the Legalizer will either have no Temp, or it will be
1900       // bound to IP.
1901       Legalizer.assertNoTempOrAssignedToIP();
1902     }
1903   }
1904 }
1905 
loOperand(Operand * Operand)1906 Operand *TargetARM32::loOperand(Operand *Operand) {
1907   assert(Operand->getType() == IceType_i64);
1908   if (Operand->getType() != IceType_i64)
1909     return Operand;
1910   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
1911     return Var64On32->getLo();
1912   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand))
1913     return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
1914   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
1915     // Conservatively disallow memory operands with side-effects (pre/post
1916     // increment) in case of duplication.
1917     assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
1918            Mem->getAddrMode() == OperandARM32Mem::NegOffset);
1919     if (Mem->isRegReg()) {
1920       Variable *IndexR = legalizeToReg(Mem->getIndex());
1921       return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), IndexR,
1922                                      Mem->getShiftOp(), Mem->getShiftAmt(),
1923                                      Mem->getAddrMode());
1924     } else {
1925       return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
1926                                      Mem->getOffset(), Mem->getAddrMode());
1927     }
1928   }
1929   llvm::report_fatal_error("Unsupported operand type");
1930   return nullptr;
1931 }
1932 
hiOperand(Operand * Operand)1933 Operand *TargetARM32::hiOperand(Operand *Operand) {
1934   assert(Operand->getType() == IceType_i64);
1935   if (Operand->getType() != IceType_i64)
1936     return Operand;
1937   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
1938     return Var64On32->getHi();
1939   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1940     return Ctx->getConstantInt32(
1941         static_cast<uint32_t>(Const->getValue() >> 32));
1942   }
1943   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
1944     // Conservatively disallow memory operands with side-effects in case of
1945     // duplication.
1946     assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
1947            Mem->getAddrMode() == OperandARM32Mem::NegOffset);
1948     const Type SplitType = IceType_i32;
1949     if (Mem->isRegReg()) {
1950       // We have to make a temp variable T, and add 4 to either Base or Index.
1951       // The Index may be shifted, so adding 4 can mean something else. Thus,
1952       // prefer T := Base + 4, and use T as the new Base.
1953       Variable *Base = Mem->getBase();
1954       Constant *Four = Ctx->getConstantInt32(4);
1955       Variable *NewBase = Func->makeVariable(Base->getType());
1956       lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
1957                                              Base, Four));
1958       Variable *BaseR = legalizeToReg(NewBase);
1959       Variable *IndexR = legalizeToReg(Mem->getIndex());
1960       return OperandARM32Mem::create(Func, SplitType, BaseR, IndexR,
1961                                      Mem->getShiftOp(), Mem->getShiftAmt(),
1962                                      Mem->getAddrMode());
1963     } else {
1964       Variable *Base = Mem->getBase();
1965       ConstantInteger32 *Offset = Mem->getOffset();
1966       assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
1967       int32_t NextOffsetVal = Offset->getValue() + 4;
1968       constexpr bool ZeroExt = false;
1969       if (!OperandARM32Mem::canHoldOffset(SplitType, ZeroExt, NextOffsetVal)) {
1970         // We have to make a temp variable and add 4 to either Base or Offset.
1971         // If we add 4 to Offset, this will convert a non-RegReg addressing
1972         // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
1973         // RegReg addressing modes, prefer adding to base and replacing
1974         // instead. Thus we leave the old offset alone.
1975         Constant *_4 = Ctx->getConstantInt32(4);
1976         Variable *NewBase = Func->makeVariable(Base->getType());
1977         lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
1978                                                NewBase, Base, _4));
1979         Base = NewBase;
1980       } else {
1981         Offset =
1982             llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
1983       }
1984       Variable *BaseR = legalizeToReg(Base);
1985       return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,
1986                                      Mem->getAddrMode());
1987     }
1988   }
1989   llvm::report_fatal_error("Unsupported operand type");
1990   return nullptr;
1991 }
1992 
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const1993 SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
1994                                            RegSetMask Exclude) const {
1995   SmallBitVector Registers(RegARM32::Reg_NUM);
1996 
1997   for (uint32_t i = 0; i < RegARM32::Reg_NUM; ++i) {
1998     const auto &Entry = RegARM32::RegTable[i];
1999     if (Entry.Scratch && (Include & RegSet_CallerSave))
2000       Registers[i] = true;
2001     if (Entry.Preserved && (Include & RegSet_CalleeSave))
2002       Registers[i] = true;
2003     if (Entry.StackPtr && (Include & RegSet_StackPointer))
2004       Registers[i] = true;
2005     if (Entry.FramePtr && (Include & RegSet_FramePointer))
2006       Registers[i] = true;
2007     if (Entry.Scratch && (Exclude & RegSet_CallerSave))
2008       Registers[i] = false;
2009     if (Entry.Preserved && (Exclude & RegSet_CalleeSave))
2010       Registers[i] = false;
2011     if (Entry.StackPtr && (Exclude & RegSet_StackPointer))
2012       Registers[i] = false;
2013     if (Entry.FramePtr && (Exclude & RegSet_FramePointer))
2014       Registers[i] = false;
2015   }
2016 
2017   return Registers;
2018 }
2019 
lowerAlloca(const InstAlloca * Instr)2020 void TargetARM32::lowerAlloca(const InstAlloca *Instr) {
2021   // Conservatively require the stack to be aligned. Some stack adjustment
2022   // operations implemented below assume that the stack is aligned before the
2023   // alloca. All the alloca code ensures that the stack alignment is preserved
2024   // after the alloca. The stack alignment restriction can be relaxed in some
2025   // cases.
2026   NeedsStackAlignment = true;
2027 
2028   // For default align=0, set it to the real value 1, to avoid any
2029   // bit-manipulation problems below.
2030   const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2031 
2032   // LLVM enforces power of 2 alignment.
2033   assert(llvm::isPowerOf2_32(AlignmentParam));
2034   assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
2035 
2036   const uint32_t Alignment =
2037       std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
2038   const bool OverAligned = Alignment > ARM32_STACK_ALIGNMENT_BYTES;
2039   const bool OptM1 = Func->getOptLevel() == Opt_m1;
2040   const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2041   const bool UseFramePointer =
2042       hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2043 
2044   if (UseFramePointer)
2045     setHasFramePointer();
2046 
2047   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2048   if (OverAligned) {
2049     alignRegisterPow2(SP, Alignment);
2050   }
2051 
2052   Variable *Dest = Instr->getDest();
2053   Operand *TotalSize = Instr->getSizeInBytes();
2054 
2055   if (const auto *ConstantTotalSize =
2056           llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2057     const uint32_t Value =
2058         Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2059     // Constant size alloca.
2060     if (!UseFramePointer) {
2061       // If we don't need a Frame Pointer, this alloca has a known offset to the
2062       // stack pointer. We don't need adjust the stack pointer, nor assign any
2063       // value to Dest, as Dest is rematerializable.
2064       assert(Dest->isRematerializable());
2065       FixedAllocaSizeBytes += Value;
2066       Context.insert<InstFakeDef>(Dest);
2067       return;
2068     }
2069 
2070     // If a frame pointer is required, then we need to store the alloca'd result
2071     // in Dest.
2072     Operand *SubAmountRF =
2073         legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
2074     _sub(SP, SP, SubAmountRF);
2075   } else {
2076     // Non-constant sizes need to be adjusted to the next highest multiple of
2077     // the required alignment at runtime.
2078     TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
2079     Variable *T = makeReg(IceType_i32);
2080     _mov(T, TotalSize);
2081     Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
2082     _add(T, T, AddAmount);
2083     alignRegisterPow2(T, Alignment);
2084     _sub(SP, SP, T);
2085   }
2086 
2087   // Adds back a few bytes to SP to account for the out args area.
2088   Variable *T = SP;
2089   if (MaxOutArgsSizeBytes != 0) {
2090     T = makeReg(getPointerType());
2091     Operand *OutArgsSizeRF = legalize(
2092         Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
2093     _add(T, SP, OutArgsSizeRF);
2094   }
2095 
2096   _mov(Dest, T);
2097 }
2098 
div0Check(Type Ty,Operand * SrcLo,Operand * SrcHi)2099 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
2100   if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
2101     return;
2102   Variable *SrcLoReg = legalizeToReg(SrcLo);
2103   switch (Ty) {
2104   default:
2105     llvm_unreachable(
2106         ("Unexpected type in div0Check: " + typeStdString(Ty)).c_str());
2107   case IceType_i8:
2108   case IceType_i16: {
2109     Operand *ShAmtImm = shAmtImm(32 - getScalarIntBitWidth(Ty));
2110     Variable *T = makeReg(IceType_i32);
2111     _lsls(T, SrcLoReg, ShAmtImm);
2112     Context.insert<InstFakeUse>(T);
2113   } break;
2114   case IceType_i32: {
2115     _tst(SrcLoReg, SrcLoReg);
2116     break;
2117   }
2118   case IceType_i64: {
2119     Variable *T = makeReg(IceType_i32);
2120     _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex));
2121     // T isn't going to be used, but we need the side-effect of setting flags
2122     // from this operation.
2123     Context.insert<InstFakeUse>(T);
2124   }
2125   }
2126   auto *Label = InstARM32Label::create(Func, this);
2127   _br(Label, CondARM32::NE);
2128   _trap();
2129   Context.insert(Label);
2130 }
2131 
lowerIDivRem(Variable * Dest,Variable * T,Variable * Src0R,Operand * Src1,ExtInstr ExtFunc,DivInstr DivFunc,bool IsRemainder)2132 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
2133                                Operand *Src1, ExtInstr ExtFunc,
2134                                DivInstr DivFunc, bool IsRemainder) {
2135   div0Check(Dest->getType(), Src1, nullptr);
2136   Variable *Src1R = legalizeToReg(Src1);
2137   Variable *T0R = Src0R;
2138   Variable *T1R = Src1R;
2139   if (Dest->getType() != IceType_i32) {
2140     T0R = makeReg(IceType_i32);
2141     (this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
2142     T1R = makeReg(IceType_i32);
2143     (this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
2144   }
2145   if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
2146     (this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
2147     if (IsRemainder) {
2148       Variable *T2 = makeReg(IceType_i32);
2149       _mls(T2, T, T1R, T0R);
2150       T = T2;
2151     }
2152     _mov(Dest, T);
2153   } else {
2154     llvm::report_fatal_error("div should have already been turned into a call");
2155   }
2156 }
2157 
2158 TargetARM32::SafeBoolChain
lowerInt1Arithmetic(const InstArithmetic * Instr)2159 TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Instr) {
2160   Variable *Dest = Instr->getDest();
2161   assert(Dest->getType() == IceType_i1);
2162 
2163   // So folding didn't work for Instr. Not a problem: We just need to
2164   // materialize the Sources, and perform the operation. We create regular
2165   // Variables (and not infinite-weight ones) because this call might recurse a
2166   // lot, and we might end up with tons of infinite weight temporaries.
2167   assert(Instr->getSrcSize() == 2);
2168   Variable *Src0 = Func->makeVariable(IceType_i1);
2169   SafeBoolChain Src0Safe = lowerInt1(Src0, Instr->getSrc(0));
2170 
2171   Operand *Src1 = Instr->getSrc(1);
2172   SafeBoolChain Src1Safe = SBC_Yes;
2173 
2174   if (!llvm::isa<Constant>(Src1)) {
2175     Variable *Src1V = Func->makeVariable(IceType_i1);
2176     Src1Safe = lowerInt1(Src1V, Src1);
2177     Src1 = Src1V;
2178   }
2179 
2180   Variable *T = makeReg(IceType_i1);
2181   Src0 = legalizeToReg(Src0);
2182   Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
2183   switch (Instr->getOp()) {
2184   default:
2185     // If this Unreachable is ever executed, add the offending operation to
2186     // the list of valid consumers.
2187     llvm::report_fatal_error("Unhandled i1 Op");
2188   case InstArithmetic::And:
2189     _and(T, Src0, Src1RF);
2190     break;
2191   case InstArithmetic::Or:
2192     _orr(T, Src0, Src1RF);
2193     break;
2194   case InstArithmetic::Xor:
2195     _eor(T, Src0, Src1RF);
2196     break;
2197   }
2198   _mov(Dest, T);
2199   return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;
2200 }
2201 
2202 namespace {
2203 // NumericOperands is used during arithmetic/icmp lowering for constant folding.
2204 // It holds the two sources operands, and maintains some state as to whether one
2205 // of them is a constant. If one of the operands is a constant, then it will be
2206 // be stored as the operation's second source, with a bit indicating whether the
2207 // operands were swapped.
2208 //
2209 // The class is split into a base class with operand type-independent methods,
2210 // and a derived, templated class, for each type of operand we want to fold
2211 // constants for:
2212 //
2213 // NumericOperandsBase --> NumericOperands<ConstantFloat>
2214 //                     --> NumericOperands<ConstantDouble>
2215 //                     --> NumericOperands<ConstantInt32>
2216 //
2217 // NumericOperands<ConstantInt32> also exposes helper methods for emitting
2218 // inverted/negated immediates.
2219 class NumericOperandsBase {
2220   NumericOperandsBase() = delete;
2221   NumericOperandsBase(const NumericOperandsBase &) = delete;
2222   NumericOperandsBase &operator=(const NumericOperandsBase &) = delete;
2223 
2224 public:
NumericOperandsBase(Operand * S0,Operand * S1)2225   NumericOperandsBase(Operand *S0, Operand *S1)
2226       : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)),
2227         Swapped(Src0 == S1 && S0 != S1) {
2228     assert(Src0 != nullptr);
2229     assert(Src1 != nullptr);
2230     assert(Src0 != Src1 || S0 == S1);
2231   }
2232 
hasConstOperand() const2233   bool hasConstOperand() const {
2234     return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1);
2235   }
2236 
swappedOperands() const2237   bool swappedOperands() const { return Swapped; }
2238 
src0R(TargetARM32 * Target) const2239   Variable *src0R(TargetARM32 *Target) const {
2240     return legalizeToReg(Target, Src0);
2241   }
2242 
unswappedSrc0R(TargetARM32 * Target) const2243   Variable *unswappedSrc0R(TargetARM32 *Target) const {
2244     return legalizeToReg(Target, Swapped ? Src1 : Src0);
2245   }
2246 
src1RF(TargetARM32 * Target) const2247   Operand *src1RF(TargetARM32 *Target) const {
2248     return legalizeToRegOrFlex(Target, Src1);
2249   }
2250 
unswappedSrc1R(TargetARM32 * Target) const2251   Variable *unswappedSrc1R(TargetARM32 *Target) const {
2252     return legalizeToReg(Target, Swapped ? Src0 : Src1);
2253   }
2254 
src1() const2255   Operand *src1() const { return Src1; }
2256 
2257 protected:
2258   Operand *const Src0;
2259   Operand *const Src1;
2260   const bool Swapped;
2261 
legalizeToReg(TargetARM32 * Target,Operand * Src)2262   static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) {
2263     return Target->legalizeToReg(Src);
2264   }
2265 
legalizeToRegOrFlex(TargetARM32 * Target,Operand * Src)2266   static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) {
2267     return Target->legalize(Src,
2268                             TargetARM32::Legal_Reg | TargetARM32::Legal_Flex);
2269   }
2270 
2271 private:
NonConstOperand(Operand * S0,Operand * S1)2272   static Operand *NonConstOperand(Operand *S0, Operand *S1) {
2273     if (!llvm::isa<Constant>(S0))
2274       return S0;
2275     if (!llvm::isa<Constant>(S1))
2276       return S1;
2277     if (llvm::isa<ConstantRelocatable>(S1) &&
2278         !llvm::isa<ConstantRelocatable>(S0))
2279       return S1;
2280     return S0;
2281   }
2282 
ConstOperand(Operand * S0,Operand * S1)2283   static Operand *ConstOperand(Operand *S0, Operand *S1) {
2284     if (!llvm::isa<Constant>(S0))
2285       return S1;
2286     if (!llvm::isa<Constant>(S1))
2287       return S0;
2288     if (llvm::isa<ConstantRelocatable>(S1) &&
2289         !llvm::isa<ConstantRelocatable>(S0))
2290       return S0;
2291     return S1;
2292   }
2293 };
2294 
2295 template <typename C> class NumericOperands : public NumericOperandsBase {
2296   NumericOperands() = delete;
2297   NumericOperands(const NumericOperands &) = delete;
2298   NumericOperands &operator=(const NumericOperands &) = delete;
2299 
2300 public:
NumericOperands(Operand * S0,Operand * S1)2301   NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) {
2302     assert(!hasConstOperand() || llvm::isa<C>(this->Src1));
2303   }
2304 
getConstantValue() const2305   typename C::PrimType getConstantValue() const {
2306     return llvm::cast<C>(Src1)->getValue();
2307   }
2308 };
2309 
2310 using FloatOperands = NumericOperands<ConstantFloat>;
2311 using DoubleOperands = NumericOperands<ConstantDouble>;
2312 
2313 class Int32Operands : public NumericOperands<ConstantInteger32> {
2314   Int32Operands() = delete;
2315   Int32Operands(const Int32Operands &) = delete;
2316   Int32Operands &operator=(const Int32Operands &) = delete;
2317 
2318 public:
Int32Operands(Operand * S0,Operand * S1)2319   Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {}
2320 
unswappedSrc1RShAmtImm(TargetARM32 * Target) const2321   Operand *unswappedSrc1RShAmtImm(TargetARM32 *Target) const {
2322     if (!swappedOperands() && hasConstOperand()) {
2323       return Target->shAmtImm(getConstantValue() & 0x1F);
2324     }
2325     return legalizeToReg(Target, Swapped ? Src0 : Src1);
2326   }
2327 
isSrc1ImmediateZero() const2328   bool isSrc1ImmediateZero() const {
2329     if (!swappedOperands() && hasConstOperand()) {
2330       return getConstantValue() == 0;
2331     }
2332     return false;
2333   }
2334 
immediateIsFlexEncodable() const2335   bool immediateIsFlexEncodable() const {
2336     uint32_t Rotate, Imm8;
2337     return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8);
2338   }
2339 
negatedImmediateIsFlexEncodable() const2340   bool negatedImmediateIsFlexEncodable() const {
2341     uint32_t Rotate, Imm8;
2342     return OperandARM32FlexImm::canHoldImm(
2343         -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8);
2344   }
2345 
negatedSrc1F(TargetARM32 * Target) const2346   Operand *negatedSrc1F(TargetARM32 *Target) const {
2347     return legalizeToRegOrFlex(Target,
2348                                Target->getCtx()->getConstantInt32(
2349                                    -static_cast<int32_t>(getConstantValue())));
2350   }
2351 
invertedImmediateIsFlexEncodable() const2352   bool invertedImmediateIsFlexEncodable() const {
2353     uint32_t Rotate, Imm8;
2354     return OperandARM32FlexImm::canHoldImm(
2355         ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8);
2356   }
2357 
invertedSrc1F(TargetARM32 * Target) const2358   Operand *invertedSrc1F(TargetARM32 *Target) const {
2359     return legalizeToRegOrFlex(Target,
2360                                Target->getCtx()->getConstantInt32(
2361                                    ~static_cast<uint32_t>(getConstantValue())));
2362   }
2363 };
2364 } // end of anonymous namespace
2365 
preambleDivRem(const InstCall * Instr)2366 void TargetARM32::preambleDivRem(const InstCall *Instr) {
2367   Operand *Src1 = Instr->getArg(1);
2368 
2369   switch (Src1->getType()) {
2370   default:
2371     llvm::report_fatal_error("Invalid type for idiv.");
2372   case IceType_i64: {
2373     if (auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2374       if (C->getValue() == 0) {
2375         _trap();
2376         return;
2377       }
2378     }
2379     div0Check(IceType_i64, loOperand(Src1), hiOperand(Src1));
2380     return;
2381   }
2382   case IceType_i32: {
2383     // Src0 and Src1 have already been appropriately extended to an i32, so we
2384     // don't check for i8 and i16.
2385     if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2386       if (C->getValue() == 0) {
2387         _trap();
2388         return;
2389       }
2390     }
2391     div0Check(IceType_i32, Src1, nullptr);
2392     return;
2393   }
2394   }
2395 }
2396 
lowerInt64Arithmetic(InstArithmetic::OpKind Op,Variable * Dest,Operand * Src0,Operand * Src1)2397 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
2398                                        Variable *Dest, Operand *Src0,
2399                                        Operand *Src1) {
2400   Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
2401   Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
2402   assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
2403   assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
2404 
2405   auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2406   auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2407   Variable *T_Lo = makeReg(DestLo->getType());
2408   Variable *T_Hi = makeReg(DestHi->getType());
2409 
2410   switch (Op) {
2411   case InstArithmetic::_num:
2412     llvm::report_fatal_error("Unknown arithmetic operator");
2413     return;
2414   case InstArithmetic::Add: {
2415     Variable *Src0LoR = SrcsLo.src0R(this);
2416     Operand *Src1LoRF = SrcsLo.src1RF(this);
2417     Variable *Src0HiR = SrcsHi.src0R(this);
2418     Operand *Src1HiRF = SrcsHi.src1RF(this);
2419     _adds(T_Lo, Src0LoR, Src1LoRF);
2420     _mov(DestLo, T_Lo);
2421     _adc(T_Hi, Src0HiR, Src1HiRF);
2422     _mov(DestHi, T_Hi);
2423     return;
2424   }
2425   case InstArithmetic::And: {
2426     Variable *Src0LoR = SrcsLo.src0R(this);
2427     Operand *Src1LoRF = SrcsLo.src1RF(this);
2428     Variable *Src0HiR = SrcsHi.src0R(this);
2429     Operand *Src1HiRF = SrcsHi.src1RF(this);
2430     _and(T_Lo, Src0LoR, Src1LoRF);
2431     _mov(DestLo, T_Lo);
2432     _and(T_Hi, Src0HiR, Src1HiRF);
2433     _mov(DestHi, T_Hi);
2434     return;
2435   }
2436   case InstArithmetic::Or: {
2437     Variable *Src0LoR = SrcsLo.src0R(this);
2438     Operand *Src1LoRF = SrcsLo.src1RF(this);
2439     Variable *Src0HiR = SrcsHi.src0R(this);
2440     Operand *Src1HiRF = SrcsHi.src1RF(this);
2441     _orr(T_Lo, Src0LoR, Src1LoRF);
2442     _mov(DestLo, T_Lo);
2443     _orr(T_Hi, Src0HiR, Src1HiRF);
2444     _mov(DestHi, T_Hi);
2445     return;
2446   }
2447   case InstArithmetic::Xor: {
2448     Variable *Src0LoR = SrcsLo.src0R(this);
2449     Operand *Src1LoRF = SrcsLo.src1RF(this);
2450     Variable *Src0HiR = SrcsHi.src0R(this);
2451     Operand *Src1HiRF = SrcsHi.src1RF(this);
2452     _eor(T_Lo, Src0LoR, Src1LoRF);
2453     _mov(DestLo, T_Lo);
2454     _eor(T_Hi, Src0HiR, Src1HiRF);
2455     _mov(DestHi, T_Hi);
2456     return;
2457   }
2458   case InstArithmetic::Sub: {
2459     Variable *Src0LoR = SrcsLo.src0R(this);
2460     Operand *Src1LoRF = SrcsLo.src1RF(this);
2461     Variable *Src0HiR = SrcsHi.src0R(this);
2462     Operand *Src1HiRF = SrcsHi.src1RF(this);
2463     if (SrcsLo.swappedOperands()) {
2464       _rsbs(T_Lo, Src0LoR, Src1LoRF);
2465       _mov(DestLo, T_Lo);
2466       _rsc(T_Hi, Src0HiR, Src1HiRF);
2467       _mov(DestHi, T_Hi);
2468     } else {
2469       _subs(T_Lo, Src0LoR, Src1LoRF);
2470       _mov(DestLo, T_Lo);
2471       _sbc(T_Hi, Src0HiR, Src1HiRF);
2472       _mov(DestHi, T_Hi);
2473     }
2474     return;
2475   }
2476   case InstArithmetic::Mul: {
2477     // GCC 4.8 does:
2478     // a=b*c ==>
2479     //   t_acc =(mul) (b.lo * c.hi)
2480     //   t_acc =(mla) (c.lo * b.hi) + t_acc
2481     //   t.hi,t.lo =(umull) b.lo * c.lo
2482     //   t.hi += t_acc
2483     //   a.lo = t.lo
2484     //   a.hi = t.hi
2485     //
2486     // LLVM does:
2487     //   t.hi,t.lo =(umull) b.lo * c.lo
2488     //   t.hi =(mla) (b.lo * c.hi) + t.hi
2489     //   t.hi =(mla) (b.hi * c.lo) + t.hi
2490     //   a.lo = t.lo
2491     //   a.hi = t.hi
2492     //
2493     // LLVM's lowering has fewer instructions, but more register pressure:
2494     // t.lo is live from beginning to end, while GCC delays the two-dest
2495     // instruction till the end, and kills c.hi immediately.
2496     Variable *T_Acc = makeReg(IceType_i32);
2497     Variable *T_Acc1 = makeReg(IceType_i32);
2498     Variable *T_Hi1 = makeReg(IceType_i32);
2499     Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2500     Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2501     Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2502     Variable *Src1RHi = SrcsHi.unswappedSrc1R(this);
2503     _mul(T_Acc, Src0RLo, Src1RHi);
2504     _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
2505     _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
2506     _add(T_Hi, T_Hi1, T_Acc1);
2507     _mov(DestLo, T_Lo);
2508     _mov(DestHi, T_Hi);
2509     return;
2510   }
2511   case InstArithmetic::Shl: {
2512     if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2513       Variable *Src0RLo = SrcsLo.src0R(this);
2514       // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2515       const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;
2516       if (ShAmtImm == 0) {
2517         _mov(DestLo, Src0RLo);
2518         _mov(DestHi, SrcsHi.src0R(this));
2519         return;
2520       }
2521 
2522       if (ShAmtImm >= 32) {
2523         if (ShAmtImm == 32) {
2524           _mov(DestHi, Src0RLo);
2525         } else {
2526           Operand *ShAmtOp = shAmtImm(ShAmtImm - 32);
2527           _lsl(T_Hi, Src0RLo, ShAmtOp);
2528           _mov(DestHi, T_Hi);
2529         }
2530 
2531         Operand *_0 =
2532             legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2533         _mov(T_Lo, _0);
2534         _mov(DestLo, T_Lo);
2535         return;
2536       }
2537 
2538       Variable *Src0RHi = SrcsHi.src0R(this);
2539       Operand *ShAmtOp = shAmtImm(ShAmtImm);
2540       Operand *ComplShAmtOp = shAmtImm(32 - ShAmtImm);
2541       _lsl(T_Hi, Src0RHi, ShAmtOp);
2542       _orr(T_Hi, T_Hi,
2543            OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
2544                                        OperandARM32::LSR, ComplShAmtOp));
2545       _mov(DestHi, T_Hi);
2546 
2547       _lsl(T_Lo, Src0RLo, ShAmtOp);
2548       _mov(DestLo, T_Lo);
2549       return;
2550     }
2551 
2552     // a=b<<c ==>
2553     // pnacl-llc does:
2554     // mov     t_b.lo, b.lo
2555     // mov     t_b.hi, b.hi
2556     // mov     t_c.lo, c.lo
2557     // rsb     T0, t_c.lo, #32
2558     // lsr     T1, t_b.lo, T0
2559     // orr     t_a.hi, T1, t_b.hi, lsl t_c.lo
2560     // sub     T2, t_c.lo, #32
2561     // cmp     T2, #0
2562     // lslge   t_a.hi, t_b.lo, T2
2563     // lsl     t_a.lo, t_b.lo, t_c.lo
2564     // mov     a.lo, t_a.lo
2565     // mov     a.hi, t_a.hi
2566     //
2567     // GCC 4.8 does:
2568     // sub t_c1, c.lo, #32
2569     // lsl t_hi, b.hi, c.lo
2570     // orr t_hi, t_hi, b.lo, lsl t_c1
2571     // rsb t_c2, c.lo, #32
2572     // orr t_hi, t_hi, b.lo, lsr t_c2
2573     // lsl t_lo, b.lo, c.lo
2574     // a.lo = t_lo
2575     // a.hi = t_hi
2576     //
2577     // These are incompatible, therefore we mimic pnacl-llc.
2578     // Can be strength-reduced for constant-shifts, but we don't do that for
2579     // now.
2580     // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
2581     // ARM, shifts only take the lower 8 bits of the shift register, and
2582     // saturate to the range 0-32, so the negative value will saturate to 32.
2583     Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2584     Operand *_0 =
2585         legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2586     Variable *T0 = makeReg(IceType_i32);
2587     Variable *T1 = makeReg(IceType_i32);
2588     Variable *T2 = makeReg(IceType_i32);
2589     Variable *TA_Hi = makeReg(IceType_i32);
2590     Variable *TA_Lo = makeReg(IceType_i32);
2591     Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2592     Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2593     Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2594     _rsb(T0, Src1RLo, _32);
2595     _lsr(T1, Src0RLo, T0);
2596     _orr(TA_Hi, T1,
2597          OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2598                                      OperandARM32::LSL, Src1RLo));
2599     _sub(T2, Src1RLo, _32);
2600     _cmp(T2, _0);
2601     _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
2602     _set_dest_redefined();
2603     _lsl(TA_Lo, Src0RLo, Src1RLo);
2604     _mov(DestLo, TA_Lo);
2605     _mov(DestHi, TA_Hi);
2606     return;
2607   }
2608   case InstArithmetic::Lshr:
2609   case InstArithmetic::Ashr: {
2610     const bool ASR = Op == InstArithmetic::Ashr;
2611     if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2612       Variable *Src0RHi = SrcsHi.src0R(this);
2613       // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2614       const int32_t ShAmt = SrcsLo.getConstantValue() & 0x3F;
2615       if (ShAmt == 0) {
2616         _mov(DestHi, Src0RHi);
2617         _mov(DestLo, SrcsLo.src0R(this));
2618         return;
2619       }
2620 
2621       if (ShAmt >= 32) {
2622         if (ShAmt == 32) {
2623           _mov(DestLo, Src0RHi);
2624         } else {
2625           Operand *ShAmtImm = shAmtImm(ShAmt - 32);
2626           if (ASR) {
2627             _asr(T_Lo, Src0RHi, ShAmtImm);
2628           } else {
2629             _lsr(T_Lo, Src0RHi, ShAmtImm);
2630           }
2631           _mov(DestLo, T_Lo);
2632         }
2633 
2634         if (ASR) {
2635           Operand *_31 = shAmtImm(31);
2636           _asr(T_Hi, Src0RHi, _31);
2637         } else {
2638           Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),
2639                                  Legal_Reg | Legal_Flex);
2640           _mov(T_Hi, _0);
2641         }
2642         _mov(DestHi, T_Hi);
2643         return;
2644       }
2645 
2646       Variable *Src0RLo = SrcsLo.src0R(this);
2647       Operand *ShAmtImm = shAmtImm(ShAmt);
2648       Operand *ComplShAmtImm = shAmtImm(32 - ShAmt);
2649       _lsr(T_Lo, Src0RLo, ShAmtImm);
2650       _orr(T_Lo, T_Lo,
2651            OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2652                                        OperandARM32::LSL, ComplShAmtImm));
2653       _mov(DestLo, T_Lo);
2654 
2655       if (ASR) {
2656         _asr(T_Hi, Src0RHi, ShAmtImm);
2657       } else {
2658         _lsr(T_Hi, Src0RHi, ShAmtImm);
2659       }
2660       _mov(DestHi, T_Hi);
2661       return;
2662     }
2663 
2664     // a=b>>c
2665     // pnacl-llc does:
2666     // mov        t_b.lo, b.lo
2667     // mov        t_b.hi, b.hi
2668     // mov        t_c.lo, c.lo
2669     // lsr        T0, t_b.lo, t_c.lo
2670     // rsb        T1, t_c.lo, #32
2671     // orr        t_a.lo, T0, t_b.hi, lsl T1
2672     // sub        T2, t_c.lo, #32
2673     // cmp        T2, #0
2674     // [al]srge   t_a.lo, t_b.hi, T2
2675     // [al]sr     t_a.hi, t_b.hi, t_c.lo
2676     // mov        a.lo, t_a.lo
2677     // mov        a.hi, t_a.hi
2678     //
2679     // GCC 4.8 does (lsr):
2680     // rsb        t_c1, c.lo, #32
2681     // lsr        t_lo, b.lo, c.lo
2682     // orr        t_lo, t_lo, b.hi, lsl t_c1
2683     // sub        t_c2, c.lo, #32
2684     // orr        t_lo, t_lo, b.hi, lsr t_c2
2685     // lsr        t_hi, b.hi, c.lo
2686     // mov        a.lo, t_lo
2687     // mov        a.hi, t_hi
2688     //
2689     // These are incompatible, therefore we mimic pnacl-llc.
2690     Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2691     Operand *_0 =
2692         legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2693     Variable *T0 = makeReg(IceType_i32);
2694     Variable *T1 = makeReg(IceType_i32);
2695     Variable *T2 = makeReg(IceType_i32);
2696     Variable *TA_Lo = makeReg(IceType_i32);
2697     Variable *TA_Hi = makeReg(IceType_i32);
2698     Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2699     Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2700     Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2701     _lsr(T0, Src0RLo, Src1RLo);
2702     _rsb(T1, Src1RLo, _32);
2703     _orr(TA_Lo, T0,
2704          OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2705                                      OperandARM32::LSL, T1));
2706     _sub(T2, Src1RLo, _32);
2707     _cmp(T2, _0);
2708     if (ASR) {
2709       _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2710       _set_dest_redefined();
2711       _asr(TA_Hi, Src0RHi, Src1RLo);
2712     } else {
2713       _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2714       _set_dest_redefined();
2715       _lsr(TA_Hi, Src0RHi, Src1RLo);
2716     }
2717     _mov(DestLo, TA_Lo);
2718     _mov(DestHi, TA_Hi);
2719     return;
2720   }
2721   case InstArithmetic::Fadd:
2722   case InstArithmetic::Fsub:
2723   case InstArithmetic::Fmul:
2724   case InstArithmetic::Fdiv:
2725   case InstArithmetic::Frem:
2726     llvm::report_fatal_error("FP instruction with i64 type");
2727     return;
2728   case InstArithmetic::Udiv:
2729   case InstArithmetic::Sdiv:
2730   case InstArithmetic::Urem:
2731   case InstArithmetic::Srem:
2732     llvm::report_fatal_error("Call-helper-involved instruction for i64 type "
2733                              "should have already been handled before");
2734     return;
2735   }
2736 }
2737 
2738 namespace {
2739 // StrengthReduction is a namespace with the strength reduction machinery. The
2740 // entry point is the StrengthReduction::tryToOptimize method. It returns true
2741 // if the optimization can be performed, and false otherwise.
2742 //
2743 // If the optimization can be performed, tryToOptimize sets its NumOperations
2744 // parameter to the number of shifts that are needed to perform the
2745 // multiplication; and it sets the Operations parameter with <ShAmt, AddOrSub>
2746 // tuples that describe how to materialize the multiplication.
2747 //
2748 // The algorithm finds contiguous 1s in the Multiplication source, and uses one
2749 // or two shifts to materialize it. A sequence of 1s, e.g.,
2750 //
2751 //                  M           N
2752 //   ...00000000000011111...111110000000...
2753 //
2754 // is materializable with (1 << (M + 1)) - (1 << N):
2755 //
2756 //   ...00000000000100000...000000000000...      [1 << (M + 1)]
2757 //   ...00000000000000000...000010000000... (-)  [1 << N]
2758 //   --------------------------------------
2759 //   ...00000000000011111...111110000000...
2760 //
2761 // And a single bit set, which is just a left shift.
2762 namespace StrengthReduction {
2763 enum AggregationOperation {
2764   AO_Invalid,
2765   AO_Add,
2766   AO_Sub,
2767 };
2768 
2769 // AggregateElement is a glorified <ShAmt, AddOrSub> tuple.
2770 class AggregationElement {
2771   AggregationElement(const AggregationElement &) = delete;
2772 
2773 public:
2774   AggregationElement() = default;
2775   AggregationElement &operator=(const AggregationElement &) = default;
AggregationElement(AggregationOperation Op,uint32_t ShAmt)2776   AggregationElement(AggregationOperation Op, uint32_t ShAmt)
2777       : Op(Op), ShAmt(ShAmt) {}
2778 
createShiftedOperand(Cfg * Func,Variable * OpR) const2779   Operand *createShiftedOperand(Cfg *Func, Variable *OpR) const {
2780     assert(OpR->mustHaveReg());
2781     if (ShAmt == 0) {
2782       return OpR;
2783     }
2784     return OperandARM32FlexReg::create(
2785         Func, IceType_i32, OpR, OperandARM32::LSL,
2786         OperandARM32ShAmtImm::create(
2787             Func, llvm::cast<ConstantInteger32>(
2788                       Func->getContext()->getConstantInt32(ShAmt))));
2789   }
2790 
aggregateWithAdd() const2791   bool aggregateWithAdd() const {
2792     switch (Op) {
2793     case AO_Invalid:
2794       llvm::report_fatal_error("Invalid Strength Reduction Operations.");
2795     case AO_Add:
2796       return true;
2797     case AO_Sub:
2798       return false;
2799     }
2800     llvm_unreachable("(silence g++ warning)");
2801   }
2802 
shAmt() const2803   uint32_t shAmt() const { return ShAmt; }
2804 
2805 private:
2806   AggregationOperation Op = AO_Invalid;
2807   uint32_t ShAmt;
2808 };
2809 
2810 // [RangeStart, RangeEnd] is a range of 1s in Src.
2811 template <std::size_t N>
addOperations(uint32_t RangeStart,uint32_t RangeEnd,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)2812 bool addOperations(uint32_t RangeStart, uint32_t RangeEnd, SizeT *NumOperations,
2813                    std::array<AggregationElement, N> *Operations) {
2814   assert(*NumOperations < N);
2815   if (RangeStart == RangeEnd) {
2816     // Single bit set:
2817     // Src           : 0...00010...
2818     // RangeStart    :        ^
2819     // RangeEnd      :        ^
2820     // NegSrc        : 0...00001...
2821     (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart);
2822     ++(*NumOperations);
2823     return true;
2824   }
2825 
2826   // Sequence of 1s: (two operations required.)
2827   // Src           : 0...00011...110...
2828   // RangeStart    :        ^
2829   // RangeEnd      :              ^
2830   // NegSrc        : 0...00000...001...
2831   if (*NumOperations + 1 >= N) {
2832     return false;
2833   }
2834   (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart + 1);
2835   ++(*NumOperations);
2836   (*Operations)[*NumOperations] = AggregationElement(AO_Sub, RangeEnd);
2837   ++(*NumOperations);
2838   return true;
2839 }
2840 
2841 // tryToOptmize scans Src looking for sequences of 1s (including the unitary bit
2842 // 1 surrounded by zeroes.
2843 template <std::size_t N>
tryToOptimize(uint32_t Src,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)2844 bool tryToOptimize(uint32_t Src, SizeT *NumOperations,
2845                    std::array<AggregationElement, N> *Operations) {
2846   constexpr uint32_t SrcSizeBits = sizeof(Src) * CHAR_BIT;
2847   uint32_t NegSrc = ~Src;
2848 
2849   *NumOperations = 0;
2850   while (Src != 0 && *NumOperations < N) {
2851     // Each step of the algorithm:
2852     //   * finds L, the last bit set in Src;
2853     //   * clears all the upper bits in NegSrc up to bit L;
2854     //   * finds nL, the last bit set in NegSrc;
2855     //   * clears all the upper bits in Src up to bit nL;
2856     //
2857     // if L == nL + 1, then a unitary 1 was found in Src. Otherwise, a sequence
2858     // of 1s starting at L, and ending at nL + 1, was found.
2859     const uint32_t SrcLastBitSet = llvm::findLastSet(Src);
2860     const uint32_t NegSrcClearMask =
2861         (SrcLastBitSet == 0) ? 0
2862                              : (0xFFFFFFFFu) >> (SrcSizeBits - SrcLastBitSet);
2863     NegSrc &= NegSrcClearMask;
2864     if (NegSrc == 0) {
2865       if (addOperations(SrcLastBitSet, 0, NumOperations, Operations)) {
2866         return true;
2867       }
2868       return false;
2869     }
2870     const uint32_t NegSrcLastBitSet = llvm::findLastSet(NegSrc);
2871     assert(NegSrcLastBitSet < SrcLastBitSet);
2872     const uint32_t SrcClearMask =
2873         (NegSrcLastBitSet == 0)
2874             ? 0
2875             : (0xFFFFFFFFu) >> (SrcSizeBits - NegSrcLastBitSet);
2876     Src &= SrcClearMask;
2877     if (!addOperations(SrcLastBitSet, NegSrcLastBitSet + 1, NumOperations,
2878                        Operations)) {
2879       return false;
2880     }
2881   }
2882 
2883   return Src == 0;
2884 }
2885 } // end of namespace StrengthReduction
2886 } // end of anonymous namespace
2887 
lowerArithmetic(const InstArithmetic * Instr)2888 void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
2889   Variable *Dest = Instr->getDest();
2890 
2891   if (Dest->isRematerializable()) {
2892     Context.insert<InstFakeDef>(Dest);
2893     return;
2894   }
2895 
2896   Type DestTy = Dest->getType();
2897   if (DestTy == IceType_i1) {
2898     lowerInt1Arithmetic(Instr);
2899     return;
2900   }
2901 
2902   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
2903   Operand *Src1 = legalizeUndef(Instr->getSrc(1));
2904   if (DestTy == IceType_i64) {
2905     lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1);
2906     return;
2907   }
2908 
2909   if (isVectorType(DestTy)) {
2910     switch (Instr->getOp()) {
2911     default:
2912       UnimplementedLoweringError(this, Instr);
2913       return;
2914     // Explicitly allow vector instructions we have implemented/enabled.
2915     case InstArithmetic::Add:
2916     case InstArithmetic::And:
2917     case InstArithmetic::Ashr:
2918     case InstArithmetic::Fadd:
2919     case InstArithmetic::Fmul:
2920     case InstArithmetic::Fsub:
2921     case InstArithmetic::Lshr:
2922     case InstArithmetic::Mul:
2923     case InstArithmetic::Or:
2924     case InstArithmetic::Shl:
2925     case InstArithmetic::Sub:
2926     case InstArithmetic::Xor:
2927       break;
2928     }
2929   }
2930 
2931   Variable *T = makeReg(DestTy);
2932 
2933   // * Handle div/rem separately. They require a non-legalized Src1 to inspect
2934   // whether or not Src1 is a non-zero constant. Once legalized it is more
2935   // difficult to determine (constant may be moved to a register).
2936   // * Handle floating point arithmetic separately: they require Src1 to be
2937   // legalized to a register.
2938   switch (Instr->getOp()) {
2939   default:
2940     break;
2941   case InstArithmetic::Udiv: {
2942     constexpr bool NotRemainder = false;
2943     Variable *Src0R = legalizeToReg(Src0);
2944     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
2945                  NotRemainder);
2946     return;
2947   }
2948   case InstArithmetic::Sdiv: {
2949     constexpr bool NotRemainder = false;
2950     Variable *Src0R = legalizeToReg(Src0);
2951     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
2952                  NotRemainder);
2953     return;
2954   }
2955   case InstArithmetic::Urem: {
2956     constexpr bool IsRemainder = true;
2957     Variable *Src0R = legalizeToReg(Src0);
2958     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
2959                  IsRemainder);
2960     return;
2961   }
2962   case InstArithmetic::Srem: {
2963     constexpr bool IsRemainder = true;
2964     Variable *Src0R = legalizeToReg(Src0);
2965     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
2966                  IsRemainder);
2967     return;
2968   }
2969   case InstArithmetic::Frem: {
2970     if (!isScalarFloatingType(DestTy)) {
2971       llvm::report_fatal_error("Unexpected type when lowering frem.");
2972     }
2973     llvm::report_fatal_error("Frem should have already been lowered.");
2974   }
2975   case InstArithmetic::Fadd: {
2976     Variable *Src0R = legalizeToReg(Src0);
2977     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
2978       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
2979       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
2980       _vmla(Src0R, Src1R, Src2R);
2981       _mov(Dest, Src0R);
2982       return;
2983     }
2984 
2985     Variable *Src1R = legalizeToReg(Src1);
2986     _vadd(T, Src0R, Src1R);
2987     _mov(Dest, T);
2988     return;
2989   }
2990   case InstArithmetic::Fsub: {
2991     Variable *Src0R = legalizeToReg(Src0);
2992     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
2993       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
2994       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
2995       _vmls(Src0R, Src1R, Src2R);
2996       _mov(Dest, Src0R);
2997       return;
2998     }
2999     Variable *Src1R = legalizeToReg(Src1);
3000     _vsub(T, Src0R, Src1R);
3001     _mov(Dest, T);
3002     return;
3003   }
3004   case InstArithmetic::Fmul: {
3005     Variable *Src0R = legalizeToReg(Src0);
3006     Variable *Src1R = legalizeToReg(Src1);
3007     _vmul(T, Src0R, Src1R);
3008     _mov(Dest, T);
3009     return;
3010   }
3011   case InstArithmetic::Fdiv: {
3012     Variable *Src0R = legalizeToReg(Src0);
3013     Variable *Src1R = legalizeToReg(Src1);
3014     _vdiv(T, Src0R, Src1R);
3015     _mov(Dest, T);
3016     return;
3017   }
3018   }
3019 
3020   // Handle everything else here.
3021   Int32Operands Srcs(Src0, Src1);
3022   switch (Instr->getOp()) {
3023   case InstArithmetic::_num:
3024     llvm::report_fatal_error("Unknown arithmetic operator");
3025     return;
3026   case InstArithmetic::Add: {
3027     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3028       assert(!isVectorType(DestTy));
3029       Variable *Src0R = legalizeToReg(Src0);
3030       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3031       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3032       _mla(T, Src1R, Src2R, Src0R);
3033       _mov(Dest, T);
3034       return;
3035     }
3036 
3037     if (Srcs.hasConstOperand()) {
3038       if (!Srcs.immediateIsFlexEncodable() &&
3039           Srcs.negatedImmediateIsFlexEncodable()) {
3040         assert(!isVectorType(DestTy));
3041         Variable *Src0R = Srcs.src0R(this);
3042         Operand *Src1F = Srcs.negatedSrc1F(this);
3043         if (!Srcs.swappedOperands()) {
3044           _sub(T, Src0R, Src1F);
3045         } else {
3046           _rsb(T, Src0R, Src1F);
3047         }
3048         _mov(Dest, T);
3049         return;
3050       }
3051     }
3052     Variable *Src0R = Srcs.src0R(this);
3053     if (isVectorType(DestTy)) {
3054       Variable *Src1R = legalizeToReg(Src1);
3055       _vadd(T, Src0R, Src1R);
3056     } else {
3057       Operand *Src1RF = Srcs.src1RF(this);
3058       _add(T, Src0R, Src1RF);
3059     }
3060     _mov(Dest, T);
3061     return;
3062   }
3063   case InstArithmetic::And: {
3064     if (Srcs.hasConstOperand()) {
3065       if (!Srcs.immediateIsFlexEncodable() &&
3066           Srcs.invertedImmediateIsFlexEncodable()) {
3067         Variable *Src0R = Srcs.src0R(this);
3068         Operand *Src1F = Srcs.invertedSrc1F(this);
3069         _bic(T, Src0R, Src1F);
3070         _mov(Dest, T);
3071         return;
3072       }
3073     }
3074     assert(isIntegerType(DestTy));
3075     Variable *Src0R = Srcs.src0R(this);
3076     if (isVectorType(DestTy)) {
3077       Variable *Src1R = legalizeToReg(Src1);
3078       _vand(T, Src0R, Src1R);
3079     } else {
3080       Operand *Src1RF = Srcs.src1RF(this);
3081       _and(T, Src0R, Src1RF);
3082     }
3083     _mov(Dest, T);
3084     return;
3085   }
3086   case InstArithmetic::Or: {
3087     Variable *Src0R = Srcs.src0R(this);
3088     assert(isIntegerType(DestTy));
3089     if (isVectorType(DestTy)) {
3090       Variable *Src1R = legalizeToReg(Src1);
3091       _vorr(T, Src0R, Src1R);
3092     } else {
3093       Operand *Src1RF = Srcs.src1RF(this);
3094       _orr(T, Src0R, Src1RF);
3095     }
3096     _mov(Dest, T);
3097     return;
3098   }
3099   case InstArithmetic::Xor: {
3100     Variable *Src0R = Srcs.src0R(this);
3101     assert(isIntegerType(DestTy));
3102     if (isVectorType(DestTy)) {
3103       Variable *Src1R = legalizeToReg(Src1);
3104       _veor(T, Src0R, Src1R);
3105     } else {
3106       Operand *Src1RF = Srcs.src1RF(this);
3107       _eor(T, Src0R, Src1RF);
3108     }
3109     _mov(Dest, T);
3110     return;
3111   }
3112   case InstArithmetic::Sub: {
3113     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3114       assert(!isVectorType(DestTy));
3115       Variable *Src0R = legalizeToReg(Src0);
3116       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3117       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3118       _mls(T, Src1R, Src2R, Src0R);
3119       _mov(Dest, T);
3120       return;
3121     }
3122 
3123     if (Srcs.hasConstOperand()) {
3124       assert(!isVectorType(DestTy));
3125       if (Srcs.immediateIsFlexEncodable()) {
3126         Variable *Src0R = Srcs.src0R(this);
3127         Operand *Src1RF = Srcs.src1RF(this);
3128         if (Srcs.swappedOperands()) {
3129           _rsb(T, Src0R, Src1RF);
3130         } else {
3131           _sub(T, Src0R, Src1RF);
3132         }
3133         _mov(Dest, T);
3134         return;
3135       }
3136       if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
3137         Variable *Src0R = Srcs.src0R(this);
3138         Operand *Src1F = Srcs.negatedSrc1F(this);
3139         _add(T, Src0R, Src1F);
3140         _mov(Dest, T);
3141         return;
3142       }
3143     }
3144     Variable *Src0R = Srcs.unswappedSrc0R(this);
3145     Variable *Src1R = Srcs.unswappedSrc1R(this);
3146     if (isVectorType(DestTy)) {
3147       _vsub(T, Src0R, Src1R);
3148     } else {
3149       _sub(T, Src0R, Src1R);
3150     }
3151     _mov(Dest, T);
3152     return;
3153   }
3154   case InstArithmetic::Mul: {
3155     const bool OptM1 = Func->getOptLevel() == Opt_m1;
3156     if (!OptM1 && Srcs.hasConstOperand()) {
3157       constexpr std::size_t MaxShifts = 4;
3158       std::array<StrengthReduction::AggregationElement, MaxShifts> Shifts;
3159       SizeT NumOperations;
3160       int32_t Const = Srcs.getConstantValue();
3161       const bool Invert = Const < 0;
3162       const bool MultiplyByZero = Const == 0;
3163       Operand *_0 =
3164           legalize(Ctx->getConstantZero(DestTy), Legal_Reg | Legal_Flex);
3165 
3166       if (MultiplyByZero) {
3167         _mov(T, _0);
3168         _mov(Dest, T);
3169         return;
3170       }
3171 
3172       if (Invert) {
3173         Const = -Const;
3174       }
3175 
3176       if (StrengthReduction::tryToOptimize(Const, &NumOperations, &Shifts)) {
3177         assert(NumOperations >= 1);
3178         Variable *Src0R = Srcs.src0R(this);
3179         int32_t Start;
3180         int32_t End;
3181         if (NumOperations == 1 || Shifts[NumOperations - 1].shAmt() != 0) {
3182           // Multiplication by a power of 2 (NumOperations == 1); or
3183           // Multiplication by a even number not a power of 2.
3184           Start = 1;
3185           End = NumOperations;
3186           assert(Shifts[0].aggregateWithAdd());
3187           _lsl(T, Src0R, shAmtImm(Shifts[0].shAmt()));
3188         } else {
3189           // Multiplication by an odd number. Put the free barrel shifter to a
3190           // good use.
3191           Start = 0;
3192           End = NumOperations - 2;
3193           const StrengthReduction::AggregationElement &Last =
3194               Shifts[NumOperations - 1];
3195           const StrengthReduction::AggregationElement &SecondToLast =
3196               Shifts[NumOperations - 2];
3197           if (!Last.aggregateWithAdd()) {
3198             assert(SecondToLast.aggregateWithAdd());
3199             _rsb(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3200           } else if (!SecondToLast.aggregateWithAdd()) {
3201             assert(Last.aggregateWithAdd());
3202             _sub(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3203           } else {
3204             _add(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3205           }
3206         }
3207 
3208         // Odd numbers :   S                                 E   I   I
3209         //               +---+---+---+---+---+---+ ... +---+---+---+---+
3210         //     Shifts  = |   |   |   |   |   |   | ... |   |   |   |   |
3211         //               +---+---+---+---+---+---+ ... +---+---+---+---+
3212         // Even numbers:   I   S                                     E
3213         //
3214         // S: Start; E: End; I: Init
3215         for (int32_t I = Start; I < End; ++I) {
3216           const StrengthReduction::AggregationElement &Current = Shifts[I];
3217           Operand *SrcF = Current.createShiftedOperand(Func, Src0R);
3218           if (Current.aggregateWithAdd()) {
3219             _add(T, T, SrcF);
3220           } else {
3221             _sub(T, T, SrcF);
3222           }
3223         }
3224 
3225         if (Invert) {
3226           // T = 0 - T.
3227           _rsb(T, T, _0);
3228         }
3229 
3230         _mov(Dest, T);
3231         return;
3232       }
3233     }
3234     Variable *Src0R = Srcs.unswappedSrc0R(this);
3235     Variable *Src1R = Srcs.unswappedSrc1R(this);
3236     if (isVectorType(DestTy)) {
3237       _vmul(T, Src0R, Src1R);
3238     } else {
3239       _mul(T, Src0R, Src1R);
3240     }
3241     _mov(Dest, T);
3242     return;
3243   }
3244   case InstArithmetic::Shl: {
3245     Variable *Src0R = Srcs.unswappedSrc0R(this);
3246     if (!isVectorType(T->getType())) {
3247       if (Srcs.isSrc1ImmediateZero()) {
3248         _mov(T, Src0R);
3249       } else {
3250         Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3251         _lsl(T, Src0R, Src1R);
3252       }
3253     } else {
3254       if (Srcs.hasConstOperand()) {
3255         ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3256         _vshl(T, Src0R, ShAmt);
3257       } else {
3258         auto *Src1R = Srcs.unswappedSrc1R(this);
3259         _vshl(T, Src0R, Src1R)->setSignType(InstARM32::FS_Unsigned);
3260       }
3261     }
3262     _mov(Dest, T);
3263     return;
3264   }
3265   case InstArithmetic::Lshr: {
3266     Variable *Src0R = Srcs.unswappedSrc0R(this);
3267     if (!isVectorType(T->getType())) {
3268       if (DestTy != IceType_i32) {
3269         _uxt(Src0R, Src0R);
3270       }
3271       if (Srcs.isSrc1ImmediateZero()) {
3272         _mov(T, Src0R);
3273       } else {
3274         Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3275         _lsr(T, Src0R, Src1R);
3276       }
3277     } else {
3278       if (Srcs.hasConstOperand()) {
3279         ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3280         _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Unsigned);
3281       } else {
3282         auto *Src1R = Srcs.unswappedSrc1R(this);
3283         auto *Src1RNeg = makeReg(Src1R->getType());
3284         _vneg(Src1RNeg, Src1R);
3285         _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Unsigned);
3286       }
3287     }
3288     _mov(Dest, T);
3289     return;
3290   }
3291   case InstArithmetic::Ashr: {
3292     Variable *Src0R = Srcs.unswappedSrc0R(this);
3293     if (!isVectorType(T->getType())) {
3294       if (DestTy != IceType_i32) {
3295         _sxt(Src0R, Src0R);
3296       }
3297       if (Srcs.isSrc1ImmediateZero()) {
3298         _mov(T, Src0R);
3299       } else {
3300         _asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
3301       }
3302     } else {
3303       if (Srcs.hasConstOperand()) {
3304         ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3305         _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Signed);
3306       } else {
3307         auto *Src1R = Srcs.unswappedSrc1R(this);
3308         auto *Src1RNeg = makeReg(Src1R->getType());
3309         _vneg(Src1RNeg, Src1R);
3310         _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Signed);
3311       }
3312     }
3313     _mov(Dest, T);
3314     return;
3315   }
3316   case InstArithmetic::Udiv:
3317   case InstArithmetic::Sdiv:
3318   case InstArithmetic::Urem:
3319   case InstArithmetic::Srem:
3320     llvm::report_fatal_error(
3321         "Integer div/rem should have been handled earlier.");
3322     return;
3323   case InstArithmetic::Fadd:
3324   case InstArithmetic::Fsub:
3325   case InstArithmetic::Fmul:
3326   case InstArithmetic::Fdiv:
3327   case InstArithmetic::Frem:
3328     llvm::report_fatal_error(
3329         "Floating point arith should have been handled earlier.");
3330     return;
3331   }
3332 }
3333 
lowerAssign(const InstAssign * Instr)3334 void TargetARM32::lowerAssign(const InstAssign *Instr) {
3335   Variable *Dest = Instr->getDest();
3336 
3337   if (Dest->isRematerializable()) {
3338     Context.insert<InstFakeDef>(Dest);
3339     return;
3340   }
3341 
3342   Operand *Src0 = Instr->getSrc(0);
3343   assert(Dest->getType() == Src0->getType());
3344   if (Dest->getType() == IceType_i64) {
3345     Src0 = legalizeUndef(Src0);
3346 
3347     Variable *T_Lo = makeReg(IceType_i32);
3348     auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3349     Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
3350     _mov(T_Lo, Src0Lo);
3351     _mov(DestLo, T_Lo);
3352 
3353     Variable *T_Hi = makeReg(IceType_i32);
3354     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3355     Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
3356     _mov(T_Hi, Src0Hi);
3357     _mov(DestHi, T_Hi);
3358 
3359     return;
3360   }
3361 
3362   Operand *NewSrc;
3363   if (Dest->hasReg()) {
3364     // If Dest already has a physical register, then legalize the Src operand
3365     // into a Variable with the same register assignment. This especially
3366     // helps allow the use of Flex operands.
3367     NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
3368   } else {
3369     // Dest could be a stack operand. Since we could potentially need to do a
3370     // Store (and store can only have Register operands), legalize this to a
3371     // register.
3372     NewSrc = legalize(Src0, Legal_Reg);
3373   }
3374 
3375   if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) {
3376     NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem);
3377   }
3378   _mov(Dest, NewSrc);
3379 }
3380 
lowerInt1ForBranch(Operand * Boolean,const LowerInt1BranchTarget & TargetTrue,const LowerInt1BranchTarget & TargetFalse,uint32_t ShortCircuitable)3381 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
3382     Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
3383     const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {
3384   InstARM32Label *NewShortCircuitLabel = nullptr;
3385   Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3386 
3387   const Inst *Producer = Computations.getProducerOf(Boolean);
3388 
3389   if (Producer == nullptr) {
3390     // No producer, no problem: just do emit code to perform (Boolean & 1) and
3391     // set the flags register. The branch should be taken if the resulting flags
3392     // indicate a non-zero result.
3393     _tst(legalizeToReg(Boolean), _1);
3394     return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3395   }
3396 
3397   switch (Producer->getKind()) {
3398   default:
3399     llvm::report_fatal_error("Unexpected producer.");
3400   case Inst::Icmp: {
3401     return ShortCircuitCondAndLabel(
3402         lowerIcmpCond(llvm::cast<InstIcmp>(Producer)));
3403   } break;
3404   case Inst::Fcmp: {
3405     return ShortCircuitCondAndLabel(
3406         lowerFcmpCond(llvm::cast<InstFcmp>(Producer)));
3407   } break;
3408   case Inst::Cast: {
3409     const auto *CastProducer = llvm::cast<InstCast>(Producer);
3410     assert(CastProducer->getCastKind() == InstCast::Trunc);
3411     Operand *Src = CastProducer->getSrc(0);
3412     if (Src->getType() == IceType_i64)
3413       Src = loOperand(Src);
3414     _tst(legalizeToReg(Src), _1);
3415     return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3416   } break;
3417   case Inst::Arithmetic: {
3418     const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
3419     switch (ArithProducer->getOp()) {
3420     default:
3421       llvm::report_fatal_error("Unhandled Arithmetic Producer.");
3422     case InstArithmetic::And: {
3423       if (!(ShortCircuitable & SC_And)) {
3424         NewShortCircuitLabel = InstARM32Label::create(Func, this);
3425       }
3426 
3427       LowerInt1BranchTarget NewTarget =
3428           TargetFalse.createForLabelOrDuplicate(NewShortCircuitLabel);
3429 
3430       ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3431           Producer->getSrc(0), TargetTrue, NewTarget, SC_And);
3432       const CondWhenTrue &Cond = CondAndLabel.Cond;
3433 
3434       _br_short_circuit(NewTarget, Cond.invert());
3435 
3436       InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3437       if (ShortCircuitLabel != nullptr)
3438         Context.insert(ShortCircuitLabel);
3439 
3440       return ShortCircuitCondAndLabel(
3441           lowerInt1ForBranch(Producer->getSrc(1), TargetTrue, NewTarget, SC_All)
3442               .assertNoLabelAndReturnCond(),
3443           NewShortCircuitLabel);
3444     } break;
3445     case InstArithmetic::Or: {
3446       if (!(ShortCircuitable & SC_Or)) {
3447         NewShortCircuitLabel = InstARM32Label::create(Func, this);
3448       }
3449 
3450       LowerInt1BranchTarget NewTarget =
3451           TargetTrue.createForLabelOrDuplicate(NewShortCircuitLabel);
3452 
3453       ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3454           Producer->getSrc(0), NewTarget, TargetFalse, SC_Or);
3455       const CondWhenTrue &Cond = CondAndLabel.Cond;
3456 
3457       _br_short_circuit(NewTarget, Cond);
3458 
3459       InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3460       if (ShortCircuitLabel != nullptr)
3461         Context.insert(ShortCircuitLabel);
3462 
3463       return ShortCircuitCondAndLabel(lowerInt1ForBranch(Producer->getSrc(1),
3464                                                          NewTarget, TargetFalse,
3465                                                          SC_All)
3466                                           .assertNoLabelAndReturnCond(),
3467                                       NewShortCircuitLabel);
3468     } break;
3469     }
3470   }
3471   }
3472 }
3473 
lowerBr(const InstBr * Instr)3474 void TargetARM32::lowerBr(const InstBr *Instr) {
3475   if (Instr->isUnconditional()) {
3476     _br(Instr->getTargetUnconditional());
3477     return;
3478   }
3479 
3480   CfgNode *TargetTrue = Instr->getTargetTrue();
3481   CfgNode *TargetFalse = Instr->getTargetFalse();
3482   ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3483       Instr->getCondition(), LowerInt1BranchTarget(TargetTrue),
3484       LowerInt1BranchTarget(TargetFalse), SC_All);
3485   assert(CondAndLabel.ShortCircuitTarget == nullptr);
3486 
3487   const CondWhenTrue &Cond = CondAndLabel.Cond;
3488   if (Cond.WhenTrue1 != CondARM32::kNone) {
3489     assert(Cond.WhenTrue0 != CondARM32::AL);
3490     _br(TargetTrue, Cond.WhenTrue1);
3491   }
3492 
3493   switch (Cond.WhenTrue0) {
3494   default:
3495     _br(TargetTrue, TargetFalse, Cond.WhenTrue0);
3496     break;
3497   case CondARM32::kNone:
3498     _br(TargetFalse);
3499     break;
3500   case CondARM32::AL:
3501     _br(TargetTrue);
3502     break;
3503   }
3504 }
3505 
lowerCall(const InstCall * Instr)3506 void TargetARM32::lowerCall(const InstCall *Instr) {
3507   Operand *CallTarget = Instr->getCallTarget();
3508   if (Instr->isTargetHelperCall()) {
3509     auto TargetHelperPreamble = ARM32HelpersPreamble.find(CallTarget);
3510     if (TargetHelperPreamble != ARM32HelpersPreamble.end()) {
3511       (this->*TargetHelperPreamble->second)(Instr);
3512     }
3513   }
3514   MaybeLeafFunc = false;
3515   NeedsStackAlignment = true;
3516 
3517   // Assign arguments to registers and stack. Also reserve stack.
3518   TargetARM32::CallingConv CC;
3519   // Pair of Arg Operand -> GPR number assignments.
3520   llvm::SmallVector<std::pair<Operand *, RegNumT>, NumGPRArgs> GPRArgs;
3521   llvm::SmallVector<std::pair<Operand *, RegNumT>, NumFP32Args> FPArgs;
3522   // Pair of Arg Operand -> stack offset.
3523   llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3524   size_t ParameterAreaSizeBytes = 0;
3525 
3526   // Classify each argument operand according to the location where the
3527   // argument is passed.
3528   for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3529     Operand *Arg = legalizeUndef(Instr->getArg(i));
3530     const Type Ty = Arg->getType();
3531     bool InReg = false;
3532     RegNumT Reg;
3533     if (isScalarIntegerType(Ty)) {
3534       InReg = CC.argInGPR(Ty, &Reg);
3535     } else {
3536       InReg = CC.argInVFP(Ty, &Reg);
3537     }
3538 
3539     if (!InReg) {
3540       ParameterAreaSizeBytes =
3541           applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3542       StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3543       ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3544       continue;
3545     }
3546 
3547     if (Ty == IceType_i64) {
3548       Operand *Lo = loOperand(Arg);
3549       Operand *Hi = hiOperand(Arg);
3550       GPRArgs.push_back(std::make_pair(
3551           Lo, RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Reg))));
3552       GPRArgs.push_back(std::make_pair(
3553           Hi, RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(Reg))));
3554     } else if (isScalarIntegerType(Ty)) {
3555       GPRArgs.push_back(std::make_pair(Arg, Reg));
3556     } else {
3557       FPArgs.push_back(std::make_pair(Arg, Reg));
3558     }
3559   }
3560 
3561   // Adjust the parameter area so that the stack is aligned. It is assumed that
3562   // the stack is already aligned at the start of the calling sequence.
3563   ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3564 
3565   if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) {
3566     llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max.");
3567   }
3568 
3569   // Copy arguments that are passed on the stack to the appropriate stack
3570   // locations.
3571   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
3572   for (auto &StackArg : StackArgs) {
3573     ConstantInteger32 *Loc =
3574         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3575     Type Ty = StackArg.first->getType();
3576     OperandARM32Mem *Addr;
3577     constexpr bool SignExt = false;
3578     if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3579       Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
3580     } else {
3581       Variable *NewBase = Func->makeVariable(SP->getType());
3582       lowerArithmetic(
3583           InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3584       Addr = formMemoryOperand(NewBase, Ty);
3585     }
3586     lowerStore(InstStore::create(Func, StackArg.first, Addr));
3587   }
3588 
3589   // Generate the call instruction. Assign its result to a temporary with high
3590   // register allocation weight.
3591   Variable *Dest = Instr->getDest();
3592   // ReturnReg doubles as ReturnRegLo as necessary.
3593   Variable *ReturnReg = nullptr;
3594   Variable *ReturnRegHi = nullptr;
3595   if (Dest) {
3596     switch (Dest->getType()) {
3597     case IceType_NUM:
3598       llvm::report_fatal_error("Invalid Call dest type");
3599       break;
3600     case IceType_void:
3601       break;
3602     case IceType_i1:
3603       assert(Computations.getProducerOf(Dest) == nullptr);
3604     // Fall-through intended.
3605     case IceType_i8:
3606     case IceType_i16:
3607     case IceType_i32:
3608       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
3609       break;
3610     case IceType_i64:
3611       ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
3612       ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
3613       break;
3614     case IceType_f32:
3615       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0);
3616       break;
3617     case IceType_f64:
3618       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0);
3619       break;
3620     case IceType_v4i1:
3621     case IceType_v8i1:
3622     case IceType_v16i1:
3623     case IceType_v16i8:
3624     case IceType_v8i16:
3625     case IceType_v4i32:
3626     case IceType_v4f32:
3627       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);
3628       break;
3629     }
3630   }
3631 
3632   // Allow ConstantRelocatable to be left alone as a direct call, but force
3633   // other constants like ConstantInteger32 to be in a register and make it an
3634   // indirect call.
3635   if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3636     CallTarget = legalize(CallTarget, Legal_Reg);
3637   }
3638 
3639   // Copy arguments to be passed in registers to the appropriate registers.
3640   CfgVector<Variable *> RegArgs;
3641   for (auto &FPArg : FPArgs) {
3642     RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3643   }
3644   for (auto &GPRArg : GPRArgs) {
3645     RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3646   }
3647 
3648   // Generate a FakeUse of register arguments so that they do not get dead code
3649   // eliminated as a result of the FakeKill of scratch registers after the call.
3650   // These fake-uses need to be placed here to avoid argument registers from
3651   // being used during the legalizeToReg() calls above.
3652   for (auto *RegArg : RegArgs) {
3653     Context.insert<InstFakeUse>(RegArg);
3654   }
3655 
3656   InstARM32Call *NewCall = Context.insert<InstARM32Call>(ReturnReg, CallTarget);
3657 
3658   if (ReturnRegHi)
3659     Context.insert<InstFakeDef>(ReturnRegHi);
3660 
3661   // Insert a register-kill pseudo instruction.
3662   Context.insert<InstFakeKill>(NewCall);
3663 
3664   // Generate a FakeUse to keep the call live if necessary.
3665   if (Instr->hasSideEffects() && ReturnReg) {
3666     Context.insert<InstFakeUse>(ReturnReg);
3667   }
3668 
3669   if (Dest != nullptr) {
3670     // Assign the result of the call to Dest.
3671     if (ReturnReg != nullptr) {
3672       if (ReturnRegHi) {
3673         auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3674         Variable *DestLo = Dest64On32->getLo();
3675         Variable *DestHi = Dest64On32->getHi();
3676         _mov(DestLo, ReturnReg);
3677         _mov(DestHi, ReturnRegHi);
3678       } else {
3679         if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
3680           _mov(Dest, ReturnReg);
3681         } else {
3682           assert(isIntegerType(Dest->getType()) &&
3683                  typeWidthInBytes(Dest->getType()) <= 4);
3684           _mov(Dest, ReturnReg);
3685         }
3686       }
3687     }
3688   }
3689 
3690   if (Instr->isTargetHelperCall()) {
3691     auto TargetHelpersPostamble = ARM32HelpersPostamble.find(CallTarget);
3692     if (TargetHelpersPostamble != ARM32HelpersPostamble.end()) {
3693       (this->*TargetHelpersPostamble->second)(Instr);
3694     }
3695   }
3696 }
3697 
3698 namespace {
configureBitcastTemporary(Variable64On32 * Var)3699 void configureBitcastTemporary(Variable64On32 *Var) {
3700   Var->setMustNotHaveReg();
3701   Var->getHi()->setMustHaveReg();
3702   Var->getLo()->setMustHaveReg();
3703 }
3704 } // end of anonymous namespace
3705 
lowerCast(const InstCast * Instr)3706 void TargetARM32::lowerCast(const InstCast *Instr) {
3707   InstCast::OpKind CastKind = Instr->getCastKind();
3708   Variable *Dest = Instr->getDest();
3709   const Type DestTy = Dest->getType();
3710   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3711   switch (CastKind) {
3712   default:
3713     Func->setError("Cast type not supported");
3714     return;
3715   case InstCast::Sext: {
3716     if (isVectorType(DestTy)) {
3717       Variable *T0 = makeReg(DestTy);
3718       Variable *T1 = makeReg(DestTy);
3719       ConstantInteger32 *ShAmt = nullptr;
3720       switch (DestTy) {
3721       default:
3722         llvm::report_fatal_error("Unexpected type in vector sext.");
3723       case IceType_v16i8:
3724         ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(7));
3725         break;
3726       case IceType_v8i16:
3727         ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(15));
3728         break;
3729       case IceType_v4i32:
3730         ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(31));
3731         break;
3732       }
3733       auto *Src0R = legalizeToReg(Src0);
3734       _vshl(T0, Src0R, ShAmt);
3735       _vshr(T1, T0, ShAmt)->setSignType(InstARM32::FS_Signed);
3736       _mov(Dest, T1);
3737     } else if (DestTy == IceType_i64) {
3738       // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
3739       Constant *ShiftAmt = Ctx->getConstantInt32(31);
3740       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3741       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3742       Variable *T_Lo = makeReg(DestLo->getType());
3743       if (Src0->getType() == IceType_i32) {
3744         Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
3745         _mov(T_Lo, Src0RF);
3746       } else if (Src0->getType() != IceType_i1) {
3747         Variable *Src0R = legalizeToReg(Src0);
3748         _sxt(T_Lo, Src0R);
3749       } else {
3750         Operand *_0 = Ctx->getConstantZero(IceType_i32);
3751         Operand *_m1 = Ctx->getConstantInt32(-1);
3752         lowerInt1ForSelect(T_Lo, Src0, _m1, _0);
3753       }
3754       _mov(DestLo, T_Lo);
3755       Variable *T_Hi = makeReg(DestHi->getType());
3756       if (Src0->getType() != IceType_i1) {
3757         _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
3758                                                OperandARM32::ASR, ShiftAmt));
3759       } else {
3760         // For i1, the asr instruction is already done above.
3761         _mov(T_Hi, T_Lo);
3762       }
3763       _mov(DestHi, T_Hi);
3764     } else if (Src0->getType() != IceType_i1) {
3765       // t1 = sxt src; dst = t1
3766       Variable *Src0R = legalizeToReg(Src0);
3767       Variable *T = makeReg(DestTy);
3768       _sxt(T, Src0R);
3769       _mov(Dest, T);
3770     } else {
3771       Constant *_0 = Ctx->getConstantZero(IceType_i32);
3772       Operand *_m1 = Ctx->getConstantInt(DestTy, -1);
3773       Variable *T = makeReg(DestTy);
3774       lowerInt1ForSelect(T, Src0, _m1, _0);
3775       _mov(Dest, T);
3776     }
3777     break;
3778   }
3779   case InstCast::Zext: {
3780     if (isVectorType(DestTy)) {
3781       auto *Mask = makeReg(DestTy);
3782       auto *_1 = Ctx->getConstantInt32(1);
3783       auto *T = makeReg(DestTy);
3784       auto *Src0R = legalizeToReg(Src0);
3785       _mov(Mask, _1);
3786       _vand(T, Src0R, Mask);
3787       _mov(Dest, T);
3788     } else if (DestTy == IceType_i64) {
3789       // t1=uxtb src; dst.lo=t1; dst.hi=0
3790       Operand *_0 =
3791           legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
3792       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3793       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3794       Variable *T_Lo = makeReg(DestLo->getType());
3795 
3796       switch (Src0->getType()) {
3797       default: {
3798         assert(Src0->getType() != IceType_i64);
3799         _uxt(T_Lo, legalizeToReg(Src0));
3800       } break;
3801       case IceType_i32: {
3802         _mov(T_Lo, legalize(Src0, Legal_Reg | Legal_Flex));
3803       } break;
3804       case IceType_i1: {
3805         SafeBoolChain Safe = lowerInt1(T_Lo, Src0);
3806         if (Safe == SBC_No) {
3807           Operand *_1 =
3808               legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3809           _and(T_Lo, T_Lo, _1);
3810         }
3811       } break;
3812       }
3813 
3814       _mov(DestLo, T_Lo);
3815 
3816       Variable *T_Hi = makeReg(DestLo->getType());
3817       _mov(T_Hi, _0);
3818       _mov(DestHi, T_Hi);
3819     } else if (Src0->getType() == IceType_i1) {
3820       Variable *T = makeReg(DestTy);
3821 
3822       SafeBoolChain Safe = lowerInt1(T, Src0);
3823       if (Safe == SBC_No) {
3824         Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3825         _and(T, T, _1);
3826       }
3827 
3828       _mov(Dest, T);
3829     } else {
3830       // t1 = uxt src; dst = t1
3831       Variable *Src0R = legalizeToReg(Src0);
3832       Variable *T = makeReg(DestTy);
3833       _uxt(T, Src0R);
3834       _mov(Dest, T);
3835     }
3836     break;
3837   }
3838   case InstCast::Trunc: {
3839     if (isVectorType(DestTy)) {
3840       auto *T = makeReg(DestTy);
3841       auto *Src0R = legalizeToReg(Src0);
3842       _mov(T, Src0R);
3843       _mov(Dest, T);
3844     } else {
3845       if (Src0->getType() == IceType_i64)
3846         Src0 = loOperand(Src0);
3847       Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
3848       // t1 = trunc Src0RF; Dest = t1
3849       Variable *T = makeReg(DestTy);
3850       _mov(T, Src0RF);
3851       if (DestTy == IceType_i1)
3852         _and(T, T, Ctx->getConstantInt1(1));
3853       _mov(Dest, T);
3854     }
3855     break;
3856   }
3857   case InstCast::Fptrunc:
3858   case InstCast::Fpext: {
3859     // fptrunc: dest.f32 = fptrunc src0.fp64
3860     // fpext: dest.f64 = fptrunc src0.fp32
3861     const bool IsTrunc = CastKind == InstCast::Fptrunc;
3862     assert(!isVectorType(DestTy));
3863     assert(DestTy == (IsTrunc ? IceType_f32 : IceType_f64));
3864     assert(Src0->getType() == (IsTrunc ? IceType_f64 : IceType_f32));
3865     Variable *Src0R = legalizeToReg(Src0);
3866     Variable *T = makeReg(DestTy);
3867     _vcvt(T, Src0R, IsTrunc ? InstARM32Vcvt::D2s : InstARM32Vcvt::S2d);
3868     _mov(Dest, T);
3869     break;
3870   }
3871   case InstCast::Fptosi:
3872   case InstCast::Fptoui: {
3873     const bool DestIsSigned = CastKind == InstCast::Fptosi;
3874     Variable *Src0R = legalizeToReg(Src0);
3875 
3876     if (isVectorType(DestTy)) {
3877       assert(typeElementType(Src0->getType()) == IceType_f32);
3878       auto *T = makeReg(DestTy);
3879       _vcvt(T, Src0R,
3880             DestIsSigned ? InstARM32Vcvt::Vs2si : InstARM32Vcvt::Vs2ui);
3881       _mov(Dest, T);
3882       break;
3883     }
3884 
3885     const bool Src0IsF32 = isFloat32Asserting32Or64(Src0->getType());
3886     if (llvm::isa<Variable64On32>(Dest)) {
3887       llvm::report_fatal_error("fp-to-i64 should have been pre-lowered.");
3888     }
3889     // fptosi:
3890     //     t1.fp = vcvt src0.fp
3891     //     t2.i32 = vmov t1.fp
3892     //     dest.int = conv t2.i32     @ Truncates the result if needed.
3893     // fptoui:
3894     //     t1.fp = vcvt src0.fp
3895     //     t2.u32 = vmov t1.fp
3896     //     dest.uint = conv t2.u32    @ Truncates the result if needed.
3897     Variable *T_fp = makeReg(IceType_f32);
3898     const InstARM32Vcvt::VcvtVariant Conversion =
3899         Src0IsF32 ? (DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui)
3900                   : (DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui);
3901     _vcvt(T_fp, Src0R, Conversion);
3902     Variable *T = makeReg(IceType_i32);
3903     _mov(T, T_fp);
3904     if (DestTy != IceType_i32) {
3905       Variable *T_1 = makeReg(DestTy);
3906       lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T));
3907       T = T_1;
3908     }
3909     _mov(Dest, T);
3910     break;
3911   }
3912   case InstCast::Sitofp:
3913   case InstCast::Uitofp: {
3914     const bool SourceIsSigned = CastKind == InstCast::Sitofp;
3915 
3916     if (isVectorType(DestTy)) {
3917       assert(typeElementType(DestTy) == IceType_f32);
3918       auto *T = makeReg(DestTy);
3919       Variable *Src0R = legalizeToReg(Src0);
3920       _vcvt(T, Src0R,
3921             SourceIsSigned ? InstARM32Vcvt::Vsi2s : InstARM32Vcvt::Vui2s);
3922       _mov(Dest, T);
3923       break;
3924     }
3925 
3926     const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
3927     if (Src0->getType() == IceType_i64) {
3928       llvm::report_fatal_error("i64-to-fp should have been pre-lowered.");
3929     }
3930     // sitofp:
3931     //     t1.i32 = sext src.int    @ sign-extends src0 if needed.
3932     //     t2.fp32 = vmov t1.i32
3933     //     t3.fp = vcvt.{fp}.s32    @ fp is either f32 or f64
3934     // uitofp:
3935     //     t1.i32 = zext src.int    @ zero-extends src0 if needed.
3936     //     t2.fp32 = vmov t1.i32
3937     //     t3.fp = vcvt.{fp}.s32    @ fp is either f32 or f64
3938     if (Src0->getType() != IceType_i32) {
3939       Variable *Src0R_32 = makeReg(IceType_i32);
3940       lowerCast(InstCast::create(
3941           Func, SourceIsSigned ? InstCast::Sext : InstCast::Zext, Src0R_32,
3942           Src0));
3943       Src0 = Src0R_32;
3944     }
3945     Variable *Src0R = legalizeToReg(Src0);
3946     Variable *Src0R_f32 = makeReg(IceType_f32);
3947     _mov(Src0R_f32, Src0R);
3948     Src0R = Src0R_f32;
3949     Variable *T = makeReg(DestTy);
3950     const InstARM32Vcvt::VcvtVariant Conversion =
3951         DestIsF32
3952             ? (SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s)
3953             : (SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d);
3954     _vcvt(T, Src0R, Conversion);
3955     _mov(Dest, T);
3956     break;
3957   }
3958   case InstCast::Bitcast: {
3959     Operand *Src0 = Instr->getSrc(0);
3960     if (DestTy == Src0->getType()) {
3961       auto *Assign = InstAssign::create(Func, Dest, Src0);
3962       lowerAssign(Assign);
3963       return;
3964     }
3965     switch (DestTy) {
3966     case IceType_NUM:
3967     case IceType_void:
3968       llvm::report_fatal_error("Unexpected bitcast.");
3969     case IceType_i1:
3970       UnimplementedLoweringError(this, Instr);
3971       break;
3972     case IceType_i8:
3973       assert(Src0->getType() == IceType_v8i1);
3974       llvm::report_fatal_error(
3975           "i8 to v8i1 conversion should have been prelowered.");
3976       break;
3977     case IceType_i16:
3978       assert(Src0->getType() == IceType_v16i1);
3979       llvm::report_fatal_error(
3980           "i16 to v16i1 conversion should have been prelowered.");
3981       break;
3982     case IceType_i32:
3983     case IceType_f32: {
3984       Variable *Src0R = legalizeToReg(Src0);
3985       Variable *T = makeReg(DestTy);
3986       _mov(T, Src0R);
3987       lowerAssign(InstAssign::create(Func, Dest, T));
3988       break;
3989     }
3990     case IceType_i64: {
3991       // t0, t1 <- src0
3992       // dest[31..0]  = t0
3993       // dest[63..32] = t1
3994       assert(Src0->getType() == IceType_f64);
3995       auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
3996       T->initHiLo(Func);
3997       configureBitcastTemporary(T);
3998       Variable *Src0R = legalizeToReg(Src0);
3999       _mov(T, Src0R);
4000       Context.insert<InstFakeUse>(T->getHi());
4001       Context.insert<InstFakeUse>(T->getLo());
4002       lowerAssign(InstAssign::create(Func, Dest, T));
4003       break;
4004     }
4005     case IceType_f64: {
4006       // T0 <- lo(src)
4007       // T1 <- hi(src)
4008       // vmov T2, T0, T1
4009       // Dest <- T2
4010       assert(Src0->getType() == IceType_i64);
4011       Variable *T = makeReg(DestTy);
4012       auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4013       Src64->initHiLo(Func);
4014       configureBitcastTemporary(Src64);
4015       lowerAssign(InstAssign::create(Func, Src64, Src0));
4016       _mov(T, Src64);
4017       lowerAssign(InstAssign::create(Func, Dest, T));
4018       break;
4019     }
4020     case IceType_v8i1:
4021       assert(Src0->getType() == IceType_i8);
4022       llvm::report_fatal_error(
4023           "v8i1 to i8 conversion should have been prelowered.");
4024       break;
4025     case IceType_v16i1:
4026       assert(Src0->getType() == IceType_i16);
4027       llvm::report_fatal_error(
4028           "v16i1 to i16 conversion should have been prelowered.");
4029       break;
4030     case IceType_v4i1:
4031     case IceType_v8i16:
4032     case IceType_v16i8:
4033     case IceType_v4f32:
4034     case IceType_v4i32: {
4035       assert(typeWidthInBytes(DestTy) == typeWidthInBytes(Src0->getType()));
4036       assert(isVectorType(DestTy) == isVectorType(Src0->getType()));
4037       Variable *T = makeReg(DestTy);
4038       _mov(T, Src0);
4039       _mov(Dest, T);
4040       break;
4041     }
4042     }
4043     break;
4044   }
4045   }
4046 }
4047 
lowerExtractElement(const InstExtractElement * Instr)4048 void TargetARM32::lowerExtractElement(const InstExtractElement *Instr) {
4049   Variable *Dest = Instr->getDest();
4050   Type DestTy = Dest->getType();
4051 
4052   Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4053   Operand *Src1 = Instr->getSrc(1);
4054 
4055   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
4056     const uint32_t Index = Imm->getValue();
4057     Variable *T = makeReg(DestTy);
4058     Variable *TSrc0 = makeReg(Src0->getType());
4059 
4060     if (isFloatingType(DestTy)) {
4061       // We need to make sure the source is in a suitable register.
4062       TSrc0->setRegClass(RegARM32::RCARM32_QtoS);
4063     }
4064 
4065     _mov(TSrc0, Src0);
4066     _extractelement(T, TSrc0, Index);
4067     _mov(Dest, T);
4068     return;
4069   }
4070   assert(false && "extractelement requires a constant index");
4071 }
4072 
4073 namespace {
4074 // Validates FCMPARM32_TABLE's declaration w.r.t. InstFcmp::FCondition ordering
4075 // (and naming).
4076 enum {
4077 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) _fcmp_ll_##val,
4078   FCMPARM32_TABLE
4079 #undef X
4080       _fcmp_ll_NUM
4081 };
4082 
4083 enum {
4084 #define X(tag, str) _fcmp_hl_##tag = InstFcmp::tag,
4085   ICEINSTFCMP_TABLE
4086 #undef X
4087       _fcmp_hl_NUM
4088 };
4089 
4090 static_assert((uint32_t)_fcmp_hl_NUM == (uint32_t)_fcmp_ll_NUM,
4091               "Inconsistency between high-level and low-level fcmp tags.");
4092 #define X(tag, str)                                                            \
4093   static_assert(                                                               \
4094       (uint32_t)_fcmp_hl_##tag == (uint32_t)_fcmp_ll_##tag,                    \
4095       "Inconsistency between high-level and low-level fcmp tag " #tag);
4096 ICEINSTFCMP_TABLE
4097 #undef X
4098 
4099 struct {
4100   CondARM32::Cond CC0;
4101   CondARM32::Cond CC1;
4102 } TableFcmp[] = {
4103 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V)                           \
4104   {CondARM32::CC0, CondARM32::CC1},
4105     FCMPARM32_TABLE
4106 #undef X
4107 };
4108 
isFloatingPointZero(const Operand * Src)4109 bool isFloatingPointZero(const Operand *Src) {
4110   if (const auto *F32 = llvm::dyn_cast<const ConstantFloat>(Src)) {
4111     return Utils::isPositiveZero(F32->getValue());
4112   }
4113 
4114   if (const auto *F64 = llvm::dyn_cast<const ConstantDouble>(Src)) {
4115     return Utils::isPositiveZero(F64->getValue());
4116   }
4117 
4118   return false;
4119 }
4120 } // end of anonymous namespace
4121 
lowerFcmpCond(const InstFcmp * Instr)4122 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
4123   InstFcmp::FCond Condition = Instr->getCondition();
4124   switch (Condition) {
4125   case InstFcmp::False:
4126     return CondWhenTrue(CondARM32::kNone);
4127   case InstFcmp::True:
4128     return CondWhenTrue(CondARM32::AL);
4129     break;
4130   default: {
4131     Variable *Src0R = legalizeToReg(Instr->getSrc(0));
4132     Operand *Src1 = Instr->getSrc(1);
4133     if (isFloatingPointZero(Src1)) {
4134       _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));
4135     } else {
4136       _vcmp(Src0R, legalizeToReg(Src1));
4137     }
4138     _vmrs();
4139     assert(Condition < llvm::array_lengthof(TableFcmp));
4140     return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);
4141   }
4142   }
4143 }
4144 
lowerFcmp(const InstFcmp * Instr)4145 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
4146   Variable *Dest = Instr->getDest();
4147   const Type DestTy = Dest->getType();
4148 
4149   if (isVectorType(DestTy)) {
4150     if (Instr->getCondition() == InstFcmp::False) {
4151       constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4152       auto *T = makeReg(SafeTypeForMovingConstant);
4153       _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(0)));
4154       _mov(Dest, T);
4155       return;
4156     }
4157 
4158     if (Instr->getCondition() == InstFcmp::True) {
4159       constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4160       auto *T = makeReg(SafeTypeForMovingConstant);
4161       _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(1)));
4162       _mov(Dest, T);
4163       return;
4164     }
4165 
4166     Variable *T0;
4167     Variable *T1;
4168     bool Negate = false;
4169     auto *Src0 = legalizeToReg(Instr->getSrc(0));
4170     auto *Src1 = legalizeToReg(Instr->getSrc(1));
4171 
4172     switch (Instr->getCondition()) {
4173     default:
4174       llvm::report_fatal_error("Unhandled fp comparison.");
4175 #define _Vcnone(Tptr, S0, S1)                                                  \
4176   do {                                                                         \
4177     *(Tptr) = nullptr;                                                         \
4178   } while (0)
4179 #define _Vceq(Tptr, S0, S1)                                                    \
4180   do {                                                                         \
4181     *(Tptr) = makeReg(DestTy);                                                 \
4182     _vceq(*(Tptr), S0, S1);                                                    \
4183   } while (0)
4184 #define _Vcge(Tptr, S0, S1)                                                    \
4185   do {                                                                         \
4186     *(Tptr) = makeReg(DestTy);                                                 \
4187     _vcge(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed);                 \
4188   } while (0)
4189 #define _Vcgt(Tptr, S0, S1)                                                    \
4190   do {                                                                         \
4191     *(Tptr) = makeReg(DestTy);                                                 \
4192     _vcgt(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed);                 \
4193   } while (0)
4194 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V)                           \
4195   case InstFcmp::val: {                                                        \
4196     _Vc##CC0_V(&T0, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1);             \
4197     _Vc##CC1_V(&T1, (INV_V) ? Src0 : Src1, (INV_V) ? Src1 : Src0);             \
4198     Negate = NEG_V;                                                            \
4199   } break;
4200       FCMPARM32_TABLE
4201 #undef X
4202 #undef _Vcgt
4203 #undef _Vcge
4204 #undef _Vceq
4205 #undef _Vcnone
4206     }
4207     assert(T0 != nullptr);
4208     Variable *T = T0;
4209     if (T1 != nullptr) {
4210       T = makeReg(DestTy);
4211       _vorr(T, T0, T1);
4212     }
4213 
4214     if (Negate) {
4215       auto *TNeg = makeReg(DestTy);
4216       _vmvn(TNeg, T);
4217       T = TNeg;
4218     }
4219 
4220     _mov(Dest, T);
4221     return;
4222   }
4223 
4224   Variable *T = makeReg(IceType_i1);
4225   Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4226   Operand *_0 =
4227       legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4228 
4229   CondWhenTrue Cond = lowerFcmpCond(Instr);
4230 
4231   bool RedefineT = false;
4232   if (Cond.WhenTrue0 != CondARM32::AL) {
4233     _mov(T, _0);
4234     RedefineT = true;
4235   }
4236 
4237   if (Cond.WhenTrue0 == CondARM32::kNone) {
4238     _mov(Dest, T);
4239     return;
4240   }
4241 
4242   if (RedefineT) {
4243     _mov_redefined(T, _1, Cond.WhenTrue0);
4244   } else {
4245     _mov(T, _1, Cond.WhenTrue0);
4246   }
4247 
4248   if (Cond.WhenTrue1 != CondARM32::kNone) {
4249     _mov_redefined(T, _1, Cond.WhenTrue1);
4250   }
4251 
4252   _mov(Dest, T);
4253 }
4254 
4255 TargetARM32::CondWhenTrue
lowerInt64IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4256 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4257                                 Operand *Src1) {
4258   assert(Condition < llvm::array_lengthof(TableIcmp64));
4259 
4260   Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
4261   Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
4262   assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
4263   assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
4264 
4265   if (SrcsLo.hasConstOperand()) {
4266     const uint32_t ValueLo = SrcsLo.getConstantValue();
4267     const uint32_t ValueHi = SrcsHi.getConstantValue();
4268     const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo;
4269     if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
4270         Value == 0) {
4271       Variable *T = makeReg(IceType_i32);
4272       Variable *Src0LoR = SrcsLo.src0R(this);
4273       Variable *Src0HiR = SrcsHi.src0R(this);
4274       _orrs(T, Src0LoR, Src0HiR);
4275       Context.insert<InstFakeUse>(T);
4276       return CondWhenTrue(TableIcmp64[Condition].C1);
4277     }
4278 
4279     Variable *Src0RLo = SrcsLo.src0R(this);
4280     Variable *Src0RHi = SrcsHi.src0R(this);
4281     Operand *Src1RFLo = SrcsLo.src1RF(this);
4282     Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this);
4283 
4284     const bool UseRsb =
4285         TableIcmp64[Condition].Swapped != SrcsLo.swappedOperands();
4286 
4287     if (UseRsb) {
4288       if (TableIcmp64[Condition].IsSigned) {
4289         Variable *T = makeReg(IceType_i32);
4290         _rsbs(T, Src0RLo, Src1RFLo);
4291         Context.insert<InstFakeUse>(T);
4292 
4293         T = makeReg(IceType_i32);
4294         _rscs(T, Src0RHi, Src1RFHi);
4295         // We need to add a FakeUse here because liveness gets mad at us (Def
4296         // without Use.) Note that flag-setting instructions are considered to
4297         // have side effects and, therefore, are not DCE'ed.
4298         Context.insert<InstFakeUse>(T);
4299       } else {
4300         Variable *T = makeReg(IceType_i32);
4301         _rsbs(T, Src0RHi, Src1RFHi);
4302         Context.insert<InstFakeUse>(T);
4303 
4304         T = makeReg(IceType_i32);
4305         _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);
4306         Context.insert<InstFakeUse>(T);
4307       }
4308     } else {
4309       if (TableIcmp64[Condition].IsSigned) {
4310         _cmp(Src0RLo, Src1RFLo);
4311         Variable *T = makeReg(IceType_i32);
4312         _sbcs(T, Src0RHi, Src1RFHi);
4313         Context.insert<InstFakeUse>(T);
4314       } else {
4315         _cmp(Src0RHi, Src1RFHi);
4316         _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4317       }
4318     }
4319 
4320     return CondWhenTrue(TableIcmp64[Condition].C1);
4321   }
4322 
4323   Variable *Src0RLo, *Src0RHi;
4324   Operand *Src1RFLo, *Src1RFHi;
4325   if (TableIcmp64[Condition].Swapped) {
4326     Src0RLo = legalizeToReg(loOperand(Src1));
4327     Src0RHi = legalizeToReg(hiOperand(Src1));
4328     Src1RFLo = legalizeToReg(loOperand(Src0));
4329     Src1RFHi = legalizeToReg(hiOperand(Src0));
4330   } else {
4331     Src0RLo = legalizeToReg(loOperand(Src0));
4332     Src0RHi = legalizeToReg(hiOperand(Src0));
4333     Src1RFLo = legalizeToReg(loOperand(Src1));
4334     Src1RFHi = legalizeToReg(hiOperand(Src1));
4335   }
4336 
4337   // a=icmp cond, b, c ==>
4338   // GCC does:
4339   //   cmp      b.hi, c.hi     or  cmp      b.lo, c.lo
4340   //   cmp.eq   b.lo, c.lo         sbcs t1, b.hi, c.hi
4341   //   mov.<C1> t, #1              mov.<C1> t, #1
4342   //   mov.<C2> t, #0              mov.<C2> t, #0
4343   //   mov      a, t               mov      a, t
4344   // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
4345   // is used for signed compares. In some cases, b and c need to be swapped as
4346   // well.
4347   //
4348   // LLVM does:
4349   // for EQ and NE:
4350   //   eor  t1, b.hi, c.hi
4351   //   eor  t2, b.lo, c.hi
4352   //   orrs t, t1, t2
4353   //   mov.<C> t, #1
4354   //   mov  a, t
4355   //
4356   // that's nice in that it's just as short but has fewer dependencies for
4357   // better ILP at the cost of more registers.
4358   //
4359   // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
4360   // unconditional mov #0, two cmps, two conditional mov #1, and one
4361   // conditional reg mov. That has few dependencies for good ILP, but is a
4362   // longer sequence.
4363   //
4364   // So, we are going with the GCC version since it's usually better (except
4365   // perhaps for eq/ne). We could revisit special-casing eq/ne later.
4366   if (TableIcmp64[Condition].IsSigned) {
4367     Variable *ScratchReg = makeReg(IceType_i32);
4368     _cmp(Src0RLo, Src1RFLo);
4369     _sbcs(ScratchReg, Src0RHi, Src1RFHi);
4370     // ScratchReg isn't going to be used, but we need the side-effect of
4371     // setting flags from this operation.
4372     Context.insert<InstFakeUse>(ScratchReg);
4373   } else {
4374     _cmp(Src0RHi, Src1RFHi);
4375     _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4376   }
4377   return CondWhenTrue(TableIcmp64[Condition].C1);
4378 }
4379 
4380 TargetARM32::CondWhenTrue
lowerInt32IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4381 TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4382                                 Operand *Src1) {
4383   Int32Operands Srcs(Src0, Src1);
4384   if (!Srcs.hasConstOperand()) {
4385 
4386     Variable *Src0R = Srcs.src0R(this);
4387     Operand *Src1RF = Srcs.src1RF(this);
4388     _cmp(Src0R, Src1RF);
4389     return CondWhenTrue(getIcmp32Mapping(Condition));
4390   }
4391 
4392   Variable *Src0R = Srcs.src0R(this);
4393   const int32_t Value = Srcs.getConstantValue();
4394   if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4395     _tst(Src0R, Src0R);
4396     return CondWhenTrue(getIcmp32Mapping(Condition));
4397   }
4398 
4399   if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() &&
4400       Srcs.negatedImmediateIsFlexEncodable()) {
4401     Operand *Src1F = Srcs.negatedSrc1F(this);
4402     _cmn(Src0R, Src1F);
4403     return CondWhenTrue(getIcmp32Mapping(Condition));
4404   }
4405 
4406   Operand *Src1RF = Srcs.src1RF(this);
4407   if (!Srcs.swappedOperands()) {
4408     _cmp(Src0R, Src1RF);
4409   } else {
4410     Variable *T = makeReg(IceType_i32);
4411     _rsbs(T, Src0R, Src1RF);
4412     Context.insert<InstFakeUse>(T);
4413   }
4414   return CondWhenTrue(getIcmp32Mapping(Condition));
4415 }
4416 
4417 TargetARM32::CondWhenTrue
lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4418 TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4419                                        Operand *Src1) {
4420   Int32Operands Srcs(Src0, Src1);
4421   const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());
4422   assert(ShAmt >= 0);
4423 
4424   if (!Srcs.hasConstOperand()) {
4425     Variable *Src0R = makeReg(IceType_i32);
4426     Operand *ShAmtImm = shAmtImm(ShAmt);
4427     _lsl(Src0R, legalizeToReg(Src0), ShAmtImm);
4428 
4429     Variable *Src1R = legalizeToReg(Src1);
4430     auto *Src1F = OperandARM32FlexReg::create(Func, IceType_i32, Src1R,
4431                                               OperandARM32::LSL, ShAmtImm);
4432     _cmp(Src0R, Src1F);
4433     return CondWhenTrue(getIcmp32Mapping(Condition));
4434   }
4435 
4436   const int32_t Value = Srcs.getConstantValue();
4437   if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4438     Operand *ShAmtImm = shAmtImm(ShAmt);
4439     Variable *T = makeReg(IceType_i32);
4440     _lsls(T, Srcs.src0R(this), ShAmtImm);
4441     Context.insert<InstFakeUse>(T);
4442     return CondWhenTrue(getIcmp32Mapping(Condition));
4443   }
4444 
4445   Variable *ConstR = makeReg(IceType_i32);
4446   _mov(ConstR,
4447        legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex));
4448   Operand *NonConstF = OperandARM32FlexReg::create(
4449       Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL,
4450       Ctx->getConstantInt32(ShAmt));
4451 
4452   if (Srcs.swappedOperands()) {
4453     _cmp(ConstR, NonConstF);
4454   } else {
4455     Variable *T = makeReg(IceType_i32);
4456     _rsbs(T, ConstR, NonConstF);
4457     Context.insert<InstFakeUse>(T);
4458   }
4459   return CondWhenTrue(getIcmp32Mapping(Condition));
4460 }
4461 
lowerIcmpCond(const InstIcmp * Instr)4462 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) {
4463   return lowerIcmpCond(Instr->getCondition(), Instr->getSrc(0),
4464                        Instr->getSrc(1));
4465 }
4466 
lowerIcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4467 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition,
4468                                                      Operand *Src0,
4469                                                      Operand *Src1) {
4470   Src0 = legalizeUndef(Src0);
4471   Src1 = legalizeUndef(Src1);
4472 
4473   // a=icmp cond b, c ==>
4474   // GCC does:
4475   //   <u/s>xtb tb, b
4476   //   <u/s>xtb tc, c
4477   //   cmp      tb, tc
4478   //   mov.C1   t, #0
4479   //   mov.C2   t, #1
4480   //   mov      a, t
4481   // where the unsigned/sign extension is not needed for 32-bit. They also have
4482   // special cases for EQ and NE. E.g., for NE:
4483   //   <extend to tb, tc>
4484   //   subs     t, tb, tc
4485   //   movne    t, #1
4486   //   mov      a, t
4487   //
4488   // LLVM does:
4489   //   lsl     tb, b, #<N>
4490   //   mov     t, #0
4491   //   cmp     tb, c, lsl #<N>
4492   //   mov.<C> t, #1
4493   //   mov     a, t
4494   //
4495   // the left shift is by 0, 16, or 24, which allows the comparison to focus on
4496   // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
4497   // the unsigned case, for some reason it does similar to GCC and does a uxtb
4498   // first. It's not clear to me why that special-casing is needed.
4499   //
4500   // We'll go with the LLVM way for now, since it's shorter and has just as few
4501   // dependencies.
4502   switch (Src0->getType()) {
4503   default:
4504     llvm::report_fatal_error("Unhandled type in lowerIcmpCond");
4505   case IceType_i1:
4506   case IceType_i8:
4507   case IceType_i16:
4508     return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1);
4509   case IceType_i32:
4510     return lowerInt32IcmpCond(Condition, Src0, Src1);
4511   case IceType_i64:
4512     return lowerInt64IcmpCond(Condition, Src0, Src1);
4513   }
4514 }
4515 
lowerIcmp(const InstIcmp * Instr)4516 void TargetARM32::lowerIcmp(const InstIcmp *Instr) {
4517   Variable *Dest = Instr->getDest();
4518   const Type DestTy = Dest->getType();
4519 
4520   if (isVectorType(DestTy)) {
4521     auto *T = makeReg(DestTy);
4522     auto *Src0 = legalizeToReg(Instr->getSrc(0));
4523     auto *Src1 = legalizeToReg(Instr->getSrc(1));
4524     const Type SrcTy = Src0->getType();
4525 
4526     bool NeedsShl = false;
4527     Type NewTypeAfterShl;
4528     SizeT ShAmt;
4529     switch (SrcTy) {
4530     default:
4531       break;
4532     case IceType_v16i1:
4533       NeedsShl = true;
4534       NewTypeAfterShl = IceType_v16i8;
4535       ShAmt = 7;
4536       break;
4537     case IceType_v8i1:
4538       NeedsShl = true;
4539       NewTypeAfterShl = IceType_v8i16;
4540       ShAmt = 15;
4541       break;
4542     case IceType_v4i1:
4543       NeedsShl = true;
4544       NewTypeAfterShl = IceType_v4i32;
4545       ShAmt = 31;
4546       break;
4547     }
4548 
4549     if (NeedsShl) {
4550       auto *Imm = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmt));
4551       auto *Src0T = makeReg(NewTypeAfterShl);
4552       auto *Src0Shl = makeReg(NewTypeAfterShl);
4553       _mov(Src0T, Src0);
4554       _vshl(Src0Shl, Src0T, Imm);
4555       Src0 = Src0Shl;
4556 
4557       auto *Src1T = makeReg(NewTypeAfterShl);
4558       auto *Src1Shl = makeReg(NewTypeAfterShl);
4559       _mov(Src1T, Src1);
4560       _vshl(Src1Shl, Src1T, Imm);
4561       Src1 = Src1Shl;
4562     }
4563 
4564     switch (Instr->getCondition()) {
4565     default:
4566       llvm::report_fatal_error("Unhandled integer comparison.");
4567 #define _Vceq(T, S0, S1, Signed) _vceq(T, S0, S1)
4568 #define _Vcge(T, S0, S1, Signed)                                               \
4569   _vcge(T, S0, S1)->setSignType(Signed ? InstARM32::FS_Signed                  \
4570                                        : InstARM32::FS_Unsigned)
4571 #define _Vcgt(T, S0, S1, Signed)                                               \
4572   _vcgt(T, S0, S1)->setSignType(Signed ? InstARM32::FS_Signed                  \
4573                                        : InstARM32::FS_Unsigned)
4574 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
4575   case InstIcmp::val: {                                                        \
4576     _Vc##C_V(T, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1, is_signed);      \
4577     if (NEG_V) {                                                               \
4578       auto *TInv = makeReg(DestTy);                                            \
4579       _vmvn(TInv, T);                                                          \
4580       T = TInv;                                                                \
4581     }                                                                          \
4582   } break;
4583       ICMPARM32_TABLE
4584 #undef X
4585 #undef _Vcgt
4586 #undef _Vcge
4587 #undef _Vceq
4588     }
4589     _mov(Dest, T);
4590     return;
4591   }
4592 
4593   Operand *_0 =
4594       legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4595   Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4596   Variable *T = makeReg(IceType_i1);
4597 
4598   _mov(T, _0);
4599   CondWhenTrue Cond = lowerIcmpCond(Instr);
4600   _mov_redefined(T, _1, Cond.WhenTrue0);
4601   _mov(Dest, T);
4602 
4603   assert(Cond.WhenTrue1 == CondARM32::kNone);
4604 
4605   return;
4606 }
4607 
lowerInsertElement(const InstInsertElement * Instr)4608 void TargetARM32::lowerInsertElement(const InstInsertElement *Instr) {
4609   Variable *Dest = Instr->getDest();
4610   Type DestTy = Dest->getType();
4611 
4612   Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4613   Variable *Src1 = legalizeToReg(Instr->getSrc(1));
4614   Operand *Src2 = Instr->getSrc(2);
4615 
4616   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4617     const uint32_t Index = Imm->getValue();
4618     Variable *T = makeReg(DestTy);
4619 
4620     if (isFloatingType(DestTy)) {
4621       T->setRegClass(RegARM32::RCARM32_QtoS);
4622     }
4623 
4624     _mov(T, Src0);
4625     _insertelement(T, Src1, Index);
4626     _set_dest_redefined();
4627     _mov(Dest, T);
4628     return;
4629   }
4630   assert(false && "insertelement requires a constant index");
4631 }
4632 
4633 namespace {
getConstantMemoryOrder(Operand * Opnd)4634 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
4635   if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
4636     return Integer->getValue();
4637   return Intrinsics::MemoryOrderInvalid;
4638 }
4639 } // end of anonymous namespace
4640 
lowerLoadLinkedStoreExclusive(Type Ty,Operand * Addr,std::function<Variable * (Variable *)> Operation,CondARM32::Cond Cond)4641 void TargetARM32::lowerLoadLinkedStoreExclusive(
4642     Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation,
4643     CondARM32::Cond Cond) {
4644 
4645   auto *Retry = Context.insert<InstARM32Label>(this);
4646 
4647   { // scoping for loop highlighting.
4648     Variable *Success = makeReg(IceType_i32);
4649     Variable *Tmp = (Ty == IceType_i64) ? makeI64RegPair() : makeReg(Ty);
4650     auto *_0 = Ctx->getConstantZero(IceType_i32);
4651 
4652     Context.insert<InstFakeDef>(Tmp);
4653     Context.insert<InstFakeUse>(Tmp);
4654     Variable *AddrR = legalizeToReg(Addr);
4655     _ldrex(Tmp, formMemoryOperand(AddrR, Ty))->setDestRedefined();
4656     auto *StoreValue = Operation(Tmp);
4657     assert(StoreValue->mustHaveReg());
4658     // strex requires Dest to be a register other than Value or Addr. This
4659     // restriction is cleanly represented by adding an "early" definition of
4660     // Dest (or a latter use of all the sources.)
4661     Context.insert<InstFakeDef>(Success);
4662     if (Cond != CondARM32::AL) {
4663       _mov_redefined(Success, legalize(_0, Legal_Reg | Legal_Flex),
4664                      InstARM32::getOppositeCondition(Cond));
4665     }
4666     _strex(Success, StoreValue, formMemoryOperand(AddrR, Ty), Cond)
4667         ->setDestRedefined();
4668     _cmp(Success, _0);
4669   }
4670 
4671   _br(Retry, CondARM32::NE);
4672 }
4673 
4674 namespace {
createArithInst(Cfg * Func,uint32_t Operation,Variable * Dest,Variable * Src0,Operand * Src1)4675 InstArithmetic *createArithInst(Cfg *Func, uint32_t Operation, Variable *Dest,
4676                                 Variable *Src0, Operand *Src1) {
4677   InstArithmetic::OpKind Oper;
4678   switch (Operation) {
4679   default:
4680     llvm::report_fatal_error("Unknown AtomicRMW operation");
4681   case Intrinsics::AtomicExchange:
4682     llvm::report_fatal_error("Can't handle Atomic xchg operation");
4683   case Intrinsics::AtomicAdd:
4684     Oper = InstArithmetic::Add;
4685     break;
4686   case Intrinsics::AtomicAnd:
4687     Oper = InstArithmetic::And;
4688     break;
4689   case Intrinsics::AtomicSub:
4690     Oper = InstArithmetic::Sub;
4691     break;
4692   case Intrinsics::AtomicOr:
4693     Oper = InstArithmetic::Or;
4694     break;
4695   case Intrinsics::AtomicXor:
4696     Oper = InstArithmetic::Xor;
4697     break;
4698   }
4699   return InstArithmetic::create(Func, Oper, Dest, Src0, Src1);
4700 }
4701 } // end of anonymous namespace
4702 
lowerAtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4703 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
4704                                  Operand *Addr, Operand *Val) {
4705   // retry:
4706   //     ldrex tmp, [addr]
4707   //     mov contents, tmp
4708   //     op result, contents, Val
4709   //     strex success, result, [addr]
4710   //     cmp success, 0
4711   //     jne retry
4712   //     fake-use(addr, operand)  @ prevents undesirable clobbering.
4713   //     mov dest, contents
4714   auto DestTy = Dest->getType();
4715 
4716   if (DestTy == IceType_i64) {
4717     lowerInt64AtomicRMW(Dest, Operation, Addr, Val);
4718     return;
4719   }
4720 
4721   Operand *ValRF = nullptr;
4722   if (llvm::isa<ConstantInteger32>(Val)) {
4723     ValRF = Val;
4724   } else {
4725     ValRF = legalizeToReg(Val);
4726   }
4727   auto *ContentsR = makeReg(DestTy);
4728   auto *ResultR = makeReg(DestTy);
4729 
4730   _dmb();
4731   lowerLoadLinkedStoreExclusive(
4732       DestTy, Addr,
4733       [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4734         lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4735         if (Operation == Intrinsics::AtomicExchange) {
4736           lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4737         } else {
4738           lowerArithmetic(
4739               createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4740         }
4741         return ResultR;
4742       });
4743   _dmb();
4744   if (auto *ValR = llvm::dyn_cast<Variable>(ValRF)) {
4745     Context.insert<InstFakeUse>(ValR);
4746   }
4747   // Can't dce ContentsR.
4748   Context.insert<InstFakeUse>(ContentsR);
4749   lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4750 }
4751 
lowerInt64AtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4752 void TargetARM32::lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation,
4753                                       Operand *Addr, Operand *Val) {
4754   assert(Dest->getType() == IceType_i64);
4755 
4756   auto *ResultR = makeI64RegPair();
4757 
4758   Context.insert<InstFakeDef>(ResultR);
4759 
4760   Operand *ValRF = nullptr;
4761   if (llvm::dyn_cast<ConstantInteger64>(Val)) {
4762     ValRF = Val;
4763   } else {
4764     auto *ValR64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4765     ValR64->initHiLo(Func);
4766     ValR64->setMustNotHaveReg();
4767     ValR64->getLo()->setMustHaveReg();
4768     ValR64->getHi()->setMustHaveReg();
4769     lowerAssign(InstAssign::create(Func, ValR64, Val));
4770     ValRF = ValR64;
4771   }
4772 
4773   auto *ContentsR = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4774   ContentsR->initHiLo(Func);
4775   ContentsR->setMustNotHaveReg();
4776   ContentsR->getLo()->setMustHaveReg();
4777   ContentsR->getHi()->setMustHaveReg();
4778 
4779   _dmb();
4780   lowerLoadLinkedStoreExclusive(
4781       IceType_i64, Addr,
4782       [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4783         lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4784         Context.insert<InstFakeUse>(Tmp);
4785         if (Operation == Intrinsics::AtomicExchange) {
4786           lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4787         } else {
4788           lowerArithmetic(
4789               createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4790         }
4791         Context.insert<InstFakeUse>(ResultR->getHi());
4792         Context.insert<InstFakeDef>(ResultR, ResultR->getLo())
4793             ->setDestRedefined();
4794         return ResultR;
4795       });
4796   _dmb();
4797   if (auto *ValR64 = llvm::dyn_cast<Variable64On32>(ValRF)) {
4798     Context.insert<InstFakeUse>(ValR64->getLo());
4799     Context.insert<InstFakeUse>(ValR64->getHi());
4800   }
4801   lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4802 }
4803 
postambleCtpop64(const InstCall * Instr)4804 void TargetARM32::postambleCtpop64(const InstCall *Instr) {
4805   Operand *Arg0 = Instr->getArg(0);
4806   if (isInt32Asserting32Or64(Arg0->getType())) {
4807     return;
4808   }
4809   // The popcount helpers always return 32-bit values, while the intrinsic's
4810   // signature matches some 64-bit platform's native instructions and expect to
4811   // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the
4812   // user doesn't do that in the IR or doesn't toss the bits via truncate.
4813   auto *DestHi = llvm::cast<Variable>(hiOperand(Instr->getDest()));
4814   Variable *T = makeReg(IceType_i32);
4815   Operand *_0 =
4816       legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4817   _mov(T, _0);
4818   _mov(DestHi, T);
4819 }
4820 
lowerIntrinsic(const InstIntrinsic * Instr)4821 void TargetARM32::lowerIntrinsic(const InstIntrinsic *Instr) {
4822   Variable *Dest = Instr->getDest();
4823   Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void;
4824   Intrinsics::IntrinsicID ID = Instr->getIntrinsicID();
4825   switch (ID) {
4826   case Intrinsics::AtomicFence:
4827   case Intrinsics::AtomicFenceAll:
4828     assert(Dest == nullptr);
4829     _dmb();
4830     return;
4831   case Intrinsics::AtomicIsLockFree: {
4832     Operand *ByteSize = Instr->getArg(0);
4833     auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
4834     if (CI == nullptr) {
4835       // The PNaCl ABI requires the byte size to be a compile-time constant.
4836       Func->setError("AtomicIsLockFree byte size should be compile-time const");
4837       return;
4838     }
4839     static constexpr int32_t NotLockFree = 0;
4840     static constexpr int32_t LockFree = 1;
4841     int32_t Result = NotLockFree;
4842     switch (CI->getValue()) {
4843     case 1:
4844     case 2:
4845     case 4:
4846     case 8:
4847       Result = LockFree;
4848       break;
4849     }
4850     _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result)));
4851     return;
4852   }
4853   case Intrinsics::AtomicLoad: {
4854     assert(isScalarIntegerType(DestTy));
4855     // We require the memory address to be naturally aligned. Given that is the
4856     // case, then normal loads are atomic.
4857     if (!Intrinsics::isMemoryOrderValid(
4858             ID, getConstantMemoryOrder(Instr->getArg(1)))) {
4859       Func->setError("Unexpected memory ordering for AtomicLoad");
4860       return;
4861     }
4862     Variable *T;
4863 
4864     if (DestTy == IceType_i64) {
4865       // ldrex is the only arm instruction that is guaranteed to load a 64-bit
4866       // integer atomically. Everything else works with a regular ldr.
4867       T = makeI64RegPair();
4868       _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64));
4869     } else {
4870       T = makeReg(DestTy);
4871       _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy));
4872     }
4873     _dmb();
4874     lowerAssign(InstAssign::create(Func, Dest, T));
4875     // Adding a fake-use T to ensure the atomic load is not removed if Dest is
4876     // unused.
4877     Context.insert<InstFakeUse>(T);
4878     return;
4879   }
4880   case Intrinsics::AtomicStore: {
4881     // We require the memory address to be naturally aligned. Given that is the
4882     // case, then normal loads are atomic.
4883     if (!Intrinsics::isMemoryOrderValid(
4884             ID, getConstantMemoryOrder(Instr->getArg(2)))) {
4885       Func->setError("Unexpected memory ordering for AtomicStore");
4886       return;
4887     }
4888 
4889     auto *Value = Instr->getArg(0);
4890     if (Value->getType() == IceType_i64) {
4891       auto *ValueR = makeI64RegPair();
4892       Context.insert<InstFakeDef>(ValueR);
4893       lowerAssign(InstAssign::create(Func, ValueR, Value));
4894       _dmb();
4895       lowerLoadLinkedStoreExclusive(
4896           IceType_i64, Instr->getArg(1), [this, ValueR](Variable *Tmp) {
4897             // The following fake-use prevents the ldrex instruction from being
4898             // dead code eliminated.
4899             Context.insert<InstFakeUse>(llvm::cast<Variable>(loOperand(Tmp)));
4900             Context.insert<InstFakeUse>(llvm::cast<Variable>(hiOperand(Tmp)));
4901             Context.insert<InstFakeUse>(Tmp);
4902             return ValueR;
4903           });
4904       Context.insert<InstFakeUse>(ValueR);
4905       _dmb();
4906       return;
4907     }
4908 
4909     auto *ValueR = legalizeToReg(Instr->getArg(0));
4910     const auto ValueTy = ValueR->getType();
4911     assert(isScalarIntegerType(ValueTy));
4912     auto *Addr = legalizeToReg(Instr->getArg(1));
4913 
4914     // non-64-bit stores are atomically as long as the address is aligned. This
4915     // is PNaCl, so addresses are aligned.
4916     _dmb();
4917     _str(ValueR, formMemoryOperand(Addr, ValueTy));
4918     _dmb();
4919     return;
4920   }
4921   case Intrinsics::AtomicCmpxchg: {
4922     // retry:
4923     //     ldrex tmp, [addr]
4924     //     cmp tmp, expected
4925     //     mov expected, tmp
4926     //     strexeq success, new, [addr]
4927     //     cmpeq success, #0
4928     //     bne retry
4929     //     mov dest, expected
4930     assert(isScalarIntegerType(DestTy));
4931     // We require the memory address to be naturally aligned. Given that is the
4932     // case, then normal loads are atomic.
4933     if (!Intrinsics::isMemoryOrderValid(
4934             ID, getConstantMemoryOrder(Instr->getArg(3)),
4935             getConstantMemoryOrder(Instr->getArg(4)))) {
4936       Func->setError("Unexpected memory ordering for AtomicCmpxchg");
4937       return;
4938     }
4939 
4940     if (DestTy == IceType_i64) {
4941       Variable *LoadedValue = nullptr;
4942 
4943       auto *New = makeI64RegPair();
4944       Context.insert<InstFakeDef>(New);
4945       lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
4946 
4947       auto *Expected = makeI64RegPair();
4948       Context.insert<InstFakeDef>(Expected);
4949       lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
4950 
4951       _dmb();
4952       lowerLoadLinkedStoreExclusive(
4953           DestTy, Instr->getArg(0),
4954           [this, Expected, New, &LoadedValue](Variable *Tmp) {
4955             auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected));
4956             auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected));
4957             auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp));
4958             auto *TmpHiR = llvm::cast<Variable>(hiOperand(Tmp));
4959             _cmp(TmpLoR, ExpectedLoR);
4960             _cmp(TmpHiR, ExpectedHiR, CondARM32::EQ);
4961             LoadedValue = Tmp;
4962             return New;
4963           },
4964           CondARM32::EQ);
4965       _dmb();
4966 
4967       Context.insert<InstFakeUse>(LoadedValue);
4968       lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
4969       // The fake-use Expected prevents the assignments to Expected (above)
4970       // from being removed if Dest is not used.
4971       Context.insert<InstFakeUse>(Expected);
4972       // New needs to be alive here, or its live range will end in the
4973       // strex instruction.
4974       Context.insert<InstFakeUse>(New);
4975       return;
4976     }
4977 
4978     auto *New = legalizeToReg(Instr->getArg(2));
4979     auto *Expected = legalizeToReg(Instr->getArg(1));
4980     Variable *LoadedValue = nullptr;
4981 
4982     _dmb();
4983     lowerLoadLinkedStoreExclusive(
4984         DestTy, Instr->getArg(0),
4985         [this, Expected, New, &LoadedValue](Variable *Tmp) {
4986           lowerIcmpCond(InstIcmp::Eq, Tmp, Expected);
4987           LoadedValue = Tmp;
4988           return New;
4989         },
4990         CondARM32::EQ);
4991     _dmb();
4992 
4993     lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
4994     Context.insert<InstFakeUse>(Expected);
4995     Context.insert<InstFakeUse>(New);
4996     return;
4997   }
4998   case Intrinsics::AtomicRMW: {
4999     if (!Intrinsics::isMemoryOrderValid(
5000             ID, getConstantMemoryOrder(Instr->getArg(3)))) {
5001       Func->setError("Unexpected memory ordering for AtomicRMW");
5002       return;
5003     }
5004     lowerAtomicRMW(
5005         Dest,
5006         static_cast<uint32_t>(
5007             llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
5008         Instr->getArg(1), Instr->getArg(2));
5009     return;
5010   }
5011   case Intrinsics::Bswap: {
5012     Operand *Val = Instr->getArg(0);
5013     Type Ty = Val->getType();
5014     if (Ty == IceType_i64) {
5015       Val = legalizeUndef(Val);
5016       Variable *Val_Lo = legalizeToReg(loOperand(Val));
5017       Variable *Val_Hi = legalizeToReg(hiOperand(Val));
5018       Variable *T_Lo = makeReg(IceType_i32);
5019       Variable *T_Hi = makeReg(IceType_i32);
5020       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5021       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5022       _rev(T_Lo, Val_Lo);
5023       _rev(T_Hi, Val_Hi);
5024       _mov(DestLo, T_Hi);
5025       _mov(DestHi, T_Lo);
5026     } else {
5027       assert(Ty == IceType_i32 || Ty == IceType_i16);
5028       Variable *ValR = legalizeToReg(Val);
5029       Variable *T = makeReg(Ty);
5030       _rev(T, ValR);
5031       if (Val->getType() == IceType_i16) {
5032         Operand *_16 = shAmtImm(16);
5033         _lsr(T, T, _16);
5034       }
5035       _mov(Dest, T);
5036     }
5037     return;
5038   }
5039   case Intrinsics::Ctpop: {
5040     llvm::report_fatal_error("Ctpop should have been prelowered.");
5041   }
5042   case Intrinsics::Ctlz: {
5043     // The "is zero undef" parameter is ignored and we always return a
5044     // well-defined value.
5045     Operand *Val = Instr->getArg(0);
5046     Variable *ValLoR;
5047     Variable *ValHiR = nullptr;
5048     if (Val->getType() == IceType_i64) {
5049       Val = legalizeUndef(Val);
5050       ValLoR = legalizeToReg(loOperand(Val));
5051       ValHiR = legalizeToReg(hiOperand(Val));
5052     } else {
5053       ValLoR = legalizeToReg(Val);
5054     }
5055     lowerCLZ(Dest, ValLoR, ValHiR);
5056     return;
5057   }
5058   case Intrinsics::Cttz: {
5059     // Essentially like Clz, but reverse the bits first.
5060     Operand *Val = Instr->getArg(0);
5061     Variable *ValLoR;
5062     Variable *ValHiR = nullptr;
5063     if (Val->getType() == IceType_i64) {
5064       Val = legalizeUndef(Val);
5065       ValLoR = legalizeToReg(loOperand(Val));
5066       ValHiR = legalizeToReg(hiOperand(Val));
5067       Variable *TLo = makeReg(IceType_i32);
5068       Variable *THi = makeReg(IceType_i32);
5069       _rbit(TLo, ValLoR);
5070       _rbit(THi, ValHiR);
5071       ValLoR = THi;
5072       ValHiR = TLo;
5073     } else {
5074       ValLoR = legalizeToReg(Val);
5075       Variable *T = makeReg(IceType_i32);
5076       _rbit(T, ValLoR);
5077       ValLoR = T;
5078     }
5079     lowerCLZ(Dest, ValLoR, ValHiR);
5080     return;
5081   }
5082   case Intrinsics::Fabs: {
5083     Variable *T = makeReg(DestTy);
5084     _vabs(T, legalizeToReg(Instr->getArg(0)));
5085     _mov(Dest, T);
5086     return;
5087   }
5088   case Intrinsics::Longjmp: {
5089     llvm::report_fatal_error("longjmp should have been prelowered.");
5090   }
5091   case Intrinsics::Memcpy: {
5092     llvm::report_fatal_error("memcpy should have been prelowered.");
5093   }
5094   case Intrinsics::Memmove: {
5095     llvm::report_fatal_error("memmove should have been prelowered.");
5096   }
5097   case Intrinsics::Memset: {
5098     llvm::report_fatal_error("memmove should have been prelowered.");
5099   }
5100   case Intrinsics::Setjmp: {
5101     llvm::report_fatal_error("setjmp should have been prelowered.");
5102   }
5103   case Intrinsics::Sqrt: {
5104     Variable *Src = legalizeToReg(Instr->getArg(0));
5105     Variable *T = makeReg(DestTy);
5106     _vsqrt(T, Src);
5107     _mov(Dest, T);
5108     return;
5109   }
5110   case Intrinsics::Stacksave: {
5111     Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5112     _mov(Dest, SP);
5113     return;
5114   }
5115   case Intrinsics::Stackrestore: {
5116     Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5117     Variable *Val = legalizeToReg(Instr->getArg(0));
5118     _mov_redefined(SP, Val);
5119     return;
5120   }
5121   case Intrinsics::Trap:
5122     _trap();
5123     return;
5124   case Intrinsics::AddSaturateSigned:
5125   case Intrinsics::AddSaturateUnsigned: {
5126     bool Unsigned = (ID == Intrinsics::AddSaturateUnsigned);
5127     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5128     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5129     Variable *T = makeReg(DestTy);
5130     _vqadd(T, Src0, Src1, Unsigned);
5131     _mov(Dest, T);
5132     return;
5133   }
5134   case Intrinsics::LoadSubVector: {
5135     assert(llvm::isa<ConstantInteger32>(Instr->getArg(1)) &&
5136            "LoadSubVector second argument must be a constant");
5137     Variable *Dest = Instr->getDest();
5138     Type Ty = Dest->getType();
5139     auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(1));
5140     Operand *Addr = Instr->getArg(0);
5141     OperandARM32Mem *Src = formMemoryOperand(Addr, Ty);
5142     doMockBoundsCheck(Src);
5143 
5144     if (Dest->isRematerializable()) {
5145       Context.insert<InstFakeDef>(Dest);
5146       return;
5147     }
5148 
5149     auto *T = makeReg(Ty);
5150     switch (SubVectorSize->getValue()) {
5151     case 4:
5152       _vldr1d(T, Src);
5153       break;
5154     case 8:
5155       _vldr1q(T, Src);
5156       break;
5157     default:
5158       Func->setError("Unexpected size for LoadSubVector");
5159       return;
5160     }
5161     _mov(Dest, T);
5162     return;
5163   }
5164   case Intrinsics::StoreSubVector: {
5165     assert(llvm::isa<ConstantInteger32>(Instr->getArg(2)) &&
5166            "StoreSubVector third argument must be a constant");
5167     auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(2));
5168     Variable *Value = legalizeToReg(Instr->getArg(0));
5169     Operand *Addr = Instr->getArg(1);
5170     OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5171     doMockBoundsCheck(NewAddr);
5172 
5173     Value = legalizeToReg(Value);
5174 
5175     switch (SubVectorSize->getValue()) {
5176     case 4:
5177       _vstr1d(Value, NewAddr);
5178       break;
5179     case 8:
5180       _vstr1q(Value, NewAddr);
5181       break;
5182     default:
5183       Func->setError("Unexpected size for StoreSubVector");
5184       return;
5185     }
5186     return;
5187   }
5188   case Intrinsics::MultiplyAddPairs: {
5189     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5190     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5191     Variable *T = makeReg(DestTy);
5192     _vmlap(T, Src0, Src1);
5193     _mov(Dest, T);
5194     return;
5195   }
5196   case Intrinsics::MultiplyHighSigned:
5197   case Intrinsics::MultiplyHighUnsigned: {
5198     bool Unsigned = (ID == Intrinsics::MultiplyHighUnsigned);
5199     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5200     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5201     Variable *T = makeReg(DestTy);
5202     _vmulh(T, Src0, Src1, Unsigned);
5203     _mov(Dest, T);
5204     return;
5205   }
5206   case Intrinsics::Nearbyint: {
5207     UnimplementedLoweringError(this, Instr);
5208     return;
5209   }
5210   case Intrinsics::Round: {
5211     UnimplementedLoweringError(this, Instr);
5212     return;
5213   }
5214   case Intrinsics::SignMask: {
5215     UnimplementedLoweringError(this, Instr);
5216     return;
5217   }
5218   case Intrinsics::SubtractSaturateSigned:
5219   case Intrinsics::SubtractSaturateUnsigned: {
5220     bool Unsigned = (ID == Intrinsics::SubtractSaturateUnsigned);
5221     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5222     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5223     Variable *T = makeReg(DestTy);
5224     _vqsub(T, Src0, Src1, Unsigned);
5225     _mov(Dest, T);
5226     return;
5227   }
5228   case Intrinsics::VectorPackSigned:
5229   case Intrinsics::VectorPackUnsigned: {
5230     bool Unsigned = (ID == Intrinsics::VectorPackUnsigned);
5231     bool Saturating = true;
5232     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5233     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5234     Variable *T = makeReg(DestTy);
5235     _vqmovn2(T, Src0, Src1, Unsigned, Saturating);
5236     _mov(Dest, T);
5237     return;
5238   }
5239   default: // UnknownIntrinsic
5240     Func->setError("Unexpected intrinsic");
5241     return;
5242   }
5243   return;
5244 }
5245 
lowerCLZ(Variable * Dest,Variable * ValLoR,Variable * ValHiR)5246 void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
5247   Type Ty = Dest->getType();
5248   assert(Ty == IceType_i32 || Ty == IceType_i64);
5249   Variable *T = makeReg(IceType_i32);
5250   _clz(T, ValLoR);
5251   if (Ty == IceType_i64) {
5252     auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5253     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5254     Operand *Zero =
5255         legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
5256     Operand *ThirtyTwo =
5257         legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
5258     _cmp(ValHiR, Zero);
5259     Variable *T2 = makeReg(IceType_i32);
5260     _add(T2, T, ThirtyTwo);
5261     _clz(T2, ValHiR, CondARM32::NE);
5262     // T2 is actually a source as well when the predicate is not AL (since it
5263     // may leave T2 alone). We use _set_dest_redefined to prolong the liveness
5264     // of T2 as if it was used as a source.
5265     _set_dest_redefined();
5266     _mov(DestLo, T2);
5267     Variable *T3 = makeReg(Zero->getType());
5268     _mov(T3, Zero);
5269     _mov(DestHi, T3);
5270     return;
5271   }
5272   _mov(Dest, T);
5273   return;
5274 }
5275 
lowerLoad(const InstLoad * Load)5276 void TargetARM32::lowerLoad(const InstLoad *Load) {
5277   // A Load instruction can be treated the same as an Assign instruction, after
5278   // the source operand is transformed into an OperandARM32Mem operand.
5279   Type Ty = Load->getDest()->getType();
5280   Operand *Src0 = formMemoryOperand(Load->getLoadAddress(), Ty);
5281   Variable *DestLoad = Load->getDest();
5282 
5283   // TODO(jvoung): handled folding opportunities. Sign and zero extension can
5284   // be folded into a load.
5285   auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5286   lowerAssign(Assign);
5287 }
5288 
5289 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Variable * OffsetReg,int16_t OffsetRegShAmt,const Inst * Reason)5290 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5291                     const Variable *OffsetReg, int16_t OffsetRegShAmt,
5292                     const Inst *Reason) {
5293   if (!BuildDefs::dump())
5294     return;
5295   if (!Func->isVerbose(IceV_AddrOpt))
5296     return;
5297   OstreamLocker _(Func->getContext());
5298   Ostream &Str = Func->getContext()->getStrDump();
5299   Str << "Instruction: ";
5300   Reason->dumpDecorated(Func);
5301   Str << "  results in Base=";
5302   if (Base)
5303     Base->dump(Func);
5304   else
5305     Str << "<null>";
5306   Str << ", OffsetReg=";
5307   if (OffsetReg)
5308     OffsetReg->dump(Func);
5309   else
5310     Str << "<null>";
5311   Str << ", Shift=" << OffsetRegShAmt << ", Offset=" << Offset << "\n";
5312 }
5313 
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5314 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5315                  int32_t *Offset, const Inst **Reason) {
5316   // Var originates from Var=SrcVar ==> set Var:=SrcVar
5317   if (*Var == nullptr)
5318     return false;
5319   const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5320   if (!VarAssign)
5321     return false;
5322   assert(!VMetadata->isMultiDef(*Var));
5323   if (!llvm::isa<InstAssign>(VarAssign))
5324     return false;
5325 
5326   Operand *SrcOp = VarAssign->getSrc(0);
5327   bool Optimized = false;
5328   if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5329     if (!VMetadata->isMultiDef(SrcVar) ||
5330         // TODO: ensure SrcVar stays single-BB
5331         false) {
5332       Optimized = true;
5333       *Var = SrcVar;
5334     } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5335       int32_t MoreOffset = Const->getValue();
5336       int32_t NewOffset = MoreOffset + *Offset;
5337       if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5338         return false;
5339       *Var = nullptr;
5340       *Offset += NewOffset;
5341       Optimized = true;
5342     }
5343   }
5344 
5345   if (Optimized) {
5346     *Reason = VarAssign;
5347   }
5348 
5349   return Optimized;
5350 }
5351 
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5352 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5353   if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5354     switch (Arith->getOp()) {
5355     default:
5356       return false;
5357     case InstArithmetic::Add:
5358     case InstArithmetic::Sub:
5359       *Kind = Arith->getOp();
5360       return true;
5361     }
5362   }
5363   return false;
5364 }
5365 
matchCombinedBaseIndex(const VariablesMetadata * VMetadata,Variable ** Base,Variable ** OffsetReg,int32_t OffsetRegShamt,const Inst ** Reason)5366 bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable **Base,
5367                             Variable **OffsetReg, int32_t OffsetRegShamt,
5368                             const Inst **Reason) {
5369   // OffsetReg==nullptr && Base is Base=Var1+Var2 ==>
5370   //   set Base=Var1, OffsetReg=Var2, Shift=0
5371   if (*Base == nullptr)
5372     return false;
5373   if (*OffsetReg != nullptr)
5374     return false;
5375   (void)OffsetRegShamt;
5376   assert(OffsetRegShamt == 0);
5377   const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5378   if (BaseInst == nullptr)
5379     return false;
5380   assert(!VMetadata->isMultiDef(*Base));
5381   if (BaseInst->getSrcSize() < 2)
5382     return false;
5383   auto *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0));
5384   if (!Var1)
5385     return false;
5386   if (VMetadata->isMultiDef(Var1))
5387     return false;
5388   auto *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1));
5389   if (!Var2)
5390     return false;
5391   if (VMetadata->isMultiDef(Var2))
5392     return false;
5393   InstArithmetic::OpKind _;
5394   if (!isAddOrSub(BaseInst, &_) ||
5395       // TODO: ensure Var1 and Var2 stay single-BB
5396       false)
5397     return false;
5398   *Base = Var1;
5399   *OffsetReg = Var2;
5400   // OffsetRegShamt is already 0.
5401   *Reason = BaseInst;
5402   return true;
5403 }
5404 
matchShiftedOffsetReg(const VariablesMetadata * VMetadata,Variable ** OffsetReg,OperandARM32::ShiftKind * Kind,int32_t * OffsetRegShamt,const Inst ** Reason)5405 bool matchShiftedOffsetReg(const VariablesMetadata *VMetadata,
5406                            Variable **OffsetReg, OperandARM32::ShiftKind *Kind,
5407                            int32_t *OffsetRegShamt, const Inst **Reason) {
5408   // OffsetReg is OffsetReg=Var*Const && log2(Const)+Shift<=32 ==>
5409   //   OffsetReg=Var, Shift+=log2(Const)
5410   // OffsetReg is OffsetReg=Var<<Const && Const+Shift<=32 ==>
5411   //   OffsetReg=Var, Shift+=Const
5412   // OffsetReg is OffsetReg=Var>>Const && Const-Shift>=-32 ==>
5413   //   OffsetReg=Var, Shift-=Const
5414   OperandARM32::ShiftKind NewShiftKind = OperandARM32::kNoShift;
5415   if (*OffsetReg == nullptr)
5416     return false;
5417   auto *IndexInst = VMetadata->getSingleDefinition(*OffsetReg);
5418   if (IndexInst == nullptr)
5419     return false;
5420   assert(!VMetadata->isMultiDef(*OffsetReg));
5421   if (IndexInst->getSrcSize() < 2)
5422     return false;
5423   auto *ArithInst = llvm::dyn_cast<InstArithmetic>(IndexInst);
5424   if (ArithInst == nullptr)
5425     return false;
5426   auto *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0));
5427   if (Var == nullptr)
5428     return false;
5429   auto *Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
5430   if (Const == nullptr) {
5431     assert(!llvm::isa<ConstantInteger32>(ArithInst->getSrc(0)));
5432     return false;
5433   }
5434   if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32)
5435     return false;
5436 
5437   uint32_t NewShamt = -1;
5438   switch (ArithInst->getOp()) {
5439   default:
5440     return false;
5441   case InstArithmetic::Shl: {
5442     NewShiftKind = OperandARM32::LSL;
5443     NewShamt = Const->getValue();
5444     if (NewShamt > 31)
5445       return false;
5446   } break;
5447   case InstArithmetic::Lshr: {
5448     NewShiftKind = OperandARM32::LSR;
5449     NewShamt = Const->getValue();
5450     if (NewShamt > 31)
5451       return false;
5452   } break;
5453   case InstArithmetic::Ashr: {
5454     NewShiftKind = OperandARM32::ASR;
5455     NewShamt = Const->getValue();
5456     if (NewShamt > 31)
5457       return false;
5458   } break;
5459   case InstArithmetic::Udiv:
5460   case InstArithmetic::Mul: {
5461     const uint32_t UnsignedConst = Const->getValue();
5462     NewShamt = llvm::findFirstSet(UnsignedConst);
5463     if (NewShamt != llvm::findLastSet(UnsignedConst)) {
5464       // First bit set is not the same as the last bit set, so Const is not
5465       // a power of 2.
5466       return false;
5467     }
5468     NewShiftKind = ArithInst->getOp() == InstArithmetic::Udiv
5469                        ? OperandARM32::LSR
5470                        : OperandARM32::LSL;
5471   } break;
5472   }
5473   // Allowed "transitions":
5474   //   kNoShift -> * iff NewShamt < 31
5475   //   LSL -> LSL    iff NewShamt + OffsetRegShamt < 31
5476   //   LSR -> LSR    iff NewShamt + OffsetRegShamt < 31
5477   //   ASR -> ASR    iff NewShamt + OffsetRegShamt < 31
5478   if (*Kind != OperandARM32::kNoShift && *Kind != NewShiftKind) {
5479     return false;
5480   }
5481   const int32_t NewOffsetRegShamt = *OffsetRegShamt + NewShamt;
5482   if (NewOffsetRegShamt > 31)
5483     return false;
5484   *OffsetReg = Var;
5485   *OffsetRegShamt = NewOffsetRegShamt;
5486   *Kind = NewShiftKind;
5487   *Reason = IndexInst;
5488   return true;
5489 }
5490 
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5491 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5492                      int32_t *Offset, const Inst **Reason) {
5493   // Base is Base=Var+Const || Base is Base=Const+Var ==>
5494   //   set Base=Var, Offset+=Const
5495   // Base is Base=Var-Const ==>
5496   //   set Base=Var, Offset-=Const
5497   if (*Base == nullptr)
5498     return false;
5499   const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5500   if (BaseInst == nullptr) {
5501     return false;
5502   }
5503   assert(!VMetadata->isMultiDef(*Base));
5504 
5505   auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5506   if (ArithInst == nullptr)
5507     return false;
5508   InstArithmetic::OpKind Kind;
5509   if (!isAddOrSub(ArithInst, &Kind))
5510     return false;
5511   bool IsAdd = Kind == InstArithmetic::Add;
5512   Operand *Src0 = ArithInst->getSrc(0);
5513   Operand *Src1 = ArithInst->getSrc(1);
5514   auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5515   auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5516   auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5517   auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5518   Variable *NewBase = nullptr;
5519   int32_t NewOffset = *Offset;
5520 
5521   if (Var0 == nullptr && Const0 == nullptr) {
5522     assert(llvm::isa<ConstantRelocatable>(Src0));
5523     return false;
5524   }
5525 
5526   if (Var1 == nullptr && Const1 == nullptr) {
5527     assert(llvm::isa<ConstantRelocatable>(Src1));
5528     return false;
5529   }
5530 
5531   if (Var0 && Var1)
5532     // TODO(jpp): merge base/index splitting into here.
5533     return false;
5534   if (!IsAdd && Var1)
5535     return false;
5536   if (Var0)
5537     NewBase = Var0;
5538   else if (Var1)
5539     NewBase = Var1;
5540   // Compute the updated constant offset.
5541   if (Const0) {
5542     int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5543     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5544       return false;
5545     NewOffset += MoreOffset;
5546   }
5547   if (Const1) {
5548     int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5549     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5550       return false;
5551     NewOffset += MoreOffset;
5552   }
5553 
5554   // Update the computed address parameters once we are sure optimization
5555   // is valid.
5556   *Base = NewBase;
5557   *Offset = NewOffset;
5558   *Reason = BaseInst;
5559   return true;
5560 }
5561 } // end of anonymous namespace
5562 
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5563 OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func,
5564                                                  const Inst *LdSt,
5565                                                  Operand *Base) {
5566   assert(Base != nullptr);
5567   int32_t OffsetImm = 0;
5568   Variable *OffsetReg = nullptr;
5569   int32_t OffsetRegShamt = 0;
5570   OperandARM32::ShiftKind ShiftKind = OperandARM32::kNoShift;
5571 
5572   Func->resetCurrentNode();
5573   if (Func->isVerbose(IceV_AddrOpt)) {
5574     OstreamLocker _(Func->getContext());
5575     Ostream &Str = Func->getContext()->getStrDump();
5576     Str << "\nAddress mode formation:\t";
5577     LdSt->dumpDecorated(Func);
5578   }
5579 
5580   if (isVectorType(Ty))
5581     // vector loads and stores do not allow offsets, and only support the
5582     // "[reg]" addressing mode (the other supported modes are write back.)
5583     return nullptr;
5584 
5585   auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5586   if (BaseVar == nullptr)
5587     return nullptr;
5588 
5589   (void)MemTraitsSize;
5590   assert(Ty < MemTraitsSize);
5591   auto *TypeTraits = &MemTraits[Ty];
5592   const bool CanHaveIndex = TypeTraits->CanHaveIndex;
5593   const bool CanHaveShiftedIndex = TypeTraits->CanHaveShiftedIndex;
5594   const bool CanHaveImm = TypeTraits->CanHaveImm;
5595   const int32_t ValidImmMask = TypeTraits->ValidImmMask;
5596   (void)ValidImmMask;
5597   assert(!CanHaveImm || ValidImmMask >= 0);
5598 
5599   const VariablesMetadata *VMetadata = Func->getVMetadata();
5600   const Inst *Reason = nullptr;
5601 
5602   do {
5603     if (Reason != nullptr) {
5604       dumpAddressOpt(Func, BaseVar, OffsetImm, OffsetReg, OffsetRegShamt,
5605                      Reason);
5606       Reason = nullptr;
5607     }
5608 
5609     if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5610       continue;
5611     }
5612 
5613     if (CanHaveIndex &&
5614         matchAssign(VMetadata, &OffsetReg, &OffsetImm, &Reason)) {
5615       continue;
5616     }
5617 
5618     if (CanHaveIndex && matchCombinedBaseIndex(VMetadata, &BaseVar, &OffsetReg,
5619                                                OffsetRegShamt, &Reason)) {
5620       continue;
5621     }
5622 
5623     if (CanHaveShiftedIndex) {
5624       if (matchShiftedOffsetReg(VMetadata, &OffsetReg, &ShiftKind,
5625                                 &OffsetRegShamt, &Reason)) {
5626         continue;
5627       }
5628 
5629       if ((OffsetRegShamt == 0) &&
5630           matchShiftedOffsetReg(VMetadata, &BaseVar, &ShiftKind,
5631                                 &OffsetRegShamt, &Reason)) {
5632         std::swap(BaseVar, OffsetReg);
5633         continue;
5634       }
5635     }
5636 
5637     if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5638       continue;
5639     }
5640   } while (Reason);
5641 
5642   if (BaseVar == nullptr) {
5643     // [OffsetReg{, LSL Shamt}{, #OffsetImm}] is not legal in ARM, so we have to
5644     // legalize the addressing mode to [BaseReg, OffsetReg{, LSL Shamt}].
5645     // Instead of a zeroed BaseReg, we initialize it with OffsetImm:
5646     //
5647     // [OffsetReg{, LSL Shamt}{, #OffsetImm}] ->
5648     //     mov BaseReg, #OffsetImm
5649     //     use of [BaseReg, OffsetReg{, LSL Shamt}]
5650     //
5651     const Type PointerType = getPointerType();
5652     BaseVar = makeReg(PointerType);
5653     Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5654     OffsetImm = 0;
5655   } else if (OffsetImm != 0) {
5656     // ARM Ldr/Str instructions have limited range immediates. The formation
5657     // loop above materialized an Immediate carelessly, so we ensure the
5658     // generated offset is sane.
5659     const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5660     const InstArithmetic::OpKind Op =
5661         OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5662 
5663     if (!CanHaveImm || !isLegalMemOffset(Ty, OffsetImm) ||
5664         OffsetReg != nullptr) {
5665       if (OffsetReg == nullptr) {
5666         // We formed a [Base, #const] addressing mode which is not encodable in
5667         // ARM. There is little point in forming an address mode now if we don't
5668         // have an offset. Effectively, we would end up with something like
5669         //
5670         // [Base, #const] -> add T, Base, #const
5671         //                   use of [T]
5672         //
5673         // Which is exactly what we already have. So we just bite the bullet
5674         // here and don't form any address mode.
5675         return nullptr;
5676       }
5677       // We formed [Base, Offset {, LSL Amnt}, #const]. Oops. Legalize it to
5678       //
5679       // [Base, Offset, {LSL amount}, #const] ->
5680       //      add T, Base, #const
5681       //      use of [T, Offset {, LSL amount}]
5682       const Type PointerType = getPointerType();
5683       Variable *T = makeReg(PointerType);
5684       Context.insert<InstArithmetic>(Op, T, BaseVar,
5685                                      Ctx->getConstantInt32(PositiveOffset));
5686       BaseVar = T;
5687       OffsetImm = 0;
5688     }
5689   }
5690 
5691   assert(BaseVar != nullptr);
5692   assert(OffsetImm == 0 || OffsetReg == nullptr);
5693   assert(OffsetReg == nullptr || CanHaveIndex);
5694   assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm
5695                        : (ValidImmMask & OffsetImm) == OffsetImm);
5696 
5697   if (OffsetReg != nullptr) {
5698     Variable *OffsetR = makeReg(getPointerType());
5699     Context.insert<InstAssign>(OffsetR, OffsetReg);
5700     return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetR, ShiftKind,
5701                                    OffsetRegShamt);
5702   }
5703 
5704   return OperandARM32Mem::create(
5705       Func, Ty, BaseVar,
5706       llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5707 }
5708 
doAddressOptLoad()5709 void TargetARM32::doAddressOptLoad() {
5710   Inst *Instr = iteratorToInst(Context.getCur());
5711   assert(llvm::isa<InstLoad>(Instr));
5712   Variable *Dest = Instr->getDest();
5713   Operand *Addr = Instr->getSrc(0);
5714   if (OperandARM32Mem *Mem =
5715           formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5716     Instr->setDeleted();
5717     Context.insert<InstLoad>(Dest, Mem);
5718   }
5719 }
5720 
lowerPhi(const InstPhi *)5721 void TargetARM32::lowerPhi(const InstPhi * /*Instr*/) {
5722   Func->setError("Phi found in regular instruction list");
5723 }
5724 
lowerRet(const InstRet * Instr)5725 void TargetARM32::lowerRet(const InstRet *Instr) {
5726   Variable *Reg = nullptr;
5727   if (Instr->hasRetValue()) {
5728     Operand *Src0 = Instr->getRetValue();
5729     Type Ty = Src0->getType();
5730     if (Ty == IceType_i64) {
5731       Src0 = legalizeUndef(Src0);
5732       Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0);
5733       Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1);
5734       Reg = R0;
5735       Context.insert<InstFakeUse>(R1);
5736     } else if (Ty == IceType_f32) {
5737       Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0);
5738       Reg = S0;
5739     } else if (Ty == IceType_f64) {
5740       Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0);
5741       Reg = D0;
5742     } else if (isVectorType(Src0->getType())) {
5743       Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0);
5744       Reg = Q0;
5745     } else {
5746       Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
5747       Reg = makeReg(Src0F->getType(), RegARM32::Reg_r0);
5748       _mov(Reg, Src0F, CondARM32::AL);
5749     }
5750   }
5751   // Add a ret instruction even if sandboxing is enabled, because addEpilog
5752   // explicitly looks for a ret instruction as a marker for where to insert the
5753   // frame removal instructions. addEpilog is responsible for restoring the
5754   // "lr" register as needed prior to this ret instruction.
5755   _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
5756 
5757   // Add a fake use of sp to make sure sp stays alive for the entire function.
5758   // Otherwise post-call sp adjustments get dead-code eliminated.
5759   // TODO: Are there more places where the fake use should be inserted? E.g.
5760   // "void f(int n){while(1) g(n);}" may not have a ret instruction.
5761   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5762   Context.insert<InstFakeUse>(SP);
5763 }
5764 
lowerShuffleVector(const InstShuffleVector * Instr)5765 void TargetARM32::lowerShuffleVector(const InstShuffleVector *Instr) {
5766   auto *Dest = Instr->getDest();
5767   const Type DestTy = Dest->getType();
5768 
5769   auto *T = makeReg(DestTy);
5770   auto *Src0 = Instr->getSrc(0);
5771   auto *Src1 = Instr->getSrc(1);
5772   const SizeT NumElements = typeNumElements(DestTy);
5773   const Type ElementType = typeElementType(DestTy);
5774 
5775   bool Replicate = true;
5776   for (SizeT I = 1; Replicate && I < Instr->getNumIndexes(); ++I) {
5777     if (Instr->getIndexValue(I) != Instr->getIndexValue(0)) {
5778       Replicate = false;
5779     }
5780   }
5781 
5782   if (Replicate) {
5783     Variable *Src0Var = legalizeToReg(Src0);
5784     _vdup(T, Src0Var, Instr->getIndexValue(0));
5785     _mov(Dest, T);
5786     return;
5787   }
5788 
5789   switch (DestTy) {
5790   case IceType_v8i1:
5791   case IceType_v8i16: {
5792     static constexpr SizeT ExpectedNumElements = 8;
5793     assert(ExpectedNumElements == Instr->getNumIndexes());
5794     (void)ExpectedNumElements;
5795 
5796     if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3)) {
5797       Variable *Src0R = legalizeToReg(Src0);
5798       _vzip(T, Src0R, Src0R);
5799       _mov(Dest, T);
5800       return;
5801     }
5802 
5803     if (Instr->indexesAre(0, 8, 1, 9, 2, 10, 3, 11)) {
5804       Variable *Src0R = legalizeToReg(Src0);
5805       Variable *Src1R = legalizeToReg(Src1);
5806       _vzip(T, Src0R, Src1R);
5807       _mov(Dest, T);
5808       return;
5809     }
5810 
5811     if (Instr->indexesAre(0, 2, 4, 6, 0, 2, 4, 6)) {
5812       Variable *Src0R = legalizeToReg(Src0);
5813       _vqmovn2(T, Src0R, Src0R, false, false);
5814       _mov(Dest, T);
5815       return;
5816     }
5817   } break;
5818   case IceType_v16i1:
5819   case IceType_v16i8: {
5820     static constexpr SizeT ExpectedNumElements = 16;
5821     assert(ExpectedNumElements == Instr->getNumIndexes());
5822     (void)ExpectedNumElements;
5823 
5824     if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7)) {
5825       Variable *Src0R = legalizeToReg(Src0);
5826       _vzip(T, Src0R, Src0R);
5827       _mov(Dest, T);
5828       return;
5829     }
5830 
5831     if (Instr->indexesAre(0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7,
5832                           23)) {
5833       Variable *Src0R = legalizeToReg(Src0);
5834       Variable *Src1R = legalizeToReg(Src1);
5835       _vzip(T, Src0R, Src1R);
5836       _mov(Dest, T);
5837       return;
5838     }
5839   } break;
5840   case IceType_v4i1:
5841   case IceType_v4i32:
5842   case IceType_v4f32: {
5843     static constexpr SizeT ExpectedNumElements = 4;
5844     assert(ExpectedNumElements == Instr->getNumIndexes());
5845     (void)ExpectedNumElements;
5846 
5847     if (Instr->indexesAre(0, 0, 1, 1)) {
5848       Variable *Src0R = legalizeToReg(Src0);
5849       _vzip(T, Src0R, Src0R);
5850       _mov(Dest, T);
5851       return;
5852     }
5853 
5854     if (Instr->indexesAre(0, 4, 1, 5)) {
5855       Variable *Src0R = legalizeToReg(Src0);
5856       Variable *Src1R = legalizeToReg(Src1);
5857       _vzip(T, Src0R, Src1R);
5858       _mov(Dest, T);
5859       return;
5860     }
5861 
5862     if (Instr->indexesAre(0, 1, 4, 5)) {
5863       Variable *Src0R = legalizeToReg(Src0);
5864       Variable *Src1R = legalizeToReg(Src1);
5865       _vmovlh(T, Src0R, Src1R);
5866       _mov(Dest, T);
5867       return;
5868     }
5869 
5870     if (Instr->indexesAre(2, 3, 2, 3)) {
5871       Variable *Src0R = legalizeToReg(Src0);
5872       _vmovhl(T, Src0R, Src0R);
5873       _mov(Dest, T);
5874       return;
5875     }
5876 
5877     if (Instr->indexesAre(2, 3, 6, 7)) {
5878       Variable *Src0R = legalizeToReg(Src0);
5879       Variable *Src1R = legalizeToReg(Src1);
5880       _vmovhl(T, Src1R, Src0R);
5881       _mov(Dest, T);
5882       return;
5883     }
5884   } break;
5885   default:
5886     break;
5887     // TODO(jpp): figure out how to properly lower this without scalarization.
5888   }
5889 
5890   // Unoptimized shuffle. Perform a series of inserts and extracts.
5891   Context.insert<InstFakeDef>(T);
5892   for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
5893     auto *Index = Instr->getIndex(I);
5894     const SizeT Elem = Index->getValue();
5895     auto *ExtElmt = makeReg(ElementType);
5896     if (Elem < NumElements) {
5897       lowerExtractElement(
5898           InstExtractElement::create(Func, ExtElmt, Src0, Index));
5899     } else {
5900       lowerExtractElement(InstExtractElement::create(
5901           Func, ExtElmt, Src1,
5902           Ctx->getConstantInt32(Index->getValue() - NumElements)));
5903     }
5904     auto *NewT = makeReg(DestTy);
5905     lowerInsertElement(InstInsertElement::create(Func, NewT, T, ExtElmt,
5906                                                  Ctx->getConstantInt32(I)));
5907     T = NewT;
5908   }
5909   _mov(Dest, T);
5910 }
5911 
lowerSelect(const InstSelect * Instr)5912 void TargetARM32::lowerSelect(const InstSelect *Instr) {
5913   Variable *Dest = Instr->getDest();
5914   Type DestTy = Dest->getType();
5915   Operand *SrcT = Instr->getTrueOperand();
5916   Operand *SrcF = Instr->getFalseOperand();
5917   Operand *Condition = Instr->getCondition();
5918 
5919   if (!isVectorType(DestTy)) {
5920     lowerInt1ForSelect(Dest, Condition, legalizeUndef(SrcT),
5921                        legalizeUndef(SrcF));
5922     return;
5923   }
5924 
5925   Type TType = DestTy;
5926   switch (DestTy) {
5927   default:
5928     llvm::report_fatal_error("Unexpected type for vector select.");
5929   case IceType_v4i1:
5930     TType = IceType_v4i32;
5931     break;
5932   case IceType_v8i1:
5933     TType = IceType_v8i16;
5934     break;
5935   case IceType_v16i1:
5936     TType = IceType_v16i8;
5937     break;
5938   case IceType_v4f32:
5939     TType = IceType_v4i32;
5940     break;
5941   case IceType_v4i32:
5942   case IceType_v8i16:
5943   case IceType_v16i8:
5944     break;
5945   }
5946   auto *T = makeReg(TType);
5947   lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
5948   auto *SrcTR = legalizeToReg(SrcT);
5949   auto *SrcFR = legalizeToReg(SrcF);
5950   _vbsl(T, SrcTR, SrcFR)->setDestRedefined();
5951   _mov(Dest, T);
5952 }
5953 
lowerStore(const InstStore * Instr)5954 void TargetARM32::lowerStore(const InstStore *Instr) {
5955   Operand *Value = Instr->getData();
5956   Operand *Addr = Instr->getStoreAddress();
5957   OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5958   Type Ty = NewAddr->getType();
5959 
5960   if (Ty == IceType_i64) {
5961     Value = legalizeUndef(Value);
5962     Variable *ValueHi = legalizeToReg(hiOperand(Value));
5963     Variable *ValueLo = legalizeToReg(loOperand(Value));
5964     _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
5965     _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
5966   } else {
5967     Variable *ValueR = legalizeToReg(Value);
5968     _str(ValueR, NewAddr);
5969   }
5970 }
5971 
doAddressOptStore()5972 void TargetARM32::doAddressOptStore() {
5973   Inst *Instr = iteratorToInst(Context.getCur());
5974   assert(llvm::isa<InstStore>(Instr));
5975   Operand *Src = Instr->getSrc(0);
5976   Operand *Addr = Instr->getSrc(1);
5977   if (OperandARM32Mem *Mem =
5978           formAddressingMode(Src->getType(), Func, Instr, Addr)) {
5979     Instr->setDeleted();
5980     Context.insert<InstStore>(Src, Mem);
5981   }
5982 }
5983 
lowerSwitch(const InstSwitch * Instr)5984 void TargetARM32::lowerSwitch(const InstSwitch *Instr) {
5985   // This implements the most naive possible lowering.
5986   // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
5987   Operand *Src0 = Instr->getComparison();
5988   SizeT NumCases = Instr->getNumCases();
5989   if (Src0->getType() == IceType_i64) {
5990     Src0 = legalizeUndef(Src0);
5991     Variable *Src0Lo = legalizeToReg(loOperand(Src0));
5992     Variable *Src0Hi = legalizeToReg(hiOperand(Src0));
5993     for (SizeT I = 0; I < NumCases; ++I) {
5994       Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
5995       Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
5996       ValueLo = legalize(ValueLo, Legal_Reg | Legal_Flex);
5997       ValueHi = legalize(ValueHi, Legal_Reg | Legal_Flex);
5998       _cmp(Src0Lo, ValueLo);
5999       _cmp(Src0Hi, ValueHi, CondARM32::EQ);
6000       _br(Instr->getLabel(I), CondARM32::EQ);
6001     }
6002     _br(Instr->getLabelDefault());
6003     return;
6004   }
6005 
6006   Variable *Src0Var = legalizeToReg(Src0);
6007   // If Src0 is not an i32, we left shift it -- see the icmp lowering for the
6008   // reason.
6009   assert(Src0Var->mustHaveReg());
6010   const size_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
6011   assert(ShiftAmt < 32);
6012   if (ShiftAmt > 0) {
6013     Operand *ShAmtImm = shAmtImm(ShiftAmt);
6014     Variable *T = makeReg(IceType_i32);
6015     _lsl(T, Src0Var, ShAmtImm);
6016     Src0Var = T;
6017   }
6018 
6019   for (SizeT I = 0; I < NumCases; ++I) {
6020     Operand *Value = Ctx->getConstantInt32(Instr->getValue(I) << ShiftAmt);
6021     Value = legalize(Value, Legal_Reg | Legal_Flex);
6022     _cmp(Src0Var, Value);
6023     _br(Instr->getLabel(I), CondARM32::EQ);
6024   }
6025   _br(Instr->getLabelDefault());
6026 }
6027 
lowerBreakpoint(const InstBreakpoint * Instr)6028 void TargetARM32::lowerBreakpoint(const InstBreakpoint *Instr) {
6029   UnimplementedLoweringError(this, Instr);
6030 }
6031 
lowerUnreachable(const InstUnreachable *)6032 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) {
6033   _trap();
6034 }
6035 
prelowerPhis()6036 void TargetARM32::prelowerPhis() {
6037   CfgNode *Node = Context.getNode();
6038   PhiLowering::prelowerPhis32Bit(this, Node, Func);
6039 }
6040 
makeVectorOfZeros(Type Ty,RegNumT RegNum)6041 Variable *TargetARM32::makeVectorOfZeros(Type Ty, RegNumT RegNum) {
6042   Variable *Reg = makeReg(Ty, RegNum);
6043   Context.insert<InstFakeDef>(Reg);
6044   assert(isVectorType(Ty));
6045   _veor(Reg, Reg, Reg);
6046   return Reg;
6047 }
6048 
6049 // Helper for legalize() to emit the right code to lower an operand to a
6050 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)6051 Variable *TargetARM32::copyToReg(Operand *Src, RegNumT RegNum) {
6052   Type Ty = Src->getType();
6053   Variable *Reg = makeReg(Ty, RegNum);
6054   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) {
6055     _ldr(Reg, Mem);
6056   } else {
6057     _mov(Reg, Src);
6058   }
6059   return Reg;
6060 }
6061 
6062 // TODO(jpp): remove unneeded else clauses in legalize.
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)6063 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
6064                                RegNumT RegNum) {
6065   Type Ty = From->getType();
6066   // Assert that a physical register is allowed. To date, all calls to
6067   // legalize() allow a physical register. Legal_Flex converts registers to the
6068   // right type OperandARM32FlexReg as needed.
6069   assert(Allowed & Legal_Reg);
6070 
6071   // Copied ipsis literis from TargetX86Base<Machine>.
6072   if (RegNum.hasNoValue()) {
6073     if (Variable *Subst = getContext().availabilityGet(From)) {
6074       // At this point we know there is a potential substitution available.
6075       if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
6076           !Subst->hasReg()) {
6077         // At this point we know the substitution will have a register.
6078         if (From->getType() == Subst->getType()) {
6079           // At this point we know the substitution's register is compatible.
6080           return Subst;
6081         }
6082       }
6083     }
6084   }
6085 
6086   // Go through the various types of operands: OperandARM32Mem,
6087   // OperandARM32Flex, Constant, and Variable. Given the above assertion, if
6088   // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
6089   // can always copy to a register.
6090   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
6091     // Before doing anything with a Mem operand, we need to ensure that the
6092     // Base and Index components are in physical registers.
6093     Variable *Base = Mem->getBase();
6094     Variable *Index = Mem->getIndex();
6095     ConstantInteger32 *Offset = Mem->getOffset();
6096     assert(Index == nullptr || Offset == nullptr);
6097     Variable *RegBase = nullptr;
6098     Variable *RegIndex = nullptr;
6099     assert(Base);
6100     RegBase = llvm::cast<Variable>(
6101         legalize(Base, Legal_Reg | Legal_Rematerializable));
6102     assert(Ty < MemTraitsSize);
6103     if (Index) {
6104       assert(Offset == nullptr);
6105       assert(MemTraits[Ty].CanHaveIndex);
6106       RegIndex = legalizeToReg(Index);
6107     }
6108     if (Offset && Offset->getValue() != 0) {
6109       assert(Index == nullptr);
6110       static constexpr bool ZeroExt = false;
6111       assert(MemTraits[Ty].CanHaveImm);
6112       if (!OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
6113         llvm::report_fatal_error("Invalid memory offset.");
6114       }
6115     }
6116 
6117     // Create a new operand if there was a change.
6118     if (Base != RegBase || Index != RegIndex) {
6119       // There is only a reg +/- reg or reg + imm form.
6120       // Figure out which to re-create.
6121       if (RegIndex) {
6122         Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex,
6123                                       Mem->getShiftOp(), Mem->getShiftAmt(),
6124                                       Mem->getAddrMode());
6125       } else {
6126         Mem = OperandARM32Mem::create(Func, Ty, RegBase, Offset,
6127                                       Mem->getAddrMode());
6128       }
6129     }
6130     if (Allowed & Legal_Mem) {
6131       From = Mem;
6132     } else {
6133       Variable *Reg = makeReg(Ty, RegNum);
6134       _ldr(Reg, Mem);
6135       From = Reg;
6136     }
6137     return From;
6138   }
6139 
6140   if (auto *Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
6141     if (!(Allowed & Legal_Flex)) {
6142       if (auto *FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
6143         if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
6144           From = FlexReg->getReg();
6145           // Fall through and let From be checked as a Variable below, where it
6146           // may or may not need a register.
6147         } else {
6148           return copyToReg(Flex, RegNum);
6149         }
6150       } else {
6151         return copyToReg(Flex, RegNum);
6152       }
6153     } else {
6154       return From;
6155     }
6156   }
6157 
6158   if (llvm::isa<Constant>(From)) {
6159     if (llvm::isa<ConstantUndef>(From)) {
6160       From = legalizeUndef(From, RegNum);
6161       if (isVectorType(Ty))
6162         return From;
6163     }
6164     // There should be no constants of vector type (other than undef).
6165     assert(!isVectorType(Ty));
6166     if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
6167       uint32_t RotateAmt;
6168       uint32_t Immed_8;
6169       uint32_t Value = static_cast<uint32_t>(C32->getValue());
6170       if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
6171         // The immediate can be encoded as a Flex immediate. We may return the
6172         // Flex operand if the caller has Allow'ed it.
6173         auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6174         const bool CanBeFlex = Allowed & Legal_Flex;
6175         if (CanBeFlex)
6176           return OpF;
6177         return copyToReg(OpF, RegNum);
6178       } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt,
6179                                                  &Immed_8)) {
6180         // Even though the immediate can't be encoded as a Flex operand, its
6181         // inverted bit pattern can, thus we use ARM's mvn to load the 32-bit
6182         // constant with a single instruction.
6183         auto *InvOpF =
6184             OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6185         Variable *Reg = makeReg(Ty, RegNum);
6186         _mvn(Reg, InvOpF);
6187         return Reg;
6188       } else {
6189         // Do a movw/movt to a register.
6190         Variable *Reg = makeReg(Ty, RegNum);
6191         uint32_t UpperBits = (Value >> 16) & 0xFFFF;
6192         _movw(Reg,
6193               UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
6194         if (UpperBits != 0) {
6195           _movt(Reg, Ctx->getConstantInt32(UpperBits));
6196         }
6197         return Reg;
6198       }
6199     } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
6200       Variable *Reg = makeReg(Ty, RegNum);
6201       _movw(Reg, C);
6202       _movt(Reg, C);
6203       return Reg;
6204     } else {
6205       assert(isScalarFloatingType(Ty));
6206       uint32_t ModifiedImm;
6207       if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {
6208         Variable *T = makeReg(Ty, RegNum);
6209         _mov(T,
6210              OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));
6211         return T;
6212       }
6213 
6214       if (Ty == IceType_f64 && isFloatingPointZero(From)) {
6215         // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32
6216         // because ARM does not have a veor instruction with S registers.
6217         Variable *T = makeReg(IceType_f64, RegNum);
6218         Context.insert<InstFakeDef>(T);
6219         _veor(T, T, T);
6220         return T;
6221       }
6222 
6223       // Load floats/doubles from literal pool.
6224       auto *CFrom = llvm::cast<Constant>(From);
6225       assert(CFrom->getShouldBePooled());
6226       Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
6227       Variable *BaseReg = makeReg(getPointerType());
6228       _movw(BaseReg, Offset);
6229       _movt(BaseReg, Offset);
6230       From = formMemoryOperand(BaseReg, Ty);
6231       return copyToReg(From, RegNum);
6232     }
6233   }
6234 
6235   if (auto *Var = llvm::dyn_cast<Variable>(From)) {
6236     if (Var->isRematerializable()) {
6237       if (Allowed & Legal_Rematerializable) {
6238         return From;
6239       }
6240 
6241       Variable *T = makeReg(Var->getType(), RegNum);
6242       _mov(T, Var);
6243       return T;
6244     }
6245     // Check if the variable is guaranteed a physical register. This can happen
6246     // either when the variable is pre-colored or when it is assigned infinite
6247     // weight.
6248     bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
6249     // We need a new physical register for the operand if:
6250     //   Mem is not allowed and Var isn't guaranteed a physical
6251     //   register, or
6252     //   RegNum is required and Var->getRegNum() doesn't match.
6253     if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
6254         (RegNum.hasValue() && (RegNum != Var->getRegNum()))) {
6255       From = copyToReg(From, RegNum);
6256     }
6257     return From;
6258   }
6259   llvm::report_fatal_error("Unhandled operand kind in legalize()");
6260 
6261   return From;
6262 }
6263 
6264 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)6265 Variable *TargetARM32::legalizeToReg(Operand *From, RegNumT RegNum) {
6266   return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
6267 }
6268 
6269 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)6270 Operand *TargetARM32::legalizeUndef(Operand *From, RegNumT RegNum) {
6271   Type Ty = From->getType();
6272   if (llvm::isa<ConstantUndef>(From)) {
6273     // Lower undefs to zero. Another option is to lower undefs to an
6274     // uninitialized register; however, using an uninitialized register results
6275     // in less predictable code.
6276     //
6277     // If in the future the implementation is changed to lower undef values to
6278     // uninitialized registers, a FakeDef will be needed:
6279     // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to
6280     // ensure that the live range of Reg is not overestimated. If the constant
6281     // being lowered is a 64 bit value, then the result should be split and the
6282     // lo and hi components will need to go in uninitialized registers.
6283     if (isVectorType(Ty))
6284       return makeVectorOfZeros(Ty, RegNum);
6285     return Ctx->getConstantZero(Ty);
6286   }
6287   return From;
6288 }
6289 
formMemoryOperand(Operand * Operand,Type Ty)6290 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
6291   auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
6292   // It may be the case that address mode optimization already creates an
6293   // OperandARM32Mem, so in that case it wouldn't need another level of
6294   // transformation.
6295   if (Mem) {
6296     return llvm::cast<OperandARM32Mem>(legalize(Mem));
6297   }
6298   // If we didn't do address mode optimization, then we only have a
6299   // base/offset to work with. ARM always requires a base register, so
6300   // just use that to hold the operand.
6301   auto *Base = llvm::cast<Variable>(
6302       legalize(Operand, Legal_Reg | Legal_Rematerializable));
6303   return OperandARM32Mem::create(
6304       Func, Ty, Base,
6305       llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
6306 }
6307 
makeI64RegPair()6308 Variable64On32 *TargetARM32::makeI64RegPair() {
6309   Variable64On32 *Reg =
6310       llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
6311   Reg->setMustHaveReg();
6312   Reg->initHiLo(Func);
6313   Reg->getLo()->setMustNotHaveReg();
6314   Reg->getHi()->setMustNotHaveReg();
6315   return Reg;
6316 }
6317 
makeReg(Type Type,RegNumT RegNum)6318 Variable *TargetARM32::makeReg(Type Type, RegNumT RegNum) {
6319   // There aren't any 64-bit integer registers for ARM32.
6320   assert(Type != IceType_i64);
6321   assert(AllowTemporaryWithNoReg || RegNum.hasValue());
6322   Variable *Reg = Func->makeVariable(Type);
6323   if (RegNum.hasValue())
6324     Reg->setRegNum(RegNum);
6325   else
6326     Reg->setMustHaveReg();
6327   return Reg;
6328 }
6329 
alignRegisterPow2(Variable * Reg,uint32_t Align,RegNumT TmpRegNum)6330 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align,
6331                                     RegNumT TmpRegNum) {
6332   assert(llvm::isPowerOf2_32(Align));
6333   uint32_t RotateAmt;
6334   uint32_t Immed_8;
6335   Operand *Mask;
6336   // Use AND or BIC to mask off the bits, depending on which immediate fits (if
6337   // it fits at all). Assume Align is usually small, in which case BIC works
6338   // better. Thus, this rounds down to the alignment.
6339   if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
6340     Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex,
6341                     TmpRegNum);
6342     _bic(Reg, Reg, Mask);
6343   } else {
6344     Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex,
6345                     TmpRegNum);
6346     _and(Reg, Reg, Mask);
6347   }
6348 }
6349 
postLower()6350 void TargetARM32::postLower() {
6351   if (Func->getOptLevel() == Opt_m1)
6352     return;
6353   markRedefinitions();
6354   Context.availabilityUpdate();
6355 }
6356 
emit(const ConstantInteger32 * C) const6357 void TargetARM32::emit(const ConstantInteger32 *C) const {
6358   if (!BuildDefs::dump())
6359     return;
6360   Ostream &Str = Ctx->getStrEmit();
6361   Str << "#" << C->getValue();
6362 }
6363 
emit(const ConstantInteger64 *) const6364 void TargetARM32::emit(const ConstantInteger64 *) const {
6365   llvm::report_fatal_error("Not expecting to emit 64-bit integers");
6366 }
6367 
emit(const ConstantFloat * C) const6368 void TargetARM32::emit(const ConstantFloat *C) const {
6369   (void)C;
6370   UnimplementedError(getFlags());
6371 }
6372 
emit(const ConstantDouble * C) const6373 void TargetARM32::emit(const ConstantDouble *C) const {
6374   (void)C;
6375   UnimplementedError(getFlags());
6376 }
6377 
emit(const ConstantUndef *) const6378 void TargetARM32::emit(const ConstantUndef *) const {
6379   llvm::report_fatal_error("undef value encountered by emitter.");
6380 }
6381 
emit(const ConstantRelocatable * C) const6382 void TargetARM32::emit(const ConstantRelocatable *C) const {
6383   if (!BuildDefs::dump())
6384     return;
6385   Ostream &Str = Ctx->getStrEmit();
6386   Str << "#";
6387   emitWithoutPrefix(C);
6388 }
6389 
lowerInt1ForSelect(Variable * Dest,Operand * Boolean,Operand * TrueValue,Operand * FalseValue)6390 void TargetARM32::lowerInt1ForSelect(Variable *Dest, Operand *Boolean,
6391                                      Operand *TrueValue, Operand *FalseValue) {
6392   Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6393 
6394   assert(Boolean->getType() == IceType_i1);
6395 
6396   bool NeedsAnd1 = false;
6397   if (TrueValue->getType() == IceType_i1) {
6398     assert(FalseValue->getType() == IceType_i1);
6399 
6400     Variable *TrueValueV = Func->makeVariable(IceType_i1);
6401     SafeBoolChain Src0Safe = lowerInt1(TrueValueV, TrueValue);
6402     TrueValue = TrueValueV;
6403 
6404     Variable *FalseValueV = Func->makeVariable(IceType_i1);
6405     SafeBoolChain Src1Safe = lowerInt1(FalseValueV, FalseValue);
6406     FalseValue = FalseValueV;
6407 
6408     NeedsAnd1 = Src0Safe == SBC_No || Src1Safe == SBC_No;
6409   }
6410 
6411   Variable *DestLo = (Dest->getType() == IceType_i64)
6412                          ? llvm::cast<Variable>(loOperand(Dest))
6413                          : Dest;
6414   Variable *DestHi = (Dest->getType() == IceType_i64)
6415                          ? llvm::cast<Variable>(hiOperand(Dest))
6416                          : nullptr;
6417   Operand *FalseValueLo = (FalseValue->getType() == IceType_i64)
6418                               ? loOperand(FalseValue)
6419                               : FalseValue;
6420   Operand *FalseValueHi =
6421       (FalseValue->getType() == IceType_i64) ? hiOperand(FalseValue) : nullptr;
6422 
6423   Operand *TrueValueLo =
6424       (TrueValue->getType() == IceType_i64) ? loOperand(TrueValue) : TrueValue;
6425   Operand *TrueValueHi =
6426       (TrueValue->getType() == IceType_i64) ? hiOperand(TrueValue) : nullptr;
6427 
6428   Variable *T_Lo = makeReg(DestLo->getType());
6429   Variable *T_Hi = (DestHi == nullptr) ? nullptr : makeReg(DestHi->getType());
6430 
6431   _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex));
6432   if (DestHi) {
6433     _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex));
6434   }
6435 
6436   CondWhenTrue Cond(CondARM32::kNone);
6437   // FlagsWereSet is used to determine wether Boolean was folded or not. If not,
6438   // add an explicit _tst instruction below.
6439   bool FlagsWereSet = false;
6440   if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6441     switch (Producer->getKind()) {
6442     default:
6443       llvm::report_fatal_error("Unexpected producer.");
6444     case Inst::Icmp: {
6445       Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6446       FlagsWereSet = true;
6447     } break;
6448     case Inst::Fcmp: {
6449       Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6450       FlagsWereSet = true;
6451     } break;
6452     case Inst::Cast: {
6453       const auto *CastProducer = llvm::cast<InstCast>(Producer);
6454       assert(CastProducer->getCastKind() == InstCast::Trunc);
6455       Boolean = CastProducer->getSrc(0);
6456       // No flags were set, so a _tst(Src, 1) will be emitted below. Don't
6457       // bother legalizing Src to a Reg because it will be legalized before
6458       // emitting the tst instruction.
6459       FlagsWereSet = false;
6460     } break;
6461     case Inst::Arithmetic: {
6462       // This is a special case: we eagerly assumed Producer could be folded,
6463       // but in reality, it can't. No reason to panic: we just lower it using
6464       // the regular lowerArithmetic helper.
6465       const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6466       lowerArithmetic(ArithProducer);
6467       Boolean = ArithProducer->getDest();
6468       // No flags were set, so a _tst(Dest, 1) will be emitted below. Don't
6469       // bother legalizing Dest to a Reg because it will be legalized before
6470       // emitting  the tst instruction.
6471       FlagsWereSet = false;
6472     } break;
6473     }
6474   }
6475 
6476   if (!FlagsWereSet) {
6477     // No flags have been set, so emit a tst Boolean, 1.
6478     Variable *Src = legalizeToReg(Boolean);
6479     _tst(Src, _1);
6480     Cond = CondWhenTrue(CondARM32::NE); // i.e., CondARM32::NotZero.
6481   }
6482 
6483   if (Cond.WhenTrue0 == CondARM32::kNone) {
6484     assert(Cond.WhenTrue1 == CondARM32::kNone);
6485   } else {
6486     _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6487                    Cond.WhenTrue0);
6488     if (DestHi) {
6489       _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6490                      Cond.WhenTrue0);
6491     }
6492   }
6493 
6494   if (Cond.WhenTrue1 != CondARM32::kNone) {
6495     _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6496                    Cond.WhenTrue1);
6497     if (DestHi) {
6498       _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6499                      Cond.WhenTrue1);
6500     }
6501   }
6502 
6503   if (NeedsAnd1) {
6504     // We lowered something that is unsafe (i.e., can't provably be zero or
6505     // one). Truncate the result.
6506     _and(T_Lo, T_Lo, _1);
6507   }
6508 
6509   _mov(DestLo, T_Lo);
6510   if (DestHi) {
6511     _mov(DestHi, T_Hi);
6512   }
6513 }
6514 
lowerInt1(Variable * Dest,Operand * Boolean)6515 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest,
6516                                                   Operand *Boolean) {
6517   assert(Boolean->getType() == IceType_i1);
6518   Variable *T = makeReg(IceType_i1);
6519   Operand *_0 =
6520       legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex);
6521   Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6522 
6523   SafeBoolChain Safe = SBC_Yes;
6524   if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6525     switch (Producer->getKind()) {
6526     default:
6527       llvm::report_fatal_error("Unexpected producer.");
6528     case Inst::Icmp: {
6529       _mov(T, _0);
6530       CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6531       assert(Cond.WhenTrue0 != CondARM32::AL);
6532       assert(Cond.WhenTrue0 != CondARM32::kNone);
6533       assert(Cond.WhenTrue1 == CondARM32::kNone);
6534       _mov_redefined(T, _1, Cond.WhenTrue0);
6535     } break;
6536     case Inst::Fcmp: {
6537       _mov(T, _0);
6538       Inst *MovZero = Context.getLastInserted();
6539       CondWhenTrue Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6540       if (Cond.WhenTrue0 == CondARM32::AL) {
6541         assert(Cond.WhenTrue1 == CondARM32::kNone);
6542         MovZero->setDeleted();
6543         _mov(T, _1);
6544       } else if (Cond.WhenTrue0 != CondARM32::kNone) {
6545         _mov_redefined(T, _1, Cond.WhenTrue0);
6546       }
6547       if (Cond.WhenTrue1 != CondARM32::kNone) {
6548         assert(Cond.WhenTrue0 != CondARM32::kNone);
6549         assert(Cond.WhenTrue0 != CondARM32::AL);
6550         _mov_redefined(T, _1, Cond.WhenTrue1);
6551       }
6552     } break;
6553     case Inst::Cast: {
6554       const auto *CastProducer = llvm::cast<InstCast>(Producer);
6555       assert(CastProducer->getCastKind() == InstCast::Trunc);
6556       Operand *Src = CastProducer->getSrc(0);
6557       if (Src->getType() == IceType_i64)
6558         Src = loOperand(Src);
6559       _mov(T, legalize(Src, Legal_Reg | Legal_Flex));
6560       Safe = SBC_No;
6561     } break;
6562     case Inst::Arithmetic: {
6563       const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6564       Safe = lowerInt1Arithmetic(ArithProducer);
6565       _mov(T, ArithProducer->getDest());
6566     } break;
6567     }
6568   } else {
6569     _mov(T, legalize(Boolean, Legal_Reg | Legal_Flex));
6570   }
6571 
6572   _mov(Dest, T);
6573   return Safe;
6574 }
6575 
6576 namespace {
6577 namespace BoolFolding {
shouldTrackProducer(const Inst & Instr)6578 bool shouldTrackProducer(const Inst &Instr) {
6579   switch (Instr.getKind()) {
6580   default:
6581     return false;
6582   case Inst::Icmp:
6583   case Inst::Fcmp:
6584     return true;
6585   case Inst::Cast: {
6586     switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6587     default:
6588       return false;
6589     case InstCast::Trunc:
6590       return true;
6591     }
6592   }
6593   case Inst::Arithmetic: {
6594     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6595     default:
6596       return false;
6597     case InstArithmetic::And:
6598     case InstArithmetic::Or:
6599       return true;
6600     }
6601   }
6602   }
6603 }
6604 
isValidConsumer(const Inst & Instr)6605 bool isValidConsumer(const Inst &Instr) {
6606   switch (Instr.getKind()) {
6607   default:
6608     return false;
6609   case Inst::Br:
6610     return true;
6611   case Inst::Select:
6612     return !isVectorType(Instr.getDest()->getType());
6613   case Inst::Cast: {
6614     switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6615     default:
6616       return false;
6617     case InstCast::Sext:
6618       return !isVectorType(Instr.getDest()->getType());
6619     case InstCast::Zext:
6620       return !isVectorType(Instr.getDest()->getType());
6621     }
6622   }
6623   case Inst::Arithmetic: {
6624     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6625     default:
6626       return false;
6627     case InstArithmetic::And:
6628       return !isVectorType(Instr.getDest()->getType());
6629     case InstArithmetic::Or:
6630       return !isVectorType(Instr.getDest()->getType());
6631     }
6632   }
6633   }
6634 }
6635 } // end of namespace BoolFolding
6636 
6637 namespace FpFolding {
shouldTrackProducer(const Inst & Instr)6638 bool shouldTrackProducer(const Inst &Instr) {
6639   switch (Instr.getKind()) {
6640   default:
6641     return false;
6642   case Inst::Arithmetic: {
6643     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6644     default:
6645       return false;
6646     case InstArithmetic::Fmul:
6647       return true;
6648     }
6649   }
6650   }
6651 }
6652 
isValidConsumer(const Inst & Instr)6653 bool isValidConsumer(const Inst &Instr) {
6654   switch (Instr.getKind()) {
6655   default:
6656     return false;
6657   case Inst::Arithmetic: {
6658     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6659     default:
6660       return false;
6661     case InstArithmetic::Fadd:
6662     case InstArithmetic::Fsub:
6663       return true;
6664     }
6665   }
6666   }
6667 }
6668 } // end of namespace FpFolding
6669 
6670 namespace IntFolding {
shouldTrackProducer(const Inst & Instr)6671 bool shouldTrackProducer(const Inst &Instr) {
6672   switch (Instr.getKind()) {
6673   default:
6674     return false;
6675   case Inst::Arithmetic: {
6676     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6677     default:
6678       return false;
6679     case InstArithmetic::Mul:
6680       return true;
6681     }
6682   }
6683   }
6684 }
6685 
isValidConsumer(const Inst & Instr)6686 bool isValidConsumer(const Inst &Instr) {
6687   switch (Instr.getKind()) {
6688   default:
6689     return false;
6690   case Inst::Arithmetic: {
6691     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6692     default:
6693       return false;
6694     case InstArithmetic::Add:
6695     case InstArithmetic::Sub:
6696       return true;
6697     }
6698   }
6699   }
6700 }
6701 } // namespace IntFolding
6702 } // end of anonymous namespace
6703 
recordProducers(CfgNode * Node)6704 void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) {
6705   for (Inst &Instr : Node->getInsts()) {
6706     // Check whether Instr is a valid producer.
6707     Variable *Dest = Instr.getDest();
6708     if (!Instr.isDeleted() // only consider non-deleted instructions; and
6709         && Dest            // only instructions with an actual dest var; and
6710         && Dest->getType() == IceType_i1 // only bool-type dest vars; and
6711         && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6712       KnownComputations.emplace(Dest->getIndex(),
6713                                 ComputationEntry(&Instr, IceType_i1));
6714     }
6715     if (!Instr.isDeleted() // only consider non-deleted instructions; and
6716         && Dest            // only instructions with an actual dest var; and
6717         && isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and
6718         && FpFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6719       KnownComputations.emplace(Dest->getIndex(),
6720                                 ComputationEntry(&Instr, Dest->getType()));
6721     }
6722     if (!Instr.isDeleted() // only consider non-deleted instructions; and
6723         && Dest            // only instructions with an actual dest var; and
6724         && Dest->getType() == IceType_i32            // i32 only dest vars; and
6725         && IntFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6726       KnownComputations.emplace(Dest->getIndex(),
6727                                 ComputationEntry(&Instr, IceType_i32));
6728     }
6729     // Check each src variable against the map.
6730     FOREACH_VAR_IN_INST(Var, Instr) {
6731       SizeT VarNum = Var->getIndex();
6732       auto ComputationIter = KnownComputations.find(VarNum);
6733       if (ComputationIter == KnownComputations.end()) {
6734         continue;
6735       }
6736 
6737       ++ComputationIter->second.NumUses;
6738       switch (ComputationIter->second.ComputationType) {
6739       default:
6740         KnownComputations.erase(VarNum);
6741         continue;
6742       case IceType_i1:
6743         if (!BoolFolding::isValidConsumer(Instr)) {
6744           KnownComputations.erase(VarNum);
6745           continue;
6746         }
6747         break;
6748       case IceType_i32:
6749         if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) {
6750           KnownComputations.erase(VarNum);
6751           continue;
6752         }
6753         break;
6754       case IceType_f32:
6755       case IceType_f64:
6756         if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) {
6757           KnownComputations.erase(VarNum);
6758           continue;
6759         }
6760         break;
6761       }
6762 
6763       if (Instr.isLastUse(Var)) {
6764         ComputationIter->second.IsLiveOut = false;
6765       }
6766     }
6767   }
6768 
6769   for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
6770        Iter != End;) {
6771     // Disable the folding if its dest may be live beyond this block.
6772     if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
6773       Iter = KnownComputations.erase(Iter);
6774       continue;
6775     }
6776 
6777     // Mark as "dead" rather than outright deleting. This is so that other
6778     // peephole style optimizations during or before lowering have access to
6779     // this instruction in undeleted form. See for example
6780     // tryOptimizedCmpxchgCmpBr().
6781     Iter->second.Instr->setDead();
6782     ++Iter;
6783   }
6784 }
6785 
TargetDataARM32(GlobalContext * Ctx)6786 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
6787     : TargetDataLowering(Ctx) {}
6788 
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)6789 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
6790                                    const std::string &SectionSuffix) {
6791   const bool IsPIC = false;
6792   switch (getFlags().getOutFileType()) {
6793   case FT_Elf: {
6794     ELFObjectWriter *Writer = Ctx->getObjectWriter();
6795     Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix,
6796                              IsPIC);
6797   } break;
6798   case FT_Asm:
6799   case FT_Iasm: {
6800     OstreamLocker _(Ctx);
6801     for (const VariableDeclaration *Var : Vars) {
6802       if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
6803         emitGlobal(*Var, SectionSuffix);
6804       }
6805     }
6806   } break;
6807   }
6808 }
6809 
6810 namespace {
6811 template <typename T> struct ConstantPoolEmitterTraits;
6812 
6813 static_assert(sizeof(uint64_t) == 8,
6814               "uint64_t is supposed to be 8 bytes wide.");
6815 
6816 // TODO(jpp): implement the following when implementing constant randomization:
6817 //  * template <> struct ConstantPoolEmitterTraits<uint8_t>
6818 //  * template <> struct ConstantPoolEmitterTraits<uint16_t>
6819 //  * template <> struct ConstantPoolEmitterTraits<uint32_t>
6820 template <> struct ConstantPoolEmitterTraits<float> {
6821   using ConstantType = ConstantFloat;
6822   static constexpr Type IceType = IceType_f32;
6823   // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
6824   // about them being constexpr.
6825   static const char AsmTag[];
6826   static const char TypeName[];
bitcastToUint64Ice::ARM32::__anon15914fd81a11::ConstantPoolEmitterTraits6827   static uint64_t bitcastToUint64(float Value) {
6828     static_assert(sizeof(Value) == sizeof(uint32_t),
6829                   "Float should be 4 bytes.");
6830     const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
6831     return static_cast<uint64_t>(IntValue);
6832   }
6833 };
6834 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".long";
6835 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
6836 
6837 template <> struct ConstantPoolEmitterTraits<double> {
6838   using ConstantType = ConstantDouble;
6839   static constexpr Type IceType = IceType_f64;
6840   static const char AsmTag[];
6841   static const char TypeName[];
bitcastToUint64Ice::ARM32::__anon15914fd81a11::ConstantPoolEmitterTraits6842   static uint64_t bitcastToUint64(double Value) {
6843     static_assert(sizeof(double) == sizeof(uint64_t),
6844                   "Double should be 8 bytes.");
6845     return Utils::bitCopy<uint64_t>(Value);
6846   }
6847 };
6848 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
6849 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
6850 
6851 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)6852 void emitConstant(
6853     Ostream &Str,
6854     const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
6855   using Traits = ConstantPoolEmitterTraits<T>;
6856   Str << Const->getLabelName();
6857   Str << ":\n\t" << Traits::AsmTag << "\t0x";
6858   T Value = Const->getValue();
6859   Str.write_hex(Traits::bitcastToUint64(Value));
6860   Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
6861 }
6862 
emitConstantPool(GlobalContext * Ctx)6863 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
6864   if (!BuildDefs::dump()) {
6865     return;
6866   }
6867 
6868   using Traits = ConstantPoolEmitterTraits<T>;
6869   static constexpr size_t MinimumAlignment = 4;
6870   SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
6871   assert((Align % 4) == 0 && "Constants should be aligned");
6872   Ostream &Str = Ctx->getStrEmit();
6873   ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
6874 
6875   Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
6876       << "\n"
6877       << "\t.align\t" << Align << "\n";
6878 
6879   for (Constant *C : Pool) {
6880     if (!C->getShouldBePooled()) {
6881       continue;
6882     }
6883 
6884     emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
6885   }
6886 }
6887 } // end of anonymous namespace
6888 
lowerConstants()6889 void TargetDataARM32::lowerConstants() {
6890   if (getFlags().getDisableTranslation())
6891     return;
6892   switch (getFlags().getOutFileType()) {
6893   case FT_Elf: {
6894     ELFObjectWriter *Writer = Ctx->getObjectWriter();
6895     Writer->writeConstantPool<ConstantFloat>(IceType_f32);
6896     Writer->writeConstantPool<ConstantDouble>(IceType_f64);
6897   } break;
6898   case FT_Asm:
6899   case FT_Iasm: {
6900     OstreamLocker _(Ctx);
6901     emitConstantPool<float>(Ctx);
6902     emitConstantPool<double>(Ctx);
6903     break;
6904   }
6905   }
6906 }
6907 
lowerJumpTables()6908 void TargetDataARM32::lowerJumpTables() {
6909   if (getFlags().getDisableTranslation())
6910     return;
6911   switch (getFlags().getOutFileType()) {
6912   case FT_Elf:
6913     if (!Ctx->getJumpTables().empty()) {
6914       llvm::report_fatal_error("ARM32 does not support jump tables yet.");
6915     }
6916     break;
6917   case FT_Asm:
6918     // Already emitted from Cfg
6919     break;
6920   case FT_Iasm: {
6921     // TODO(kschimpf): Fill this in when we get more information.
6922     break;
6923   }
6924   }
6925 }
6926 
TargetHeaderARM32(GlobalContext * Ctx)6927 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
6928     : TargetHeaderLowering(Ctx), CPUFeatures(getFlags()) {}
6929 
lower()6930 void TargetHeaderARM32::lower() {
6931   OstreamLocker _(Ctx);
6932   Ostream &Str = Ctx->getStrEmit();
6933   Str << ".syntax unified\n";
6934   // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
6935   // "Addenda to, and Errata in the ABI for the ARM architecture"
6936   // http://infocenter.arm.com
6937   //                  /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
6938   //
6939   // Tag_conformance should be be emitted first in a file-scope sub-subsection
6940   // of the first public subsection of the attributes.
6941   Str << ".eabi_attribute 67, \"2.09\"      @ Tag_conformance\n";
6942   // Chromebooks are at least A15, but do A9 for higher compat. For some
6943   // reason, the LLVM ARM asm parser has the .cpu directive override the mattr
6944   // specified on the commandline. So to test hwdiv, we need to set the .cpu
6945   // directive higher (can't just rely on --mattr=...).
6946   if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
6947     Str << ".cpu    cortex-a15\n";
6948   } else {
6949     Str << ".cpu    cortex-a9\n";
6950   }
6951   Str << ".eabi_attribute 6, 10   @ Tag_CPU_arch: ARMv7\n"
6952       << ".eabi_attribute 7, 65   @ Tag_CPU_arch_profile: App profile\n";
6953   Str << ".eabi_attribute 8, 1    @ Tag_ARM_ISA_use: Yes\n"
6954       << ".eabi_attribute 9, 2    @ Tag_THUMB_ISA_use: Thumb-2\n";
6955   Str << ".fpu    neon\n"
6956       << ".eabi_attribute 17, 1   @ Tag_ABI_PCS_GOT_use: permit directly\n"
6957       << ".eabi_attribute 20, 1   @ Tag_ABI_FP_denormal\n"
6958       << ".eabi_attribute 21, 1   @ Tag_ABI_FP_exceptions\n"
6959       << ".eabi_attribute 23, 3   @ Tag_ABI_FP_number_model: IEEE 754\n"
6960       << ".eabi_attribute 34, 1   @ Tag_CPU_unaligned_access\n"
6961       << ".eabi_attribute 24, 1   @ Tag_ABI_align_needed: 8-byte\n"
6962       << ".eabi_attribute 25, 1   @ Tag_ABI_align_preserved: 8-byte\n"
6963       << ".eabi_attribute 28, 1   @ Tag_ABI_VFP_args\n"
6964       << ".eabi_attribute 36, 1   @ Tag_FP_HP_extension\n"
6965       << ".eabi_attribute 38, 1   @ Tag_ABI_FP_16bit_format\n"
6966       << ".eabi_attribute 42, 1   @ Tag_MPextension_use\n"
6967       << ".eabi_attribute 68, 1   @ Tag_Virtualization_use\n";
6968   if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
6969     Str << ".eabi_attribute 44, 2   @ Tag_DIV_use\n";
6970   }
6971   // Technically R9 is used for TLS with Sandboxing, and we reserve it.
6972   // However, for compatibility with current NaCl LLVM, don't claim that.
6973   Str << ".eabi_attribute 14, 3   @ Tag_ABI_PCS_R9_use: Not used\n";
6974 }
6975 
6976 SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
6977 SmallBitVector TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
6978 SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
6979 
6980 } // end of namespace ARM32
6981 } // end of namespace Ice
6982