1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the TargetLoweringARM32 class, which consists almost
12 /// entirely of the lowering sequence for each high-level instruction.
13 ///
14 //===----------------------------------------------------------------------===//
15 #include "IceTargetLoweringARM32.h"
16
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstARM32.def"
24 #include "IceInstARM32.h"
25 #include "IceInstVarIter.h"
26 #include "IceLiveness.h"
27 #include "IceOperand.h"
28 #include "IcePhiLoweringImpl.h"
29 #include "IceRegistersARM32.h"
30 #include "IceTargetLoweringARM32.def"
31 #include "IceUtils.h"
32 #include "llvm/Support/MathExtras.h"
33
34 #include <algorithm>
35 #include <array>
36 #include <utility>
37
38 namespace ARM32 {
createTargetLowering(::Ice::Cfg * Func)39 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
40 return ::Ice::ARM32::TargetARM32::create(Func);
41 }
42
43 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)44 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
45 return ::Ice::ARM32::TargetDataARM32::create(Ctx);
46 }
47
48 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
50 return ::Ice::ARM32::TargetHeaderARM32::create(Ctx);
51 }
52
staticInit(::Ice::GlobalContext * Ctx)53 void staticInit(::Ice::GlobalContext *Ctx) {
54 ::Ice::ARM32::TargetARM32::staticInit(Ctx);
55 }
56
shouldBePooled(const::Ice::Constant * C)57 bool shouldBePooled(const ::Ice::Constant *C) {
58 return ::Ice::ARM32::TargetARM32::shouldBePooled(C);
59 }
60
getPointerType()61 ::Ice::Type getPointerType() {
62 return ::Ice::ARM32::TargetARM32::getPointerType();
63 }
64
65 } // end of namespace ARM32
66
67 namespace Ice {
68 namespace ARM32 {
69
70 namespace {
71
72 /// SizeOf is used to obtain the size of an initializer list as a constexpr
73 /// expression. This is only needed until our C++ library is updated to
74 /// C++ 14 -- which defines constexpr members to std::initializer_list.
75 class SizeOf {
76 SizeOf(const SizeOf &) = delete;
77 SizeOf &operator=(const SizeOf &) = delete;
78
79 public:
SizeOf()80 constexpr SizeOf() : Size(0) {}
81 template <typename... T>
SizeOf(T...)82 explicit constexpr SizeOf(T...) : Size(__length<T...>::value) {}
size() const83 constexpr SizeT size() const { return Size; }
84
85 private:
86 template <typename T, typename... U> struct __length {
87 static constexpr std::size_t value = 1 + __length<U...>::value;
88 };
89
90 template <typename T> struct __length<T> {
91 static constexpr std::size_t value = 1;
92 };
93
94 const std::size_t Size;
95 };
96
97 } // end of anonymous namespace
98
99 // Defines the RegARM32::Table table with register information.
100 RegARM32::RegTableType RegARM32::RegTable[RegARM32::Reg_NUM] = {
101 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
102 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
103 {name, encode, \
104 cc_arg, scratch, \
105 preserved, stackptr, \
106 frameptr, isGPR, \
107 isInt, isI64Pair, \
108 isFP32, isFP64, \
109 isVec128, (SizeOf alias_init).size(), \
110 alias_init},
111 REGARM32_TABLE
112 #undef X
113 };
114
115 namespace {
116
117 // The following table summarizes the logic for lowering the icmp instruction
118 // for i32 and narrower types. Each icmp condition has a clear mapping to an
119 // ARM32 conditional move instruction.
120
121 const struct TableIcmp32_ {
122 CondARM32::Cond Mapping;
123 } TableIcmp32[] = {
124 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
125 {CondARM32::C_32},
126 ICMPARM32_TABLE
127 #undef X
128 };
129
130 // The following table summarizes the logic for lowering the icmp instruction
131 // for the i64 type. Two conditional moves are needed for setting to 1 or 0.
132 // The operands may need to be swapped, and there is a slight difference for
133 // signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
134 const struct TableIcmp64_ {
135 bool IsSigned;
136 bool Swapped;
137 CondARM32::Cond C1, C2;
138 } TableIcmp64[] = {
139 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
140 {is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64},
141 ICMPARM32_TABLE
142 #undef X
143 };
144
getIcmp32Mapping(InstIcmp::ICond Cond)145 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
146 assert(Cond < llvm::array_lengthof(TableIcmp32));
147 return TableIcmp32[Cond].Mapping;
148 }
149
150 // In some cases, there are x-macros tables for both high-level and low-level
151 // instructions/operands that use the same enum key value. The tables are kept
152 // separate to maintain a proper separation between abstraction layers. There
153 // is a risk that the tables could get out of sync if enum values are reordered
154 // or if entries are added or deleted. The following anonymous namespaces use
155 // static_asserts to ensure everything is kept in sync.
156
157 // Validate the enum values in ICMPARM32_TABLE.
158 namespace {
159 // Define a temporary set of enum values based on low-level table entries.
160 enum _icmp_ll_enum {
161 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
162 _icmp_ll_##val,
163 ICMPARM32_TABLE
164 #undef X
165 _num
166 };
167 // Define a set of constants based on high-level table entries.
168 #define X(tag, reverse, str) \
169 static constexpr int _icmp_hl_##tag = InstIcmp::tag;
170 ICEINSTICMP_TABLE
171 #undef X
172 // Define a set of constants based on low-level table entries, and ensure the
173 // table entry keys are consistent.
174 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
175 static_assert( \
176 _icmp_ll_##val == _icmp_hl_##val, \
177 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #val);
178 ICMPARM32_TABLE
179 #undef X
180 // Repeat the static asserts with respect to the high-level table entries in
181 // case the high-level table has extra entries.
182 #define X(tag, reverse, str) \
183 static_assert( \
184 _icmp_hl_##tag == _icmp_ll_##tag, \
185 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #tag);
186 ICEINSTICMP_TABLE
187 #undef X
188 } // end of anonymous namespace
189
190 // Stack alignment
191 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
192
193 // Value is in bytes. Return Value adjusted to the next highest multiple of the
194 // stack alignment.
applyStackAlignment(uint32_t Value)195 uint32_t applyStackAlignment(uint32_t Value) {
196 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
197 }
198
199 // Value is in bytes. Return Value adjusted to the next highest multiple of the
200 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)201 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
202 // Use natural alignment, except that normally (non-NaCl) ARM only aligns
203 // vectors to 8 bytes.
204 // TODO(jvoung): Check this ...
205 size_t typeAlignInBytes = typeWidthInBytes(Ty);
206 if (isVectorType(Ty))
207 typeAlignInBytes = 8;
208 return Utils::applyAlignment(Value, typeAlignInBytes);
209 }
210
211 // Conservatively check if at compile time we know that the operand is
212 // definitely a non-zero integer.
isGuaranteedNonzeroInt(const Operand * Op)213 bool isGuaranteedNonzeroInt(const Operand *Op) {
214 if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
215 return Const->getValue() != 0;
216 }
217 return false;
218 }
219
220 } // end of anonymous namespace
221
TargetARM32Features(const ClFlags & Flags)222 TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
223 static_assert(
224 (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
225 (TargetInstructionSet::ARM32InstructionSet_End -
226 TargetInstructionSet::ARM32InstructionSet_Begin),
227 "ARM32InstructionSet range different from TargetInstructionSet");
228 if (Flags.getTargetInstructionSet() !=
229 TargetInstructionSet::BaseInstructionSet) {
230 InstructionSet = static_cast<ARM32InstructionSet>(
231 (Flags.getTargetInstructionSet() -
232 TargetInstructionSet::ARM32InstructionSet_Begin) +
233 ARM32InstructionSet::Begin);
234 }
235 }
236
237 namespace {
238 constexpr SizeT NumGPRArgs =
239 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
240 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
241 +(((cc_arg) > 0) ? 1 : 0)
242 REGARM32_GPR_TABLE
243 #undef X
244 ;
245 std::array<RegNumT, NumGPRArgs> GPRArgInitializer;
246
247 constexpr SizeT NumI64Args =
248 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
249 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
250 +(((cc_arg) > 0) ? 1 : 0)
251 REGARM32_I64PAIR_TABLE
252 #undef X
253 ;
254 std::array<RegNumT, NumI64Args> I64ArgInitializer;
255
256 constexpr SizeT NumFP32Args =
257 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
258 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
259 +(((cc_arg) > 0) ? 1 : 0)
260 REGARM32_FP32_TABLE
261 #undef X
262 ;
263 std::array<RegNumT, NumFP32Args> FP32ArgInitializer;
264
265 constexpr SizeT NumFP64Args =
266 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
267 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
268 +(((cc_arg) > 0) ? 1 : 0)
269 REGARM32_FP64_TABLE
270 #undef X
271 ;
272 std::array<RegNumT, NumFP64Args> FP64ArgInitializer;
273
274 constexpr SizeT NumVec128Args =
275 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
276 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
277 +(((cc_arg > 0)) ? 1 : 0)
278 REGARM32_VEC128_TABLE
279 #undef X
280 ;
281 std::array<RegNumT, NumVec128Args> Vec128ArgInitializer;
282
getRegClassName(RegClass C)283 const char *getRegClassName(RegClass C) {
284 auto ClassNum = static_cast<RegARM32::RegClassARM32>(C);
285 assert(ClassNum < RegARM32::RCARM32_NUM);
286 switch (ClassNum) {
287 default:
288 assert(C < RC_Target);
289 return regClassString(C);
290 // Add handling of new register classes below.
291 case RegARM32::RCARM32_QtoS:
292 return "QtoS";
293 }
294 }
295
296 } // end of anonymous namespace
297
TargetARM32(Cfg * Func)298 TargetARM32::TargetARM32(Cfg *Func)
299 : TargetLowering(Func), CPUFeatures(getFlags()) {}
300
staticInit(GlobalContext * Ctx)301 void TargetARM32::staticInit(GlobalContext *Ctx) {
302 RegNumT::setLimit(RegARM32::Reg_NUM);
303 // Limit this size (or do all bitsets need to be the same width)???
304 SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
305 SmallBitVector I64PairRegisters(RegARM32::Reg_NUM);
306 SmallBitVector Float32Registers(RegARM32::Reg_NUM);
307 SmallBitVector Float64Registers(RegARM32::Reg_NUM);
308 SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
309 SmallBitVector QtoSRegisters(RegARM32::Reg_NUM);
310 SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
311 const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding;
312 for (int i = 0; i < RegARM32::Reg_NUM; ++i) {
313 const auto &Entry = RegARM32::RegTable[i];
314 IntegerRegisters[i] = Entry.IsInt;
315 I64PairRegisters[i] = Entry.IsI64Pair;
316 Float32Registers[i] = Entry.IsFP32;
317 Float64Registers[i] = Entry.IsFP64;
318 VectorRegisters[i] = Entry.IsVec128;
319 RegisterAliases[i].resize(RegARM32::Reg_NUM);
320 // TODO(eholk): It would be better to store a QtoS flag in the
321 // IceRegistersARM32 table than to compare their encodings here.
322 QtoSRegisters[i] = Entry.IsVec128 && Entry.Encoding < EncodedReg_q8;
323 for (int j = 0; j < Entry.NumAliases; ++j) {
324 assert(i == j || !RegisterAliases[i][Entry.Aliases[j]]);
325 RegisterAliases[i].set(Entry.Aliases[j]);
326 }
327 assert(RegisterAliases[i][i]);
328 if (Entry.CCArg <= 0) {
329 continue;
330 }
331 const auto RegNum = RegNumT::fromInt(i);
332 if (Entry.IsGPR) {
333 GPRArgInitializer[Entry.CCArg - 1] = RegNum;
334 } else if (Entry.IsI64Pair) {
335 I64ArgInitializer[Entry.CCArg - 1] = RegNum;
336 } else if (Entry.IsFP32) {
337 FP32ArgInitializer[Entry.CCArg - 1] = RegNum;
338 } else if (Entry.IsFP64) {
339 FP64ArgInitializer[Entry.CCArg - 1] = RegNum;
340 } else if (Entry.IsVec128) {
341 Vec128ArgInitializer[Entry.CCArg - 1] = RegNum;
342 }
343 }
344 TypeToRegisterSet[IceType_void] = InvalidRegisters;
345 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
346 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
347 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
348 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
349 TypeToRegisterSet[IceType_i64] = I64PairRegisters;
350 TypeToRegisterSet[IceType_f32] = Float32Registers;
351 TypeToRegisterSet[IceType_f64] = Float64Registers;
352 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
353 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
354 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
355 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
356 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
357 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
358 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
359 TypeToRegisterSet[RegARM32::RCARM32_QtoS] = QtoSRegisters;
360
361 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
362 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
363
364 filterTypeToRegisterSet(
365 Ctx, RegARM32::Reg_NUM, TypeToRegisterSet,
366 llvm::array_lengthof(TypeToRegisterSet),
367 [](RegNumT RegNum) -> std::string {
368 // This function simply removes ", " from the
369 // register name.
370 std::string Name = RegARM32::getRegName(RegNum);
371 constexpr const char RegSeparator[] = ", ";
372 constexpr size_t RegSeparatorWidth =
373 llvm::array_lengthof(RegSeparator) - 1;
374 for (size_t Pos = Name.find(RegSeparator); Pos != std::string::npos;
375 Pos = Name.find(RegSeparator)) {
376 Name.replace(Pos, RegSeparatorWidth, "");
377 }
378 return Name;
379 },
380 getRegClassName);
381 }
382
383 namespace {
copyRegAllocFromInfWeightVariable64On32(const VarList & Vars)384 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
385 for (Variable *Var : Vars) {
386 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var);
387 if (!Var64) {
388 // This is not the variable we are looking for.
389 continue;
390 }
391 // only allow infinite-weight i64 temporaries to be register allocated.
392 assert(!Var64->hasReg() || Var64->mustHaveReg());
393 if (!Var64->hasReg()) {
394 continue;
395 }
396 const auto FirstReg =
397 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum()));
398 // This assumes little endian.
399 Variable *Lo = Var64->getLo();
400 Variable *Hi = Var64->getHi();
401 assert(Lo->hasReg() == Hi->hasReg());
402 if (Lo->hasReg()) {
403 continue;
404 }
405 Lo->setRegNum(FirstReg);
406 Lo->setMustHaveReg();
407 Hi->setRegNum(RegNumT::fixme(FirstReg + 1));
408 Hi->setMustHaveReg();
409 }
410 }
411 } // end of anonymous namespace
412
getCallStackArgumentsSizeBytes(const InstCall * Call)413 uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
414 TargetARM32::CallingConv CC;
415 RegNumT DummyReg;
416 size_t OutArgsSizeBytes = 0;
417 for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
418 Operand *Arg = legalizeUndef(Call->getArg(i));
419 const Type Ty = Arg->getType();
420 if (isScalarIntegerType(Ty)) {
421 if (CC.argInGPR(Ty, &DummyReg)) {
422 continue;
423 }
424 } else {
425 if (CC.argInVFP(Ty, &DummyReg)) {
426 continue;
427 }
428 }
429
430 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
431 OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
432 }
433
434 return applyStackAlignment(OutArgsSizeBytes);
435 }
436
genTargetHelperCallFor(Inst * Instr)437 void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
438 constexpr bool NoTailCall = false;
439 constexpr bool IsTargetHelperCall = true;
440
441 switch (Instr->getKind()) {
442 default:
443 return;
444 case Inst::Arithmetic: {
445 Variable *Dest = Instr->getDest();
446 const Type DestTy = Dest->getType();
447 const InstArithmetic::OpKind Op =
448 llvm::cast<InstArithmetic>(Instr)->getOp();
449 if (isVectorType(DestTy)) {
450 switch (Op) {
451 default:
452 break;
453 case InstArithmetic::Fdiv:
454 case InstArithmetic::Frem:
455 case InstArithmetic::Sdiv:
456 case InstArithmetic::Srem:
457 case InstArithmetic::Udiv:
458 case InstArithmetic::Urem:
459 scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
460 Instr->setDeleted();
461 return;
462 }
463 }
464 switch (DestTy) {
465 default:
466 return;
467 case IceType_i64: {
468 // Technically, ARM has its own aeabi routines, but we can use the
469 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
470 // the more standard __moddi3 for rem.
471 RuntimeHelper HelperID = RuntimeHelper::H_Num;
472 switch (Op) {
473 default:
474 return;
475 case InstArithmetic::Udiv:
476 HelperID = RuntimeHelper::H_udiv_i64;
477 break;
478 case InstArithmetic::Sdiv:
479 HelperID = RuntimeHelper::H_sdiv_i64;
480 break;
481 case InstArithmetic::Urem:
482 HelperID = RuntimeHelper::H_urem_i64;
483 break;
484 case InstArithmetic::Srem:
485 HelperID = RuntimeHelper::H_srem_i64;
486 break;
487 }
488 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
489 ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
490 constexpr SizeT MaxArgs = 2;
491 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
492 NoTailCall, IsTargetHelperCall);
493 Call->addArg(Instr->getSrc(0));
494 Call->addArg(Instr->getSrc(1));
495 Instr->setDeleted();
496 return;
497 }
498 case IceType_i32:
499 case IceType_i16:
500 case IceType_i8: {
501 const bool HasHWDiv = hasCPUFeature(TargetARM32Features::HWDivArm);
502 InstCast::OpKind CastKind;
503 RuntimeHelper HelperID = RuntimeHelper::H_Num;
504 switch (Op) {
505 default:
506 return;
507 case InstArithmetic::Udiv:
508 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_udiv_i32;
509 CastKind = InstCast::Zext;
510 break;
511 case InstArithmetic::Sdiv:
512 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_sdiv_i32;
513 CastKind = InstCast::Sext;
514 break;
515 case InstArithmetic::Urem:
516 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_urem_i32;
517 CastKind = InstCast::Zext;
518 break;
519 case InstArithmetic::Srem:
520 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_srem_i32;
521 CastKind = InstCast::Sext;
522 break;
523 }
524 if (HelperID == RuntimeHelper::H_Num) {
525 // HelperID should only ever be undefined when the processor does not
526 // have a hardware divider. If any other helpers are ever introduced,
527 // the following assert will have to be modified.
528 assert(HasHWDiv);
529 return;
530 }
531 Operand *Src0 = Instr->getSrc(0);
532 Operand *Src1 = Instr->getSrc(1);
533 if (DestTy != IceType_i32) {
534 // Src0 and Src1 have to be zero-, or signed-extended to i32. For Src0,
535 // we just insert a InstCast right before the call to the helper.
536 Variable *Src0_32 = Func->makeVariable(IceType_i32);
537 Context.insert<InstCast>(CastKind, Src0_32, Src0);
538 Src0 = Src0_32;
539
540 // For extending Src1, we will just insert an InstCast if Src1 is not a
541 // Constant. If it is, then we extend it here, and not during program
542 // runtime. This allows preambleDivRem to optimize-out the div-by-0
543 // check.
544 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
545 const int32_t ShAmt = (DestTy == IceType_i16) ? 16 : 24;
546 int32_t NewC = C->getValue();
547 if (CastKind == InstCast::Zext) {
548 NewC &= ~(0x80000000l >> ShAmt);
549 } else {
550 NewC = (NewC << ShAmt) >> ShAmt;
551 }
552 Src1 = Ctx->getConstantInt32(NewC);
553 } else {
554 Variable *Src1_32 = Func->makeVariable(IceType_i32);
555 Context.insert<InstCast>(CastKind, Src1_32, Src1);
556 Src1 = Src1_32;
557 }
558 }
559 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
560 ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
561 constexpr SizeT MaxArgs = 2;
562 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
563 NoTailCall, IsTargetHelperCall);
564 assert(Src0->getType() == IceType_i32);
565 Call->addArg(Src0);
566 assert(Src1->getType() == IceType_i32);
567 Call->addArg(Src1);
568 Instr->setDeleted();
569 return;
570 }
571 case IceType_f64:
572 case IceType_f32: {
573 if (Op != InstArithmetic::Frem) {
574 return;
575 }
576 constexpr SizeT MaxArgs = 2;
577 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
578 DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
579 : RuntimeHelper::H_frem_f64);
580 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
581 NoTailCall, IsTargetHelperCall);
582 Call->addArg(Instr->getSrc(0));
583 Call->addArg(Instr->getSrc(1));
584 Instr->setDeleted();
585 return;
586 }
587 }
588 llvm::report_fatal_error("Control flow should never have reached here.");
589 }
590 case Inst::Cast: {
591 Variable *Dest = Instr->getDest();
592 Operand *Src0 = Instr->getSrc(0);
593 const Type DestTy = Dest->getType();
594 const Type SrcTy = Src0->getType();
595 auto *CastInstr = llvm::cast<InstCast>(Instr);
596 const InstCast::OpKind CastKind = CastInstr->getCastKind();
597
598 switch (CastKind) {
599 default:
600 return;
601 case InstCast::Fptosi:
602 case InstCast::Fptoui: {
603 if (DestTy != IceType_i64) {
604 return;
605 }
606 const bool DestIsSigned = CastKind == InstCast::Fptosi;
607 const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
608 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
609 Src0IsF32 ? (DestIsSigned ? RuntimeHelper::H_fptosi_f32_i64
610 : RuntimeHelper::H_fptoui_f32_i64)
611 : (DestIsSigned ? RuntimeHelper::H_fptosi_f64_i64
612 : RuntimeHelper::H_fptoui_f64_i64));
613 static constexpr SizeT MaxArgs = 1;
614 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
615 NoTailCall, IsTargetHelperCall);
616 Call->addArg(Src0);
617 Instr->setDeleted();
618 return;
619 }
620 case InstCast::Sitofp:
621 case InstCast::Uitofp: {
622 if (SrcTy != IceType_i64) {
623 return;
624 }
625 const bool SourceIsSigned = CastKind == InstCast::Sitofp;
626 const bool DestIsF32 = isFloat32Asserting32Or64(Dest->getType());
627 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
628 DestIsF32 ? (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f32
629 : RuntimeHelper::H_uitofp_i64_f32)
630 : (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f64
631 : RuntimeHelper::H_uitofp_i64_f64));
632 static constexpr SizeT MaxArgs = 1;
633 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
634 NoTailCall, IsTargetHelperCall);
635 Call->addArg(Src0);
636 Instr->setDeleted();
637 return;
638 }
639 case InstCast::Bitcast: {
640 if (DestTy == SrcTy) {
641 return;
642 }
643 Variable *CallDest = Dest;
644 RuntimeHelper HelperID = RuntimeHelper::H_Num;
645 switch (DestTy) {
646 default:
647 return;
648 case IceType_i8:
649 assert(SrcTy == IceType_v8i1);
650 HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
651 CallDest = Func->makeVariable(IceType_i32);
652 break;
653 case IceType_i16:
654 assert(SrcTy == IceType_v16i1);
655 HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
656 CallDest = Func->makeVariable(IceType_i32);
657 break;
658 case IceType_v8i1: {
659 assert(SrcTy == IceType_i8);
660 HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
661 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
662 // Arguments to functions are required to be at least 32 bits wide.
663 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
664 Src0 = Src0AsI32;
665 } break;
666 case IceType_v16i1: {
667 assert(SrcTy == IceType_i16);
668 HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
669 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
670 // Arguments to functions are required to be at least 32 bits wide.
671 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
672 Src0 = Src0AsI32;
673 } break;
674 }
675 constexpr SizeT MaxSrcs = 1;
676 InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
677 Call->addArg(Src0);
678 Context.insert(Call);
679 // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
680 // call result to the appropriate type as necessary.
681 if (CallDest->getType() != Dest->getType())
682 Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
683 Instr->setDeleted();
684 return;
685 }
686 case InstCast::Trunc: {
687 if (DestTy == SrcTy) {
688 return;
689 }
690 if (!isVectorType(SrcTy)) {
691 return;
692 }
693 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
694 assert(typeElementType(DestTy) == IceType_i1);
695 assert(isVectorIntegerType(SrcTy));
696 return;
697 }
698 case InstCast::Sext:
699 case InstCast::Zext: {
700 if (DestTy == SrcTy) {
701 return;
702 }
703 if (!isVectorType(DestTy)) {
704 return;
705 }
706 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
707 assert(typeElementType(SrcTy) == IceType_i1);
708 assert(isVectorIntegerType(DestTy));
709 return;
710 }
711 }
712 llvm::report_fatal_error("Control flow should never have reached here.");
713 }
714 case Inst::Intrinsic: {
715 Variable *Dest = Instr->getDest();
716 auto *Intrinsic = llvm::cast<InstIntrinsic>(Instr);
717 Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicID();
718 switch (ID) {
719 default:
720 return;
721 case Intrinsics::Ctpop: {
722 Operand *Src0 = Intrinsic->getArg(0);
723 Operand *TargetHelper =
724 Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
725 ? RuntimeHelper::H_call_ctpop_i32
726 : RuntimeHelper::H_call_ctpop_i64);
727 static constexpr SizeT MaxArgs = 1;
728 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
729 NoTailCall, IsTargetHelperCall);
730 Call->addArg(Src0);
731 Instr->setDeleted();
732 if (Src0->getType() == IceType_i64) {
733 ARM32HelpersPostamble[TargetHelper] = &TargetARM32::postambleCtpop64;
734 }
735 return;
736 }
737 case Intrinsics::Longjmp: {
738 static constexpr SizeT MaxArgs = 2;
739 static constexpr Variable *NoDest = nullptr;
740 Operand *TargetHelper =
741 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
742 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
743 NoTailCall, IsTargetHelperCall);
744 Call->addArg(Intrinsic->getArg(0));
745 Call->addArg(Intrinsic->getArg(1));
746 Instr->setDeleted();
747 return;
748 }
749 case Intrinsics::Memcpy: {
750 // In the future, we could potentially emit an inline memcpy/memset, etc.
751 // for intrinsic calls w/ a known length.
752 static constexpr SizeT MaxArgs = 3;
753 static constexpr Variable *NoDest = nullptr;
754 Operand *TargetHelper =
755 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
756 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
757 NoTailCall, IsTargetHelperCall);
758 Call->addArg(Intrinsic->getArg(0));
759 Call->addArg(Intrinsic->getArg(1));
760 Call->addArg(Intrinsic->getArg(2));
761 Instr->setDeleted();
762 return;
763 }
764 case Intrinsics::Memmove: {
765 static constexpr SizeT MaxArgs = 3;
766 static constexpr Variable *NoDest = nullptr;
767 Operand *TargetHelper =
768 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
769 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
770 NoTailCall, IsTargetHelperCall);
771 Call->addArg(Intrinsic->getArg(0));
772 Call->addArg(Intrinsic->getArg(1));
773 Call->addArg(Intrinsic->getArg(2));
774 Instr->setDeleted();
775 return;
776 }
777 case Intrinsics::Memset: {
778 // The value operand needs to be extended to a stack slot size because the
779 // PNaCl ABI requires arguments to be at least 32 bits wide.
780 Operand *ValOp = Intrinsic->getArg(1);
781 assert(ValOp->getType() == IceType_i8);
782 Variable *ValExt = Func->makeVariable(stackSlotType());
783 Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
784
785 // Technically, ARM has its own __aeabi_memset, but we can use plain
786 // memset too. The value and size argument need to be flipped if we ever
787 // decide to use __aeabi_memset.
788 static constexpr SizeT MaxArgs = 3;
789 static constexpr Variable *NoDest = nullptr;
790 Operand *TargetHelper =
791 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
792 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
793 NoTailCall, IsTargetHelperCall);
794 Call->addArg(Intrinsic->getArg(0));
795 Call->addArg(ValExt);
796 Call->addArg(Intrinsic->getArg(2));
797 Instr->setDeleted();
798 return;
799 }
800 case Intrinsics::Setjmp: {
801 static constexpr SizeT MaxArgs = 1;
802 Operand *TargetHelper =
803 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
804 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
805 NoTailCall, IsTargetHelperCall);
806 Call->addArg(Intrinsic->getArg(0));
807 Instr->setDeleted();
808 return;
809 }
810 }
811 llvm::report_fatal_error("Control flow should never have reached here.");
812 }
813 }
814 }
815
findMaxStackOutArgsSize()816 void TargetARM32::findMaxStackOutArgsSize() {
817 // MinNeededOutArgsBytes should be updated if the Target ever creates a
818 // high-level InstCall that requires more stack bytes.
819 constexpr size_t MinNeededOutArgsBytes = 0;
820 MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
821 for (CfgNode *Node : Func->getNodes()) {
822 Context.init(Node);
823 while (!Context.atEnd()) {
824 PostIncrLoweringContext PostIncrement(Context);
825 Inst *CurInstr = iteratorToInst(Context.getCur());
826 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
827 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
828 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
829 }
830 }
831 }
832 }
833
834 GlobalString
createGotoffRelocation(const ConstantRelocatable * CR)835 TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) {
836 GlobalString CRName = CR->getName();
837 GlobalString CRGotoffName =
838 Ctx->getGlobalString("GOTOFF$" + Func->getFunctionName() + "$" + CRName);
839 if (KnownGotoffs.count(CRGotoffName) == 0) {
840 constexpr bool SuppressMangling = true;
841 auto *Global =
842 VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
843 Global->setIsConstant(true);
844 Global->setName(CRName);
845 Func->getGlobalPool()->willNotBeEmitted(Global);
846
847 auto *Gotoff =
848 VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
849 constexpr auto GotFixup = R_ARM_GOTOFF32;
850 Gotoff->setIsConstant(true);
851 Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create(
852 Func->getGlobalPool(), Global, {RelocOffset::create(Ctx, 0)},
853 GotFixup));
854 Gotoff->setName(CRGotoffName);
855 Func->addGlobal(Gotoff);
856 KnownGotoffs.emplace(CRGotoffName);
857 }
858 return CRGotoffName;
859 }
860
translateO2()861 void TargetARM32::translateO2() {
862 TimerMarker T(TimerStack::TT_O2, Func);
863
864 genTargetHelperCalls();
865 findMaxStackOutArgsSize();
866
867 // Do not merge Alloca instructions, and lay out the stack.
868 static constexpr bool SortAndCombineAllocas = true;
869 Func->processAllocas(SortAndCombineAllocas);
870 Func->dump("After Alloca processing");
871
872 if (!getFlags().getEnablePhiEdgeSplit()) {
873 // Lower Phi instructions.
874 Func->placePhiLoads();
875 if (Func->hasError())
876 return;
877 Func->placePhiStores();
878 if (Func->hasError())
879 return;
880 Func->deletePhis();
881 if (Func->hasError())
882 return;
883 Func->dump("After Phi lowering");
884 }
885
886 // Address mode optimization.
887 Func->getVMetadata()->init(VMK_SingleDefs);
888 Func->doAddressOpt();
889 Func->materializeVectorShuffles();
890
891 // Argument lowering
892 Func->doArgLowering();
893
894 // Target lowering. This requires liveness analysis for some parts of the
895 // lowering decisions, such as compare/branch fusing. If non-lightweight
896 // liveness analysis is used, the instructions need to be renumbered first.
897 // TODO: This renumbering should only be necessary if we're actually
898 // calculating live intervals, which we only do for register allocation.
899 Func->renumberInstructions();
900 if (Func->hasError())
901 return;
902
903 // TODO: It should be sufficient to use the fastest liveness calculation,
904 // i.e. livenessLightweight(). However, for some reason that slows down the
905 // rest of the translation. Investigate.
906 Func->liveness(Liveness_Basic);
907 if (Func->hasError())
908 return;
909 Func->dump("After ARM32 address mode opt");
910
911 Func->genCode();
912 if (Func->hasError())
913 return;
914 Func->dump("After ARM32 codegen");
915
916 // Register allocation. This requires instruction renumbering and full
917 // liveness analysis.
918 Func->renumberInstructions();
919 if (Func->hasError())
920 return;
921 Func->liveness(Liveness_Intervals);
922 if (Func->hasError())
923 return;
924 // The post-codegen dump is done here, after liveness analysis and associated
925 // cleanup, to make the dump cleaner and more useful.
926 Func->dump("After initial ARM32 codegen");
927 // Validate the live range computations. The expensive validation call is
928 // deliberately only made when assertions are enabled.
929 assert(Func->validateLiveness());
930 Func->getVMetadata()->init(VMK_All);
931 regAlloc(RAK_Global);
932 if (Func->hasError())
933 return;
934
935 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
936 Func->dump("After linear scan regalloc");
937
938 if (getFlags().getEnablePhiEdgeSplit()) {
939 Func->advancedPhiLowering();
940 Func->dump("After advanced Phi lowering");
941 }
942
943 ForbidTemporaryWithoutReg _(this);
944
945 // Stack frame mapping.
946 Func->genFrame();
947 if (Func->hasError())
948 return;
949 Func->dump("After stack frame mapping");
950
951 postLowerLegalization();
952 if (Func->hasError())
953 return;
954 Func->dump("After postLowerLegalization");
955
956 Func->contractEmptyNodes();
957 Func->reorderNodes();
958
959 // Branch optimization. This needs to be done just before code emission. In
960 // particular, no transformations that insert or reorder CfgNodes should be
961 // done after branch optimization. We go ahead and do it before nop insertion
962 // to reduce the amount of work needed for searching for opportunities.
963 Func->doBranchOpt();
964 Func->dump("After branch optimization");
965 }
966
translateOm1()967 void TargetARM32::translateOm1() {
968 TimerMarker T(TimerStack::TT_Om1, Func);
969
970 genTargetHelperCalls();
971 findMaxStackOutArgsSize();
972
973 // Do not merge Alloca instructions, and lay out the stack.
974 static constexpr bool DontSortAndCombineAllocas = false;
975 Func->processAllocas(DontSortAndCombineAllocas);
976 Func->dump("After Alloca processing");
977
978 Func->placePhiLoads();
979 if (Func->hasError())
980 return;
981 Func->placePhiStores();
982 if (Func->hasError())
983 return;
984 Func->deletePhis();
985 if (Func->hasError())
986 return;
987 Func->dump("After Phi lowering");
988
989 Func->doArgLowering();
990
991 Func->genCode();
992 if (Func->hasError())
993 return;
994 Func->dump("After initial ARM32 codegen");
995
996 regAlloc(RAK_InfOnly);
997 if (Func->hasError())
998 return;
999
1000 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
1001 Func->dump("After regalloc of infinite-weight variables");
1002
1003 ForbidTemporaryWithoutReg _(this);
1004
1005 Func->genFrame();
1006 if (Func->hasError())
1007 return;
1008 Func->dump("After stack frame mapping");
1009
1010 postLowerLegalization();
1011 if (Func->hasError())
1012 return;
1013 Func->dump("After postLowerLegalization");
1014 }
1015
getStackAlignment() const1016 uint32_t TargetARM32::getStackAlignment() const {
1017 return ARM32_STACK_ALIGNMENT_BYTES;
1018 }
1019
doBranchOpt(Inst * I,const CfgNode * NextNode)1020 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
1021 if (auto *Br = llvm::dyn_cast<InstARM32Br>(I)) {
1022 return Br->optimizeBranch(NextNode);
1023 }
1024 return false;
1025 }
1026
getRegName(RegNumT RegNum,Type Ty) const1027 const char *TargetARM32::getRegName(RegNumT RegNum, Type Ty) const {
1028 (void)Ty;
1029 return RegARM32::getRegName(RegNum);
1030 }
1031
getPhysicalRegister(RegNumT RegNum,Type Ty)1032 Variable *TargetARM32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1033 static const Type DefaultType[] = {
1034 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
1035 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
1036 (isFP32) \
1037 ? IceType_f32 \
1038 : ((isFP64) ? IceType_f64 : ((isVec128 ? IceType_v4i32 : IceType_i32))),
1039 REGARM32_TABLE
1040 #undef X
1041 };
1042
1043 if (Ty == IceType_void) {
1044 assert(unsigned(RegNum) < llvm::array_lengthof(DefaultType));
1045 Ty = DefaultType[RegNum];
1046 }
1047 if (PhysicalRegisters[Ty].empty())
1048 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
1049 assert(unsigned(RegNum) < PhysicalRegisters[Ty].size());
1050 Variable *Reg = PhysicalRegisters[Ty][RegNum];
1051 if (Reg == nullptr) {
1052 Reg = Func->makeVariable(Ty);
1053 Reg->setRegNum(RegNum);
1054 PhysicalRegisters[Ty][RegNum] = Reg;
1055 // Specially mark a named physical register as an "argument" so that it is
1056 // considered live upon function entry. Otherwise it's possible to get
1057 // liveness validation errors for saving callee-save registers.
1058 Func->addImplicitArg(Reg);
1059 // Don't bother tracking the live range of a named physical register.
1060 Reg->setIgnoreLiveness();
1061 }
1062 return Reg;
1063 }
1064
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1065 void TargetARM32::emitJumpTable(const Cfg *Func,
1066 const InstJumpTable *JumpTable) const {
1067 (void)Func;
1068 (void)JumpTable;
1069 UnimplementedError(getFlags());
1070 }
1071
emitVariable(const Variable * Var) const1072 void TargetARM32::emitVariable(const Variable *Var) const {
1073 if (!BuildDefs::dump())
1074 return;
1075 Ostream &Str = Ctx->getStrEmit();
1076 if (Var->hasReg()) {
1077 Str << getRegName(Var->getRegNum(), Var->getType());
1078 return;
1079 }
1080 if (Var->mustHaveReg()) {
1081 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1082 ") has no register assigned - function " +
1083 Func->getFunctionName());
1084 }
1085 assert(!Var->isRematerializable());
1086 int32_t Offset = Var->getStackOffset();
1087 auto BaseRegNum = Var->getBaseRegNum();
1088 if (BaseRegNum.hasNoValue()) {
1089 BaseRegNum = getFrameOrStackReg();
1090 }
1091 const Type VarTy = Var->getType();
1092 Str << "[" << getRegName(BaseRegNum, VarTy);
1093 if (Offset != 0) {
1094 Str << ", #" << Offset;
1095 }
1096 Str << "]";
1097 }
1098
CallingConv()1099 TargetARM32::CallingConv::CallingConv()
1100 : GPRegsUsed(RegARM32::Reg_NUM),
1101 GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1102 I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1103 VFPRegsUsed(RegARM32::Reg_NUM),
1104 FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1105 FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()),
1106 Vec128Args(Vec128ArgInitializer.rbegin(), Vec128ArgInitializer.rend()) {}
1107
argInGPR(Type Ty,RegNumT * Reg)1108 bool TargetARM32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1109 CfgVector<RegNumT> *Source;
1110
1111 switch (Ty) {
1112 default: {
1113 assert(isScalarIntegerType(Ty));
1114 Source = &GPRArgs;
1115 } break;
1116 case IceType_i64: {
1117 Source = &I64Args;
1118 } break;
1119 }
1120
1121 discardUnavailableGPRsAndTheirAliases(Source);
1122
1123 if (Source->empty()) {
1124 GPRegsUsed.set();
1125 return false;
1126 }
1127
1128 *Reg = Source->back();
1129 // Note that we don't Source->pop_back() here. This is intentional. Notice how
1130 // we mark all of Reg's aliases as Used. So, for the next argument,
1131 // Source->back() is marked as unavailable, and it is thus implicitly popped
1132 // from the stack.
1133 GPRegsUsed |= RegisterAliases[*Reg];
1134 return true;
1135 }
1136
1137 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1138 // i32) will have the first argument in r0, the second in r1-r2, and the third
1139 // on the stack. To model this behavior, whenever we pop a register from Regs,
1140 // we remove all of its aliases from the pool of available GPRs. This has the
1141 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1142 void TargetARM32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1143 CfgVector<RegNumT> *Regs) {
1144 while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1145 GPRegsUsed |= RegisterAliases[Regs->back()];
1146 Regs->pop_back();
1147 }
1148 }
1149
argInVFP(Type Ty,RegNumT * Reg)1150 bool TargetARM32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1151 CfgVector<RegNumT> *Source;
1152
1153 switch (Ty) {
1154 default: {
1155 assert(isVectorType(Ty));
1156 Source = &Vec128Args;
1157 } break;
1158 case IceType_f32: {
1159 Source = &FP32Args;
1160 } break;
1161 case IceType_f64: {
1162 Source = &FP64Args;
1163 } break;
1164 }
1165
1166 discardUnavailableVFPRegs(Source);
1167
1168 if (Source->empty()) {
1169 VFPRegsUsed.set();
1170 return false;
1171 }
1172
1173 *Reg = Source->back();
1174 VFPRegsUsed |= RegisterAliases[*Reg];
1175 return true;
1176 }
1177
1178 // Arguments in VFP registers are not packed, so we don't mark the popped
1179 // registers' aliases as unavailable.
discardUnavailableVFPRegs(CfgVector<RegNumT> * Regs)1180 void TargetARM32::CallingConv::discardUnavailableVFPRegs(
1181 CfgVector<RegNumT> *Regs) {
1182 while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1183 Regs->pop_back();
1184 }
1185 }
1186
lowerArguments()1187 void TargetARM32::lowerArguments() {
1188 VarList &Args = Func->getArgs();
1189 TargetARM32::CallingConv CC;
1190
1191 // For each register argument, replace Arg in the argument list with the home
1192 // register. Then generate an instruction in the prolog to copy the home
1193 // register to the assigned location of Arg.
1194 Context.init(Func->getEntryNode());
1195 Context.setInsertPoint(Context.getCur());
1196
1197 for (SizeT I = 0, E = Args.size(); I < E; ++I) {
1198 Variable *Arg = Args[I];
1199 Type Ty = Arg->getType();
1200 RegNumT RegNum;
1201 if (isScalarIntegerType(Ty)) {
1202 if (!CC.argInGPR(Ty, &RegNum)) {
1203 continue;
1204 }
1205 } else {
1206 if (!CC.argInVFP(Ty, &RegNum)) {
1207 continue;
1208 }
1209 }
1210
1211 Variable *RegisterArg = Func->makeVariable(Ty);
1212 if (BuildDefs::dump()) {
1213 RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1214 }
1215 RegisterArg->setIsArg();
1216 Arg->setIsArg(false);
1217 Args[I] = RegisterArg;
1218 switch (Ty) {
1219 default: {
1220 RegisterArg->setRegNum(RegNum);
1221 } break;
1222 case IceType_i64: {
1223 auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1224 RegisterArg64->initHiLo(Func);
1225 RegisterArg64->getLo()->setRegNum(
1226 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(RegNum)));
1227 RegisterArg64->getHi()->setRegNum(
1228 RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(RegNum)));
1229 } break;
1230 }
1231 Context.insert<InstAssign>(Arg, RegisterArg);
1232 }
1233 }
1234
1235 // Helper function for addProlog().
1236 //
1237 // This assumes Arg is an argument passed on the stack. This sets the frame
1238 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1239 // I64 arg that has been split into Lo and Hi components, it calls itself
1240 // recursively on the components, taking care to handle Lo first because of the
1241 // little-endian architecture. Lastly, this function generates an instruction
1242 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1243 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
1244 size_t BasicFrameOffset,
1245 size_t *InArgsSizeBytes) {
1246 const Type Ty = Arg->getType();
1247 *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1248
1249 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1250 Variable *const Lo = Arg64On32->getLo();
1251 Variable *const Hi = Arg64On32->getHi();
1252 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1253 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1254 return;
1255 }
1256 assert(Ty != IceType_i64);
1257
1258 const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1259 *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1260
1261 if (!Arg->hasReg()) {
1262 Arg->setStackOffset(ArgStackOffset);
1263 return;
1264 }
1265
1266 // If the argument variable has been assigned a register, we need to copy the
1267 // value from the stack slot.
1268 Variable *Parameter = Func->makeVariable(Ty);
1269 Parameter->setMustNotHaveReg();
1270 Parameter->setStackOffset(ArgStackOffset);
1271 _mov(Arg, Parameter);
1272 }
1273
stackSlotType()1274 Type TargetARM32::stackSlotType() { return IceType_i32; }
1275
addProlog(CfgNode * Node)1276 void TargetARM32::addProlog(CfgNode *Node) {
1277 // Stack frame layout:
1278 //
1279 // +------------------------+
1280 // | 1. preserved registers |
1281 // +------------------------+
1282 // | 2. padding |
1283 // +------------------------+ <--- FramePointer (if used)
1284 // | 3. global spill area |
1285 // +------------------------+
1286 // | 4. padding |
1287 // +------------------------+
1288 // | 5. local spill area |
1289 // +------------------------+
1290 // | 6. padding |
1291 // +------------------------+
1292 // | 7. allocas (variable) |
1293 // +------------------------+
1294 // | 8. padding |
1295 // +------------------------+
1296 // | 9. out args |
1297 // +------------------------+ <--- StackPointer
1298 //
1299 // The following variables record the size in bytes of the given areas:
1300 // * PreservedRegsSizeBytes: area 1
1301 // * SpillAreaPaddingBytes: area 2
1302 // * GlobalsSize: area 3
1303 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1304 // * LocalsSpillAreaSize: area 5
1305 // * SpillAreaSizeBytes: areas 2 - 6, and 9
1306 // * MaxOutArgsSizeBytes: area 9
1307 //
1308 // Determine stack frame offsets for each Variable without a register
1309 // assignment. This can be done as one variable per stack slot. Or, do
1310 // coalescing by running the register allocator again with an infinite set of
1311 // registers (as a side effect, this gives variables a second chance at
1312 // physical register assignment).
1313 //
1314 // A middle ground approach is to leverage sparsity and allocate one block of
1315 // space on the frame for globals (variables with multi-block lifetime), and
1316 // one block to share for locals (single-block lifetime).
1317
1318 Context.init(Node);
1319 Context.setInsertPoint(Context.getCur());
1320
1321 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1322 RegsUsed = SmallBitVector(CalleeSaves.size());
1323 VarList SortedSpilledVariables;
1324 size_t GlobalsSize = 0;
1325 // If there is a separate locals area, this represents that area. Otherwise
1326 // it counts any variable not counted by GlobalsSize.
1327 SpillAreaSizeBytes = 0;
1328 // If there is a separate locals area, this specifies the alignment for it.
1329 uint32_t LocalsSlotsAlignmentBytes = 0;
1330 // The entire spill locations area gets aligned to largest natural alignment
1331 // of the variables that have a spill slot.
1332 uint32_t SpillAreaAlignmentBytes = 0;
1333 // For now, we don't have target-specific variables that need special
1334 // treatment (no stack-slot-linked SpillVariable type).
1335 std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1336 static constexpr bool AssignStackSlot = false;
1337 static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1338 if (llvm::isa<Variable64On32>(Var)) {
1339 return DontAssignStackSlot;
1340 }
1341 return AssignStackSlot;
1342 };
1343
1344 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1345 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1346 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1347 &LocalsSlotsAlignmentBytes, TargetVarHook);
1348 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1349 SpillAreaSizeBytes += GlobalsSize;
1350
1351 // Add push instructions for preserved registers. On ARM, "push" can push a
1352 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
1353 // callee-saved float/vector registers.
1354 //
1355 // The "vpush" instruction can handle a whole list of float/vector registers,
1356 // but it only handles contiguous sequences of registers by specifying the
1357 // start and the length.
1358 PreservedGPRs.reserve(CalleeSaves.size());
1359 PreservedSRegs.reserve(CalleeSaves.size());
1360
1361 // Consider FP and LR as callee-save / used as needed.
1362 if (UsesFramePointer) {
1363 if (RegsUsed[RegARM32::Reg_fp]) {
1364 llvm::report_fatal_error("Frame pointer has been used.");
1365 }
1366 CalleeSaves[RegARM32::Reg_fp] = true;
1367 RegsUsed[RegARM32::Reg_fp] = true;
1368 }
1369 if (!MaybeLeafFunc) {
1370 CalleeSaves[RegARM32::Reg_lr] = true;
1371 RegsUsed[RegARM32::Reg_lr] = true;
1372 }
1373
1374 // Make two passes over the used registers. The first pass records all the
1375 // used registers -- and their aliases. Then, we figure out which GPRs and
1376 // VFP S registers should be saved. We don't bother saving D/Q registers
1377 // because their uses are recorded as S regs uses.
1378 SmallBitVector ToPreserve(RegARM32::Reg_NUM);
1379 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1380 if (CalleeSaves[i] && RegsUsed[i]) {
1381 ToPreserve |= RegisterAliases[i];
1382 }
1383 }
1384
1385 uint32_t NumCallee = 0;
1386 size_t PreservedRegsSizeBytes = 0;
1387
1388 // RegClasses is a tuple of
1389 //
1390 // <First Register in Class, Last Register in Class, Vector of Save Registers>
1391 //
1392 // We use this tuple to figure out which register we should push/pop during
1393 // prolog/epilog.
1394 using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1395 const RegClassType RegClasses[] = {
1396 RegClassType(RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last,
1397 &PreservedGPRs),
1398 RegClassType(RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last,
1399 &PreservedSRegs)};
1400 for (const auto &RegClass : RegClasses) {
1401 const uint32_t FirstRegInClass = std::get<0>(RegClass);
1402 const uint32_t LastRegInClass = std::get<1>(RegClass);
1403 VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1404 for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) {
1405 if (!ToPreserve[Reg]) {
1406 continue;
1407 }
1408 ++NumCallee;
1409 Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1410 PreservedRegsSizeBytes +=
1411 typeWidthInBytesOnStack(PhysicalRegister->getType());
1412 PreservedRegsInClass->push_back(PhysicalRegister);
1413 }
1414 }
1415
1416 Ctx->statsUpdateRegistersSaved(NumCallee);
1417 if (!PreservedSRegs.empty())
1418 _push(PreservedSRegs);
1419 if (!PreservedGPRs.empty())
1420 _push(PreservedGPRs);
1421
1422 // Generate "mov FP, SP" if needed.
1423 if (UsesFramePointer) {
1424 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1425 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1426 _mov(FP, SP);
1427 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1428 Context.insert<InstFakeUse>(FP);
1429 }
1430
1431 // Align the variables area. SpillAreaPaddingBytes is the size of the region
1432 // after the preserved registers and before the spill areas.
1433 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1434 // locals area if they are separate.
1435 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
1436 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1437 uint32_t SpillAreaPaddingBytes = 0;
1438 uint32_t LocalsSlotsPaddingBytes = 0;
1439 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1440 GlobalsSize, LocalsSlotsAlignmentBytes,
1441 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1442 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1443 uint32_t GlobalsAndSubsequentPaddingSize =
1444 GlobalsSize + LocalsSlotsPaddingBytes;
1445
1446 // Adds the out args space to the stack, and align SP if necessary.
1447 if (!NeedsStackAlignment) {
1448 SpillAreaSizeBytes += MaxOutArgsSizeBytes;
1449 } else {
1450 uint32_t StackOffset = PreservedRegsSizeBytes;
1451 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1452 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
1453 SpillAreaSizeBytes = StackSize - StackOffset;
1454 }
1455
1456 // Combine fixed alloca with SpillAreaSize.
1457 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1458
1459 // Generate "sub sp, SpillAreaSizeBytes"
1460 if (SpillAreaSizeBytes) {
1461 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1462 // Use the scratch register if needed to legalize the immediate.
1463 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1464 Legal_Reg | Legal_Flex, getReservedTmpReg());
1465 _sub(SP, SP, SubAmount);
1466 if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) {
1467 alignRegisterPow2(SP, FixedAllocaAlignBytes);
1468 }
1469 }
1470
1471 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1472
1473 // Fill in stack offsets for stack args, and copy args into registers for
1474 // those that were register-allocated. Args are pushed right to left, so
1475 // Arg[0] is closest to the stack/frame pointer.
1476 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
1477 size_t BasicFrameOffset = PreservedRegsSizeBytes;
1478 if (!UsesFramePointer)
1479 BasicFrameOffset += SpillAreaSizeBytes;
1480
1481 const VarList &Args = Func->getArgs();
1482 size_t InArgsSizeBytes = 0;
1483 TargetARM32::CallingConv CC;
1484 for (Variable *Arg : Args) {
1485 RegNumT DummyReg;
1486 const Type Ty = Arg->getType();
1487
1488 // Skip arguments passed in registers.
1489 if (isScalarIntegerType(Ty)) {
1490 if (CC.argInGPR(Ty, &DummyReg)) {
1491 continue;
1492 }
1493 } else {
1494 if (CC.argInVFP(Ty, &DummyReg)) {
1495 continue;
1496 }
1497 }
1498 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, &InArgsSizeBytes);
1499 }
1500
1501 // Fill in stack offsets for locals.
1502 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1503 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1504 UsesFramePointer);
1505 this->HasComputedFrame = true;
1506
1507 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1508 OstreamLocker _(Func->getContext());
1509 Ostream &Str = Func->getContext()->getStrDump();
1510
1511 Str << "Stack layout:\n";
1512 uint32_t SPAdjustmentPaddingSize =
1513 SpillAreaSizeBytes - LocalsSpillAreaSize -
1514 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1515 MaxOutArgsSizeBytes;
1516 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1517 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1518 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1519 << " globals spill area = " << GlobalsSize << " bytes\n"
1520 << " globals-locals spill areas intermediate padding = "
1521 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1522 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1523 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1524
1525 Str << "Stack details:\n"
1526 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1527 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1528 << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1529 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1530 << " bytes\n"
1531 << " is FP based = " << UsesFramePointer << "\n";
1532 }
1533 }
1534
addEpilog(CfgNode * Node)1535 void TargetARM32::addEpilog(CfgNode *Node) {
1536 InstList &Insts = Node->getInsts();
1537 InstList::reverse_iterator RI, E;
1538 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1539 if (llvm::isa<InstARM32Ret>(*RI))
1540 break;
1541 }
1542 if (RI == E)
1543 return;
1544
1545 // Convert the reverse_iterator position into its corresponding (forward)
1546 // iterator position.
1547 InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1548 --InsertPoint;
1549 Context.init(Node);
1550 Context.setInsertPoint(InsertPoint);
1551
1552 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1553 if (UsesFramePointer) {
1554 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1555 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1556 // use of SP before the assignment of SP=FP keeps previous SP adjustments
1557 // from being dead-code eliminated.
1558 Context.insert<InstFakeUse>(SP);
1559 _mov_redefined(SP, FP);
1560 } else {
1561 // add SP, SpillAreaSizeBytes
1562 if (SpillAreaSizeBytes) {
1563 // Use the scratch register if needed to legalize the immediate.
1564 Operand *AddAmount =
1565 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1566 Legal_Reg | Legal_Flex, getReservedTmpReg());
1567 _add(SP, SP, AddAmount);
1568 }
1569 }
1570
1571 if (!PreservedGPRs.empty())
1572 _pop(PreservedGPRs);
1573 if (!PreservedSRegs.empty())
1574 _pop(PreservedSRegs);
1575 }
1576
isLegalMemOffset(Type Ty,int32_t Offset) const1577 bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const {
1578 constexpr bool ZeroExt = false;
1579 return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset);
1580 }
1581
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1582 Variable *TargetARM32::PostLoweringLegalizer::newBaseRegister(
1583 Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1584 // Legalize will likely need a movw/movt combination, but if the top bits are
1585 // all 0 from negating the offset and subtracting, we could use that instead.
1586 const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1587 Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1588 if (ShouldSub) {
1589 Operand *OffsetVal =
1590 Target->legalize(Target->Ctx->getConstantInt32(-Offset),
1591 Legal_Reg | Legal_Flex, ScratchRegNum);
1592 Target->_sub(ScratchReg, Base, OffsetVal);
1593 } else {
1594 Operand *OffsetVal =
1595 Target->legalize(Target->Ctx->getConstantInt32(Offset),
1596 Legal_Reg | Legal_Flex, ScratchRegNum);
1597 Target->_add(ScratchReg, Base, OffsetVal);
1598 }
1599
1600 if (ScratchRegNum == Target->getReservedTmpReg()) {
1601 const bool BaseIsStackOrFramePtr =
1602 Base->getRegNum() == Target->getFrameOrStackReg();
1603 // There is currently no code path that would trigger this assertion, so we
1604 // leave this assertion here in case it is ever violated. This is not a
1605 // fatal error (thus the use of assert() and not llvm::report_fatal_error)
1606 // as the program compiled by subzero will still work correctly.
1607 assert(BaseIsStackOrFramePtr);
1608 // Side-effect: updates TempBase to reflect the new Temporary.
1609 if (BaseIsStackOrFramePtr) {
1610 TempBaseReg = ScratchReg;
1611 TempBaseOffset = Offset;
1612 } else {
1613 TempBaseReg = nullptr;
1614 TempBaseOffset = 0;
1615 }
1616 }
1617
1618 return ScratchReg;
1619 }
1620
createMemOperand(Type Ty,Variable * Base,int32_t Offset,bool AllowOffsets)1621 OperandARM32Mem *TargetARM32::PostLoweringLegalizer::createMemOperand(
1622 Type Ty, Variable *Base, int32_t Offset, bool AllowOffsets) {
1623 assert(!Base->isRematerializable());
1624 if (Offset == 0 || (AllowOffsets && Target->isLegalMemOffset(Ty, Offset))) {
1625 return OperandARM32Mem::create(
1626 Target->Func, Ty, Base,
1627 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)),
1628 OperandARM32Mem::Offset);
1629 }
1630
1631 if (!AllowOffsets || TempBaseReg == nullptr) {
1632 newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1633 }
1634
1635 int32_t OffsetDiff = Offset - TempBaseOffset;
1636 assert(AllowOffsets || OffsetDiff == 0);
1637
1638 if (!Target->isLegalMemOffset(Ty, OffsetDiff)) {
1639 newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1640 OffsetDiff = 0;
1641 }
1642
1643 assert(!TempBaseReg->isRematerializable());
1644 return OperandARM32Mem::create(
1645 Target->Func, Ty, TempBaseReg,
1646 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(OffsetDiff)),
1647 OperandARM32Mem::Offset);
1648 }
1649
resetTempBaseIfClobberedBy(const Inst * Instr)1650 void TargetARM32::PostLoweringLegalizer::resetTempBaseIfClobberedBy(
1651 const Inst *Instr) {
1652 bool ClobbersTempBase = false;
1653 if (TempBaseReg != nullptr) {
1654 Variable *Dest = Instr->getDest();
1655 if (llvm::isa<InstARM32Call>(Instr)) {
1656 // The following assertion is an invariant, so we remove it from the if
1657 // test. If the invariant is ever broken/invalidated/changed, remember
1658 // to add it back to the if condition.
1659 assert(TempBaseReg->getRegNum() == Target->getReservedTmpReg());
1660 // The linker may need to clobber IP if the call is too far from PC. Thus,
1661 // we assume IP will be overwritten.
1662 ClobbersTempBase = true;
1663 } else if (Dest != nullptr &&
1664 Dest->getRegNum() == TempBaseReg->getRegNum()) {
1665 // Register redefinition.
1666 ClobbersTempBase = true;
1667 }
1668 }
1669
1670 if (ClobbersTempBase) {
1671 TempBaseReg = nullptr;
1672 TempBaseOffset = 0;
1673 }
1674 }
1675
legalizeMov(InstARM32Mov * MovInstr)1676 void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) {
1677 Variable *Dest = MovInstr->getDest();
1678 assert(Dest != nullptr);
1679 Type DestTy = Dest->getType();
1680 assert(DestTy != IceType_i64);
1681
1682 Operand *Src = MovInstr->getSrc(0);
1683 Type SrcTy = Src->getType();
1684 (void)SrcTy;
1685 assert(SrcTy != IceType_i64);
1686
1687 if (MovInstr->isMultiDest() || MovInstr->isMultiSource())
1688 return;
1689
1690 bool Legalized = false;
1691 if (!Dest->hasReg()) {
1692 auto *SrcR = llvm::cast<Variable>(Src);
1693 assert(SrcR->hasReg());
1694 assert(!SrcR->isRematerializable());
1695 const int32_t Offset = Dest->getStackOffset();
1696 // This is a _mov(Mem(), Variable), i.e., a store.
1697 Target->_str(SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset),
1698 MovInstr->getPredicate());
1699 // _str() does not have a Dest, so we add a fake-def(Dest).
1700 Target->Context.insert<InstFakeDef>(Dest);
1701 Legalized = true;
1702 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
1703 if (Var->isRematerializable()) {
1704 // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
1705
1706 // ExtraOffset is only needed for frame-pointer based frames as we have
1707 // to account for spill storage.
1708 const int32_t ExtraOffset = (Var->getRegNum() == Target->getFrameReg())
1709 ? Target->getFrameFixedAllocaOffset()
1710 : 0;
1711
1712 const int32_t Offset = Var->getStackOffset() + ExtraOffset;
1713 Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
1714 Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
1715 Target->_mov(Dest, T);
1716 Legalized = true;
1717 } else {
1718 if (!Var->hasReg()) {
1719 // This is a _mov(Variable, Mem()), i.e., a load.
1720 const int32_t Offset = Var->getStackOffset();
1721 Target->_ldr(Dest, createMemOperand(DestTy, StackOrFrameReg, Offset),
1722 MovInstr->getPredicate());
1723 Legalized = true;
1724 }
1725 }
1726 }
1727
1728 if (Legalized) {
1729 if (MovInstr->isDestRedefined()) {
1730 Target->_set_dest_redefined();
1731 }
1732 MovInstr->setDeleted();
1733 }
1734 }
1735
1736 // ARM32 address modes:
1737 // ld/st i[8|16|32]: [reg], [reg +/- imm12], [pc +/- imm12],
1738 // [reg +/- reg << shamt5]
1739 // ld/st f[32|64] : [reg], [reg +/- imm8] , [pc +/- imm8]
1740 // ld/st vectors : [reg]
1741 //
1742 // For now, we don't handle address modes with Relocatables.
1743 namespace {
1744 // MemTraits contains per-type valid address mode information.
1745 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
1746 ubits, rraddr, shaddr) \
1747 static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag);
1748 ICETYPEARM32_TABLE
1749 #undef X
1750
1751 static const struct {
1752 int32_t ValidImmMask;
1753 bool CanHaveImm;
1754 bool CanHaveIndex;
1755 bool CanHaveShiftedIndex;
1756 } MemTraits[] = {
1757 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
1758 ubits, rraddr, shaddr) \
1759 { \
1760 (1 << ubits) - 1, \
1761 (ubits) > 0, \
1762 rraddr, \
1763 shaddr, \
1764 },
1765 ICETYPEARM32_TABLE
1766 #undef X
1767 };
1768 static constexpr SizeT MemTraitsSize = llvm::array_lengthof(MemTraits);
1769 } // end of anonymous namespace
1770
1771 OperandARM32Mem *
legalizeMemOperand(OperandARM32Mem * Mem,bool AllowOffsets)1772 TargetARM32::PostLoweringLegalizer::legalizeMemOperand(OperandARM32Mem *Mem,
1773 bool AllowOffsets) {
1774 assert(!Mem->isRegReg() || !Mem->getIndex()->isRematerializable());
1775 assert(Mem->isRegReg() || Target->isLegalMemOffset(
1776 Mem->getType(), Mem->getOffset()->getValue()));
1777
1778 bool Legalized = false;
1779 Variable *Base = Mem->getBase();
1780 int32_t Offset = Mem->isRegReg() ? 0 : Mem->getOffset()->getValue();
1781 if (Base->isRematerializable()) {
1782 const int32_t ExtraOffset = (Base->getRegNum() == Target->getFrameReg())
1783 ? Target->getFrameFixedAllocaOffset()
1784 : 0;
1785 Offset += Base->getStackOffset() + ExtraOffset;
1786 Base = Target->getPhysicalRegister(Base->getRegNum());
1787 assert(!Base->isRematerializable());
1788 Legalized = true;
1789 }
1790
1791 if (!Legalized) {
1792 return nullptr;
1793 }
1794
1795 if (!Mem->isRegReg()) {
1796 return createMemOperand(Mem->getType(), Base, Offset, AllowOffsets);
1797 }
1798
1799 assert(MemTraits[Mem->getType()].CanHaveIndex);
1800
1801 if (Offset != 0) {
1802 if (TempBaseReg == nullptr) {
1803 Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1804 } else {
1805 uint32_t Imm8, Rotate;
1806 const int32_t OffsetDiff = Offset - TempBaseOffset;
1807 if (OffsetDiff == 0) {
1808 Base = TempBaseReg;
1809 } else if (OperandARM32FlexImm::canHoldImm(OffsetDiff, &Rotate, &Imm8)) {
1810 auto *OffsetDiffF = OperandARM32FlexImm::create(
1811 Target->Func, IceType_i32, Imm8, Rotate);
1812 Target->_add(TempBaseReg, TempBaseReg, OffsetDiffF);
1813 TempBaseOffset += OffsetDiff;
1814 Base = TempBaseReg;
1815 } else if (OperandARM32FlexImm::canHoldImm(-OffsetDiff, &Rotate, &Imm8)) {
1816 auto *OffsetDiffF = OperandARM32FlexImm::create(
1817 Target->Func, IceType_i32, Imm8, Rotate);
1818 Target->_sub(TempBaseReg, TempBaseReg, OffsetDiffF);
1819 TempBaseOffset += OffsetDiff;
1820 Base = TempBaseReg;
1821 } else {
1822 Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1823 }
1824 }
1825 }
1826
1827 return OperandARM32Mem::create(Target->Func, Mem->getType(), Base,
1828 Mem->getIndex(), Mem->getShiftOp(),
1829 Mem->getShiftAmt(), Mem->getAddrMode());
1830 }
1831
postLowerLegalization()1832 void TargetARM32::postLowerLegalization() {
1833 // If a stack variable's frame offset doesn't fit, convert from:
1834 // ldr X, OFF[SP]
1835 // to:
1836 // movw/movt TMP, OFF_PART
1837 // add TMP, TMP, SP
1838 // ldr X, OFF_MORE[TMP]
1839 //
1840 // This is safe because we have reserved TMP, and add for ARM does not
1841 // clobber the flags register.
1842 Func->dump("Before postLowerLegalization");
1843 assert(hasComputedFrame());
1844 // Do a fairly naive greedy clustering for now. Pick the first stack slot
1845 // that's out of bounds and make a new base reg using the architecture's temp
1846 // register. If that works for the next slot, then great. Otherwise, create a
1847 // new base register, clobbering the previous base register. Never share a
1848 // base reg across different basic blocks. This isn't ideal if local and
1849 // multi-block variables are far apart and their references are interspersed.
1850 // It may help to be more coordinated about assign stack slot numbers and may
1851 // help to assign smaller offsets to higher-weight variables so that they
1852 // don't depend on this legalization.
1853 for (CfgNode *Node : Func->getNodes()) {
1854 Context.init(Node);
1855 // One legalizer per basic block, otherwise we would share the Temporary
1856 // Base Register between basic blocks.
1857 PostLoweringLegalizer Legalizer(this);
1858 while (!Context.atEnd()) {
1859 PostIncrLoweringContext PostIncrement(Context);
1860 Inst *CurInstr = iteratorToInst(Context.getCur());
1861
1862 // Check if the previous TempBaseReg is clobbered, and reset if needed.
1863 Legalizer.resetTempBaseIfClobberedBy(CurInstr);
1864
1865 if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {
1866 Legalizer.legalizeMov(MovInstr);
1867 } else if (auto *LdrInstr = llvm::dyn_cast<InstARM32Ldr>(CurInstr)) {
1868 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
1869 llvm::cast<OperandARM32Mem>(LdrInstr->getSrc(0)))) {
1870 _ldr(CurInstr->getDest(), LegalMem, LdrInstr->getPredicate());
1871 CurInstr->setDeleted();
1872 }
1873 } else if (auto *LdrexInstr = llvm::dyn_cast<InstARM32Ldrex>(CurInstr)) {
1874 constexpr bool DisallowOffsetsBecauseLdrex = false;
1875 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
1876 llvm::cast<OperandARM32Mem>(LdrexInstr->getSrc(0)),
1877 DisallowOffsetsBecauseLdrex)) {
1878 _ldrex(CurInstr->getDest(), LegalMem, LdrexInstr->getPredicate());
1879 CurInstr->setDeleted();
1880 }
1881 } else if (auto *StrInstr = llvm::dyn_cast<InstARM32Str>(CurInstr)) {
1882 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
1883 llvm::cast<OperandARM32Mem>(StrInstr->getSrc(1)))) {
1884 _str(llvm::cast<Variable>(CurInstr->getSrc(0)), LegalMem,
1885 StrInstr->getPredicate());
1886 CurInstr->setDeleted();
1887 }
1888 } else if (auto *StrexInstr = llvm::dyn_cast<InstARM32Strex>(CurInstr)) {
1889 constexpr bool DisallowOffsetsBecauseStrex = false;
1890 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
1891 llvm::cast<OperandARM32Mem>(StrexInstr->getSrc(1)),
1892 DisallowOffsetsBecauseStrex)) {
1893 _strex(CurInstr->getDest(), llvm::cast<Variable>(CurInstr->getSrc(0)),
1894 LegalMem, StrexInstr->getPredicate());
1895 CurInstr->setDeleted();
1896 }
1897 }
1898
1899 // Sanity-check: the Legalizer will either have no Temp, or it will be
1900 // bound to IP.
1901 Legalizer.assertNoTempOrAssignedToIP();
1902 }
1903 }
1904 }
1905
loOperand(Operand * Operand)1906 Operand *TargetARM32::loOperand(Operand *Operand) {
1907 assert(Operand->getType() == IceType_i64);
1908 if (Operand->getType() != IceType_i64)
1909 return Operand;
1910 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
1911 return Var64On32->getLo();
1912 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand))
1913 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
1914 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
1915 // Conservatively disallow memory operands with side-effects (pre/post
1916 // increment) in case of duplication.
1917 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
1918 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
1919 if (Mem->isRegReg()) {
1920 Variable *IndexR = legalizeToReg(Mem->getIndex());
1921 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), IndexR,
1922 Mem->getShiftOp(), Mem->getShiftAmt(),
1923 Mem->getAddrMode());
1924 } else {
1925 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
1926 Mem->getOffset(), Mem->getAddrMode());
1927 }
1928 }
1929 llvm::report_fatal_error("Unsupported operand type");
1930 return nullptr;
1931 }
1932
hiOperand(Operand * Operand)1933 Operand *TargetARM32::hiOperand(Operand *Operand) {
1934 assert(Operand->getType() == IceType_i64);
1935 if (Operand->getType() != IceType_i64)
1936 return Operand;
1937 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
1938 return Var64On32->getHi();
1939 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1940 return Ctx->getConstantInt32(
1941 static_cast<uint32_t>(Const->getValue() >> 32));
1942 }
1943 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
1944 // Conservatively disallow memory operands with side-effects in case of
1945 // duplication.
1946 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
1947 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
1948 const Type SplitType = IceType_i32;
1949 if (Mem->isRegReg()) {
1950 // We have to make a temp variable T, and add 4 to either Base or Index.
1951 // The Index may be shifted, so adding 4 can mean something else. Thus,
1952 // prefer T := Base + 4, and use T as the new Base.
1953 Variable *Base = Mem->getBase();
1954 Constant *Four = Ctx->getConstantInt32(4);
1955 Variable *NewBase = Func->makeVariable(Base->getType());
1956 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
1957 Base, Four));
1958 Variable *BaseR = legalizeToReg(NewBase);
1959 Variable *IndexR = legalizeToReg(Mem->getIndex());
1960 return OperandARM32Mem::create(Func, SplitType, BaseR, IndexR,
1961 Mem->getShiftOp(), Mem->getShiftAmt(),
1962 Mem->getAddrMode());
1963 } else {
1964 Variable *Base = Mem->getBase();
1965 ConstantInteger32 *Offset = Mem->getOffset();
1966 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
1967 int32_t NextOffsetVal = Offset->getValue() + 4;
1968 constexpr bool ZeroExt = false;
1969 if (!OperandARM32Mem::canHoldOffset(SplitType, ZeroExt, NextOffsetVal)) {
1970 // We have to make a temp variable and add 4 to either Base or Offset.
1971 // If we add 4 to Offset, this will convert a non-RegReg addressing
1972 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
1973 // RegReg addressing modes, prefer adding to base and replacing
1974 // instead. Thus we leave the old offset alone.
1975 Constant *_4 = Ctx->getConstantInt32(4);
1976 Variable *NewBase = Func->makeVariable(Base->getType());
1977 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
1978 NewBase, Base, _4));
1979 Base = NewBase;
1980 } else {
1981 Offset =
1982 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
1983 }
1984 Variable *BaseR = legalizeToReg(Base);
1985 return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,
1986 Mem->getAddrMode());
1987 }
1988 }
1989 llvm::report_fatal_error("Unsupported operand type");
1990 return nullptr;
1991 }
1992
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const1993 SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
1994 RegSetMask Exclude) const {
1995 SmallBitVector Registers(RegARM32::Reg_NUM);
1996
1997 for (uint32_t i = 0; i < RegARM32::Reg_NUM; ++i) {
1998 const auto &Entry = RegARM32::RegTable[i];
1999 if (Entry.Scratch && (Include & RegSet_CallerSave))
2000 Registers[i] = true;
2001 if (Entry.Preserved && (Include & RegSet_CalleeSave))
2002 Registers[i] = true;
2003 if (Entry.StackPtr && (Include & RegSet_StackPointer))
2004 Registers[i] = true;
2005 if (Entry.FramePtr && (Include & RegSet_FramePointer))
2006 Registers[i] = true;
2007 if (Entry.Scratch && (Exclude & RegSet_CallerSave))
2008 Registers[i] = false;
2009 if (Entry.Preserved && (Exclude & RegSet_CalleeSave))
2010 Registers[i] = false;
2011 if (Entry.StackPtr && (Exclude & RegSet_StackPointer))
2012 Registers[i] = false;
2013 if (Entry.FramePtr && (Exclude & RegSet_FramePointer))
2014 Registers[i] = false;
2015 }
2016
2017 return Registers;
2018 }
2019
lowerAlloca(const InstAlloca * Instr)2020 void TargetARM32::lowerAlloca(const InstAlloca *Instr) {
2021 // Conservatively require the stack to be aligned. Some stack adjustment
2022 // operations implemented below assume that the stack is aligned before the
2023 // alloca. All the alloca code ensures that the stack alignment is preserved
2024 // after the alloca. The stack alignment restriction can be relaxed in some
2025 // cases.
2026 NeedsStackAlignment = true;
2027
2028 // For default align=0, set it to the real value 1, to avoid any
2029 // bit-manipulation problems below.
2030 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2031
2032 // LLVM enforces power of 2 alignment.
2033 assert(llvm::isPowerOf2_32(AlignmentParam));
2034 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
2035
2036 const uint32_t Alignment =
2037 std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
2038 const bool OverAligned = Alignment > ARM32_STACK_ALIGNMENT_BYTES;
2039 const bool OptM1 = Func->getOptLevel() == Opt_m1;
2040 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2041 const bool UseFramePointer =
2042 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2043
2044 if (UseFramePointer)
2045 setHasFramePointer();
2046
2047 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2048 if (OverAligned) {
2049 alignRegisterPow2(SP, Alignment);
2050 }
2051
2052 Variable *Dest = Instr->getDest();
2053 Operand *TotalSize = Instr->getSizeInBytes();
2054
2055 if (const auto *ConstantTotalSize =
2056 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2057 const uint32_t Value =
2058 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2059 // Constant size alloca.
2060 if (!UseFramePointer) {
2061 // If we don't need a Frame Pointer, this alloca has a known offset to the
2062 // stack pointer. We don't need adjust the stack pointer, nor assign any
2063 // value to Dest, as Dest is rematerializable.
2064 assert(Dest->isRematerializable());
2065 FixedAllocaSizeBytes += Value;
2066 Context.insert<InstFakeDef>(Dest);
2067 return;
2068 }
2069
2070 // If a frame pointer is required, then we need to store the alloca'd result
2071 // in Dest.
2072 Operand *SubAmountRF =
2073 legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
2074 _sub(SP, SP, SubAmountRF);
2075 } else {
2076 // Non-constant sizes need to be adjusted to the next highest multiple of
2077 // the required alignment at runtime.
2078 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
2079 Variable *T = makeReg(IceType_i32);
2080 _mov(T, TotalSize);
2081 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
2082 _add(T, T, AddAmount);
2083 alignRegisterPow2(T, Alignment);
2084 _sub(SP, SP, T);
2085 }
2086
2087 // Adds back a few bytes to SP to account for the out args area.
2088 Variable *T = SP;
2089 if (MaxOutArgsSizeBytes != 0) {
2090 T = makeReg(getPointerType());
2091 Operand *OutArgsSizeRF = legalize(
2092 Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
2093 _add(T, SP, OutArgsSizeRF);
2094 }
2095
2096 _mov(Dest, T);
2097 }
2098
div0Check(Type Ty,Operand * SrcLo,Operand * SrcHi)2099 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
2100 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
2101 return;
2102 Variable *SrcLoReg = legalizeToReg(SrcLo);
2103 switch (Ty) {
2104 default:
2105 llvm_unreachable(
2106 ("Unexpected type in div0Check: " + typeStdString(Ty)).c_str());
2107 case IceType_i8:
2108 case IceType_i16: {
2109 Operand *ShAmtImm = shAmtImm(32 - getScalarIntBitWidth(Ty));
2110 Variable *T = makeReg(IceType_i32);
2111 _lsls(T, SrcLoReg, ShAmtImm);
2112 Context.insert<InstFakeUse>(T);
2113 } break;
2114 case IceType_i32: {
2115 _tst(SrcLoReg, SrcLoReg);
2116 break;
2117 }
2118 case IceType_i64: {
2119 Variable *T = makeReg(IceType_i32);
2120 _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex));
2121 // T isn't going to be used, but we need the side-effect of setting flags
2122 // from this operation.
2123 Context.insert<InstFakeUse>(T);
2124 }
2125 }
2126 auto *Label = InstARM32Label::create(Func, this);
2127 _br(Label, CondARM32::NE);
2128 _trap();
2129 Context.insert(Label);
2130 }
2131
lowerIDivRem(Variable * Dest,Variable * T,Variable * Src0R,Operand * Src1,ExtInstr ExtFunc,DivInstr DivFunc,bool IsRemainder)2132 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
2133 Operand *Src1, ExtInstr ExtFunc,
2134 DivInstr DivFunc, bool IsRemainder) {
2135 div0Check(Dest->getType(), Src1, nullptr);
2136 Variable *Src1R = legalizeToReg(Src1);
2137 Variable *T0R = Src0R;
2138 Variable *T1R = Src1R;
2139 if (Dest->getType() != IceType_i32) {
2140 T0R = makeReg(IceType_i32);
2141 (this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
2142 T1R = makeReg(IceType_i32);
2143 (this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
2144 }
2145 if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
2146 (this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
2147 if (IsRemainder) {
2148 Variable *T2 = makeReg(IceType_i32);
2149 _mls(T2, T, T1R, T0R);
2150 T = T2;
2151 }
2152 _mov(Dest, T);
2153 } else {
2154 llvm::report_fatal_error("div should have already been turned into a call");
2155 }
2156 }
2157
2158 TargetARM32::SafeBoolChain
lowerInt1Arithmetic(const InstArithmetic * Instr)2159 TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Instr) {
2160 Variable *Dest = Instr->getDest();
2161 assert(Dest->getType() == IceType_i1);
2162
2163 // So folding didn't work for Instr. Not a problem: We just need to
2164 // materialize the Sources, and perform the operation. We create regular
2165 // Variables (and not infinite-weight ones) because this call might recurse a
2166 // lot, and we might end up with tons of infinite weight temporaries.
2167 assert(Instr->getSrcSize() == 2);
2168 Variable *Src0 = Func->makeVariable(IceType_i1);
2169 SafeBoolChain Src0Safe = lowerInt1(Src0, Instr->getSrc(0));
2170
2171 Operand *Src1 = Instr->getSrc(1);
2172 SafeBoolChain Src1Safe = SBC_Yes;
2173
2174 if (!llvm::isa<Constant>(Src1)) {
2175 Variable *Src1V = Func->makeVariable(IceType_i1);
2176 Src1Safe = lowerInt1(Src1V, Src1);
2177 Src1 = Src1V;
2178 }
2179
2180 Variable *T = makeReg(IceType_i1);
2181 Src0 = legalizeToReg(Src0);
2182 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
2183 switch (Instr->getOp()) {
2184 default:
2185 // If this Unreachable is ever executed, add the offending operation to
2186 // the list of valid consumers.
2187 llvm::report_fatal_error("Unhandled i1 Op");
2188 case InstArithmetic::And:
2189 _and(T, Src0, Src1RF);
2190 break;
2191 case InstArithmetic::Or:
2192 _orr(T, Src0, Src1RF);
2193 break;
2194 case InstArithmetic::Xor:
2195 _eor(T, Src0, Src1RF);
2196 break;
2197 }
2198 _mov(Dest, T);
2199 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;
2200 }
2201
2202 namespace {
2203 // NumericOperands is used during arithmetic/icmp lowering for constant folding.
2204 // It holds the two sources operands, and maintains some state as to whether one
2205 // of them is a constant. If one of the operands is a constant, then it will be
2206 // be stored as the operation's second source, with a bit indicating whether the
2207 // operands were swapped.
2208 //
2209 // The class is split into a base class with operand type-independent methods,
2210 // and a derived, templated class, for each type of operand we want to fold
2211 // constants for:
2212 //
2213 // NumericOperandsBase --> NumericOperands<ConstantFloat>
2214 // --> NumericOperands<ConstantDouble>
2215 // --> NumericOperands<ConstantInt32>
2216 //
2217 // NumericOperands<ConstantInt32> also exposes helper methods for emitting
2218 // inverted/negated immediates.
2219 class NumericOperandsBase {
2220 NumericOperandsBase() = delete;
2221 NumericOperandsBase(const NumericOperandsBase &) = delete;
2222 NumericOperandsBase &operator=(const NumericOperandsBase &) = delete;
2223
2224 public:
NumericOperandsBase(Operand * S0,Operand * S1)2225 NumericOperandsBase(Operand *S0, Operand *S1)
2226 : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)),
2227 Swapped(Src0 == S1 && S0 != S1) {
2228 assert(Src0 != nullptr);
2229 assert(Src1 != nullptr);
2230 assert(Src0 != Src1 || S0 == S1);
2231 }
2232
hasConstOperand() const2233 bool hasConstOperand() const {
2234 return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1);
2235 }
2236
swappedOperands() const2237 bool swappedOperands() const { return Swapped; }
2238
src0R(TargetARM32 * Target) const2239 Variable *src0R(TargetARM32 *Target) const {
2240 return legalizeToReg(Target, Src0);
2241 }
2242
unswappedSrc0R(TargetARM32 * Target) const2243 Variable *unswappedSrc0R(TargetARM32 *Target) const {
2244 return legalizeToReg(Target, Swapped ? Src1 : Src0);
2245 }
2246
src1RF(TargetARM32 * Target) const2247 Operand *src1RF(TargetARM32 *Target) const {
2248 return legalizeToRegOrFlex(Target, Src1);
2249 }
2250
unswappedSrc1R(TargetARM32 * Target) const2251 Variable *unswappedSrc1R(TargetARM32 *Target) const {
2252 return legalizeToReg(Target, Swapped ? Src0 : Src1);
2253 }
2254
src1() const2255 Operand *src1() const { return Src1; }
2256
2257 protected:
2258 Operand *const Src0;
2259 Operand *const Src1;
2260 const bool Swapped;
2261
legalizeToReg(TargetARM32 * Target,Operand * Src)2262 static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) {
2263 return Target->legalizeToReg(Src);
2264 }
2265
legalizeToRegOrFlex(TargetARM32 * Target,Operand * Src)2266 static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) {
2267 return Target->legalize(Src,
2268 TargetARM32::Legal_Reg | TargetARM32::Legal_Flex);
2269 }
2270
2271 private:
NonConstOperand(Operand * S0,Operand * S1)2272 static Operand *NonConstOperand(Operand *S0, Operand *S1) {
2273 if (!llvm::isa<Constant>(S0))
2274 return S0;
2275 if (!llvm::isa<Constant>(S1))
2276 return S1;
2277 if (llvm::isa<ConstantRelocatable>(S1) &&
2278 !llvm::isa<ConstantRelocatable>(S0))
2279 return S1;
2280 return S0;
2281 }
2282
ConstOperand(Operand * S0,Operand * S1)2283 static Operand *ConstOperand(Operand *S0, Operand *S1) {
2284 if (!llvm::isa<Constant>(S0))
2285 return S1;
2286 if (!llvm::isa<Constant>(S1))
2287 return S0;
2288 if (llvm::isa<ConstantRelocatable>(S1) &&
2289 !llvm::isa<ConstantRelocatable>(S0))
2290 return S0;
2291 return S1;
2292 }
2293 };
2294
2295 template <typename C> class NumericOperands : public NumericOperandsBase {
2296 NumericOperands() = delete;
2297 NumericOperands(const NumericOperands &) = delete;
2298 NumericOperands &operator=(const NumericOperands &) = delete;
2299
2300 public:
NumericOperands(Operand * S0,Operand * S1)2301 NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) {
2302 assert(!hasConstOperand() || llvm::isa<C>(this->Src1));
2303 }
2304
getConstantValue() const2305 typename C::PrimType getConstantValue() const {
2306 return llvm::cast<C>(Src1)->getValue();
2307 }
2308 };
2309
2310 using FloatOperands = NumericOperands<ConstantFloat>;
2311 using DoubleOperands = NumericOperands<ConstantDouble>;
2312
2313 class Int32Operands : public NumericOperands<ConstantInteger32> {
2314 Int32Operands() = delete;
2315 Int32Operands(const Int32Operands &) = delete;
2316 Int32Operands &operator=(const Int32Operands &) = delete;
2317
2318 public:
Int32Operands(Operand * S0,Operand * S1)2319 Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {}
2320
unswappedSrc1RShAmtImm(TargetARM32 * Target) const2321 Operand *unswappedSrc1RShAmtImm(TargetARM32 *Target) const {
2322 if (!swappedOperands() && hasConstOperand()) {
2323 return Target->shAmtImm(getConstantValue() & 0x1F);
2324 }
2325 return legalizeToReg(Target, Swapped ? Src0 : Src1);
2326 }
2327
isSrc1ImmediateZero() const2328 bool isSrc1ImmediateZero() const {
2329 if (!swappedOperands() && hasConstOperand()) {
2330 return getConstantValue() == 0;
2331 }
2332 return false;
2333 }
2334
immediateIsFlexEncodable() const2335 bool immediateIsFlexEncodable() const {
2336 uint32_t Rotate, Imm8;
2337 return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8);
2338 }
2339
negatedImmediateIsFlexEncodable() const2340 bool negatedImmediateIsFlexEncodable() const {
2341 uint32_t Rotate, Imm8;
2342 return OperandARM32FlexImm::canHoldImm(
2343 -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8);
2344 }
2345
negatedSrc1F(TargetARM32 * Target) const2346 Operand *negatedSrc1F(TargetARM32 *Target) const {
2347 return legalizeToRegOrFlex(Target,
2348 Target->getCtx()->getConstantInt32(
2349 -static_cast<int32_t>(getConstantValue())));
2350 }
2351
invertedImmediateIsFlexEncodable() const2352 bool invertedImmediateIsFlexEncodable() const {
2353 uint32_t Rotate, Imm8;
2354 return OperandARM32FlexImm::canHoldImm(
2355 ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8);
2356 }
2357
invertedSrc1F(TargetARM32 * Target) const2358 Operand *invertedSrc1F(TargetARM32 *Target) const {
2359 return legalizeToRegOrFlex(Target,
2360 Target->getCtx()->getConstantInt32(
2361 ~static_cast<uint32_t>(getConstantValue())));
2362 }
2363 };
2364 } // end of anonymous namespace
2365
preambleDivRem(const InstCall * Instr)2366 void TargetARM32::preambleDivRem(const InstCall *Instr) {
2367 Operand *Src1 = Instr->getArg(1);
2368
2369 switch (Src1->getType()) {
2370 default:
2371 llvm::report_fatal_error("Invalid type for idiv.");
2372 case IceType_i64: {
2373 if (auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2374 if (C->getValue() == 0) {
2375 _trap();
2376 return;
2377 }
2378 }
2379 div0Check(IceType_i64, loOperand(Src1), hiOperand(Src1));
2380 return;
2381 }
2382 case IceType_i32: {
2383 // Src0 and Src1 have already been appropriately extended to an i32, so we
2384 // don't check for i8 and i16.
2385 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2386 if (C->getValue() == 0) {
2387 _trap();
2388 return;
2389 }
2390 }
2391 div0Check(IceType_i32, Src1, nullptr);
2392 return;
2393 }
2394 }
2395 }
2396
lowerInt64Arithmetic(InstArithmetic::OpKind Op,Variable * Dest,Operand * Src0,Operand * Src1)2397 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
2398 Variable *Dest, Operand *Src0,
2399 Operand *Src1) {
2400 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
2401 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
2402 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
2403 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
2404
2405 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2406 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2407 Variable *T_Lo = makeReg(DestLo->getType());
2408 Variable *T_Hi = makeReg(DestHi->getType());
2409
2410 switch (Op) {
2411 case InstArithmetic::_num:
2412 llvm::report_fatal_error("Unknown arithmetic operator");
2413 return;
2414 case InstArithmetic::Add: {
2415 Variable *Src0LoR = SrcsLo.src0R(this);
2416 Operand *Src1LoRF = SrcsLo.src1RF(this);
2417 Variable *Src0HiR = SrcsHi.src0R(this);
2418 Operand *Src1HiRF = SrcsHi.src1RF(this);
2419 _adds(T_Lo, Src0LoR, Src1LoRF);
2420 _mov(DestLo, T_Lo);
2421 _adc(T_Hi, Src0HiR, Src1HiRF);
2422 _mov(DestHi, T_Hi);
2423 return;
2424 }
2425 case InstArithmetic::And: {
2426 Variable *Src0LoR = SrcsLo.src0R(this);
2427 Operand *Src1LoRF = SrcsLo.src1RF(this);
2428 Variable *Src0HiR = SrcsHi.src0R(this);
2429 Operand *Src1HiRF = SrcsHi.src1RF(this);
2430 _and(T_Lo, Src0LoR, Src1LoRF);
2431 _mov(DestLo, T_Lo);
2432 _and(T_Hi, Src0HiR, Src1HiRF);
2433 _mov(DestHi, T_Hi);
2434 return;
2435 }
2436 case InstArithmetic::Or: {
2437 Variable *Src0LoR = SrcsLo.src0R(this);
2438 Operand *Src1LoRF = SrcsLo.src1RF(this);
2439 Variable *Src0HiR = SrcsHi.src0R(this);
2440 Operand *Src1HiRF = SrcsHi.src1RF(this);
2441 _orr(T_Lo, Src0LoR, Src1LoRF);
2442 _mov(DestLo, T_Lo);
2443 _orr(T_Hi, Src0HiR, Src1HiRF);
2444 _mov(DestHi, T_Hi);
2445 return;
2446 }
2447 case InstArithmetic::Xor: {
2448 Variable *Src0LoR = SrcsLo.src0R(this);
2449 Operand *Src1LoRF = SrcsLo.src1RF(this);
2450 Variable *Src0HiR = SrcsHi.src0R(this);
2451 Operand *Src1HiRF = SrcsHi.src1RF(this);
2452 _eor(T_Lo, Src0LoR, Src1LoRF);
2453 _mov(DestLo, T_Lo);
2454 _eor(T_Hi, Src0HiR, Src1HiRF);
2455 _mov(DestHi, T_Hi);
2456 return;
2457 }
2458 case InstArithmetic::Sub: {
2459 Variable *Src0LoR = SrcsLo.src0R(this);
2460 Operand *Src1LoRF = SrcsLo.src1RF(this);
2461 Variable *Src0HiR = SrcsHi.src0R(this);
2462 Operand *Src1HiRF = SrcsHi.src1RF(this);
2463 if (SrcsLo.swappedOperands()) {
2464 _rsbs(T_Lo, Src0LoR, Src1LoRF);
2465 _mov(DestLo, T_Lo);
2466 _rsc(T_Hi, Src0HiR, Src1HiRF);
2467 _mov(DestHi, T_Hi);
2468 } else {
2469 _subs(T_Lo, Src0LoR, Src1LoRF);
2470 _mov(DestLo, T_Lo);
2471 _sbc(T_Hi, Src0HiR, Src1HiRF);
2472 _mov(DestHi, T_Hi);
2473 }
2474 return;
2475 }
2476 case InstArithmetic::Mul: {
2477 // GCC 4.8 does:
2478 // a=b*c ==>
2479 // t_acc =(mul) (b.lo * c.hi)
2480 // t_acc =(mla) (c.lo * b.hi) + t_acc
2481 // t.hi,t.lo =(umull) b.lo * c.lo
2482 // t.hi += t_acc
2483 // a.lo = t.lo
2484 // a.hi = t.hi
2485 //
2486 // LLVM does:
2487 // t.hi,t.lo =(umull) b.lo * c.lo
2488 // t.hi =(mla) (b.lo * c.hi) + t.hi
2489 // t.hi =(mla) (b.hi * c.lo) + t.hi
2490 // a.lo = t.lo
2491 // a.hi = t.hi
2492 //
2493 // LLVM's lowering has fewer instructions, but more register pressure:
2494 // t.lo is live from beginning to end, while GCC delays the two-dest
2495 // instruction till the end, and kills c.hi immediately.
2496 Variable *T_Acc = makeReg(IceType_i32);
2497 Variable *T_Acc1 = makeReg(IceType_i32);
2498 Variable *T_Hi1 = makeReg(IceType_i32);
2499 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2500 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2501 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2502 Variable *Src1RHi = SrcsHi.unswappedSrc1R(this);
2503 _mul(T_Acc, Src0RLo, Src1RHi);
2504 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
2505 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
2506 _add(T_Hi, T_Hi1, T_Acc1);
2507 _mov(DestLo, T_Lo);
2508 _mov(DestHi, T_Hi);
2509 return;
2510 }
2511 case InstArithmetic::Shl: {
2512 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2513 Variable *Src0RLo = SrcsLo.src0R(this);
2514 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2515 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;
2516 if (ShAmtImm == 0) {
2517 _mov(DestLo, Src0RLo);
2518 _mov(DestHi, SrcsHi.src0R(this));
2519 return;
2520 }
2521
2522 if (ShAmtImm >= 32) {
2523 if (ShAmtImm == 32) {
2524 _mov(DestHi, Src0RLo);
2525 } else {
2526 Operand *ShAmtOp = shAmtImm(ShAmtImm - 32);
2527 _lsl(T_Hi, Src0RLo, ShAmtOp);
2528 _mov(DestHi, T_Hi);
2529 }
2530
2531 Operand *_0 =
2532 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2533 _mov(T_Lo, _0);
2534 _mov(DestLo, T_Lo);
2535 return;
2536 }
2537
2538 Variable *Src0RHi = SrcsHi.src0R(this);
2539 Operand *ShAmtOp = shAmtImm(ShAmtImm);
2540 Operand *ComplShAmtOp = shAmtImm(32 - ShAmtImm);
2541 _lsl(T_Hi, Src0RHi, ShAmtOp);
2542 _orr(T_Hi, T_Hi,
2543 OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
2544 OperandARM32::LSR, ComplShAmtOp));
2545 _mov(DestHi, T_Hi);
2546
2547 _lsl(T_Lo, Src0RLo, ShAmtOp);
2548 _mov(DestLo, T_Lo);
2549 return;
2550 }
2551
2552 // a=b<<c ==>
2553 // pnacl-llc does:
2554 // mov t_b.lo, b.lo
2555 // mov t_b.hi, b.hi
2556 // mov t_c.lo, c.lo
2557 // rsb T0, t_c.lo, #32
2558 // lsr T1, t_b.lo, T0
2559 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo
2560 // sub T2, t_c.lo, #32
2561 // cmp T2, #0
2562 // lslge t_a.hi, t_b.lo, T2
2563 // lsl t_a.lo, t_b.lo, t_c.lo
2564 // mov a.lo, t_a.lo
2565 // mov a.hi, t_a.hi
2566 //
2567 // GCC 4.8 does:
2568 // sub t_c1, c.lo, #32
2569 // lsl t_hi, b.hi, c.lo
2570 // orr t_hi, t_hi, b.lo, lsl t_c1
2571 // rsb t_c2, c.lo, #32
2572 // orr t_hi, t_hi, b.lo, lsr t_c2
2573 // lsl t_lo, b.lo, c.lo
2574 // a.lo = t_lo
2575 // a.hi = t_hi
2576 //
2577 // These are incompatible, therefore we mimic pnacl-llc.
2578 // Can be strength-reduced for constant-shifts, but we don't do that for
2579 // now.
2580 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
2581 // ARM, shifts only take the lower 8 bits of the shift register, and
2582 // saturate to the range 0-32, so the negative value will saturate to 32.
2583 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2584 Operand *_0 =
2585 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2586 Variable *T0 = makeReg(IceType_i32);
2587 Variable *T1 = makeReg(IceType_i32);
2588 Variable *T2 = makeReg(IceType_i32);
2589 Variable *TA_Hi = makeReg(IceType_i32);
2590 Variable *TA_Lo = makeReg(IceType_i32);
2591 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2592 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2593 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2594 _rsb(T0, Src1RLo, _32);
2595 _lsr(T1, Src0RLo, T0);
2596 _orr(TA_Hi, T1,
2597 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2598 OperandARM32::LSL, Src1RLo));
2599 _sub(T2, Src1RLo, _32);
2600 _cmp(T2, _0);
2601 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
2602 _set_dest_redefined();
2603 _lsl(TA_Lo, Src0RLo, Src1RLo);
2604 _mov(DestLo, TA_Lo);
2605 _mov(DestHi, TA_Hi);
2606 return;
2607 }
2608 case InstArithmetic::Lshr:
2609 case InstArithmetic::Ashr: {
2610 const bool ASR = Op == InstArithmetic::Ashr;
2611 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2612 Variable *Src0RHi = SrcsHi.src0R(this);
2613 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2614 const int32_t ShAmt = SrcsLo.getConstantValue() & 0x3F;
2615 if (ShAmt == 0) {
2616 _mov(DestHi, Src0RHi);
2617 _mov(DestLo, SrcsLo.src0R(this));
2618 return;
2619 }
2620
2621 if (ShAmt >= 32) {
2622 if (ShAmt == 32) {
2623 _mov(DestLo, Src0RHi);
2624 } else {
2625 Operand *ShAmtImm = shAmtImm(ShAmt - 32);
2626 if (ASR) {
2627 _asr(T_Lo, Src0RHi, ShAmtImm);
2628 } else {
2629 _lsr(T_Lo, Src0RHi, ShAmtImm);
2630 }
2631 _mov(DestLo, T_Lo);
2632 }
2633
2634 if (ASR) {
2635 Operand *_31 = shAmtImm(31);
2636 _asr(T_Hi, Src0RHi, _31);
2637 } else {
2638 Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),
2639 Legal_Reg | Legal_Flex);
2640 _mov(T_Hi, _0);
2641 }
2642 _mov(DestHi, T_Hi);
2643 return;
2644 }
2645
2646 Variable *Src0RLo = SrcsLo.src0R(this);
2647 Operand *ShAmtImm = shAmtImm(ShAmt);
2648 Operand *ComplShAmtImm = shAmtImm(32 - ShAmt);
2649 _lsr(T_Lo, Src0RLo, ShAmtImm);
2650 _orr(T_Lo, T_Lo,
2651 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2652 OperandARM32::LSL, ComplShAmtImm));
2653 _mov(DestLo, T_Lo);
2654
2655 if (ASR) {
2656 _asr(T_Hi, Src0RHi, ShAmtImm);
2657 } else {
2658 _lsr(T_Hi, Src0RHi, ShAmtImm);
2659 }
2660 _mov(DestHi, T_Hi);
2661 return;
2662 }
2663
2664 // a=b>>c
2665 // pnacl-llc does:
2666 // mov t_b.lo, b.lo
2667 // mov t_b.hi, b.hi
2668 // mov t_c.lo, c.lo
2669 // lsr T0, t_b.lo, t_c.lo
2670 // rsb T1, t_c.lo, #32
2671 // orr t_a.lo, T0, t_b.hi, lsl T1
2672 // sub T2, t_c.lo, #32
2673 // cmp T2, #0
2674 // [al]srge t_a.lo, t_b.hi, T2
2675 // [al]sr t_a.hi, t_b.hi, t_c.lo
2676 // mov a.lo, t_a.lo
2677 // mov a.hi, t_a.hi
2678 //
2679 // GCC 4.8 does (lsr):
2680 // rsb t_c1, c.lo, #32
2681 // lsr t_lo, b.lo, c.lo
2682 // orr t_lo, t_lo, b.hi, lsl t_c1
2683 // sub t_c2, c.lo, #32
2684 // orr t_lo, t_lo, b.hi, lsr t_c2
2685 // lsr t_hi, b.hi, c.lo
2686 // mov a.lo, t_lo
2687 // mov a.hi, t_hi
2688 //
2689 // These are incompatible, therefore we mimic pnacl-llc.
2690 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2691 Operand *_0 =
2692 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2693 Variable *T0 = makeReg(IceType_i32);
2694 Variable *T1 = makeReg(IceType_i32);
2695 Variable *T2 = makeReg(IceType_i32);
2696 Variable *TA_Lo = makeReg(IceType_i32);
2697 Variable *TA_Hi = makeReg(IceType_i32);
2698 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2699 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2700 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2701 _lsr(T0, Src0RLo, Src1RLo);
2702 _rsb(T1, Src1RLo, _32);
2703 _orr(TA_Lo, T0,
2704 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2705 OperandARM32::LSL, T1));
2706 _sub(T2, Src1RLo, _32);
2707 _cmp(T2, _0);
2708 if (ASR) {
2709 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2710 _set_dest_redefined();
2711 _asr(TA_Hi, Src0RHi, Src1RLo);
2712 } else {
2713 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2714 _set_dest_redefined();
2715 _lsr(TA_Hi, Src0RHi, Src1RLo);
2716 }
2717 _mov(DestLo, TA_Lo);
2718 _mov(DestHi, TA_Hi);
2719 return;
2720 }
2721 case InstArithmetic::Fadd:
2722 case InstArithmetic::Fsub:
2723 case InstArithmetic::Fmul:
2724 case InstArithmetic::Fdiv:
2725 case InstArithmetic::Frem:
2726 llvm::report_fatal_error("FP instruction with i64 type");
2727 return;
2728 case InstArithmetic::Udiv:
2729 case InstArithmetic::Sdiv:
2730 case InstArithmetic::Urem:
2731 case InstArithmetic::Srem:
2732 llvm::report_fatal_error("Call-helper-involved instruction for i64 type "
2733 "should have already been handled before");
2734 return;
2735 }
2736 }
2737
2738 namespace {
2739 // StrengthReduction is a namespace with the strength reduction machinery. The
2740 // entry point is the StrengthReduction::tryToOptimize method. It returns true
2741 // if the optimization can be performed, and false otherwise.
2742 //
2743 // If the optimization can be performed, tryToOptimize sets its NumOperations
2744 // parameter to the number of shifts that are needed to perform the
2745 // multiplication; and it sets the Operations parameter with <ShAmt, AddOrSub>
2746 // tuples that describe how to materialize the multiplication.
2747 //
2748 // The algorithm finds contiguous 1s in the Multiplication source, and uses one
2749 // or two shifts to materialize it. A sequence of 1s, e.g.,
2750 //
2751 // M N
2752 // ...00000000000011111...111110000000...
2753 //
2754 // is materializable with (1 << (M + 1)) - (1 << N):
2755 //
2756 // ...00000000000100000...000000000000... [1 << (M + 1)]
2757 // ...00000000000000000...000010000000... (-) [1 << N]
2758 // --------------------------------------
2759 // ...00000000000011111...111110000000...
2760 //
2761 // And a single bit set, which is just a left shift.
2762 namespace StrengthReduction {
2763 enum AggregationOperation {
2764 AO_Invalid,
2765 AO_Add,
2766 AO_Sub,
2767 };
2768
2769 // AggregateElement is a glorified <ShAmt, AddOrSub> tuple.
2770 class AggregationElement {
2771 AggregationElement(const AggregationElement &) = delete;
2772
2773 public:
2774 AggregationElement() = default;
2775 AggregationElement &operator=(const AggregationElement &) = default;
AggregationElement(AggregationOperation Op,uint32_t ShAmt)2776 AggregationElement(AggregationOperation Op, uint32_t ShAmt)
2777 : Op(Op), ShAmt(ShAmt) {}
2778
createShiftedOperand(Cfg * Func,Variable * OpR) const2779 Operand *createShiftedOperand(Cfg *Func, Variable *OpR) const {
2780 assert(OpR->mustHaveReg());
2781 if (ShAmt == 0) {
2782 return OpR;
2783 }
2784 return OperandARM32FlexReg::create(
2785 Func, IceType_i32, OpR, OperandARM32::LSL,
2786 OperandARM32ShAmtImm::create(
2787 Func, llvm::cast<ConstantInteger32>(
2788 Func->getContext()->getConstantInt32(ShAmt))));
2789 }
2790
aggregateWithAdd() const2791 bool aggregateWithAdd() const {
2792 switch (Op) {
2793 case AO_Invalid:
2794 llvm::report_fatal_error("Invalid Strength Reduction Operations.");
2795 case AO_Add:
2796 return true;
2797 case AO_Sub:
2798 return false;
2799 }
2800 llvm_unreachable("(silence g++ warning)");
2801 }
2802
shAmt() const2803 uint32_t shAmt() const { return ShAmt; }
2804
2805 private:
2806 AggregationOperation Op = AO_Invalid;
2807 uint32_t ShAmt;
2808 };
2809
2810 // [RangeStart, RangeEnd] is a range of 1s in Src.
2811 template <std::size_t N>
addOperations(uint32_t RangeStart,uint32_t RangeEnd,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)2812 bool addOperations(uint32_t RangeStart, uint32_t RangeEnd, SizeT *NumOperations,
2813 std::array<AggregationElement, N> *Operations) {
2814 assert(*NumOperations < N);
2815 if (RangeStart == RangeEnd) {
2816 // Single bit set:
2817 // Src : 0...00010...
2818 // RangeStart : ^
2819 // RangeEnd : ^
2820 // NegSrc : 0...00001...
2821 (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart);
2822 ++(*NumOperations);
2823 return true;
2824 }
2825
2826 // Sequence of 1s: (two operations required.)
2827 // Src : 0...00011...110...
2828 // RangeStart : ^
2829 // RangeEnd : ^
2830 // NegSrc : 0...00000...001...
2831 if (*NumOperations + 1 >= N) {
2832 return false;
2833 }
2834 (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart + 1);
2835 ++(*NumOperations);
2836 (*Operations)[*NumOperations] = AggregationElement(AO_Sub, RangeEnd);
2837 ++(*NumOperations);
2838 return true;
2839 }
2840
2841 // tryToOptmize scans Src looking for sequences of 1s (including the unitary bit
2842 // 1 surrounded by zeroes.
2843 template <std::size_t N>
tryToOptimize(uint32_t Src,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)2844 bool tryToOptimize(uint32_t Src, SizeT *NumOperations,
2845 std::array<AggregationElement, N> *Operations) {
2846 constexpr uint32_t SrcSizeBits = sizeof(Src) * CHAR_BIT;
2847 uint32_t NegSrc = ~Src;
2848
2849 *NumOperations = 0;
2850 while (Src != 0 && *NumOperations < N) {
2851 // Each step of the algorithm:
2852 // * finds L, the last bit set in Src;
2853 // * clears all the upper bits in NegSrc up to bit L;
2854 // * finds nL, the last bit set in NegSrc;
2855 // * clears all the upper bits in Src up to bit nL;
2856 //
2857 // if L == nL + 1, then a unitary 1 was found in Src. Otherwise, a sequence
2858 // of 1s starting at L, and ending at nL + 1, was found.
2859 const uint32_t SrcLastBitSet = llvm::findLastSet(Src);
2860 const uint32_t NegSrcClearMask =
2861 (SrcLastBitSet == 0) ? 0
2862 : (0xFFFFFFFFu) >> (SrcSizeBits - SrcLastBitSet);
2863 NegSrc &= NegSrcClearMask;
2864 if (NegSrc == 0) {
2865 if (addOperations(SrcLastBitSet, 0, NumOperations, Operations)) {
2866 return true;
2867 }
2868 return false;
2869 }
2870 const uint32_t NegSrcLastBitSet = llvm::findLastSet(NegSrc);
2871 assert(NegSrcLastBitSet < SrcLastBitSet);
2872 const uint32_t SrcClearMask =
2873 (NegSrcLastBitSet == 0)
2874 ? 0
2875 : (0xFFFFFFFFu) >> (SrcSizeBits - NegSrcLastBitSet);
2876 Src &= SrcClearMask;
2877 if (!addOperations(SrcLastBitSet, NegSrcLastBitSet + 1, NumOperations,
2878 Operations)) {
2879 return false;
2880 }
2881 }
2882
2883 return Src == 0;
2884 }
2885 } // end of namespace StrengthReduction
2886 } // end of anonymous namespace
2887
lowerArithmetic(const InstArithmetic * Instr)2888 void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
2889 Variable *Dest = Instr->getDest();
2890
2891 if (Dest->isRematerializable()) {
2892 Context.insert<InstFakeDef>(Dest);
2893 return;
2894 }
2895
2896 Type DestTy = Dest->getType();
2897 if (DestTy == IceType_i1) {
2898 lowerInt1Arithmetic(Instr);
2899 return;
2900 }
2901
2902 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
2903 Operand *Src1 = legalizeUndef(Instr->getSrc(1));
2904 if (DestTy == IceType_i64) {
2905 lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1);
2906 return;
2907 }
2908
2909 if (isVectorType(DestTy)) {
2910 switch (Instr->getOp()) {
2911 default:
2912 UnimplementedLoweringError(this, Instr);
2913 return;
2914 // Explicitly allow vector instructions we have implemented/enabled.
2915 case InstArithmetic::Add:
2916 case InstArithmetic::And:
2917 case InstArithmetic::Ashr:
2918 case InstArithmetic::Fadd:
2919 case InstArithmetic::Fmul:
2920 case InstArithmetic::Fsub:
2921 case InstArithmetic::Lshr:
2922 case InstArithmetic::Mul:
2923 case InstArithmetic::Or:
2924 case InstArithmetic::Shl:
2925 case InstArithmetic::Sub:
2926 case InstArithmetic::Xor:
2927 break;
2928 }
2929 }
2930
2931 Variable *T = makeReg(DestTy);
2932
2933 // * Handle div/rem separately. They require a non-legalized Src1 to inspect
2934 // whether or not Src1 is a non-zero constant. Once legalized it is more
2935 // difficult to determine (constant may be moved to a register).
2936 // * Handle floating point arithmetic separately: they require Src1 to be
2937 // legalized to a register.
2938 switch (Instr->getOp()) {
2939 default:
2940 break;
2941 case InstArithmetic::Udiv: {
2942 constexpr bool NotRemainder = false;
2943 Variable *Src0R = legalizeToReg(Src0);
2944 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
2945 NotRemainder);
2946 return;
2947 }
2948 case InstArithmetic::Sdiv: {
2949 constexpr bool NotRemainder = false;
2950 Variable *Src0R = legalizeToReg(Src0);
2951 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
2952 NotRemainder);
2953 return;
2954 }
2955 case InstArithmetic::Urem: {
2956 constexpr bool IsRemainder = true;
2957 Variable *Src0R = legalizeToReg(Src0);
2958 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
2959 IsRemainder);
2960 return;
2961 }
2962 case InstArithmetic::Srem: {
2963 constexpr bool IsRemainder = true;
2964 Variable *Src0R = legalizeToReg(Src0);
2965 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
2966 IsRemainder);
2967 return;
2968 }
2969 case InstArithmetic::Frem: {
2970 if (!isScalarFloatingType(DestTy)) {
2971 llvm::report_fatal_error("Unexpected type when lowering frem.");
2972 }
2973 llvm::report_fatal_error("Frem should have already been lowered.");
2974 }
2975 case InstArithmetic::Fadd: {
2976 Variable *Src0R = legalizeToReg(Src0);
2977 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
2978 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
2979 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
2980 _vmla(Src0R, Src1R, Src2R);
2981 _mov(Dest, Src0R);
2982 return;
2983 }
2984
2985 Variable *Src1R = legalizeToReg(Src1);
2986 _vadd(T, Src0R, Src1R);
2987 _mov(Dest, T);
2988 return;
2989 }
2990 case InstArithmetic::Fsub: {
2991 Variable *Src0R = legalizeToReg(Src0);
2992 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
2993 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
2994 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
2995 _vmls(Src0R, Src1R, Src2R);
2996 _mov(Dest, Src0R);
2997 return;
2998 }
2999 Variable *Src1R = legalizeToReg(Src1);
3000 _vsub(T, Src0R, Src1R);
3001 _mov(Dest, T);
3002 return;
3003 }
3004 case InstArithmetic::Fmul: {
3005 Variable *Src0R = legalizeToReg(Src0);
3006 Variable *Src1R = legalizeToReg(Src1);
3007 _vmul(T, Src0R, Src1R);
3008 _mov(Dest, T);
3009 return;
3010 }
3011 case InstArithmetic::Fdiv: {
3012 Variable *Src0R = legalizeToReg(Src0);
3013 Variable *Src1R = legalizeToReg(Src1);
3014 _vdiv(T, Src0R, Src1R);
3015 _mov(Dest, T);
3016 return;
3017 }
3018 }
3019
3020 // Handle everything else here.
3021 Int32Operands Srcs(Src0, Src1);
3022 switch (Instr->getOp()) {
3023 case InstArithmetic::_num:
3024 llvm::report_fatal_error("Unknown arithmetic operator");
3025 return;
3026 case InstArithmetic::Add: {
3027 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3028 assert(!isVectorType(DestTy));
3029 Variable *Src0R = legalizeToReg(Src0);
3030 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3031 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3032 _mla(T, Src1R, Src2R, Src0R);
3033 _mov(Dest, T);
3034 return;
3035 }
3036
3037 if (Srcs.hasConstOperand()) {
3038 if (!Srcs.immediateIsFlexEncodable() &&
3039 Srcs.negatedImmediateIsFlexEncodable()) {
3040 assert(!isVectorType(DestTy));
3041 Variable *Src0R = Srcs.src0R(this);
3042 Operand *Src1F = Srcs.negatedSrc1F(this);
3043 if (!Srcs.swappedOperands()) {
3044 _sub(T, Src0R, Src1F);
3045 } else {
3046 _rsb(T, Src0R, Src1F);
3047 }
3048 _mov(Dest, T);
3049 return;
3050 }
3051 }
3052 Variable *Src0R = Srcs.src0R(this);
3053 if (isVectorType(DestTy)) {
3054 Variable *Src1R = legalizeToReg(Src1);
3055 _vadd(T, Src0R, Src1R);
3056 } else {
3057 Operand *Src1RF = Srcs.src1RF(this);
3058 _add(T, Src0R, Src1RF);
3059 }
3060 _mov(Dest, T);
3061 return;
3062 }
3063 case InstArithmetic::And: {
3064 if (Srcs.hasConstOperand()) {
3065 if (!Srcs.immediateIsFlexEncodable() &&
3066 Srcs.invertedImmediateIsFlexEncodable()) {
3067 Variable *Src0R = Srcs.src0R(this);
3068 Operand *Src1F = Srcs.invertedSrc1F(this);
3069 _bic(T, Src0R, Src1F);
3070 _mov(Dest, T);
3071 return;
3072 }
3073 }
3074 assert(isIntegerType(DestTy));
3075 Variable *Src0R = Srcs.src0R(this);
3076 if (isVectorType(DestTy)) {
3077 Variable *Src1R = legalizeToReg(Src1);
3078 _vand(T, Src0R, Src1R);
3079 } else {
3080 Operand *Src1RF = Srcs.src1RF(this);
3081 _and(T, Src0R, Src1RF);
3082 }
3083 _mov(Dest, T);
3084 return;
3085 }
3086 case InstArithmetic::Or: {
3087 Variable *Src0R = Srcs.src0R(this);
3088 assert(isIntegerType(DestTy));
3089 if (isVectorType(DestTy)) {
3090 Variable *Src1R = legalizeToReg(Src1);
3091 _vorr(T, Src0R, Src1R);
3092 } else {
3093 Operand *Src1RF = Srcs.src1RF(this);
3094 _orr(T, Src0R, Src1RF);
3095 }
3096 _mov(Dest, T);
3097 return;
3098 }
3099 case InstArithmetic::Xor: {
3100 Variable *Src0R = Srcs.src0R(this);
3101 assert(isIntegerType(DestTy));
3102 if (isVectorType(DestTy)) {
3103 Variable *Src1R = legalizeToReg(Src1);
3104 _veor(T, Src0R, Src1R);
3105 } else {
3106 Operand *Src1RF = Srcs.src1RF(this);
3107 _eor(T, Src0R, Src1RF);
3108 }
3109 _mov(Dest, T);
3110 return;
3111 }
3112 case InstArithmetic::Sub: {
3113 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3114 assert(!isVectorType(DestTy));
3115 Variable *Src0R = legalizeToReg(Src0);
3116 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3117 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3118 _mls(T, Src1R, Src2R, Src0R);
3119 _mov(Dest, T);
3120 return;
3121 }
3122
3123 if (Srcs.hasConstOperand()) {
3124 assert(!isVectorType(DestTy));
3125 if (Srcs.immediateIsFlexEncodable()) {
3126 Variable *Src0R = Srcs.src0R(this);
3127 Operand *Src1RF = Srcs.src1RF(this);
3128 if (Srcs.swappedOperands()) {
3129 _rsb(T, Src0R, Src1RF);
3130 } else {
3131 _sub(T, Src0R, Src1RF);
3132 }
3133 _mov(Dest, T);
3134 return;
3135 }
3136 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
3137 Variable *Src0R = Srcs.src0R(this);
3138 Operand *Src1F = Srcs.negatedSrc1F(this);
3139 _add(T, Src0R, Src1F);
3140 _mov(Dest, T);
3141 return;
3142 }
3143 }
3144 Variable *Src0R = Srcs.unswappedSrc0R(this);
3145 Variable *Src1R = Srcs.unswappedSrc1R(this);
3146 if (isVectorType(DestTy)) {
3147 _vsub(T, Src0R, Src1R);
3148 } else {
3149 _sub(T, Src0R, Src1R);
3150 }
3151 _mov(Dest, T);
3152 return;
3153 }
3154 case InstArithmetic::Mul: {
3155 const bool OptM1 = Func->getOptLevel() == Opt_m1;
3156 if (!OptM1 && Srcs.hasConstOperand()) {
3157 constexpr std::size_t MaxShifts = 4;
3158 std::array<StrengthReduction::AggregationElement, MaxShifts> Shifts;
3159 SizeT NumOperations;
3160 int32_t Const = Srcs.getConstantValue();
3161 const bool Invert = Const < 0;
3162 const bool MultiplyByZero = Const == 0;
3163 Operand *_0 =
3164 legalize(Ctx->getConstantZero(DestTy), Legal_Reg | Legal_Flex);
3165
3166 if (MultiplyByZero) {
3167 _mov(T, _0);
3168 _mov(Dest, T);
3169 return;
3170 }
3171
3172 if (Invert) {
3173 Const = -Const;
3174 }
3175
3176 if (StrengthReduction::tryToOptimize(Const, &NumOperations, &Shifts)) {
3177 assert(NumOperations >= 1);
3178 Variable *Src0R = Srcs.src0R(this);
3179 int32_t Start;
3180 int32_t End;
3181 if (NumOperations == 1 || Shifts[NumOperations - 1].shAmt() != 0) {
3182 // Multiplication by a power of 2 (NumOperations == 1); or
3183 // Multiplication by a even number not a power of 2.
3184 Start = 1;
3185 End = NumOperations;
3186 assert(Shifts[0].aggregateWithAdd());
3187 _lsl(T, Src0R, shAmtImm(Shifts[0].shAmt()));
3188 } else {
3189 // Multiplication by an odd number. Put the free barrel shifter to a
3190 // good use.
3191 Start = 0;
3192 End = NumOperations - 2;
3193 const StrengthReduction::AggregationElement &Last =
3194 Shifts[NumOperations - 1];
3195 const StrengthReduction::AggregationElement &SecondToLast =
3196 Shifts[NumOperations - 2];
3197 if (!Last.aggregateWithAdd()) {
3198 assert(SecondToLast.aggregateWithAdd());
3199 _rsb(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3200 } else if (!SecondToLast.aggregateWithAdd()) {
3201 assert(Last.aggregateWithAdd());
3202 _sub(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3203 } else {
3204 _add(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3205 }
3206 }
3207
3208 // Odd numbers : S E I I
3209 // +---+---+---+---+---+---+ ... +---+---+---+---+
3210 // Shifts = | | | | | | | ... | | | | |
3211 // +---+---+---+---+---+---+ ... +---+---+---+---+
3212 // Even numbers: I S E
3213 //
3214 // S: Start; E: End; I: Init
3215 for (int32_t I = Start; I < End; ++I) {
3216 const StrengthReduction::AggregationElement &Current = Shifts[I];
3217 Operand *SrcF = Current.createShiftedOperand(Func, Src0R);
3218 if (Current.aggregateWithAdd()) {
3219 _add(T, T, SrcF);
3220 } else {
3221 _sub(T, T, SrcF);
3222 }
3223 }
3224
3225 if (Invert) {
3226 // T = 0 - T.
3227 _rsb(T, T, _0);
3228 }
3229
3230 _mov(Dest, T);
3231 return;
3232 }
3233 }
3234 Variable *Src0R = Srcs.unswappedSrc0R(this);
3235 Variable *Src1R = Srcs.unswappedSrc1R(this);
3236 if (isVectorType(DestTy)) {
3237 _vmul(T, Src0R, Src1R);
3238 } else {
3239 _mul(T, Src0R, Src1R);
3240 }
3241 _mov(Dest, T);
3242 return;
3243 }
3244 case InstArithmetic::Shl: {
3245 Variable *Src0R = Srcs.unswappedSrc0R(this);
3246 if (!isVectorType(T->getType())) {
3247 if (Srcs.isSrc1ImmediateZero()) {
3248 _mov(T, Src0R);
3249 } else {
3250 Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3251 _lsl(T, Src0R, Src1R);
3252 }
3253 } else {
3254 if (Srcs.hasConstOperand()) {
3255 ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3256 _vshl(T, Src0R, ShAmt);
3257 } else {
3258 auto *Src1R = Srcs.unswappedSrc1R(this);
3259 _vshl(T, Src0R, Src1R)->setSignType(InstARM32::FS_Unsigned);
3260 }
3261 }
3262 _mov(Dest, T);
3263 return;
3264 }
3265 case InstArithmetic::Lshr: {
3266 Variable *Src0R = Srcs.unswappedSrc0R(this);
3267 if (!isVectorType(T->getType())) {
3268 if (DestTy != IceType_i32) {
3269 _uxt(Src0R, Src0R);
3270 }
3271 if (Srcs.isSrc1ImmediateZero()) {
3272 _mov(T, Src0R);
3273 } else {
3274 Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3275 _lsr(T, Src0R, Src1R);
3276 }
3277 } else {
3278 if (Srcs.hasConstOperand()) {
3279 ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3280 _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Unsigned);
3281 } else {
3282 auto *Src1R = Srcs.unswappedSrc1R(this);
3283 auto *Src1RNeg = makeReg(Src1R->getType());
3284 _vneg(Src1RNeg, Src1R);
3285 _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Unsigned);
3286 }
3287 }
3288 _mov(Dest, T);
3289 return;
3290 }
3291 case InstArithmetic::Ashr: {
3292 Variable *Src0R = Srcs.unswappedSrc0R(this);
3293 if (!isVectorType(T->getType())) {
3294 if (DestTy != IceType_i32) {
3295 _sxt(Src0R, Src0R);
3296 }
3297 if (Srcs.isSrc1ImmediateZero()) {
3298 _mov(T, Src0R);
3299 } else {
3300 _asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
3301 }
3302 } else {
3303 if (Srcs.hasConstOperand()) {
3304 ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3305 _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Signed);
3306 } else {
3307 auto *Src1R = Srcs.unswappedSrc1R(this);
3308 auto *Src1RNeg = makeReg(Src1R->getType());
3309 _vneg(Src1RNeg, Src1R);
3310 _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Signed);
3311 }
3312 }
3313 _mov(Dest, T);
3314 return;
3315 }
3316 case InstArithmetic::Udiv:
3317 case InstArithmetic::Sdiv:
3318 case InstArithmetic::Urem:
3319 case InstArithmetic::Srem:
3320 llvm::report_fatal_error(
3321 "Integer div/rem should have been handled earlier.");
3322 return;
3323 case InstArithmetic::Fadd:
3324 case InstArithmetic::Fsub:
3325 case InstArithmetic::Fmul:
3326 case InstArithmetic::Fdiv:
3327 case InstArithmetic::Frem:
3328 llvm::report_fatal_error(
3329 "Floating point arith should have been handled earlier.");
3330 return;
3331 }
3332 }
3333
lowerAssign(const InstAssign * Instr)3334 void TargetARM32::lowerAssign(const InstAssign *Instr) {
3335 Variable *Dest = Instr->getDest();
3336
3337 if (Dest->isRematerializable()) {
3338 Context.insert<InstFakeDef>(Dest);
3339 return;
3340 }
3341
3342 Operand *Src0 = Instr->getSrc(0);
3343 assert(Dest->getType() == Src0->getType());
3344 if (Dest->getType() == IceType_i64) {
3345 Src0 = legalizeUndef(Src0);
3346
3347 Variable *T_Lo = makeReg(IceType_i32);
3348 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3349 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
3350 _mov(T_Lo, Src0Lo);
3351 _mov(DestLo, T_Lo);
3352
3353 Variable *T_Hi = makeReg(IceType_i32);
3354 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3355 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
3356 _mov(T_Hi, Src0Hi);
3357 _mov(DestHi, T_Hi);
3358
3359 return;
3360 }
3361
3362 Operand *NewSrc;
3363 if (Dest->hasReg()) {
3364 // If Dest already has a physical register, then legalize the Src operand
3365 // into a Variable with the same register assignment. This especially
3366 // helps allow the use of Flex operands.
3367 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
3368 } else {
3369 // Dest could be a stack operand. Since we could potentially need to do a
3370 // Store (and store can only have Register operands), legalize this to a
3371 // register.
3372 NewSrc = legalize(Src0, Legal_Reg);
3373 }
3374
3375 if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) {
3376 NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem);
3377 }
3378 _mov(Dest, NewSrc);
3379 }
3380
lowerInt1ForBranch(Operand * Boolean,const LowerInt1BranchTarget & TargetTrue,const LowerInt1BranchTarget & TargetFalse,uint32_t ShortCircuitable)3381 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
3382 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
3383 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {
3384 InstARM32Label *NewShortCircuitLabel = nullptr;
3385 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3386
3387 const Inst *Producer = Computations.getProducerOf(Boolean);
3388
3389 if (Producer == nullptr) {
3390 // No producer, no problem: just do emit code to perform (Boolean & 1) and
3391 // set the flags register. The branch should be taken if the resulting flags
3392 // indicate a non-zero result.
3393 _tst(legalizeToReg(Boolean), _1);
3394 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3395 }
3396
3397 switch (Producer->getKind()) {
3398 default:
3399 llvm::report_fatal_error("Unexpected producer.");
3400 case Inst::Icmp: {
3401 return ShortCircuitCondAndLabel(
3402 lowerIcmpCond(llvm::cast<InstIcmp>(Producer)));
3403 } break;
3404 case Inst::Fcmp: {
3405 return ShortCircuitCondAndLabel(
3406 lowerFcmpCond(llvm::cast<InstFcmp>(Producer)));
3407 } break;
3408 case Inst::Cast: {
3409 const auto *CastProducer = llvm::cast<InstCast>(Producer);
3410 assert(CastProducer->getCastKind() == InstCast::Trunc);
3411 Operand *Src = CastProducer->getSrc(0);
3412 if (Src->getType() == IceType_i64)
3413 Src = loOperand(Src);
3414 _tst(legalizeToReg(Src), _1);
3415 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3416 } break;
3417 case Inst::Arithmetic: {
3418 const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
3419 switch (ArithProducer->getOp()) {
3420 default:
3421 llvm::report_fatal_error("Unhandled Arithmetic Producer.");
3422 case InstArithmetic::And: {
3423 if (!(ShortCircuitable & SC_And)) {
3424 NewShortCircuitLabel = InstARM32Label::create(Func, this);
3425 }
3426
3427 LowerInt1BranchTarget NewTarget =
3428 TargetFalse.createForLabelOrDuplicate(NewShortCircuitLabel);
3429
3430 ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3431 Producer->getSrc(0), TargetTrue, NewTarget, SC_And);
3432 const CondWhenTrue &Cond = CondAndLabel.Cond;
3433
3434 _br_short_circuit(NewTarget, Cond.invert());
3435
3436 InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3437 if (ShortCircuitLabel != nullptr)
3438 Context.insert(ShortCircuitLabel);
3439
3440 return ShortCircuitCondAndLabel(
3441 lowerInt1ForBranch(Producer->getSrc(1), TargetTrue, NewTarget, SC_All)
3442 .assertNoLabelAndReturnCond(),
3443 NewShortCircuitLabel);
3444 } break;
3445 case InstArithmetic::Or: {
3446 if (!(ShortCircuitable & SC_Or)) {
3447 NewShortCircuitLabel = InstARM32Label::create(Func, this);
3448 }
3449
3450 LowerInt1BranchTarget NewTarget =
3451 TargetTrue.createForLabelOrDuplicate(NewShortCircuitLabel);
3452
3453 ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3454 Producer->getSrc(0), NewTarget, TargetFalse, SC_Or);
3455 const CondWhenTrue &Cond = CondAndLabel.Cond;
3456
3457 _br_short_circuit(NewTarget, Cond);
3458
3459 InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3460 if (ShortCircuitLabel != nullptr)
3461 Context.insert(ShortCircuitLabel);
3462
3463 return ShortCircuitCondAndLabel(lowerInt1ForBranch(Producer->getSrc(1),
3464 NewTarget, TargetFalse,
3465 SC_All)
3466 .assertNoLabelAndReturnCond(),
3467 NewShortCircuitLabel);
3468 } break;
3469 }
3470 }
3471 }
3472 }
3473
lowerBr(const InstBr * Instr)3474 void TargetARM32::lowerBr(const InstBr *Instr) {
3475 if (Instr->isUnconditional()) {
3476 _br(Instr->getTargetUnconditional());
3477 return;
3478 }
3479
3480 CfgNode *TargetTrue = Instr->getTargetTrue();
3481 CfgNode *TargetFalse = Instr->getTargetFalse();
3482 ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3483 Instr->getCondition(), LowerInt1BranchTarget(TargetTrue),
3484 LowerInt1BranchTarget(TargetFalse), SC_All);
3485 assert(CondAndLabel.ShortCircuitTarget == nullptr);
3486
3487 const CondWhenTrue &Cond = CondAndLabel.Cond;
3488 if (Cond.WhenTrue1 != CondARM32::kNone) {
3489 assert(Cond.WhenTrue0 != CondARM32::AL);
3490 _br(TargetTrue, Cond.WhenTrue1);
3491 }
3492
3493 switch (Cond.WhenTrue0) {
3494 default:
3495 _br(TargetTrue, TargetFalse, Cond.WhenTrue0);
3496 break;
3497 case CondARM32::kNone:
3498 _br(TargetFalse);
3499 break;
3500 case CondARM32::AL:
3501 _br(TargetTrue);
3502 break;
3503 }
3504 }
3505
lowerCall(const InstCall * Instr)3506 void TargetARM32::lowerCall(const InstCall *Instr) {
3507 Operand *CallTarget = Instr->getCallTarget();
3508 if (Instr->isTargetHelperCall()) {
3509 auto TargetHelperPreamble = ARM32HelpersPreamble.find(CallTarget);
3510 if (TargetHelperPreamble != ARM32HelpersPreamble.end()) {
3511 (this->*TargetHelperPreamble->second)(Instr);
3512 }
3513 }
3514 MaybeLeafFunc = false;
3515 NeedsStackAlignment = true;
3516
3517 // Assign arguments to registers and stack. Also reserve stack.
3518 TargetARM32::CallingConv CC;
3519 // Pair of Arg Operand -> GPR number assignments.
3520 llvm::SmallVector<std::pair<Operand *, RegNumT>, NumGPRArgs> GPRArgs;
3521 llvm::SmallVector<std::pair<Operand *, RegNumT>, NumFP32Args> FPArgs;
3522 // Pair of Arg Operand -> stack offset.
3523 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3524 size_t ParameterAreaSizeBytes = 0;
3525
3526 // Classify each argument operand according to the location where the
3527 // argument is passed.
3528 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3529 Operand *Arg = legalizeUndef(Instr->getArg(i));
3530 const Type Ty = Arg->getType();
3531 bool InReg = false;
3532 RegNumT Reg;
3533 if (isScalarIntegerType(Ty)) {
3534 InReg = CC.argInGPR(Ty, &Reg);
3535 } else {
3536 InReg = CC.argInVFP(Ty, &Reg);
3537 }
3538
3539 if (!InReg) {
3540 ParameterAreaSizeBytes =
3541 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3542 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3543 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3544 continue;
3545 }
3546
3547 if (Ty == IceType_i64) {
3548 Operand *Lo = loOperand(Arg);
3549 Operand *Hi = hiOperand(Arg);
3550 GPRArgs.push_back(std::make_pair(
3551 Lo, RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Reg))));
3552 GPRArgs.push_back(std::make_pair(
3553 Hi, RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(Reg))));
3554 } else if (isScalarIntegerType(Ty)) {
3555 GPRArgs.push_back(std::make_pair(Arg, Reg));
3556 } else {
3557 FPArgs.push_back(std::make_pair(Arg, Reg));
3558 }
3559 }
3560
3561 // Adjust the parameter area so that the stack is aligned. It is assumed that
3562 // the stack is already aligned at the start of the calling sequence.
3563 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3564
3565 if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) {
3566 llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max.");
3567 }
3568
3569 // Copy arguments that are passed on the stack to the appropriate stack
3570 // locations.
3571 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
3572 for (auto &StackArg : StackArgs) {
3573 ConstantInteger32 *Loc =
3574 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3575 Type Ty = StackArg.first->getType();
3576 OperandARM32Mem *Addr;
3577 constexpr bool SignExt = false;
3578 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3579 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
3580 } else {
3581 Variable *NewBase = Func->makeVariable(SP->getType());
3582 lowerArithmetic(
3583 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3584 Addr = formMemoryOperand(NewBase, Ty);
3585 }
3586 lowerStore(InstStore::create(Func, StackArg.first, Addr));
3587 }
3588
3589 // Generate the call instruction. Assign its result to a temporary with high
3590 // register allocation weight.
3591 Variable *Dest = Instr->getDest();
3592 // ReturnReg doubles as ReturnRegLo as necessary.
3593 Variable *ReturnReg = nullptr;
3594 Variable *ReturnRegHi = nullptr;
3595 if (Dest) {
3596 switch (Dest->getType()) {
3597 case IceType_NUM:
3598 llvm::report_fatal_error("Invalid Call dest type");
3599 break;
3600 case IceType_void:
3601 break;
3602 case IceType_i1:
3603 assert(Computations.getProducerOf(Dest) == nullptr);
3604 // Fall-through intended.
3605 case IceType_i8:
3606 case IceType_i16:
3607 case IceType_i32:
3608 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
3609 break;
3610 case IceType_i64:
3611 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
3612 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
3613 break;
3614 case IceType_f32:
3615 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0);
3616 break;
3617 case IceType_f64:
3618 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0);
3619 break;
3620 case IceType_v4i1:
3621 case IceType_v8i1:
3622 case IceType_v16i1:
3623 case IceType_v16i8:
3624 case IceType_v8i16:
3625 case IceType_v4i32:
3626 case IceType_v4f32:
3627 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);
3628 break;
3629 }
3630 }
3631
3632 // Allow ConstantRelocatable to be left alone as a direct call, but force
3633 // other constants like ConstantInteger32 to be in a register and make it an
3634 // indirect call.
3635 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3636 CallTarget = legalize(CallTarget, Legal_Reg);
3637 }
3638
3639 // Copy arguments to be passed in registers to the appropriate registers.
3640 CfgVector<Variable *> RegArgs;
3641 for (auto &FPArg : FPArgs) {
3642 RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3643 }
3644 for (auto &GPRArg : GPRArgs) {
3645 RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3646 }
3647
3648 // Generate a FakeUse of register arguments so that they do not get dead code
3649 // eliminated as a result of the FakeKill of scratch registers after the call.
3650 // These fake-uses need to be placed here to avoid argument registers from
3651 // being used during the legalizeToReg() calls above.
3652 for (auto *RegArg : RegArgs) {
3653 Context.insert<InstFakeUse>(RegArg);
3654 }
3655
3656 InstARM32Call *NewCall = Context.insert<InstARM32Call>(ReturnReg, CallTarget);
3657
3658 if (ReturnRegHi)
3659 Context.insert<InstFakeDef>(ReturnRegHi);
3660
3661 // Insert a register-kill pseudo instruction.
3662 Context.insert<InstFakeKill>(NewCall);
3663
3664 // Generate a FakeUse to keep the call live if necessary.
3665 if (Instr->hasSideEffects() && ReturnReg) {
3666 Context.insert<InstFakeUse>(ReturnReg);
3667 }
3668
3669 if (Dest != nullptr) {
3670 // Assign the result of the call to Dest.
3671 if (ReturnReg != nullptr) {
3672 if (ReturnRegHi) {
3673 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3674 Variable *DestLo = Dest64On32->getLo();
3675 Variable *DestHi = Dest64On32->getHi();
3676 _mov(DestLo, ReturnReg);
3677 _mov(DestHi, ReturnRegHi);
3678 } else {
3679 if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
3680 _mov(Dest, ReturnReg);
3681 } else {
3682 assert(isIntegerType(Dest->getType()) &&
3683 typeWidthInBytes(Dest->getType()) <= 4);
3684 _mov(Dest, ReturnReg);
3685 }
3686 }
3687 }
3688 }
3689
3690 if (Instr->isTargetHelperCall()) {
3691 auto TargetHelpersPostamble = ARM32HelpersPostamble.find(CallTarget);
3692 if (TargetHelpersPostamble != ARM32HelpersPostamble.end()) {
3693 (this->*TargetHelpersPostamble->second)(Instr);
3694 }
3695 }
3696 }
3697
3698 namespace {
configureBitcastTemporary(Variable64On32 * Var)3699 void configureBitcastTemporary(Variable64On32 *Var) {
3700 Var->setMustNotHaveReg();
3701 Var->getHi()->setMustHaveReg();
3702 Var->getLo()->setMustHaveReg();
3703 }
3704 } // end of anonymous namespace
3705
lowerCast(const InstCast * Instr)3706 void TargetARM32::lowerCast(const InstCast *Instr) {
3707 InstCast::OpKind CastKind = Instr->getCastKind();
3708 Variable *Dest = Instr->getDest();
3709 const Type DestTy = Dest->getType();
3710 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3711 switch (CastKind) {
3712 default:
3713 Func->setError("Cast type not supported");
3714 return;
3715 case InstCast::Sext: {
3716 if (isVectorType(DestTy)) {
3717 Variable *T0 = makeReg(DestTy);
3718 Variable *T1 = makeReg(DestTy);
3719 ConstantInteger32 *ShAmt = nullptr;
3720 switch (DestTy) {
3721 default:
3722 llvm::report_fatal_error("Unexpected type in vector sext.");
3723 case IceType_v16i8:
3724 ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(7));
3725 break;
3726 case IceType_v8i16:
3727 ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(15));
3728 break;
3729 case IceType_v4i32:
3730 ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(31));
3731 break;
3732 }
3733 auto *Src0R = legalizeToReg(Src0);
3734 _vshl(T0, Src0R, ShAmt);
3735 _vshr(T1, T0, ShAmt)->setSignType(InstARM32::FS_Signed);
3736 _mov(Dest, T1);
3737 } else if (DestTy == IceType_i64) {
3738 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
3739 Constant *ShiftAmt = Ctx->getConstantInt32(31);
3740 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3741 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3742 Variable *T_Lo = makeReg(DestLo->getType());
3743 if (Src0->getType() == IceType_i32) {
3744 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
3745 _mov(T_Lo, Src0RF);
3746 } else if (Src0->getType() != IceType_i1) {
3747 Variable *Src0R = legalizeToReg(Src0);
3748 _sxt(T_Lo, Src0R);
3749 } else {
3750 Operand *_0 = Ctx->getConstantZero(IceType_i32);
3751 Operand *_m1 = Ctx->getConstantInt32(-1);
3752 lowerInt1ForSelect(T_Lo, Src0, _m1, _0);
3753 }
3754 _mov(DestLo, T_Lo);
3755 Variable *T_Hi = makeReg(DestHi->getType());
3756 if (Src0->getType() != IceType_i1) {
3757 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
3758 OperandARM32::ASR, ShiftAmt));
3759 } else {
3760 // For i1, the asr instruction is already done above.
3761 _mov(T_Hi, T_Lo);
3762 }
3763 _mov(DestHi, T_Hi);
3764 } else if (Src0->getType() != IceType_i1) {
3765 // t1 = sxt src; dst = t1
3766 Variable *Src0R = legalizeToReg(Src0);
3767 Variable *T = makeReg(DestTy);
3768 _sxt(T, Src0R);
3769 _mov(Dest, T);
3770 } else {
3771 Constant *_0 = Ctx->getConstantZero(IceType_i32);
3772 Operand *_m1 = Ctx->getConstantInt(DestTy, -1);
3773 Variable *T = makeReg(DestTy);
3774 lowerInt1ForSelect(T, Src0, _m1, _0);
3775 _mov(Dest, T);
3776 }
3777 break;
3778 }
3779 case InstCast::Zext: {
3780 if (isVectorType(DestTy)) {
3781 auto *Mask = makeReg(DestTy);
3782 auto *_1 = Ctx->getConstantInt32(1);
3783 auto *T = makeReg(DestTy);
3784 auto *Src0R = legalizeToReg(Src0);
3785 _mov(Mask, _1);
3786 _vand(T, Src0R, Mask);
3787 _mov(Dest, T);
3788 } else if (DestTy == IceType_i64) {
3789 // t1=uxtb src; dst.lo=t1; dst.hi=0
3790 Operand *_0 =
3791 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
3792 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3793 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3794 Variable *T_Lo = makeReg(DestLo->getType());
3795
3796 switch (Src0->getType()) {
3797 default: {
3798 assert(Src0->getType() != IceType_i64);
3799 _uxt(T_Lo, legalizeToReg(Src0));
3800 } break;
3801 case IceType_i32: {
3802 _mov(T_Lo, legalize(Src0, Legal_Reg | Legal_Flex));
3803 } break;
3804 case IceType_i1: {
3805 SafeBoolChain Safe = lowerInt1(T_Lo, Src0);
3806 if (Safe == SBC_No) {
3807 Operand *_1 =
3808 legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3809 _and(T_Lo, T_Lo, _1);
3810 }
3811 } break;
3812 }
3813
3814 _mov(DestLo, T_Lo);
3815
3816 Variable *T_Hi = makeReg(DestLo->getType());
3817 _mov(T_Hi, _0);
3818 _mov(DestHi, T_Hi);
3819 } else if (Src0->getType() == IceType_i1) {
3820 Variable *T = makeReg(DestTy);
3821
3822 SafeBoolChain Safe = lowerInt1(T, Src0);
3823 if (Safe == SBC_No) {
3824 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3825 _and(T, T, _1);
3826 }
3827
3828 _mov(Dest, T);
3829 } else {
3830 // t1 = uxt src; dst = t1
3831 Variable *Src0R = legalizeToReg(Src0);
3832 Variable *T = makeReg(DestTy);
3833 _uxt(T, Src0R);
3834 _mov(Dest, T);
3835 }
3836 break;
3837 }
3838 case InstCast::Trunc: {
3839 if (isVectorType(DestTy)) {
3840 auto *T = makeReg(DestTy);
3841 auto *Src0R = legalizeToReg(Src0);
3842 _mov(T, Src0R);
3843 _mov(Dest, T);
3844 } else {
3845 if (Src0->getType() == IceType_i64)
3846 Src0 = loOperand(Src0);
3847 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
3848 // t1 = trunc Src0RF; Dest = t1
3849 Variable *T = makeReg(DestTy);
3850 _mov(T, Src0RF);
3851 if (DestTy == IceType_i1)
3852 _and(T, T, Ctx->getConstantInt1(1));
3853 _mov(Dest, T);
3854 }
3855 break;
3856 }
3857 case InstCast::Fptrunc:
3858 case InstCast::Fpext: {
3859 // fptrunc: dest.f32 = fptrunc src0.fp64
3860 // fpext: dest.f64 = fptrunc src0.fp32
3861 const bool IsTrunc = CastKind == InstCast::Fptrunc;
3862 assert(!isVectorType(DestTy));
3863 assert(DestTy == (IsTrunc ? IceType_f32 : IceType_f64));
3864 assert(Src0->getType() == (IsTrunc ? IceType_f64 : IceType_f32));
3865 Variable *Src0R = legalizeToReg(Src0);
3866 Variable *T = makeReg(DestTy);
3867 _vcvt(T, Src0R, IsTrunc ? InstARM32Vcvt::D2s : InstARM32Vcvt::S2d);
3868 _mov(Dest, T);
3869 break;
3870 }
3871 case InstCast::Fptosi:
3872 case InstCast::Fptoui: {
3873 const bool DestIsSigned = CastKind == InstCast::Fptosi;
3874 Variable *Src0R = legalizeToReg(Src0);
3875
3876 if (isVectorType(DestTy)) {
3877 assert(typeElementType(Src0->getType()) == IceType_f32);
3878 auto *T = makeReg(DestTy);
3879 _vcvt(T, Src0R,
3880 DestIsSigned ? InstARM32Vcvt::Vs2si : InstARM32Vcvt::Vs2ui);
3881 _mov(Dest, T);
3882 break;
3883 }
3884
3885 const bool Src0IsF32 = isFloat32Asserting32Or64(Src0->getType());
3886 if (llvm::isa<Variable64On32>(Dest)) {
3887 llvm::report_fatal_error("fp-to-i64 should have been pre-lowered.");
3888 }
3889 // fptosi:
3890 // t1.fp = vcvt src0.fp
3891 // t2.i32 = vmov t1.fp
3892 // dest.int = conv t2.i32 @ Truncates the result if needed.
3893 // fptoui:
3894 // t1.fp = vcvt src0.fp
3895 // t2.u32 = vmov t1.fp
3896 // dest.uint = conv t2.u32 @ Truncates the result if needed.
3897 Variable *T_fp = makeReg(IceType_f32);
3898 const InstARM32Vcvt::VcvtVariant Conversion =
3899 Src0IsF32 ? (DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui)
3900 : (DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui);
3901 _vcvt(T_fp, Src0R, Conversion);
3902 Variable *T = makeReg(IceType_i32);
3903 _mov(T, T_fp);
3904 if (DestTy != IceType_i32) {
3905 Variable *T_1 = makeReg(DestTy);
3906 lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T));
3907 T = T_1;
3908 }
3909 _mov(Dest, T);
3910 break;
3911 }
3912 case InstCast::Sitofp:
3913 case InstCast::Uitofp: {
3914 const bool SourceIsSigned = CastKind == InstCast::Sitofp;
3915
3916 if (isVectorType(DestTy)) {
3917 assert(typeElementType(DestTy) == IceType_f32);
3918 auto *T = makeReg(DestTy);
3919 Variable *Src0R = legalizeToReg(Src0);
3920 _vcvt(T, Src0R,
3921 SourceIsSigned ? InstARM32Vcvt::Vsi2s : InstARM32Vcvt::Vui2s);
3922 _mov(Dest, T);
3923 break;
3924 }
3925
3926 const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
3927 if (Src0->getType() == IceType_i64) {
3928 llvm::report_fatal_error("i64-to-fp should have been pre-lowered.");
3929 }
3930 // sitofp:
3931 // t1.i32 = sext src.int @ sign-extends src0 if needed.
3932 // t2.fp32 = vmov t1.i32
3933 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64
3934 // uitofp:
3935 // t1.i32 = zext src.int @ zero-extends src0 if needed.
3936 // t2.fp32 = vmov t1.i32
3937 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64
3938 if (Src0->getType() != IceType_i32) {
3939 Variable *Src0R_32 = makeReg(IceType_i32);
3940 lowerCast(InstCast::create(
3941 Func, SourceIsSigned ? InstCast::Sext : InstCast::Zext, Src0R_32,
3942 Src0));
3943 Src0 = Src0R_32;
3944 }
3945 Variable *Src0R = legalizeToReg(Src0);
3946 Variable *Src0R_f32 = makeReg(IceType_f32);
3947 _mov(Src0R_f32, Src0R);
3948 Src0R = Src0R_f32;
3949 Variable *T = makeReg(DestTy);
3950 const InstARM32Vcvt::VcvtVariant Conversion =
3951 DestIsF32
3952 ? (SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s)
3953 : (SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d);
3954 _vcvt(T, Src0R, Conversion);
3955 _mov(Dest, T);
3956 break;
3957 }
3958 case InstCast::Bitcast: {
3959 Operand *Src0 = Instr->getSrc(0);
3960 if (DestTy == Src0->getType()) {
3961 auto *Assign = InstAssign::create(Func, Dest, Src0);
3962 lowerAssign(Assign);
3963 return;
3964 }
3965 switch (DestTy) {
3966 case IceType_NUM:
3967 case IceType_void:
3968 llvm::report_fatal_error("Unexpected bitcast.");
3969 case IceType_i1:
3970 UnimplementedLoweringError(this, Instr);
3971 break;
3972 case IceType_i8:
3973 assert(Src0->getType() == IceType_v8i1);
3974 llvm::report_fatal_error(
3975 "i8 to v8i1 conversion should have been prelowered.");
3976 break;
3977 case IceType_i16:
3978 assert(Src0->getType() == IceType_v16i1);
3979 llvm::report_fatal_error(
3980 "i16 to v16i1 conversion should have been prelowered.");
3981 break;
3982 case IceType_i32:
3983 case IceType_f32: {
3984 Variable *Src0R = legalizeToReg(Src0);
3985 Variable *T = makeReg(DestTy);
3986 _mov(T, Src0R);
3987 lowerAssign(InstAssign::create(Func, Dest, T));
3988 break;
3989 }
3990 case IceType_i64: {
3991 // t0, t1 <- src0
3992 // dest[31..0] = t0
3993 // dest[63..32] = t1
3994 assert(Src0->getType() == IceType_f64);
3995 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
3996 T->initHiLo(Func);
3997 configureBitcastTemporary(T);
3998 Variable *Src0R = legalizeToReg(Src0);
3999 _mov(T, Src0R);
4000 Context.insert<InstFakeUse>(T->getHi());
4001 Context.insert<InstFakeUse>(T->getLo());
4002 lowerAssign(InstAssign::create(Func, Dest, T));
4003 break;
4004 }
4005 case IceType_f64: {
4006 // T0 <- lo(src)
4007 // T1 <- hi(src)
4008 // vmov T2, T0, T1
4009 // Dest <- T2
4010 assert(Src0->getType() == IceType_i64);
4011 Variable *T = makeReg(DestTy);
4012 auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4013 Src64->initHiLo(Func);
4014 configureBitcastTemporary(Src64);
4015 lowerAssign(InstAssign::create(Func, Src64, Src0));
4016 _mov(T, Src64);
4017 lowerAssign(InstAssign::create(Func, Dest, T));
4018 break;
4019 }
4020 case IceType_v8i1:
4021 assert(Src0->getType() == IceType_i8);
4022 llvm::report_fatal_error(
4023 "v8i1 to i8 conversion should have been prelowered.");
4024 break;
4025 case IceType_v16i1:
4026 assert(Src0->getType() == IceType_i16);
4027 llvm::report_fatal_error(
4028 "v16i1 to i16 conversion should have been prelowered.");
4029 break;
4030 case IceType_v4i1:
4031 case IceType_v8i16:
4032 case IceType_v16i8:
4033 case IceType_v4f32:
4034 case IceType_v4i32: {
4035 assert(typeWidthInBytes(DestTy) == typeWidthInBytes(Src0->getType()));
4036 assert(isVectorType(DestTy) == isVectorType(Src0->getType()));
4037 Variable *T = makeReg(DestTy);
4038 _mov(T, Src0);
4039 _mov(Dest, T);
4040 break;
4041 }
4042 }
4043 break;
4044 }
4045 }
4046 }
4047
lowerExtractElement(const InstExtractElement * Instr)4048 void TargetARM32::lowerExtractElement(const InstExtractElement *Instr) {
4049 Variable *Dest = Instr->getDest();
4050 Type DestTy = Dest->getType();
4051
4052 Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4053 Operand *Src1 = Instr->getSrc(1);
4054
4055 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
4056 const uint32_t Index = Imm->getValue();
4057 Variable *T = makeReg(DestTy);
4058 Variable *TSrc0 = makeReg(Src0->getType());
4059
4060 if (isFloatingType(DestTy)) {
4061 // We need to make sure the source is in a suitable register.
4062 TSrc0->setRegClass(RegARM32::RCARM32_QtoS);
4063 }
4064
4065 _mov(TSrc0, Src0);
4066 _extractelement(T, TSrc0, Index);
4067 _mov(Dest, T);
4068 return;
4069 }
4070 assert(false && "extractelement requires a constant index");
4071 }
4072
4073 namespace {
4074 // Validates FCMPARM32_TABLE's declaration w.r.t. InstFcmp::FCondition ordering
4075 // (and naming).
4076 enum {
4077 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) _fcmp_ll_##val,
4078 FCMPARM32_TABLE
4079 #undef X
4080 _fcmp_ll_NUM
4081 };
4082
4083 enum {
4084 #define X(tag, str) _fcmp_hl_##tag = InstFcmp::tag,
4085 ICEINSTFCMP_TABLE
4086 #undef X
4087 _fcmp_hl_NUM
4088 };
4089
4090 static_assert((uint32_t)_fcmp_hl_NUM == (uint32_t)_fcmp_ll_NUM,
4091 "Inconsistency between high-level and low-level fcmp tags.");
4092 #define X(tag, str) \
4093 static_assert( \
4094 (uint32_t)_fcmp_hl_##tag == (uint32_t)_fcmp_ll_##tag, \
4095 "Inconsistency between high-level and low-level fcmp tag " #tag);
4096 ICEINSTFCMP_TABLE
4097 #undef X
4098
4099 struct {
4100 CondARM32::Cond CC0;
4101 CondARM32::Cond CC1;
4102 } TableFcmp[] = {
4103 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) \
4104 {CondARM32::CC0, CondARM32::CC1},
4105 FCMPARM32_TABLE
4106 #undef X
4107 };
4108
isFloatingPointZero(const Operand * Src)4109 bool isFloatingPointZero(const Operand *Src) {
4110 if (const auto *F32 = llvm::dyn_cast<const ConstantFloat>(Src)) {
4111 return Utils::isPositiveZero(F32->getValue());
4112 }
4113
4114 if (const auto *F64 = llvm::dyn_cast<const ConstantDouble>(Src)) {
4115 return Utils::isPositiveZero(F64->getValue());
4116 }
4117
4118 return false;
4119 }
4120 } // end of anonymous namespace
4121
lowerFcmpCond(const InstFcmp * Instr)4122 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
4123 InstFcmp::FCond Condition = Instr->getCondition();
4124 switch (Condition) {
4125 case InstFcmp::False:
4126 return CondWhenTrue(CondARM32::kNone);
4127 case InstFcmp::True:
4128 return CondWhenTrue(CondARM32::AL);
4129 break;
4130 default: {
4131 Variable *Src0R = legalizeToReg(Instr->getSrc(0));
4132 Operand *Src1 = Instr->getSrc(1);
4133 if (isFloatingPointZero(Src1)) {
4134 _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));
4135 } else {
4136 _vcmp(Src0R, legalizeToReg(Src1));
4137 }
4138 _vmrs();
4139 assert(Condition < llvm::array_lengthof(TableFcmp));
4140 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);
4141 }
4142 }
4143 }
4144
lowerFcmp(const InstFcmp * Instr)4145 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
4146 Variable *Dest = Instr->getDest();
4147 const Type DestTy = Dest->getType();
4148
4149 if (isVectorType(DestTy)) {
4150 if (Instr->getCondition() == InstFcmp::False) {
4151 constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4152 auto *T = makeReg(SafeTypeForMovingConstant);
4153 _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(0)));
4154 _mov(Dest, T);
4155 return;
4156 }
4157
4158 if (Instr->getCondition() == InstFcmp::True) {
4159 constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4160 auto *T = makeReg(SafeTypeForMovingConstant);
4161 _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(1)));
4162 _mov(Dest, T);
4163 return;
4164 }
4165
4166 Variable *T0;
4167 Variable *T1;
4168 bool Negate = false;
4169 auto *Src0 = legalizeToReg(Instr->getSrc(0));
4170 auto *Src1 = legalizeToReg(Instr->getSrc(1));
4171
4172 switch (Instr->getCondition()) {
4173 default:
4174 llvm::report_fatal_error("Unhandled fp comparison.");
4175 #define _Vcnone(Tptr, S0, S1) \
4176 do { \
4177 *(Tptr) = nullptr; \
4178 } while (0)
4179 #define _Vceq(Tptr, S0, S1) \
4180 do { \
4181 *(Tptr) = makeReg(DestTy); \
4182 _vceq(*(Tptr), S0, S1); \
4183 } while (0)
4184 #define _Vcge(Tptr, S0, S1) \
4185 do { \
4186 *(Tptr) = makeReg(DestTy); \
4187 _vcge(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed); \
4188 } while (0)
4189 #define _Vcgt(Tptr, S0, S1) \
4190 do { \
4191 *(Tptr) = makeReg(DestTy); \
4192 _vcgt(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed); \
4193 } while (0)
4194 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) \
4195 case InstFcmp::val: { \
4196 _Vc##CC0_V(&T0, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1); \
4197 _Vc##CC1_V(&T1, (INV_V) ? Src0 : Src1, (INV_V) ? Src1 : Src0); \
4198 Negate = NEG_V; \
4199 } break;
4200 FCMPARM32_TABLE
4201 #undef X
4202 #undef _Vcgt
4203 #undef _Vcge
4204 #undef _Vceq
4205 #undef _Vcnone
4206 }
4207 assert(T0 != nullptr);
4208 Variable *T = T0;
4209 if (T1 != nullptr) {
4210 T = makeReg(DestTy);
4211 _vorr(T, T0, T1);
4212 }
4213
4214 if (Negate) {
4215 auto *TNeg = makeReg(DestTy);
4216 _vmvn(TNeg, T);
4217 T = TNeg;
4218 }
4219
4220 _mov(Dest, T);
4221 return;
4222 }
4223
4224 Variable *T = makeReg(IceType_i1);
4225 Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4226 Operand *_0 =
4227 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4228
4229 CondWhenTrue Cond = lowerFcmpCond(Instr);
4230
4231 bool RedefineT = false;
4232 if (Cond.WhenTrue0 != CondARM32::AL) {
4233 _mov(T, _0);
4234 RedefineT = true;
4235 }
4236
4237 if (Cond.WhenTrue0 == CondARM32::kNone) {
4238 _mov(Dest, T);
4239 return;
4240 }
4241
4242 if (RedefineT) {
4243 _mov_redefined(T, _1, Cond.WhenTrue0);
4244 } else {
4245 _mov(T, _1, Cond.WhenTrue0);
4246 }
4247
4248 if (Cond.WhenTrue1 != CondARM32::kNone) {
4249 _mov_redefined(T, _1, Cond.WhenTrue1);
4250 }
4251
4252 _mov(Dest, T);
4253 }
4254
4255 TargetARM32::CondWhenTrue
lowerInt64IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4256 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4257 Operand *Src1) {
4258 assert(Condition < llvm::array_lengthof(TableIcmp64));
4259
4260 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
4261 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
4262 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
4263 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
4264
4265 if (SrcsLo.hasConstOperand()) {
4266 const uint32_t ValueLo = SrcsLo.getConstantValue();
4267 const uint32_t ValueHi = SrcsHi.getConstantValue();
4268 const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo;
4269 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
4270 Value == 0) {
4271 Variable *T = makeReg(IceType_i32);
4272 Variable *Src0LoR = SrcsLo.src0R(this);
4273 Variable *Src0HiR = SrcsHi.src0R(this);
4274 _orrs(T, Src0LoR, Src0HiR);
4275 Context.insert<InstFakeUse>(T);
4276 return CondWhenTrue(TableIcmp64[Condition].C1);
4277 }
4278
4279 Variable *Src0RLo = SrcsLo.src0R(this);
4280 Variable *Src0RHi = SrcsHi.src0R(this);
4281 Operand *Src1RFLo = SrcsLo.src1RF(this);
4282 Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this);
4283
4284 const bool UseRsb =
4285 TableIcmp64[Condition].Swapped != SrcsLo.swappedOperands();
4286
4287 if (UseRsb) {
4288 if (TableIcmp64[Condition].IsSigned) {
4289 Variable *T = makeReg(IceType_i32);
4290 _rsbs(T, Src0RLo, Src1RFLo);
4291 Context.insert<InstFakeUse>(T);
4292
4293 T = makeReg(IceType_i32);
4294 _rscs(T, Src0RHi, Src1RFHi);
4295 // We need to add a FakeUse here because liveness gets mad at us (Def
4296 // without Use.) Note that flag-setting instructions are considered to
4297 // have side effects and, therefore, are not DCE'ed.
4298 Context.insert<InstFakeUse>(T);
4299 } else {
4300 Variable *T = makeReg(IceType_i32);
4301 _rsbs(T, Src0RHi, Src1RFHi);
4302 Context.insert<InstFakeUse>(T);
4303
4304 T = makeReg(IceType_i32);
4305 _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);
4306 Context.insert<InstFakeUse>(T);
4307 }
4308 } else {
4309 if (TableIcmp64[Condition].IsSigned) {
4310 _cmp(Src0RLo, Src1RFLo);
4311 Variable *T = makeReg(IceType_i32);
4312 _sbcs(T, Src0RHi, Src1RFHi);
4313 Context.insert<InstFakeUse>(T);
4314 } else {
4315 _cmp(Src0RHi, Src1RFHi);
4316 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4317 }
4318 }
4319
4320 return CondWhenTrue(TableIcmp64[Condition].C1);
4321 }
4322
4323 Variable *Src0RLo, *Src0RHi;
4324 Operand *Src1RFLo, *Src1RFHi;
4325 if (TableIcmp64[Condition].Swapped) {
4326 Src0RLo = legalizeToReg(loOperand(Src1));
4327 Src0RHi = legalizeToReg(hiOperand(Src1));
4328 Src1RFLo = legalizeToReg(loOperand(Src0));
4329 Src1RFHi = legalizeToReg(hiOperand(Src0));
4330 } else {
4331 Src0RLo = legalizeToReg(loOperand(Src0));
4332 Src0RHi = legalizeToReg(hiOperand(Src0));
4333 Src1RFLo = legalizeToReg(loOperand(Src1));
4334 Src1RFHi = legalizeToReg(hiOperand(Src1));
4335 }
4336
4337 // a=icmp cond, b, c ==>
4338 // GCC does:
4339 // cmp b.hi, c.hi or cmp b.lo, c.lo
4340 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
4341 // mov.<C1> t, #1 mov.<C1> t, #1
4342 // mov.<C2> t, #0 mov.<C2> t, #0
4343 // mov a, t mov a, t
4344 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
4345 // is used for signed compares. In some cases, b and c need to be swapped as
4346 // well.
4347 //
4348 // LLVM does:
4349 // for EQ and NE:
4350 // eor t1, b.hi, c.hi
4351 // eor t2, b.lo, c.hi
4352 // orrs t, t1, t2
4353 // mov.<C> t, #1
4354 // mov a, t
4355 //
4356 // that's nice in that it's just as short but has fewer dependencies for
4357 // better ILP at the cost of more registers.
4358 //
4359 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
4360 // unconditional mov #0, two cmps, two conditional mov #1, and one
4361 // conditional reg mov. That has few dependencies for good ILP, but is a
4362 // longer sequence.
4363 //
4364 // So, we are going with the GCC version since it's usually better (except
4365 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
4366 if (TableIcmp64[Condition].IsSigned) {
4367 Variable *ScratchReg = makeReg(IceType_i32);
4368 _cmp(Src0RLo, Src1RFLo);
4369 _sbcs(ScratchReg, Src0RHi, Src1RFHi);
4370 // ScratchReg isn't going to be used, but we need the side-effect of
4371 // setting flags from this operation.
4372 Context.insert<InstFakeUse>(ScratchReg);
4373 } else {
4374 _cmp(Src0RHi, Src1RFHi);
4375 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4376 }
4377 return CondWhenTrue(TableIcmp64[Condition].C1);
4378 }
4379
4380 TargetARM32::CondWhenTrue
lowerInt32IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4381 TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4382 Operand *Src1) {
4383 Int32Operands Srcs(Src0, Src1);
4384 if (!Srcs.hasConstOperand()) {
4385
4386 Variable *Src0R = Srcs.src0R(this);
4387 Operand *Src1RF = Srcs.src1RF(this);
4388 _cmp(Src0R, Src1RF);
4389 return CondWhenTrue(getIcmp32Mapping(Condition));
4390 }
4391
4392 Variable *Src0R = Srcs.src0R(this);
4393 const int32_t Value = Srcs.getConstantValue();
4394 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4395 _tst(Src0R, Src0R);
4396 return CondWhenTrue(getIcmp32Mapping(Condition));
4397 }
4398
4399 if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() &&
4400 Srcs.negatedImmediateIsFlexEncodable()) {
4401 Operand *Src1F = Srcs.negatedSrc1F(this);
4402 _cmn(Src0R, Src1F);
4403 return CondWhenTrue(getIcmp32Mapping(Condition));
4404 }
4405
4406 Operand *Src1RF = Srcs.src1RF(this);
4407 if (!Srcs.swappedOperands()) {
4408 _cmp(Src0R, Src1RF);
4409 } else {
4410 Variable *T = makeReg(IceType_i32);
4411 _rsbs(T, Src0R, Src1RF);
4412 Context.insert<InstFakeUse>(T);
4413 }
4414 return CondWhenTrue(getIcmp32Mapping(Condition));
4415 }
4416
4417 TargetARM32::CondWhenTrue
lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4418 TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4419 Operand *Src1) {
4420 Int32Operands Srcs(Src0, Src1);
4421 const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());
4422 assert(ShAmt >= 0);
4423
4424 if (!Srcs.hasConstOperand()) {
4425 Variable *Src0R = makeReg(IceType_i32);
4426 Operand *ShAmtImm = shAmtImm(ShAmt);
4427 _lsl(Src0R, legalizeToReg(Src0), ShAmtImm);
4428
4429 Variable *Src1R = legalizeToReg(Src1);
4430 auto *Src1F = OperandARM32FlexReg::create(Func, IceType_i32, Src1R,
4431 OperandARM32::LSL, ShAmtImm);
4432 _cmp(Src0R, Src1F);
4433 return CondWhenTrue(getIcmp32Mapping(Condition));
4434 }
4435
4436 const int32_t Value = Srcs.getConstantValue();
4437 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4438 Operand *ShAmtImm = shAmtImm(ShAmt);
4439 Variable *T = makeReg(IceType_i32);
4440 _lsls(T, Srcs.src0R(this), ShAmtImm);
4441 Context.insert<InstFakeUse>(T);
4442 return CondWhenTrue(getIcmp32Mapping(Condition));
4443 }
4444
4445 Variable *ConstR = makeReg(IceType_i32);
4446 _mov(ConstR,
4447 legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex));
4448 Operand *NonConstF = OperandARM32FlexReg::create(
4449 Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL,
4450 Ctx->getConstantInt32(ShAmt));
4451
4452 if (Srcs.swappedOperands()) {
4453 _cmp(ConstR, NonConstF);
4454 } else {
4455 Variable *T = makeReg(IceType_i32);
4456 _rsbs(T, ConstR, NonConstF);
4457 Context.insert<InstFakeUse>(T);
4458 }
4459 return CondWhenTrue(getIcmp32Mapping(Condition));
4460 }
4461
lowerIcmpCond(const InstIcmp * Instr)4462 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) {
4463 return lowerIcmpCond(Instr->getCondition(), Instr->getSrc(0),
4464 Instr->getSrc(1));
4465 }
4466
lowerIcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4467 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition,
4468 Operand *Src0,
4469 Operand *Src1) {
4470 Src0 = legalizeUndef(Src0);
4471 Src1 = legalizeUndef(Src1);
4472
4473 // a=icmp cond b, c ==>
4474 // GCC does:
4475 // <u/s>xtb tb, b
4476 // <u/s>xtb tc, c
4477 // cmp tb, tc
4478 // mov.C1 t, #0
4479 // mov.C2 t, #1
4480 // mov a, t
4481 // where the unsigned/sign extension is not needed for 32-bit. They also have
4482 // special cases for EQ and NE. E.g., for NE:
4483 // <extend to tb, tc>
4484 // subs t, tb, tc
4485 // movne t, #1
4486 // mov a, t
4487 //
4488 // LLVM does:
4489 // lsl tb, b, #<N>
4490 // mov t, #0
4491 // cmp tb, c, lsl #<N>
4492 // mov.<C> t, #1
4493 // mov a, t
4494 //
4495 // the left shift is by 0, 16, or 24, which allows the comparison to focus on
4496 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
4497 // the unsigned case, for some reason it does similar to GCC and does a uxtb
4498 // first. It's not clear to me why that special-casing is needed.
4499 //
4500 // We'll go with the LLVM way for now, since it's shorter and has just as few
4501 // dependencies.
4502 switch (Src0->getType()) {
4503 default:
4504 llvm::report_fatal_error("Unhandled type in lowerIcmpCond");
4505 case IceType_i1:
4506 case IceType_i8:
4507 case IceType_i16:
4508 return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1);
4509 case IceType_i32:
4510 return lowerInt32IcmpCond(Condition, Src0, Src1);
4511 case IceType_i64:
4512 return lowerInt64IcmpCond(Condition, Src0, Src1);
4513 }
4514 }
4515
lowerIcmp(const InstIcmp * Instr)4516 void TargetARM32::lowerIcmp(const InstIcmp *Instr) {
4517 Variable *Dest = Instr->getDest();
4518 const Type DestTy = Dest->getType();
4519
4520 if (isVectorType(DestTy)) {
4521 auto *T = makeReg(DestTy);
4522 auto *Src0 = legalizeToReg(Instr->getSrc(0));
4523 auto *Src1 = legalizeToReg(Instr->getSrc(1));
4524 const Type SrcTy = Src0->getType();
4525
4526 bool NeedsShl = false;
4527 Type NewTypeAfterShl;
4528 SizeT ShAmt;
4529 switch (SrcTy) {
4530 default:
4531 break;
4532 case IceType_v16i1:
4533 NeedsShl = true;
4534 NewTypeAfterShl = IceType_v16i8;
4535 ShAmt = 7;
4536 break;
4537 case IceType_v8i1:
4538 NeedsShl = true;
4539 NewTypeAfterShl = IceType_v8i16;
4540 ShAmt = 15;
4541 break;
4542 case IceType_v4i1:
4543 NeedsShl = true;
4544 NewTypeAfterShl = IceType_v4i32;
4545 ShAmt = 31;
4546 break;
4547 }
4548
4549 if (NeedsShl) {
4550 auto *Imm = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmt));
4551 auto *Src0T = makeReg(NewTypeAfterShl);
4552 auto *Src0Shl = makeReg(NewTypeAfterShl);
4553 _mov(Src0T, Src0);
4554 _vshl(Src0Shl, Src0T, Imm);
4555 Src0 = Src0Shl;
4556
4557 auto *Src1T = makeReg(NewTypeAfterShl);
4558 auto *Src1Shl = makeReg(NewTypeAfterShl);
4559 _mov(Src1T, Src1);
4560 _vshl(Src1Shl, Src1T, Imm);
4561 Src1 = Src1Shl;
4562 }
4563
4564 switch (Instr->getCondition()) {
4565 default:
4566 llvm::report_fatal_error("Unhandled integer comparison.");
4567 #define _Vceq(T, S0, S1, Signed) _vceq(T, S0, S1)
4568 #define _Vcge(T, S0, S1, Signed) \
4569 _vcge(T, S0, S1)->setSignType(Signed ? InstARM32::FS_Signed \
4570 : InstARM32::FS_Unsigned)
4571 #define _Vcgt(T, S0, S1, Signed) \
4572 _vcgt(T, S0, S1)->setSignType(Signed ? InstARM32::FS_Signed \
4573 : InstARM32::FS_Unsigned)
4574 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
4575 case InstIcmp::val: { \
4576 _Vc##C_V(T, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1, is_signed); \
4577 if (NEG_V) { \
4578 auto *TInv = makeReg(DestTy); \
4579 _vmvn(TInv, T); \
4580 T = TInv; \
4581 } \
4582 } break;
4583 ICMPARM32_TABLE
4584 #undef X
4585 #undef _Vcgt
4586 #undef _Vcge
4587 #undef _Vceq
4588 }
4589 _mov(Dest, T);
4590 return;
4591 }
4592
4593 Operand *_0 =
4594 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4595 Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4596 Variable *T = makeReg(IceType_i1);
4597
4598 _mov(T, _0);
4599 CondWhenTrue Cond = lowerIcmpCond(Instr);
4600 _mov_redefined(T, _1, Cond.WhenTrue0);
4601 _mov(Dest, T);
4602
4603 assert(Cond.WhenTrue1 == CondARM32::kNone);
4604
4605 return;
4606 }
4607
lowerInsertElement(const InstInsertElement * Instr)4608 void TargetARM32::lowerInsertElement(const InstInsertElement *Instr) {
4609 Variable *Dest = Instr->getDest();
4610 Type DestTy = Dest->getType();
4611
4612 Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4613 Variable *Src1 = legalizeToReg(Instr->getSrc(1));
4614 Operand *Src2 = Instr->getSrc(2);
4615
4616 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4617 const uint32_t Index = Imm->getValue();
4618 Variable *T = makeReg(DestTy);
4619
4620 if (isFloatingType(DestTy)) {
4621 T->setRegClass(RegARM32::RCARM32_QtoS);
4622 }
4623
4624 _mov(T, Src0);
4625 _insertelement(T, Src1, Index);
4626 _set_dest_redefined();
4627 _mov(Dest, T);
4628 return;
4629 }
4630 assert(false && "insertelement requires a constant index");
4631 }
4632
4633 namespace {
getConstantMemoryOrder(Operand * Opnd)4634 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
4635 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
4636 return Integer->getValue();
4637 return Intrinsics::MemoryOrderInvalid;
4638 }
4639 } // end of anonymous namespace
4640
lowerLoadLinkedStoreExclusive(Type Ty,Operand * Addr,std::function<Variable * (Variable *)> Operation,CondARM32::Cond Cond)4641 void TargetARM32::lowerLoadLinkedStoreExclusive(
4642 Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation,
4643 CondARM32::Cond Cond) {
4644
4645 auto *Retry = Context.insert<InstARM32Label>(this);
4646
4647 { // scoping for loop highlighting.
4648 Variable *Success = makeReg(IceType_i32);
4649 Variable *Tmp = (Ty == IceType_i64) ? makeI64RegPair() : makeReg(Ty);
4650 auto *_0 = Ctx->getConstantZero(IceType_i32);
4651
4652 Context.insert<InstFakeDef>(Tmp);
4653 Context.insert<InstFakeUse>(Tmp);
4654 Variable *AddrR = legalizeToReg(Addr);
4655 _ldrex(Tmp, formMemoryOperand(AddrR, Ty))->setDestRedefined();
4656 auto *StoreValue = Operation(Tmp);
4657 assert(StoreValue->mustHaveReg());
4658 // strex requires Dest to be a register other than Value or Addr. This
4659 // restriction is cleanly represented by adding an "early" definition of
4660 // Dest (or a latter use of all the sources.)
4661 Context.insert<InstFakeDef>(Success);
4662 if (Cond != CondARM32::AL) {
4663 _mov_redefined(Success, legalize(_0, Legal_Reg | Legal_Flex),
4664 InstARM32::getOppositeCondition(Cond));
4665 }
4666 _strex(Success, StoreValue, formMemoryOperand(AddrR, Ty), Cond)
4667 ->setDestRedefined();
4668 _cmp(Success, _0);
4669 }
4670
4671 _br(Retry, CondARM32::NE);
4672 }
4673
4674 namespace {
createArithInst(Cfg * Func,uint32_t Operation,Variable * Dest,Variable * Src0,Operand * Src1)4675 InstArithmetic *createArithInst(Cfg *Func, uint32_t Operation, Variable *Dest,
4676 Variable *Src0, Operand *Src1) {
4677 InstArithmetic::OpKind Oper;
4678 switch (Operation) {
4679 default:
4680 llvm::report_fatal_error("Unknown AtomicRMW operation");
4681 case Intrinsics::AtomicExchange:
4682 llvm::report_fatal_error("Can't handle Atomic xchg operation");
4683 case Intrinsics::AtomicAdd:
4684 Oper = InstArithmetic::Add;
4685 break;
4686 case Intrinsics::AtomicAnd:
4687 Oper = InstArithmetic::And;
4688 break;
4689 case Intrinsics::AtomicSub:
4690 Oper = InstArithmetic::Sub;
4691 break;
4692 case Intrinsics::AtomicOr:
4693 Oper = InstArithmetic::Or;
4694 break;
4695 case Intrinsics::AtomicXor:
4696 Oper = InstArithmetic::Xor;
4697 break;
4698 }
4699 return InstArithmetic::create(Func, Oper, Dest, Src0, Src1);
4700 }
4701 } // end of anonymous namespace
4702
lowerAtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4703 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
4704 Operand *Addr, Operand *Val) {
4705 // retry:
4706 // ldrex tmp, [addr]
4707 // mov contents, tmp
4708 // op result, contents, Val
4709 // strex success, result, [addr]
4710 // cmp success, 0
4711 // jne retry
4712 // fake-use(addr, operand) @ prevents undesirable clobbering.
4713 // mov dest, contents
4714 auto DestTy = Dest->getType();
4715
4716 if (DestTy == IceType_i64) {
4717 lowerInt64AtomicRMW(Dest, Operation, Addr, Val);
4718 return;
4719 }
4720
4721 Operand *ValRF = nullptr;
4722 if (llvm::isa<ConstantInteger32>(Val)) {
4723 ValRF = Val;
4724 } else {
4725 ValRF = legalizeToReg(Val);
4726 }
4727 auto *ContentsR = makeReg(DestTy);
4728 auto *ResultR = makeReg(DestTy);
4729
4730 _dmb();
4731 lowerLoadLinkedStoreExclusive(
4732 DestTy, Addr,
4733 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4734 lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4735 if (Operation == Intrinsics::AtomicExchange) {
4736 lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4737 } else {
4738 lowerArithmetic(
4739 createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4740 }
4741 return ResultR;
4742 });
4743 _dmb();
4744 if (auto *ValR = llvm::dyn_cast<Variable>(ValRF)) {
4745 Context.insert<InstFakeUse>(ValR);
4746 }
4747 // Can't dce ContentsR.
4748 Context.insert<InstFakeUse>(ContentsR);
4749 lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4750 }
4751
lowerInt64AtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4752 void TargetARM32::lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation,
4753 Operand *Addr, Operand *Val) {
4754 assert(Dest->getType() == IceType_i64);
4755
4756 auto *ResultR = makeI64RegPair();
4757
4758 Context.insert<InstFakeDef>(ResultR);
4759
4760 Operand *ValRF = nullptr;
4761 if (llvm::dyn_cast<ConstantInteger64>(Val)) {
4762 ValRF = Val;
4763 } else {
4764 auto *ValR64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4765 ValR64->initHiLo(Func);
4766 ValR64->setMustNotHaveReg();
4767 ValR64->getLo()->setMustHaveReg();
4768 ValR64->getHi()->setMustHaveReg();
4769 lowerAssign(InstAssign::create(Func, ValR64, Val));
4770 ValRF = ValR64;
4771 }
4772
4773 auto *ContentsR = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4774 ContentsR->initHiLo(Func);
4775 ContentsR->setMustNotHaveReg();
4776 ContentsR->getLo()->setMustHaveReg();
4777 ContentsR->getHi()->setMustHaveReg();
4778
4779 _dmb();
4780 lowerLoadLinkedStoreExclusive(
4781 IceType_i64, Addr,
4782 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4783 lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4784 Context.insert<InstFakeUse>(Tmp);
4785 if (Operation == Intrinsics::AtomicExchange) {
4786 lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4787 } else {
4788 lowerArithmetic(
4789 createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4790 }
4791 Context.insert<InstFakeUse>(ResultR->getHi());
4792 Context.insert<InstFakeDef>(ResultR, ResultR->getLo())
4793 ->setDestRedefined();
4794 return ResultR;
4795 });
4796 _dmb();
4797 if (auto *ValR64 = llvm::dyn_cast<Variable64On32>(ValRF)) {
4798 Context.insert<InstFakeUse>(ValR64->getLo());
4799 Context.insert<InstFakeUse>(ValR64->getHi());
4800 }
4801 lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4802 }
4803
postambleCtpop64(const InstCall * Instr)4804 void TargetARM32::postambleCtpop64(const InstCall *Instr) {
4805 Operand *Arg0 = Instr->getArg(0);
4806 if (isInt32Asserting32Or64(Arg0->getType())) {
4807 return;
4808 }
4809 // The popcount helpers always return 32-bit values, while the intrinsic's
4810 // signature matches some 64-bit platform's native instructions and expect to
4811 // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the
4812 // user doesn't do that in the IR or doesn't toss the bits via truncate.
4813 auto *DestHi = llvm::cast<Variable>(hiOperand(Instr->getDest()));
4814 Variable *T = makeReg(IceType_i32);
4815 Operand *_0 =
4816 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4817 _mov(T, _0);
4818 _mov(DestHi, T);
4819 }
4820
lowerIntrinsic(const InstIntrinsic * Instr)4821 void TargetARM32::lowerIntrinsic(const InstIntrinsic *Instr) {
4822 Variable *Dest = Instr->getDest();
4823 Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void;
4824 Intrinsics::IntrinsicID ID = Instr->getIntrinsicID();
4825 switch (ID) {
4826 case Intrinsics::AtomicFence:
4827 case Intrinsics::AtomicFenceAll:
4828 assert(Dest == nullptr);
4829 _dmb();
4830 return;
4831 case Intrinsics::AtomicIsLockFree: {
4832 Operand *ByteSize = Instr->getArg(0);
4833 auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
4834 if (CI == nullptr) {
4835 // The PNaCl ABI requires the byte size to be a compile-time constant.
4836 Func->setError("AtomicIsLockFree byte size should be compile-time const");
4837 return;
4838 }
4839 static constexpr int32_t NotLockFree = 0;
4840 static constexpr int32_t LockFree = 1;
4841 int32_t Result = NotLockFree;
4842 switch (CI->getValue()) {
4843 case 1:
4844 case 2:
4845 case 4:
4846 case 8:
4847 Result = LockFree;
4848 break;
4849 }
4850 _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result)));
4851 return;
4852 }
4853 case Intrinsics::AtomicLoad: {
4854 assert(isScalarIntegerType(DestTy));
4855 // We require the memory address to be naturally aligned. Given that is the
4856 // case, then normal loads are atomic.
4857 if (!Intrinsics::isMemoryOrderValid(
4858 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
4859 Func->setError("Unexpected memory ordering for AtomicLoad");
4860 return;
4861 }
4862 Variable *T;
4863
4864 if (DestTy == IceType_i64) {
4865 // ldrex is the only arm instruction that is guaranteed to load a 64-bit
4866 // integer atomically. Everything else works with a regular ldr.
4867 T = makeI64RegPair();
4868 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64));
4869 } else {
4870 T = makeReg(DestTy);
4871 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy));
4872 }
4873 _dmb();
4874 lowerAssign(InstAssign::create(Func, Dest, T));
4875 // Adding a fake-use T to ensure the atomic load is not removed if Dest is
4876 // unused.
4877 Context.insert<InstFakeUse>(T);
4878 return;
4879 }
4880 case Intrinsics::AtomicStore: {
4881 // We require the memory address to be naturally aligned. Given that is the
4882 // case, then normal loads are atomic.
4883 if (!Intrinsics::isMemoryOrderValid(
4884 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
4885 Func->setError("Unexpected memory ordering for AtomicStore");
4886 return;
4887 }
4888
4889 auto *Value = Instr->getArg(0);
4890 if (Value->getType() == IceType_i64) {
4891 auto *ValueR = makeI64RegPair();
4892 Context.insert<InstFakeDef>(ValueR);
4893 lowerAssign(InstAssign::create(Func, ValueR, Value));
4894 _dmb();
4895 lowerLoadLinkedStoreExclusive(
4896 IceType_i64, Instr->getArg(1), [this, ValueR](Variable *Tmp) {
4897 // The following fake-use prevents the ldrex instruction from being
4898 // dead code eliminated.
4899 Context.insert<InstFakeUse>(llvm::cast<Variable>(loOperand(Tmp)));
4900 Context.insert<InstFakeUse>(llvm::cast<Variable>(hiOperand(Tmp)));
4901 Context.insert<InstFakeUse>(Tmp);
4902 return ValueR;
4903 });
4904 Context.insert<InstFakeUse>(ValueR);
4905 _dmb();
4906 return;
4907 }
4908
4909 auto *ValueR = legalizeToReg(Instr->getArg(0));
4910 const auto ValueTy = ValueR->getType();
4911 assert(isScalarIntegerType(ValueTy));
4912 auto *Addr = legalizeToReg(Instr->getArg(1));
4913
4914 // non-64-bit stores are atomically as long as the address is aligned. This
4915 // is PNaCl, so addresses are aligned.
4916 _dmb();
4917 _str(ValueR, formMemoryOperand(Addr, ValueTy));
4918 _dmb();
4919 return;
4920 }
4921 case Intrinsics::AtomicCmpxchg: {
4922 // retry:
4923 // ldrex tmp, [addr]
4924 // cmp tmp, expected
4925 // mov expected, tmp
4926 // strexeq success, new, [addr]
4927 // cmpeq success, #0
4928 // bne retry
4929 // mov dest, expected
4930 assert(isScalarIntegerType(DestTy));
4931 // We require the memory address to be naturally aligned. Given that is the
4932 // case, then normal loads are atomic.
4933 if (!Intrinsics::isMemoryOrderValid(
4934 ID, getConstantMemoryOrder(Instr->getArg(3)),
4935 getConstantMemoryOrder(Instr->getArg(4)))) {
4936 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
4937 return;
4938 }
4939
4940 if (DestTy == IceType_i64) {
4941 Variable *LoadedValue = nullptr;
4942
4943 auto *New = makeI64RegPair();
4944 Context.insert<InstFakeDef>(New);
4945 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
4946
4947 auto *Expected = makeI64RegPair();
4948 Context.insert<InstFakeDef>(Expected);
4949 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
4950
4951 _dmb();
4952 lowerLoadLinkedStoreExclusive(
4953 DestTy, Instr->getArg(0),
4954 [this, Expected, New, &LoadedValue](Variable *Tmp) {
4955 auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected));
4956 auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected));
4957 auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp));
4958 auto *TmpHiR = llvm::cast<Variable>(hiOperand(Tmp));
4959 _cmp(TmpLoR, ExpectedLoR);
4960 _cmp(TmpHiR, ExpectedHiR, CondARM32::EQ);
4961 LoadedValue = Tmp;
4962 return New;
4963 },
4964 CondARM32::EQ);
4965 _dmb();
4966
4967 Context.insert<InstFakeUse>(LoadedValue);
4968 lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
4969 // The fake-use Expected prevents the assignments to Expected (above)
4970 // from being removed if Dest is not used.
4971 Context.insert<InstFakeUse>(Expected);
4972 // New needs to be alive here, or its live range will end in the
4973 // strex instruction.
4974 Context.insert<InstFakeUse>(New);
4975 return;
4976 }
4977
4978 auto *New = legalizeToReg(Instr->getArg(2));
4979 auto *Expected = legalizeToReg(Instr->getArg(1));
4980 Variable *LoadedValue = nullptr;
4981
4982 _dmb();
4983 lowerLoadLinkedStoreExclusive(
4984 DestTy, Instr->getArg(0),
4985 [this, Expected, New, &LoadedValue](Variable *Tmp) {
4986 lowerIcmpCond(InstIcmp::Eq, Tmp, Expected);
4987 LoadedValue = Tmp;
4988 return New;
4989 },
4990 CondARM32::EQ);
4991 _dmb();
4992
4993 lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
4994 Context.insert<InstFakeUse>(Expected);
4995 Context.insert<InstFakeUse>(New);
4996 return;
4997 }
4998 case Intrinsics::AtomicRMW: {
4999 if (!Intrinsics::isMemoryOrderValid(
5000 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
5001 Func->setError("Unexpected memory ordering for AtomicRMW");
5002 return;
5003 }
5004 lowerAtomicRMW(
5005 Dest,
5006 static_cast<uint32_t>(
5007 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
5008 Instr->getArg(1), Instr->getArg(2));
5009 return;
5010 }
5011 case Intrinsics::Bswap: {
5012 Operand *Val = Instr->getArg(0);
5013 Type Ty = Val->getType();
5014 if (Ty == IceType_i64) {
5015 Val = legalizeUndef(Val);
5016 Variable *Val_Lo = legalizeToReg(loOperand(Val));
5017 Variable *Val_Hi = legalizeToReg(hiOperand(Val));
5018 Variable *T_Lo = makeReg(IceType_i32);
5019 Variable *T_Hi = makeReg(IceType_i32);
5020 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5021 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5022 _rev(T_Lo, Val_Lo);
5023 _rev(T_Hi, Val_Hi);
5024 _mov(DestLo, T_Hi);
5025 _mov(DestHi, T_Lo);
5026 } else {
5027 assert(Ty == IceType_i32 || Ty == IceType_i16);
5028 Variable *ValR = legalizeToReg(Val);
5029 Variable *T = makeReg(Ty);
5030 _rev(T, ValR);
5031 if (Val->getType() == IceType_i16) {
5032 Operand *_16 = shAmtImm(16);
5033 _lsr(T, T, _16);
5034 }
5035 _mov(Dest, T);
5036 }
5037 return;
5038 }
5039 case Intrinsics::Ctpop: {
5040 llvm::report_fatal_error("Ctpop should have been prelowered.");
5041 }
5042 case Intrinsics::Ctlz: {
5043 // The "is zero undef" parameter is ignored and we always return a
5044 // well-defined value.
5045 Operand *Val = Instr->getArg(0);
5046 Variable *ValLoR;
5047 Variable *ValHiR = nullptr;
5048 if (Val->getType() == IceType_i64) {
5049 Val = legalizeUndef(Val);
5050 ValLoR = legalizeToReg(loOperand(Val));
5051 ValHiR = legalizeToReg(hiOperand(Val));
5052 } else {
5053 ValLoR = legalizeToReg(Val);
5054 }
5055 lowerCLZ(Dest, ValLoR, ValHiR);
5056 return;
5057 }
5058 case Intrinsics::Cttz: {
5059 // Essentially like Clz, but reverse the bits first.
5060 Operand *Val = Instr->getArg(0);
5061 Variable *ValLoR;
5062 Variable *ValHiR = nullptr;
5063 if (Val->getType() == IceType_i64) {
5064 Val = legalizeUndef(Val);
5065 ValLoR = legalizeToReg(loOperand(Val));
5066 ValHiR = legalizeToReg(hiOperand(Val));
5067 Variable *TLo = makeReg(IceType_i32);
5068 Variable *THi = makeReg(IceType_i32);
5069 _rbit(TLo, ValLoR);
5070 _rbit(THi, ValHiR);
5071 ValLoR = THi;
5072 ValHiR = TLo;
5073 } else {
5074 ValLoR = legalizeToReg(Val);
5075 Variable *T = makeReg(IceType_i32);
5076 _rbit(T, ValLoR);
5077 ValLoR = T;
5078 }
5079 lowerCLZ(Dest, ValLoR, ValHiR);
5080 return;
5081 }
5082 case Intrinsics::Fabs: {
5083 Variable *T = makeReg(DestTy);
5084 _vabs(T, legalizeToReg(Instr->getArg(0)));
5085 _mov(Dest, T);
5086 return;
5087 }
5088 case Intrinsics::Longjmp: {
5089 llvm::report_fatal_error("longjmp should have been prelowered.");
5090 }
5091 case Intrinsics::Memcpy: {
5092 llvm::report_fatal_error("memcpy should have been prelowered.");
5093 }
5094 case Intrinsics::Memmove: {
5095 llvm::report_fatal_error("memmove should have been prelowered.");
5096 }
5097 case Intrinsics::Memset: {
5098 llvm::report_fatal_error("memmove should have been prelowered.");
5099 }
5100 case Intrinsics::Setjmp: {
5101 llvm::report_fatal_error("setjmp should have been prelowered.");
5102 }
5103 case Intrinsics::Sqrt: {
5104 Variable *Src = legalizeToReg(Instr->getArg(0));
5105 Variable *T = makeReg(DestTy);
5106 _vsqrt(T, Src);
5107 _mov(Dest, T);
5108 return;
5109 }
5110 case Intrinsics::Stacksave: {
5111 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5112 _mov(Dest, SP);
5113 return;
5114 }
5115 case Intrinsics::Stackrestore: {
5116 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5117 Variable *Val = legalizeToReg(Instr->getArg(0));
5118 _mov_redefined(SP, Val);
5119 return;
5120 }
5121 case Intrinsics::Trap:
5122 _trap();
5123 return;
5124 case Intrinsics::AddSaturateSigned:
5125 case Intrinsics::AddSaturateUnsigned: {
5126 bool Unsigned = (ID == Intrinsics::AddSaturateUnsigned);
5127 Variable *Src0 = legalizeToReg(Instr->getArg(0));
5128 Variable *Src1 = legalizeToReg(Instr->getArg(1));
5129 Variable *T = makeReg(DestTy);
5130 _vqadd(T, Src0, Src1, Unsigned);
5131 _mov(Dest, T);
5132 return;
5133 }
5134 case Intrinsics::LoadSubVector: {
5135 assert(llvm::isa<ConstantInteger32>(Instr->getArg(1)) &&
5136 "LoadSubVector second argument must be a constant");
5137 Variable *Dest = Instr->getDest();
5138 Type Ty = Dest->getType();
5139 auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(1));
5140 Operand *Addr = Instr->getArg(0);
5141 OperandARM32Mem *Src = formMemoryOperand(Addr, Ty);
5142 doMockBoundsCheck(Src);
5143
5144 if (Dest->isRematerializable()) {
5145 Context.insert<InstFakeDef>(Dest);
5146 return;
5147 }
5148
5149 auto *T = makeReg(Ty);
5150 switch (SubVectorSize->getValue()) {
5151 case 4:
5152 _vldr1d(T, Src);
5153 break;
5154 case 8:
5155 _vldr1q(T, Src);
5156 break;
5157 default:
5158 Func->setError("Unexpected size for LoadSubVector");
5159 return;
5160 }
5161 _mov(Dest, T);
5162 return;
5163 }
5164 case Intrinsics::StoreSubVector: {
5165 assert(llvm::isa<ConstantInteger32>(Instr->getArg(2)) &&
5166 "StoreSubVector third argument must be a constant");
5167 auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(2));
5168 Variable *Value = legalizeToReg(Instr->getArg(0));
5169 Operand *Addr = Instr->getArg(1);
5170 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5171 doMockBoundsCheck(NewAddr);
5172
5173 Value = legalizeToReg(Value);
5174
5175 switch (SubVectorSize->getValue()) {
5176 case 4:
5177 _vstr1d(Value, NewAddr);
5178 break;
5179 case 8:
5180 _vstr1q(Value, NewAddr);
5181 break;
5182 default:
5183 Func->setError("Unexpected size for StoreSubVector");
5184 return;
5185 }
5186 return;
5187 }
5188 case Intrinsics::MultiplyAddPairs: {
5189 Variable *Src0 = legalizeToReg(Instr->getArg(0));
5190 Variable *Src1 = legalizeToReg(Instr->getArg(1));
5191 Variable *T = makeReg(DestTy);
5192 _vmlap(T, Src0, Src1);
5193 _mov(Dest, T);
5194 return;
5195 }
5196 case Intrinsics::MultiplyHighSigned:
5197 case Intrinsics::MultiplyHighUnsigned: {
5198 bool Unsigned = (ID == Intrinsics::MultiplyHighUnsigned);
5199 Variable *Src0 = legalizeToReg(Instr->getArg(0));
5200 Variable *Src1 = legalizeToReg(Instr->getArg(1));
5201 Variable *T = makeReg(DestTy);
5202 _vmulh(T, Src0, Src1, Unsigned);
5203 _mov(Dest, T);
5204 return;
5205 }
5206 case Intrinsics::Nearbyint: {
5207 UnimplementedLoweringError(this, Instr);
5208 return;
5209 }
5210 case Intrinsics::Round: {
5211 UnimplementedLoweringError(this, Instr);
5212 return;
5213 }
5214 case Intrinsics::SignMask: {
5215 UnimplementedLoweringError(this, Instr);
5216 return;
5217 }
5218 case Intrinsics::SubtractSaturateSigned:
5219 case Intrinsics::SubtractSaturateUnsigned: {
5220 bool Unsigned = (ID == Intrinsics::SubtractSaturateUnsigned);
5221 Variable *Src0 = legalizeToReg(Instr->getArg(0));
5222 Variable *Src1 = legalizeToReg(Instr->getArg(1));
5223 Variable *T = makeReg(DestTy);
5224 _vqsub(T, Src0, Src1, Unsigned);
5225 _mov(Dest, T);
5226 return;
5227 }
5228 case Intrinsics::VectorPackSigned:
5229 case Intrinsics::VectorPackUnsigned: {
5230 bool Unsigned = (ID == Intrinsics::VectorPackUnsigned);
5231 bool Saturating = true;
5232 Variable *Src0 = legalizeToReg(Instr->getArg(0));
5233 Variable *Src1 = legalizeToReg(Instr->getArg(1));
5234 Variable *T = makeReg(DestTy);
5235 _vqmovn2(T, Src0, Src1, Unsigned, Saturating);
5236 _mov(Dest, T);
5237 return;
5238 }
5239 default: // UnknownIntrinsic
5240 Func->setError("Unexpected intrinsic");
5241 return;
5242 }
5243 return;
5244 }
5245
lowerCLZ(Variable * Dest,Variable * ValLoR,Variable * ValHiR)5246 void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
5247 Type Ty = Dest->getType();
5248 assert(Ty == IceType_i32 || Ty == IceType_i64);
5249 Variable *T = makeReg(IceType_i32);
5250 _clz(T, ValLoR);
5251 if (Ty == IceType_i64) {
5252 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5253 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5254 Operand *Zero =
5255 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
5256 Operand *ThirtyTwo =
5257 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
5258 _cmp(ValHiR, Zero);
5259 Variable *T2 = makeReg(IceType_i32);
5260 _add(T2, T, ThirtyTwo);
5261 _clz(T2, ValHiR, CondARM32::NE);
5262 // T2 is actually a source as well when the predicate is not AL (since it
5263 // may leave T2 alone). We use _set_dest_redefined to prolong the liveness
5264 // of T2 as if it was used as a source.
5265 _set_dest_redefined();
5266 _mov(DestLo, T2);
5267 Variable *T3 = makeReg(Zero->getType());
5268 _mov(T3, Zero);
5269 _mov(DestHi, T3);
5270 return;
5271 }
5272 _mov(Dest, T);
5273 return;
5274 }
5275
lowerLoad(const InstLoad * Load)5276 void TargetARM32::lowerLoad(const InstLoad *Load) {
5277 // A Load instruction can be treated the same as an Assign instruction, after
5278 // the source operand is transformed into an OperandARM32Mem operand.
5279 Type Ty = Load->getDest()->getType();
5280 Operand *Src0 = formMemoryOperand(Load->getLoadAddress(), Ty);
5281 Variable *DestLoad = Load->getDest();
5282
5283 // TODO(jvoung): handled folding opportunities. Sign and zero extension can
5284 // be folded into a load.
5285 auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5286 lowerAssign(Assign);
5287 }
5288
5289 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Variable * OffsetReg,int16_t OffsetRegShAmt,const Inst * Reason)5290 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5291 const Variable *OffsetReg, int16_t OffsetRegShAmt,
5292 const Inst *Reason) {
5293 if (!BuildDefs::dump())
5294 return;
5295 if (!Func->isVerbose(IceV_AddrOpt))
5296 return;
5297 OstreamLocker _(Func->getContext());
5298 Ostream &Str = Func->getContext()->getStrDump();
5299 Str << "Instruction: ";
5300 Reason->dumpDecorated(Func);
5301 Str << " results in Base=";
5302 if (Base)
5303 Base->dump(Func);
5304 else
5305 Str << "<null>";
5306 Str << ", OffsetReg=";
5307 if (OffsetReg)
5308 OffsetReg->dump(Func);
5309 else
5310 Str << "<null>";
5311 Str << ", Shift=" << OffsetRegShAmt << ", Offset=" << Offset << "\n";
5312 }
5313
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5314 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5315 int32_t *Offset, const Inst **Reason) {
5316 // Var originates from Var=SrcVar ==> set Var:=SrcVar
5317 if (*Var == nullptr)
5318 return false;
5319 const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5320 if (!VarAssign)
5321 return false;
5322 assert(!VMetadata->isMultiDef(*Var));
5323 if (!llvm::isa<InstAssign>(VarAssign))
5324 return false;
5325
5326 Operand *SrcOp = VarAssign->getSrc(0);
5327 bool Optimized = false;
5328 if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5329 if (!VMetadata->isMultiDef(SrcVar) ||
5330 // TODO: ensure SrcVar stays single-BB
5331 false) {
5332 Optimized = true;
5333 *Var = SrcVar;
5334 } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5335 int32_t MoreOffset = Const->getValue();
5336 int32_t NewOffset = MoreOffset + *Offset;
5337 if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5338 return false;
5339 *Var = nullptr;
5340 *Offset += NewOffset;
5341 Optimized = true;
5342 }
5343 }
5344
5345 if (Optimized) {
5346 *Reason = VarAssign;
5347 }
5348
5349 return Optimized;
5350 }
5351
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5352 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5353 if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5354 switch (Arith->getOp()) {
5355 default:
5356 return false;
5357 case InstArithmetic::Add:
5358 case InstArithmetic::Sub:
5359 *Kind = Arith->getOp();
5360 return true;
5361 }
5362 }
5363 return false;
5364 }
5365
matchCombinedBaseIndex(const VariablesMetadata * VMetadata,Variable ** Base,Variable ** OffsetReg,int32_t OffsetRegShamt,const Inst ** Reason)5366 bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable **Base,
5367 Variable **OffsetReg, int32_t OffsetRegShamt,
5368 const Inst **Reason) {
5369 // OffsetReg==nullptr && Base is Base=Var1+Var2 ==>
5370 // set Base=Var1, OffsetReg=Var2, Shift=0
5371 if (*Base == nullptr)
5372 return false;
5373 if (*OffsetReg != nullptr)
5374 return false;
5375 (void)OffsetRegShamt;
5376 assert(OffsetRegShamt == 0);
5377 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5378 if (BaseInst == nullptr)
5379 return false;
5380 assert(!VMetadata->isMultiDef(*Base));
5381 if (BaseInst->getSrcSize() < 2)
5382 return false;
5383 auto *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0));
5384 if (!Var1)
5385 return false;
5386 if (VMetadata->isMultiDef(Var1))
5387 return false;
5388 auto *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1));
5389 if (!Var2)
5390 return false;
5391 if (VMetadata->isMultiDef(Var2))
5392 return false;
5393 InstArithmetic::OpKind _;
5394 if (!isAddOrSub(BaseInst, &_) ||
5395 // TODO: ensure Var1 and Var2 stay single-BB
5396 false)
5397 return false;
5398 *Base = Var1;
5399 *OffsetReg = Var2;
5400 // OffsetRegShamt is already 0.
5401 *Reason = BaseInst;
5402 return true;
5403 }
5404
matchShiftedOffsetReg(const VariablesMetadata * VMetadata,Variable ** OffsetReg,OperandARM32::ShiftKind * Kind,int32_t * OffsetRegShamt,const Inst ** Reason)5405 bool matchShiftedOffsetReg(const VariablesMetadata *VMetadata,
5406 Variable **OffsetReg, OperandARM32::ShiftKind *Kind,
5407 int32_t *OffsetRegShamt, const Inst **Reason) {
5408 // OffsetReg is OffsetReg=Var*Const && log2(Const)+Shift<=32 ==>
5409 // OffsetReg=Var, Shift+=log2(Const)
5410 // OffsetReg is OffsetReg=Var<<Const && Const+Shift<=32 ==>
5411 // OffsetReg=Var, Shift+=Const
5412 // OffsetReg is OffsetReg=Var>>Const && Const-Shift>=-32 ==>
5413 // OffsetReg=Var, Shift-=Const
5414 OperandARM32::ShiftKind NewShiftKind = OperandARM32::kNoShift;
5415 if (*OffsetReg == nullptr)
5416 return false;
5417 auto *IndexInst = VMetadata->getSingleDefinition(*OffsetReg);
5418 if (IndexInst == nullptr)
5419 return false;
5420 assert(!VMetadata->isMultiDef(*OffsetReg));
5421 if (IndexInst->getSrcSize() < 2)
5422 return false;
5423 auto *ArithInst = llvm::dyn_cast<InstArithmetic>(IndexInst);
5424 if (ArithInst == nullptr)
5425 return false;
5426 auto *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0));
5427 if (Var == nullptr)
5428 return false;
5429 auto *Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
5430 if (Const == nullptr) {
5431 assert(!llvm::isa<ConstantInteger32>(ArithInst->getSrc(0)));
5432 return false;
5433 }
5434 if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32)
5435 return false;
5436
5437 uint32_t NewShamt = -1;
5438 switch (ArithInst->getOp()) {
5439 default:
5440 return false;
5441 case InstArithmetic::Shl: {
5442 NewShiftKind = OperandARM32::LSL;
5443 NewShamt = Const->getValue();
5444 if (NewShamt > 31)
5445 return false;
5446 } break;
5447 case InstArithmetic::Lshr: {
5448 NewShiftKind = OperandARM32::LSR;
5449 NewShamt = Const->getValue();
5450 if (NewShamt > 31)
5451 return false;
5452 } break;
5453 case InstArithmetic::Ashr: {
5454 NewShiftKind = OperandARM32::ASR;
5455 NewShamt = Const->getValue();
5456 if (NewShamt > 31)
5457 return false;
5458 } break;
5459 case InstArithmetic::Udiv:
5460 case InstArithmetic::Mul: {
5461 const uint32_t UnsignedConst = Const->getValue();
5462 NewShamt = llvm::findFirstSet(UnsignedConst);
5463 if (NewShamt != llvm::findLastSet(UnsignedConst)) {
5464 // First bit set is not the same as the last bit set, so Const is not
5465 // a power of 2.
5466 return false;
5467 }
5468 NewShiftKind = ArithInst->getOp() == InstArithmetic::Udiv
5469 ? OperandARM32::LSR
5470 : OperandARM32::LSL;
5471 } break;
5472 }
5473 // Allowed "transitions":
5474 // kNoShift -> * iff NewShamt < 31
5475 // LSL -> LSL iff NewShamt + OffsetRegShamt < 31
5476 // LSR -> LSR iff NewShamt + OffsetRegShamt < 31
5477 // ASR -> ASR iff NewShamt + OffsetRegShamt < 31
5478 if (*Kind != OperandARM32::kNoShift && *Kind != NewShiftKind) {
5479 return false;
5480 }
5481 const int32_t NewOffsetRegShamt = *OffsetRegShamt + NewShamt;
5482 if (NewOffsetRegShamt > 31)
5483 return false;
5484 *OffsetReg = Var;
5485 *OffsetRegShamt = NewOffsetRegShamt;
5486 *Kind = NewShiftKind;
5487 *Reason = IndexInst;
5488 return true;
5489 }
5490
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5491 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5492 int32_t *Offset, const Inst **Reason) {
5493 // Base is Base=Var+Const || Base is Base=Const+Var ==>
5494 // set Base=Var, Offset+=Const
5495 // Base is Base=Var-Const ==>
5496 // set Base=Var, Offset-=Const
5497 if (*Base == nullptr)
5498 return false;
5499 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5500 if (BaseInst == nullptr) {
5501 return false;
5502 }
5503 assert(!VMetadata->isMultiDef(*Base));
5504
5505 auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5506 if (ArithInst == nullptr)
5507 return false;
5508 InstArithmetic::OpKind Kind;
5509 if (!isAddOrSub(ArithInst, &Kind))
5510 return false;
5511 bool IsAdd = Kind == InstArithmetic::Add;
5512 Operand *Src0 = ArithInst->getSrc(0);
5513 Operand *Src1 = ArithInst->getSrc(1);
5514 auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5515 auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5516 auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5517 auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5518 Variable *NewBase = nullptr;
5519 int32_t NewOffset = *Offset;
5520
5521 if (Var0 == nullptr && Const0 == nullptr) {
5522 assert(llvm::isa<ConstantRelocatable>(Src0));
5523 return false;
5524 }
5525
5526 if (Var1 == nullptr && Const1 == nullptr) {
5527 assert(llvm::isa<ConstantRelocatable>(Src1));
5528 return false;
5529 }
5530
5531 if (Var0 && Var1)
5532 // TODO(jpp): merge base/index splitting into here.
5533 return false;
5534 if (!IsAdd && Var1)
5535 return false;
5536 if (Var0)
5537 NewBase = Var0;
5538 else if (Var1)
5539 NewBase = Var1;
5540 // Compute the updated constant offset.
5541 if (Const0) {
5542 int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5543 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5544 return false;
5545 NewOffset += MoreOffset;
5546 }
5547 if (Const1) {
5548 int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5549 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5550 return false;
5551 NewOffset += MoreOffset;
5552 }
5553
5554 // Update the computed address parameters once we are sure optimization
5555 // is valid.
5556 *Base = NewBase;
5557 *Offset = NewOffset;
5558 *Reason = BaseInst;
5559 return true;
5560 }
5561 } // end of anonymous namespace
5562
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5563 OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func,
5564 const Inst *LdSt,
5565 Operand *Base) {
5566 assert(Base != nullptr);
5567 int32_t OffsetImm = 0;
5568 Variable *OffsetReg = nullptr;
5569 int32_t OffsetRegShamt = 0;
5570 OperandARM32::ShiftKind ShiftKind = OperandARM32::kNoShift;
5571
5572 Func->resetCurrentNode();
5573 if (Func->isVerbose(IceV_AddrOpt)) {
5574 OstreamLocker _(Func->getContext());
5575 Ostream &Str = Func->getContext()->getStrDump();
5576 Str << "\nAddress mode formation:\t";
5577 LdSt->dumpDecorated(Func);
5578 }
5579
5580 if (isVectorType(Ty))
5581 // vector loads and stores do not allow offsets, and only support the
5582 // "[reg]" addressing mode (the other supported modes are write back.)
5583 return nullptr;
5584
5585 auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5586 if (BaseVar == nullptr)
5587 return nullptr;
5588
5589 (void)MemTraitsSize;
5590 assert(Ty < MemTraitsSize);
5591 auto *TypeTraits = &MemTraits[Ty];
5592 const bool CanHaveIndex = TypeTraits->CanHaveIndex;
5593 const bool CanHaveShiftedIndex = TypeTraits->CanHaveShiftedIndex;
5594 const bool CanHaveImm = TypeTraits->CanHaveImm;
5595 const int32_t ValidImmMask = TypeTraits->ValidImmMask;
5596 (void)ValidImmMask;
5597 assert(!CanHaveImm || ValidImmMask >= 0);
5598
5599 const VariablesMetadata *VMetadata = Func->getVMetadata();
5600 const Inst *Reason = nullptr;
5601
5602 do {
5603 if (Reason != nullptr) {
5604 dumpAddressOpt(Func, BaseVar, OffsetImm, OffsetReg, OffsetRegShamt,
5605 Reason);
5606 Reason = nullptr;
5607 }
5608
5609 if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5610 continue;
5611 }
5612
5613 if (CanHaveIndex &&
5614 matchAssign(VMetadata, &OffsetReg, &OffsetImm, &Reason)) {
5615 continue;
5616 }
5617
5618 if (CanHaveIndex && matchCombinedBaseIndex(VMetadata, &BaseVar, &OffsetReg,
5619 OffsetRegShamt, &Reason)) {
5620 continue;
5621 }
5622
5623 if (CanHaveShiftedIndex) {
5624 if (matchShiftedOffsetReg(VMetadata, &OffsetReg, &ShiftKind,
5625 &OffsetRegShamt, &Reason)) {
5626 continue;
5627 }
5628
5629 if ((OffsetRegShamt == 0) &&
5630 matchShiftedOffsetReg(VMetadata, &BaseVar, &ShiftKind,
5631 &OffsetRegShamt, &Reason)) {
5632 std::swap(BaseVar, OffsetReg);
5633 continue;
5634 }
5635 }
5636
5637 if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5638 continue;
5639 }
5640 } while (Reason);
5641
5642 if (BaseVar == nullptr) {
5643 // [OffsetReg{, LSL Shamt}{, #OffsetImm}] is not legal in ARM, so we have to
5644 // legalize the addressing mode to [BaseReg, OffsetReg{, LSL Shamt}].
5645 // Instead of a zeroed BaseReg, we initialize it with OffsetImm:
5646 //
5647 // [OffsetReg{, LSL Shamt}{, #OffsetImm}] ->
5648 // mov BaseReg, #OffsetImm
5649 // use of [BaseReg, OffsetReg{, LSL Shamt}]
5650 //
5651 const Type PointerType = getPointerType();
5652 BaseVar = makeReg(PointerType);
5653 Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5654 OffsetImm = 0;
5655 } else if (OffsetImm != 0) {
5656 // ARM Ldr/Str instructions have limited range immediates. The formation
5657 // loop above materialized an Immediate carelessly, so we ensure the
5658 // generated offset is sane.
5659 const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5660 const InstArithmetic::OpKind Op =
5661 OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5662
5663 if (!CanHaveImm || !isLegalMemOffset(Ty, OffsetImm) ||
5664 OffsetReg != nullptr) {
5665 if (OffsetReg == nullptr) {
5666 // We formed a [Base, #const] addressing mode which is not encodable in
5667 // ARM. There is little point in forming an address mode now if we don't
5668 // have an offset. Effectively, we would end up with something like
5669 //
5670 // [Base, #const] -> add T, Base, #const
5671 // use of [T]
5672 //
5673 // Which is exactly what we already have. So we just bite the bullet
5674 // here and don't form any address mode.
5675 return nullptr;
5676 }
5677 // We formed [Base, Offset {, LSL Amnt}, #const]. Oops. Legalize it to
5678 //
5679 // [Base, Offset, {LSL amount}, #const] ->
5680 // add T, Base, #const
5681 // use of [T, Offset {, LSL amount}]
5682 const Type PointerType = getPointerType();
5683 Variable *T = makeReg(PointerType);
5684 Context.insert<InstArithmetic>(Op, T, BaseVar,
5685 Ctx->getConstantInt32(PositiveOffset));
5686 BaseVar = T;
5687 OffsetImm = 0;
5688 }
5689 }
5690
5691 assert(BaseVar != nullptr);
5692 assert(OffsetImm == 0 || OffsetReg == nullptr);
5693 assert(OffsetReg == nullptr || CanHaveIndex);
5694 assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm
5695 : (ValidImmMask & OffsetImm) == OffsetImm);
5696
5697 if (OffsetReg != nullptr) {
5698 Variable *OffsetR = makeReg(getPointerType());
5699 Context.insert<InstAssign>(OffsetR, OffsetReg);
5700 return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetR, ShiftKind,
5701 OffsetRegShamt);
5702 }
5703
5704 return OperandARM32Mem::create(
5705 Func, Ty, BaseVar,
5706 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5707 }
5708
doAddressOptLoad()5709 void TargetARM32::doAddressOptLoad() {
5710 Inst *Instr = iteratorToInst(Context.getCur());
5711 assert(llvm::isa<InstLoad>(Instr));
5712 Variable *Dest = Instr->getDest();
5713 Operand *Addr = Instr->getSrc(0);
5714 if (OperandARM32Mem *Mem =
5715 formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5716 Instr->setDeleted();
5717 Context.insert<InstLoad>(Dest, Mem);
5718 }
5719 }
5720
lowerPhi(const InstPhi *)5721 void TargetARM32::lowerPhi(const InstPhi * /*Instr*/) {
5722 Func->setError("Phi found in regular instruction list");
5723 }
5724
lowerRet(const InstRet * Instr)5725 void TargetARM32::lowerRet(const InstRet *Instr) {
5726 Variable *Reg = nullptr;
5727 if (Instr->hasRetValue()) {
5728 Operand *Src0 = Instr->getRetValue();
5729 Type Ty = Src0->getType();
5730 if (Ty == IceType_i64) {
5731 Src0 = legalizeUndef(Src0);
5732 Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0);
5733 Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1);
5734 Reg = R0;
5735 Context.insert<InstFakeUse>(R1);
5736 } else if (Ty == IceType_f32) {
5737 Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0);
5738 Reg = S0;
5739 } else if (Ty == IceType_f64) {
5740 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0);
5741 Reg = D0;
5742 } else if (isVectorType(Src0->getType())) {
5743 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0);
5744 Reg = Q0;
5745 } else {
5746 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
5747 Reg = makeReg(Src0F->getType(), RegARM32::Reg_r0);
5748 _mov(Reg, Src0F, CondARM32::AL);
5749 }
5750 }
5751 // Add a ret instruction even if sandboxing is enabled, because addEpilog
5752 // explicitly looks for a ret instruction as a marker for where to insert the
5753 // frame removal instructions. addEpilog is responsible for restoring the
5754 // "lr" register as needed prior to this ret instruction.
5755 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
5756
5757 // Add a fake use of sp to make sure sp stays alive for the entire function.
5758 // Otherwise post-call sp adjustments get dead-code eliminated.
5759 // TODO: Are there more places where the fake use should be inserted? E.g.
5760 // "void f(int n){while(1) g(n);}" may not have a ret instruction.
5761 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5762 Context.insert<InstFakeUse>(SP);
5763 }
5764
lowerShuffleVector(const InstShuffleVector * Instr)5765 void TargetARM32::lowerShuffleVector(const InstShuffleVector *Instr) {
5766 auto *Dest = Instr->getDest();
5767 const Type DestTy = Dest->getType();
5768
5769 auto *T = makeReg(DestTy);
5770 auto *Src0 = Instr->getSrc(0);
5771 auto *Src1 = Instr->getSrc(1);
5772 const SizeT NumElements = typeNumElements(DestTy);
5773 const Type ElementType = typeElementType(DestTy);
5774
5775 bool Replicate = true;
5776 for (SizeT I = 1; Replicate && I < Instr->getNumIndexes(); ++I) {
5777 if (Instr->getIndexValue(I) != Instr->getIndexValue(0)) {
5778 Replicate = false;
5779 }
5780 }
5781
5782 if (Replicate) {
5783 Variable *Src0Var = legalizeToReg(Src0);
5784 _vdup(T, Src0Var, Instr->getIndexValue(0));
5785 _mov(Dest, T);
5786 return;
5787 }
5788
5789 switch (DestTy) {
5790 case IceType_v8i1:
5791 case IceType_v8i16: {
5792 static constexpr SizeT ExpectedNumElements = 8;
5793 assert(ExpectedNumElements == Instr->getNumIndexes());
5794 (void)ExpectedNumElements;
5795
5796 if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3)) {
5797 Variable *Src0R = legalizeToReg(Src0);
5798 _vzip(T, Src0R, Src0R);
5799 _mov(Dest, T);
5800 return;
5801 }
5802
5803 if (Instr->indexesAre(0, 8, 1, 9, 2, 10, 3, 11)) {
5804 Variable *Src0R = legalizeToReg(Src0);
5805 Variable *Src1R = legalizeToReg(Src1);
5806 _vzip(T, Src0R, Src1R);
5807 _mov(Dest, T);
5808 return;
5809 }
5810
5811 if (Instr->indexesAre(0, 2, 4, 6, 0, 2, 4, 6)) {
5812 Variable *Src0R = legalizeToReg(Src0);
5813 _vqmovn2(T, Src0R, Src0R, false, false);
5814 _mov(Dest, T);
5815 return;
5816 }
5817 } break;
5818 case IceType_v16i1:
5819 case IceType_v16i8: {
5820 static constexpr SizeT ExpectedNumElements = 16;
5821 assert(ExpectedNumElements == Instr->getNumIndexes());
5822 (void)ExpectedNumElements;
5823
5824 if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7)) {
5825 Variable *Src0R = legalizeToReg(Src0);
5826 _vzip(T, Src0R, Src0R);
5827 _mov(Dest, T);
5828 return;
5829 }
5830
5831 if (Instr->indexesAre(0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7,
5832 23)) {
5833 Variable *Src0R = legalizeToReg(Src0);
5834 Variable *Src1R = legalizeToReg(Src1);
5835 _vzip(T, Src0R, Src1R);
5836 _mov(Dest, T);
5837 return;
5838 }
5839 } break;
5840 case IceType_v4i1:
5841 case IceType_v4i32:
5842 case IceType_v4f32: {
5843 static constexpr SizeT ExpectedNumElements = 4;
5844 assert(ExpectedNumElements == Instr->getNumIndexes());
5845 (void)ExpectedNumElements;
5846
5847 if (Instr->indexesAre(0, 0, 1, 1)) {
5848 Variable *Src0R = legalizeToReg(Src0);
5849 _vzip(T, Src0R, Src0R);
5850 _mov(Dest, T);
5851 return;
5852 }
5853
5854 if (Instr->indexesAre(0, 4, 1, 5)) {
5855 Variable *Src0R = legalizeToReg(Src0);
5856 Variable *Src1R = legalizeToReg(Src1);
5857 _vzip(T, Src0R, Src1R);
5858 _mov(Dest, T);
5859 return;
5860 }
5861
5862 if (Instr->indexesAre(0, 1, 4, 5)) {
5863 Variable *Src0R = legalizeToReg(Src0);
5864 Variable *Src1R = legalizeToReg(Src1);
5865 _vmovlh(T, Src0R, Src1R);
5866 _mov(Dest, T);
5867 return;
5868 }
5869
5870 if (Instr->indexesAre(2, 3, 2, 3)) {
5871 Variable *Src0R = legalizeToReg(Src0);
5872 _vmovhl(T, Src0R, Src0R);
5873 _mov(Dest, T);
5874 return;
5875 }
5876
5877 if (Instr->indexesAre(2, 3, 6, 7)) {
5878 Variable *Src0R = legalizeToReg(Src0);
5879 Variable *Src1R = legalizeToReg(Src1);
5880 _vmovhl(T, Src1R, Src0R);
5881 _mov(Dest, T);
5882 return;
5883 }
5884 } break;
5885 default:
5886 break;
5887 // TODO(jpp): figure out how to properly lower this without scalarization.
5888 }
5889
5890 // Unoptimized shuffle. Perform a series of inserts and extracts.
5891 Context.insert<InstFakeDef>(T);
5892 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
5893 auto *Index = Instr->getIndex(I);
5894 const SizeT Elem = Index->getValue();
5895 auto *ExtElmt = makeReg(ElementType);
5896 if (Elem < NumElements) {
5897 lowerExtractElement(
5898 InstExtractElement::create(Func, ExtElmt, Src0, Index));
5899 } else {
5900 lowerExtractElement(InstExtractElement::create(
5901 Func, ExtElmt, Src1,
5902 Ctx->getConstantInt32(Index->getValue() - NumElements)));
5903 }
5904 auto *NewT = makeReg(DestTy);
5905 lowerInsertElement(InstInsertElement::create(Func, NewT, T, ExtElmt,
5906 Ctx->getConstantInt32(I)));
5907 T = NewT;
5908 }
5909 _mov(Dest, T);
5910 }
5911
lowerSelect(const InstSelect * Instr)5912 void TargetARM32::lowerSelect(const InstSelect *Instr) {
5913 Variable *Dest = Instr->getDest();
5914 Type DestTy = Dest->getType();
5915 Operand *SrcT = Instr->getTrueOperand();
5916 Operand *SrcF = Instr->getFalseOperand();
5917 Operand *Condition = Instr->getCondition();
5918
5919 if (!isVectorType(DestTy)) {
5920 lowerInt1ForSelect(Dest, Condition, legalizeUndef(SrcT),
5921 legalizeUndef(SrcF));
5922 return;
5923 }
5924
5925 Type TType = DestTy;
5926 switch (DestTy) {
5927 default:
5928 llvm::report_fatal_error("Unexpected type for vector select.");
5929 case IceType_v4i1:
5930 TType = IceType_v4i32;
5931 break;
5932 case IceType_v8i1:
5933 TType = IceType_v8i16;
5934 break;
5935 case IceType_v16i1:
5936 TType = IceType_v16i8;
5937 break;
5938 case IceType_v4f32:
5939 TType = IceType_v4i32;
5940 break;
5941 case IceType_v4i32:
5942 case IceType_v8i16:
5943 case IceType_v16i8:
5944 break;
5945 }
5946 auto *T = makeReg(TType);
5947 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
5948 auto *SrcTR = legalizeToReg(SrcT);
5949 auto *SrcFR = legalizeToReg(SrcF);
5950 _vbsl(T, SrcTR, SrcFR)->setDestRedefined();
5951 _mov(Dest, T);
5952 }
5953
lowerStore(const InstStore * Instr)5954 void TargetARM32::lowerStore(const InstStore *Instr) {
5955 Operand *Value = Instr->getData();
5956 Operand *Addr = Instr->getStoreAddress();
5957 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5958 Type Ty = NewAddr->getType();
5959
5960 if (Ty == IceType_i64) {
5961 Value = legalizeUndef(Value);
5962 Variable *ValueHi = legalizeToReg(hiOperand(Value));
5963 Variable *ValueLo = legalizeToReg(loOperand(Value));
5964 _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
5965 _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
5966 } else {
5967 Variable *ValueR = legalizeToReg(Value);
5968 _str(ValueR, NewAddr);
5969 }
5970 }
5971
doAddressOptStore()5972 void TargetARM32::doAddressOptStore() {
5973 Inst *Instr = iteratorToInst(Context.getCur());
5974 assert(llvm::isa<InstStore>(Instr));
5975 Operand *Src = Instr->getSrc(0);
5976 Operand *Addr = Instr->getSrc(1);
5977 if (OperandARM32Mem *Mem =
5978 formAddressingMode(Src->getType(), Func, Instr, Addr)) {
5979 Instr->setDeleted();
5980 Context.insert<InstStore>(Src, Mem);
5981 }
5982 }
5983
lowerSwitch(const InstSwitch * Instr)5984 void TargetARM32::lowerSwitch(const InstSwitch *Instr) {
5985 // This implements the most naive possible lowering.
5986 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
5987 Operand *Src0 = Instr->getComparison();
5988 SizeT NumCases = Instr->getNumCases();
5989 if (Src0->getType() == IceType_i64) {
5990 Src0 = legalizeUndef(Src0);
5991 Variable *Src0Lo = legalizeToReg(loOperand(Src0));
5992 Variable *Src0Hi = legalizeToReg(hiOperand(Src0));
5993 for (SizeT I = 0; I < NumCases; ++I) {
5994 Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
5995 Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
5996 ValueLo = legalize(ValueLo, Legal_Reg | Legal_Flex);
5997 ValueHi = legalize(ValueHi, Legal_Reg | Legal_Flex);
5998 _cmp(Src0Lo, ValueLo);
5999 _cmp(Src0Hi, ValueHi, CondARM32::EQ);
6000 _br(Instr->getLabel(I), CondARM32::EQ);
6001 }
6002 _br(Instr->getLabelDefault());
6003 return;
6004 }
6005
6006 Variable *Src0Var = legalizeToReg(Src0);
6007 // If Src0 is not an i32, we left shift it -- see the icmp lowering for the
6008 // reason.
6009 assert(Src0Var->mustHaveReg());
6010 const size_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
6011 assert(ShiftAmt < 32);
6012 if (ShiftAmt > 0) {
6013 Operand *ShAmtImm = shAmtImm(ShiftAmt);
6014 Variable *T = makeReg(IceType_i32);
6015 _lsl(T, Src0Var, ShAmtImm);
6016 Src0Var = T;
6017 }
6018
6019 for (SizeT I = 0; I < NumCases; ++I) {
6020 Operand *Value = Ctx->getConstantInt32(Instr->getValue(I) << ShiftAmt);
6021 Value = legalize(Value, Legal_Reg | Legal_Flex);
6022 _cmp(Src0Var, Value);
6023 _br(Instr->getLabel(I), CondARM32::EQ);
6024 }
6025 _br(Instr->getLabelDefault());
6026 }
6027
lowerBreakpoint(const InstBreakpoint * Instr)6028 void TargetARM32::lowerBreakpoint(const InstBreakpoint *Instr) {
6029 UnimplementedLoweringError(this, Instr);
6030 }
6031
lowerUnreachable(const InstUnreachable *)6032 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) {
6033 _trap();
6034 }
6035
prelowerPhis()6036 void TargetARM32::prelowerPhis() {
6037 CfgNode *Node = Context.getNode();
6038 PhiLowering::prelowerPhis32Bit(this, Node, Func);
6039 }
6040
makeVectorOfZeros(Type Ty,RegNumT RegNum)6041 Variable *TargetARM32::makeVectorOfZeros(Type Ty, RegNumT RegNum) {
6042 Variable *Reg = makeReg(Ty, RegNum);
6043 Context.insert<InstFakeDef>(Reg);
6044 assert(isVectorType(Ty));
6045 _veor(Reg, Reg, Reg);
6046 return Reg;
6047 }
6048
6049 // Helper for legalize() to emit the right code to lower an operand to a
6050 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)6051 Variable *TargetARM32::copyToReg(Operand *Src, RegNumT RegNum) {
6052 Type Ty = Src->getType();
6053 Variable *Reg = makeReg(Ty, RegNum);
6054 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) {
6055 _ldr(Reg, Mem);
6056 } else {
6057 _mov(Reg, Src);
6058 }
6059 return Reg;
6060 }
6061
6062 // TODO(jpp): remove unneeded else clauses in legalize.
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)6063 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
6064 RegNumT RegNum) {
6065 Type Ty = From->getType();
6066 // Assert that a physical register is allowed. To date, all calls to
6067 // legalize() allow a physical register. Legal_Flex converts registers to the
6068 // right type OperandARM32FlexReg as needed.
6069 assert(Allowed & Legal_Reg);
6070
6071 // Copied ipsis literis from TargetX86Base<Machine>.
6072 if (RegNum.hasNoValue()) {
6073 if (Variable *Subst = getContext().availabilityGet(From)) {
6074 // At this point we know there is a potential substitution available.
6075 if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
6076 !Subst->hasReg()) {
6077 // At this point we know the substitution will have a register.
6078 if (From->getType() == Subst->getType()) {
6079 // At this point we know the substitution's register is compatible.
6080 return Subst;
6081 }
6082 }
6083 }
6084 }
6085
6086 // Go through the various types of operands: OperandARM32Mem,
6087 // OperandARM32Flex, Constant, and Variable. Given the above assertion, if
6088 // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
6089 // can always copy to a register.
6090 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
6091 // Before doing anything with a Mem operand, we need to ensure that the
6092 // Base and Index components are in physical registers.
6093 Variable *Base = Mem->getBase();
6094 Variable *Index = Mem->getIndex();
6095 ConstantInteger32 *Offset = Mem->getOffset();
6096 assert(Index == nullptr || Offset == nullptr);
6097 Variable *RegBase = nullptr;
6098 Variable *RegIndex = nullptr;
6099 assert(Base);
6100 RegBase = llvm::cast<Variable>(
6101 legalize(Base, Legal_Reg | Legal_Rematerializable));
6102 assert(Ty < MemTraitsSize);
6103 if (Index) {
6104 assert(Offset == nullptr);
6105 assert(MemTraits[Ty].CanHaveIndex);
6106 RegIndex = legalizeToReg(Index);
6107 }
6108 if (Offset && Offset->getValue() != 0) {
6109 assert(Index == nullptr);
6110 static constexpr bool ZeroExt = false;
6111 assert(MemTraits[Ty].CanHaveImm);
6112 if (!OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
6113 llvm::report_fatal_error("Invalid memory offset.");
6114 }
6115 }
6116
6117 // Create a new operand if there was a change.
6118 if (Base != RegBase || Index != RegIndex) {
6119 // There is only a reg +/- reg or reg + imm form.
6120 // Figure out which to re-create.
6121 if (RegIndex) {
6122 Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex,
6123 Mem->getShiftOp(), Mem->getShiftAmt(),
6124 Mem->getAddrMode());
6125 } else {
6126 Mem = OperandARM32Mem::create(Func, Ty, RegBase, Offset,
6127 Mem->getAddrMode());
6128 }
6129 }
6130 if (Allowed & Legal_Mem) {
6131 From = Mem;
6132 } else {
6133 Variable *Reg = makeReg(Ty, RegNum);
6134 _ldr(Reg, Mem);
6135 From = Reg;
6136 }
6137 return From;
6138 }
6139
6140 if (auto *Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
6141 if (!(Allowed & Legal_Flex)) {
6142 if (auto *FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
6143 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
6144 From = FlexReg->getReg();
6145 // Fall through and let From be checked as a Variable below, where it
6146 // may or may not need a register.
6147 } else {
6148 return copyToReg(Flex, RegNum);
6149 }
6150 } else {
6151 return copyToReg(Flex, RegNum);
6152 }
6153 } else {
6154 return From;
6155 }
6156 }
6157
6158 if (llvm::isa<Constant>(From)) {
6159 if (llvm::isa<ConstantUndef>(From)) {
6160 From = legalizeUndef(From, RegNum);
6161 if (isVectorType(Ty))
6162 return From;
6163 }
6164 // There should be no constants of vector type (other than undef).
6165 assert(!isVectorType(Ty));
6166 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
6167 uint32_t RotateAmt;
6168 uint32_t Immed_8;
6169 uint32_t Value = static_cast<uint32_t>(C32->getValue());
6170 if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
6171 // The immediate can be encoded as a Flex immediate. We may return the
6172 // Flex operand if the caller has Allow'ed it.
6173 auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6174 const bool CanBeFlex = Allowed & Legal_Flex;
6175 if (CanBeFlex)
6176 return OpF;
6177 return copyToReg(OpF, RegNum);
6178 } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt,
6179 &Immed_8)) {
6180 // Even though the immediate can't be encoded as a Flex operand, its
6181 // inverted bit pattern can, thus we use ARM's mvn to load the 32-bit
6182 // constant with a single instruction.
6183 auto *InvOpF =
6184 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6185 Variable *Reg = makeReg(Ty, RegNum);
6186 _mvn(Reg, InvOpF);
6187 return Reg;
6188 } else {
6189 // Do a movw/movt to a register.
6190 Variable *Reg = makeReg(Ty, RegNum);
6191 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
6192 _movw(Reg,
6193 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
6194 if (UpperBits != 0) {
6195 _movt(Reg, Ctx->getConstantInt32(UpperBits));
6196 }
6197 return Reg;
6198 }
6199 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
6200 Variable *Reg = makeReg(Ty, RegNum);
6201 _movw(Reg, C);
6202 _movt(Reg, C);
6203 return Reg;
6204 } else {
6205 assert(isScalarFloatingType(Ty));
6206 uint32_t ModifiedImm;
6207 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {
6208 Variable *T = makeReg(Ty, RegNum);
6209 _mov(T,
6210 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));
6211 return T;
6212 }
6213
6214 if (Ty == IceType_f64 && isFloatingPointZero(From)) {
6215 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32
6216 // because ARM does not have a veor instruction with S registers.
6217 Variable *T = makeReg(IceType_f64, RegNum);
6218 Context.insert<InstFakeDef>(T);
6219 _veor(T, T, T);
6220 return T;
6221 }
6222
6223 // Load floats/doubles from literal pool.
6224 auto *CFrom = llvm::cast<Constant>(From);
6225 assert(CFrom->getShouldBePooled());
6226 Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
6227 Variable *BaseReg = makeReg(getPointerType());
6228 _movw(BaseReg, Offset);
6229 _movt(BaseReg, Offset);
6230 From = formMemoryOperand(BaseReg, Ty);
6231 return copyToReg(From, RegNum);
6232 }
6233 }
6234
6235 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
6236 if (Var->isRematerializable()) {
6237 if (Allowed & Legal_Rematerializable) {
6238 return From;
6239 }
6240
6241 Variable *T = makeReg(Var->getType(), RegNum);
6242 _mov(T, Var);
6243 return T;
6244 }
6245 // Check if the variable is guaranteed a physical register. This can happen
6246 // either when the variable is pre-colored or when it is assigned infinite
6247 // weight.
6248 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
6249 // We need a new physical register for the operand if:
6250 // Mem is not allowed and Var isn't guaranteed a physical
6251 // register, or
6252 // RegNum is required and Var->getRegNum() doesn't match.
6253 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
6254 (RegNum.hasValue() && (RegNum != Var->getRegNum()))) {
6255 From = copyToReg(From, RegNum);
6256 }
6257 return From;
6258 }
6259 llvm::report_fatal_error("Unhandled operand kind in legalize()");
6260
6261 return From;
6262 }
6263
6264 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)6265 Variable *TargetARM32::legalizeToReg(Operand *From, RegNumT RegNum) {
6266 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
6267 }
6268
6269 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)6270 Operand *TargetARM32::legalizeUndef(Operand *From, RegNumT RegNum) {
6271 Type Ty = From->getType();
6272 if (llvm::isa<ConstantUndef>(From)) {
6273 // Lower undefs to zero. Another option is to lower undefs to an
6274 // uninitialized register; however, using an uninitialized register results
6275 // in less predictable code.
6276 //
6277 // If in the future the implementation is changed to lower undef values to
6278 // uninitialized registers, a FakeDef will be needed:
6279 // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to
6280 // ensure that the live range of Reg is not overestimated. If the constant
6281 // being lowered is a 64 bit value, then the result should be split and the
6282 // lo and hi components will need to go in uninitialized registers.
6283 if (isVectorType(Ty))
6284 return makeVectorOfZeros(Ty, RegNum);
6285 return Ctx->getConstantZero(Ty);
6286 }
6287 return From;
6288 }
6289
formMemoryOperand(Operand * Operand,Type Ty)6290 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
6291 auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
6292 // It may be the case that address mode optimization already creates an
6293 // OperandARM32Mem, so in that case it wouldn't need another level of
6294 // transformation.
6295 if (Mem) {
6296 return llvm::cast<OperandARM32Mem>(legalize(Mem));
6297 }
6298 // If we didn't do address mode optimization, then we only have a
6299 // base/offset to work with. ARM always requires a base register, so
6300 // just use that to hold the operand.
6301 auto *Base = llvm::cast<Variable>(
6302 legalize(Operand, Legal_Reg | Legal_Rematerializable));
6303 return OperandARM32Mem::create(
6304 Func, Ty, Base,
6305 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
6306 }
6307
makeI64RegPair()6308 Variable64On32 *TargetARM32::makeI64RegPair() {
6309 Variable64On32 *Reg =
6310 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
6311 Reg->setMustHaveReg();
6312 Reg->initHiLo(Func);
6313 Reg->getLo()->setMustNotHaveReg();
6314 Reg->getHi()->setMustNotHaveReg();
6315 return Reg;
6316 }
6317
makeReg(Type Type,RegNumT RegNum)6318 Variable *TargetARM32::makeReg(Type Type, RegNumT RegNum) {
6319 // There aren't any 64-bit integer registers for ARM32.
6320 assert(Type != IceType_i64);
6321 assert(AllowTemporaryWithNoReg || RegNum.hasValue());
6322 Variable *Reg = Func->makeVariable(Type);
6323 if (RegNum.hasValue())
6324 Reg->setRegNum(RegNum);
6325 else
6326 Reg->setMustHaveReg();
6327 return Reg;
6328 }
6329
alignRegisterPow2(Variable * Reg,uint32_t Align,RegNumT TmpRegNum)6330 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align,
6331 RegNumT TmpRegNum) {
6332 assert(llvm::isPowerOf2_32(Align));
6333 uint32_t RotateAmt;
6334 uint32_t Immed_8;
6335 Operand *Mask;
6336 // Use AND or BIC to mask off the bits, depending on which immediate fits (if
6337 // it fits at all). Assume Align is usually small, in which case BIC works
6338 // better. Thus, this rounds down to the alignment.
6339 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
6340 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex,
6341 TmpRegNum);
6342 _bic(Reg, Reg, Mask);
6343 } else {
6344 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex,
6345 TmpRegNum);
6346 _and(Reg, Reg, Mask);
6347 }
6348 }
6349
postLower()6350 void TargetARM32::postLower() {
6351 if (Func->getOptLevel() == Opt_m1)
6352 return;
6353 markRedefinitions();
6354 Context.availabilityUpdate();
6355 }
6356
emit(const ConstantInteger32 * C) const6357 void TargetARM32::emit(const ConstantInteger32 *C) const {
6358 if (!BuildDefs::dump())
6359 return;
6360 Ostream &Str = Ctx->getStrEmit();
6361 Str << "#" << C->getValue();
6362 }
6363
emit(const ConstantInteger64 *) const6364 void TargetARM32::emit(const ConstantInteger64 *) const {
6365 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
6366 }
6367
emit(const ConstantFloat * C) const6368 void TargetARM32::emit(const ConstantFloat *C) const {
6369 (void)C;
6370 UnimplementedError(getFlags());
6371 }
6372
emit(const ConstantDouble * C) const6373 void TargetARM32::emit(const ConstantDouble *C) const {
6374 (void)C;
6375 UnimplementedError(getFlags());
6376 }
6377
emit(const ConstantUndef *) const6378 void TargetARM32::emit(const ConstantUndef *) const {
6379 llvm::report_fatal_error("undef value encountered by emitter.");
6380 }
6381
emit(const ConstantRelocatable * C) const6382 void TargetARM32::emit(const ConstantRelocatable *C) const {
6383 if (!BuildDefs::dump())
6384 return;
6385 Ostream &Str = Ctx->getStrEmit();
6386 Str << "#";
6387 emitWithoutPrefix(C);
6388 }
6389
lowerInt1ForSelect(Variable * Dest,Operand * Boolean,Operand * TrueValue,Operand * FalseValue)6390 void TargetARM32::lowerInt1ForSelect(Variable *Dest, Operand *Boolean,
6391 Operand *TrueValue, Operand *FalseValue) {
6392 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6393
6394 assert(Boolean->getType() == IceType_i1);
6395
6396 bool NeedsAnd1 = false;
6397 if (TrueValue->getType() == IceType_i1) {
6398 assert(FalseValue->getType() == IceType_i1);
6399
6400 Variable *TrueValueV = Func->makeVariable(IceType_i1);
6401 SafeBoolChain Src0Safe = lowerInt1(TrueValueV, TrueValue);
6402 TrueValue = TrueValueV;
6403
6404 Variable *FalseValueV = Func->makeVariable(IceType_i1);
6405 SafeBoolChain Src1Safe = lowerInt1(FalseValueV, FalseValue);
6406 FalseValue = FalseValueV;
6407
6408 NeedsAnd1 = Src0Safe == SBC_No || Src1Safe == SBC_No;
6409 }
6410
6411 Variable *DestLo = (Dest->getType() == IceType_i64)
6412 ? llvm::cast<Variable>(loOperand(Dest))
6413 : Dest;
6414 Variable *DestHi = (Dest->getType() == IceType_i64)
6415 ? llvm::cast<Variable>(hiOperand(Dest))
6416 : nullptr;
6417 Operand *FalseValueLo = (FalseValue->getType() == IceType_i64)
6418 ? loOperand(FalseValue)
6419 : FalseValue;
6420 Operand *FalseValueHi =
6421 (FalseValue->getType() == IceType_i64) ? hiOperand(FalseValue) : nullptr;
6422
6423 Operand *TrueValueLo =
6424 (TrueValue->getType() == IceType_i64) ? loOperand(TrueValue) : TrueValue;
6425 Operand *TrueValueHi =
6426 (TrueValue->getType() == IceType_i64) ? hiOperand(TrueValue) : nullptr;
6427
6428 Variable *T_Lo = makeReg(DestLo->getType());
6429 Variable *T_Hi = (DestHi == nullptr) ? nullptr : makeReg(DestHi->getType());
6430
6431 _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex));
6432 if (DestHi) {
6433 _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex));
6434 }
6435
6436 CondWhenTrue Cond(CondARM32::kNone);
6437 // FlagsWereSet is used to determine wether Boolean was folded or not. If not,
6438 // add an explicit _tst instruction below.
6439 bool FlagsWereSet = false;
6440 if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6441 switch (Producer->getKind()) {
6442 default:
6443 llvm::report_fatal_error("Unexpected producer.");
6444 case Inst::Icmp: {
6445 Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6446 FlagsWereSet = true;
6447 } break;
6448 case Inst::Fcmp: {
6449 Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6450 FlagsWereSet = true;
6451 } break;
6452 case Inst::Cast: {
6453 const auto *CastProducer = llvm::cast<InstCast>(Producer);
6454 assert(CastProducer->getCastKind() == InstCast::Trunc);
6455 Boolean = CastProducer->getSrc(0);
6456 // No flags were set, so a _tst(Src, 1) will be emitted below. Don't
6457 // bother legalizing Src to a Reg because it will be legalized before
6458 // emitting the tst instruction.
6459 FlagsWereSet = false;
6460 } break;
6461 case Inst::Arithmetic: {
6462 // This is a special case: we eagerly assumed Producer could be folded,
6463 // but in reality, it can't. No reason to panic: we just lower it using
6464 // the regular lowerArithmetic helper.
6465 const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6466 lowerArithmetic(ArithProducer);
6467 Boolean = ArithProducer->getDest();
6468 // No flags were set, so a _tst(Dest, 1) will be emitted below. Don't
6469 // bother legalizing Dest to a Reg because it will be legalized before
6470 // emitting the tst instruction.
6471 FlagsWereSet = false;
6472 } break;
6473 }
6474 }
6475
6476 if (!FlagsWereSet) {
6477 // No flags have been set, so emit a tst Boolean, 1.
6478 Variable *Src = legalizeToReg(Boolean);
6479 _tst(Src, _1);
6480 Cond = CondWhenTrue(CondARM32::NE); // i.e., CondARM32::NotZero.
6481 }
6482
6483 if (Cond.WhenTrue0 == CondARM32::kNone) {
6484 assert(Cond.WhenTrue1 == CondARM32::kNone);
6485 } else {
6486 _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6487 Cond.WhenTrue0);
6488 if (DestHi) {
6489 _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6490 Cond.WhenTrue0);
6491 }
6492 }
6493
6494 if (Cond.WhenTrue1 != CondARM32::kNone) {
6495 _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6496 Cond.WhenTrue1);
6497 if (DestHi) {
6498 _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6499 Cond.WhenTrue1);
6500 }
6501 }
6502
6503 if (NeedsAnd1) {
6504 // We lowered something that is unsafe (i.e., can't provably be zero or
6505 // one). Truncate the result.
6506 _and(T_Lo, T_Lo, _1);
6507 }
6508
6509 _mov(DestLo, T_Lo);
6510 if (DestHi) {
6511 _mov(DestHi, T_Hi);
6512 }
6513 }
6514
lowerInt1(Variable * Dest,Operand * Boolean)6515 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest,
6516 Operand *Boolean) {
6517 assert(Boolean->getType() == IceType_i1);
6518 Variable *T = makeReg(IceType_i1);
6519 Operand *_0 =
6520 legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex);
6521 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6522
6523 SafeBoolChain Safe = SBC_Yes;
6524 if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6525 switch (Producer->getKind()) {
6526 default:
6527 llvm::report_fatal_error("Unexpected producer.");
6528 case Inst::Icmp: {
6529 _mov(T, _0);
6530 CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6531 assert(Cond.WhenTrue0 != CondARM32::AL);
6532 assert(Cond.WhenTrue0 != CondARM32::kNone);
6533 assert(Cond.WhenTrue1 == CondARM32::kNone);
6534 _mov_redefined(T, _1, Cond.WhenTrue0);
6535 } break;
6536 case Inst::Fcmp: {
6537 _mov(T, _0);
6538 Inst *MovZero = Context.getLastInserted();
6539 CondWhenTrue Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6540 if (Cond.WhenTrue0 == CondARM32::AL) {
6541 assert(Cond.WhenTrue1 == CondARM32::kNone);
6542 MovZero->setDeleted();
6543 _mov(T, _1);
6544 } else if (Cond.WhenTrue0 != CondARM32::kNone) {
6545 _mov_redefined(T, _1, Cond.WhenTrue0);
6546 }
6547 if (Cond.WhenTrue1 != CondARM32::kNone) {
6548 assert(Cond.WhenTrue0 != CondARM32::kNone);
6549 assert(Cond.WhenTrue0 != CondARM32::AL);
6550 _mov_redefined(T, _1, Cond.WhenTrue1);
6551 }
6552 } break;
6553 case Inst::Cast: {
6554 const auto *CastProducer = llvm::cast<InstCast>(Producer);
6555 assert(CastProducer->getCastKind() == InstCast::Trunc);
6556 Operand *Src = CastProducer->getSrc(0);
6557 if (Src->getType() == IceType_i64)
6558 Src = loOperand(Src);
6559 _mov(T, legalize(Src, Legal_Reg | Legal_Flex));
6560 Safe = SBC_No;
6561 } break;
6562 case Inst::Arithmetic: {
6563 const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6564 Safe = lowerInt1Arithmetic(ArithProducer);
6565 _mov(T, ArithProducer->getDest());
6566 } break;
6567 }
6568 } else {
6569 _mov(T, legalize(Boolean, Legal_Reg | Legal_Flex));
6570 }
6571
6572 _mov(Dest, T);
6573 return Safe;
6574 }
6575
6576 namespace {
6577 namespace BoolFolding {
shouldTrackProducer(const Inst & Instr)6578 bool shouldTrackProducer(const Inst &Instr) {
6579 switch (Instr.getKind()) {
6580 default:
6581 return false;
6582 case Inst::Icmp:
6583 case Inst::Fcmp:
6584 return true;
6585 case Inst::Cast: {
6586 switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6587 default:
6588 return false;
6589 case InstCast::Trunc:
6590 return true;
6591 }
6592 }
6593 case Inst::Arithmetic: {
6594 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6595 default:
6596 return false;
6597 case InstArithmetic::And:
6598 case InstArithmetic::Or:
6599 return true;
6600 }
6601 }
6602 }
6603 }
6604
isValidConsumer(const Inst & Instr)6605 bool isValidConsumer(const Inst &Instr) {
6606 switch (Instr.getKind()) {
6607 default:
6608 return false;
6609 case Inst::Br:
6610 return true;
6611 case Inst::Select:
6612 return !isVectorType(Instr.getDest()->getType());
6613 case Inst::Cast: {
6614 switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6615 default:
6616 return false;
6617 case InstCast::Sext:
6618 return !isVectorType(Instr.getDest()->getType());
6619 case InstCast::Zext:
6620 return !isVectorType(Instr.getDest()->getType());
6621 }
6622 }
6623 case Inst::Arithmetic: {
6624 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6625 default:
6626 return false;
6627 case InstArithmetic::And:
6628 return !isVectorType(Instr.getDest()->getType());
6629 case InstArithmetic::Or:
6630 return !isVectorType(Instr.getDest()->getType());
6631 }
6632 }
6633 }
6634 }
6635 } // end of namespace BoolFolding
6636
6637 namespace FpFolding {
shouldTrackProducer(const Inst & Instr)6638 bool shouldTrackProducer(const Inst &Instr) {
6639 switch (Instr.getKind()) {
6640 default:
6641 return false;
6642 case Inst::Arithmetic: {
6643 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6644 default:
6645 return false;
6646 case InstArithmetic::Fmul:
6647 return true;
6648 }
6649 }
6650 }
6651 }
6652
isValidConsumer(const Inst & Instr)6653 bool isValidConsumer(const Inst &Instr) {
6654 switch (Instr.getKind()) {
6655 default:
6656 return false;
6657 case Inst::Arithmetic: {
6658 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6659 default:
6660 return false;
6661 case InstArithmetic::Fadd:
6662 case InstArithmetic::Fsub:
6663 return true;
6664 }
6665 }
6666 }
6667 }
6668 } // end of namespace FpFolding
6669
6670 namespace IntFolding {
shouldTrackProducer(const Inst & Instr)6671 bool shouldTrackProducer(const Inst &Instr) {
6672 switch (Instr.getKind()) {
6673 default:
6674 return false;
6675 case Inst::Arithmetic: {
6676 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6677 default:
6678 return false;
6679 case InstArithmetic::Mul:
6680 return true;
6681 }
6682 }
6683 }
6684 }
6685
isValidConsumer(const Inst & Instr)6686 bool isValidConsumer(const Inst &Instr) {
6687 switch (Instr.getKind()) {
6688 default:
6689 return false;
6690 case Inst::Arithmetic: {
6691 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6692 default:
6693 return false;
6694 case InstArithmetic::Add:
6695 case InstArithmetic::Sub:
6696 return true;
6697 }
6698 }
6699 }
6700 }
6701 } // namespace IntFolding
6702 } // end of anonymous namespace
6703
recordProducers(CfgNode * Node)6704 void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) {
6705 for (Inst &Instr : Node->getInsts()) {
6706 // Check whether Instr is a valid producer.
6707 Variable *Dest = Instr.getDest();
6708 if (!Instr.isDeleted() // only consider non-deleted instructions; and
6709 && Dest // only instructions with an actual dest var; and
6710 && Dest->getType() == IceType_i1 // only bool-type dest vars; and
6711 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6712 KnownComputations.emplace(Dest->getIndex(),
6713 ComputationEntry(&Instr, IceType_i1));
6714 }
6715 if (!Instr.isDeleted() // only consider non-deleted instructions; and
6716 && Dest // only instructions with an actual dest var; and
6717 && isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and
6718 && FpFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6719 KnownComputations.emplace(Dest->getIndex(),
6720 ComputationEntry(&Instr, Dest->getType()));
6721 }
6722 if (!Instr.isDeleted() // only consider non-deleted instructions; and
6723 && Dest // only instructions with an actual dest var; and
6724 && Dest->getType() == IceType_i32 // i32 only dest vars; and
6725 && IntFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6726 KnownComputations.emplace(Dest->getIndex(),
6727 ComputationEntry(&Instr, IceType_i32));
6728 }
6729 // Check each src variable against the map.
6730 FOREACH_VAR_IN_INST(Var, Instr) {
6731 SizeT VarNum = Var->getIndex();
6732 auto ComputationIter = KnownComputations.find(VarNum);
6733 if (ComputationIter == KnownComputations.end()) {
6734 continue;
6735 }
6736
6737 ++ComputationIter->second.NumUses;
6738 switch (ComputationIter->second.ComputationType) {
6739 default:
6740 KnownComputations.erase(VarNum);
6741 continue;
6742 case IceType_i1:
6743 if (!BoolFolding::isValidConsumer(Instr)) {
6744 KnownComputations.erase(VarNum);
6745 continue;
6746 }
6747 break;
6748 case IceType_i32:
6749 if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) {
6750 KnownComputations.erase(VarNum);
6751 continue;
6752 }
6753 break;
6754 case IceType_f32:
6755 case IceType_f64:
6756 if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) {
6757 KnownComputations.erase(VarNum);
6758 continue;
6759 }
6760 break;
6761 }
6762
6763 if (Instr.isLastUse(Var)) {
6764 ComputationIter->second.IsLiveOut = false;
6765 }
6766 }
6767 }
6768
6769 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
6770 Iter != End;) {
6771 // Disable the folding if its dest may be live beyond this block.
6772 if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
6773 Iter = KnownComputations.erase(Iter);
6774 continue;
6775 }
6776
6777 // Mark as "dead" rather than outright deleting. This is so that other
6778 // peephole style optimizations during or before lowering have access to
6779 // this instruction in undeleted form. See for example
6780 // tryOptimizedCmpxchgCmpBr().
6781 Iter->second.Instr->setDead();
6782 ++Iter;
6783 }
6784 }
6785
TargetDataARM32(GlobalContext * Ctx)6786 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
6787 : TargetDataLowering(Ctx) {}
6788
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)6789 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
6790 const std::string &SectionSuffix) {
6791 const bool IsPIC = false;
6792 switch (getFlags().getOutFileType()) {
6793 case FT_Elf: {
6794 ELFObjectWriter *Writer = Ctx->getObjectWriter();
6795 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix,
6796 IsPIC);
6797 } break;
6798 case FT_Asm:
6799 case FT_Iasm: {
6800 OstreamLocker _(Ctx);
6801 for (const VariableDeclaration *Var : Vars) {
6802 if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
6803 emitGlobal(*Var, SectionSuffix);
6804 }
6805 }
6806 } break;
6807 }
6808 }
6809
6810 namespace {
6811 template <typename T> struct ConstantPoolEmitterTraits;
6812
6813 static_assert(sizeof(uint64_t) == 8,
6814 "uint64_t is supposed to be 8 bytes wide.");
6815
6816 // TODO(jpp): implement the following when implementing constant randomization:
6817 // * template <> struct ConstantPoolEmitterTraits<uint8_t>
6818 // * template <> struct ConstantPoolEmitterTraits<uint16_t>
6819 // * template <> struct ConstantPoolEmitterTraits<uint32_t>
6820 template <> struct ConstantPoolEmitterTraits<float> {
6821 using ConstantType = ConstantFloat;
6822 static constexpr Type IceType = IceType_f32;
6823 // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
6824 // about them being constexpr.
6825 static const char AsmTag[];
6826 static const char TypeName[];
bitcastToUint64Ice::ARM32::__anon15914fd81a11::ConstantPoolEmitterTraits6827 static uint64_t bitcastToUint64(float Value) {
6828 static_assert(sizeof(Value) == sizeof(uint32_t),
6829 "Float should be 4 bytes.");
6830 const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
6831 return static_cast<uint64_t>(IntValue);
6832 }
6833 };
6834 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".long";
6835 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
6836
6837 template <> struct ConstantPoolEmitterTraits<double> {
6838 using ConstantType = ConstantDouble;
6839 static constexpr Type IceType = IceType_f64;
6840 static const char AsmTag[];
6841 static const char TypeName[];
bitcastToUint64Ice::ARM32::__anon15914fd81a11::ConstantPoolEmitterTraits6842 static uint64_t bitcastToUint64(double Value) {
6843 static_assert(sizeof(double) == sizeof(uint64_t),
6844 "Double should be 8 bytes.");
6845 return Utils::bitCopy<uint64_t>(Value);
6846 }
6847 };
6848 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
6849 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
6850
6851 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)6852 void emitConstant(
6853 Ostream &Str,
6854 const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
6855 using Traits = ConstantPoolEmitterTraits<T>;
6856 Str << Const->getLabelName();
6857 Str << ":\n\t" << Traits::AsmTag << "\t0x";
6858 T Value = Const->getValue();
6859 Str.write_hex(Traits::bitcastToUint64(Value));
6860 Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
6861 }
6862
emitConstantPool(GlobalContext * Ctx)6863 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
6864 if (!BuildDefs::dump()) {
6865 return;
6866 }
6867
6868 using Traits = ConstantPoolEmitterTraits<T>;
6869 static constexpr size_t MinimumAlignment = 4;
6870 SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
6871 assert((Align % 4) == 0 && "Constants should be aligned");
6872 Ostream &Str = Ctx->getStrEmit();
6873 ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
6874
6875 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
6876 << "\n"
6877 << "\t.align\t" << Align << "\n";
6878
6879 for (Constant *C : Pool) {
6880 if (!C->getShouldBePooled()) {
6881 continue;
6882 }
6883
6884 emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
6885 }
6886 }
6887 } // end of anonymous namespace
6888
lowerConstants()6889 void TargetDataARM32::lowerConstants() {
6890 if (getFlags().getDisableTranslation())
6891 return;
6892 switch (getFlags().getOutFileType()) {
6893 case FT_Elf: {
6894 ELFObjectWriter *Writer = Ctx->getObjectWriter();
6895 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
6896 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
6897 } break;
6898 case FT_Asm:
6899 case FT_Iasm: {
6900 OstreamLocker _(Ctx);
6901 emitConstantPool<float>(Ctx);
6902 emitConstantPool<double>(Ctx);
6903 break;
6904 }
6905 }
6906 }
6907
lowerJumpTables()6908 void TargetDataARM32::lowerJumpTables() {
6909 if (getFlags().getDisableTranslation())
6910 return;
6911 switch (getFlags().getOutFileType()) {
6912 case FT_Elf:
6913 if (!Ctx->getJumpTables().empty()) {
6914 llvm::report_fatal_error("ARM32 does not support jump tables yet.");
6915 }
6916 break;
6917 case FT_Asm:
6918 // Already emitted from Cfg
6919 break;
6920 case FT_Iasm: {
6921 // TODO(kschimpf): Fill this in when we get more information.
6922 break;
6923 }
6924 }
6925 }
6926
TargetHeaderARM32(GlobalContext * Ctx)6927 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
6928 : TargetHeaderLowering(Ctx), CPUFeatures(getFlags()) {}
6929
lower()6930 void TargetHeaderARM32::lower() {
6931 OstreamLocker _(Ctx);
6932 Ostream &Str = Ctx->getStrEmit();
6933 Str << ".syntax unified\n";
6934 // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
6935 // "Addenda to, and Errata in the ABI for the ARM architecture"
6936 // http://infocenter.arm.com
6937 // /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
6938 //
6939 // Tag_conformance should be be emitted first in a file-scope sub-subsection
6940 // of the first public subsection of the attributes.
6941 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
6942 // Chromebooks are at least A15, but do A9 for higher compat. For some
6943 // reason, the LLVM ARM asm parser has the .cpu directive override the mattr
6944 // specified on the commandline. So to test hwdiv, we need to set the .cpu
6945 // directive higher (can't just rely on --mattr=...).
6946 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
6947 Str << ".cpu cortex-a15\n";
6948 } else {
6949 Str << ".cpu cortex-a9\n";
6950 }
6951 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
6952 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
6953 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
6954 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
6955 Str << ".fpu neon\n"
6956 << ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
6957 << ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
6958 << ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n"
6959 << ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n"
6960 << ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n"
6961 << ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n"
6962 << ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n"
6963 << ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n"
6964 << ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n"
6965 << ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
6966 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
6967 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
6968 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
6969 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
6970 }
6971 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
6972 // However, for compatibility with current NaCl LLVM, don't claim that.
6973 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
6974 }
6975
6976 SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
6977 SmallBitVector TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
6978 SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
6979
6980 } // end of namespace ARM32
6981 } // end of namespace Ice
6982