1*9880d681SAndroid Build Coastguard Worker //===-- AArch64A57FPLoadBalancing.cpp - Balance FP ops statically on A57---===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker // The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker // For best-case performance on Cortex-A57, we should try to use a balanced
10*9880d681SAndroid Build Coastguard Worker // mix of odd and even D-registers when performing a critical sequence of
11*9880d681SAndroid Build Coastguard Worker // independent, non-quadword FP/ASIMD floating-point multiply or
12*9880d681SAndroid Build Coastguard Worker // multiply-accumulate operations.
13*9880d681SAndroid Build Coastguard Worker //
14*9880d681SAndroid Build Coastguard Worker // This pass attempts to detect situations where the register allocation may
15*9880d681SAndroid Build Coastguard Worker // adversely affect this load balancing and to change the registers used so as
16*9880d681SAndroid Build Coastguard Worker // to better utilize the CPU.
17*9880d681SAndroid Build Coastguard Worker //
18*9880d681SAndroid Build Coastguard Worker // Ideally we'd just take each multiply or multiply-accumulate in turn and
19*9880d681SAndroid Build Coastguard Worker // allocate it alternating even or odd registers. However, multiply-accumulates
20*9880d681SAndroid Build Coastguard Worker // are most efficiently performed in the same functional unit as their
21*9880d681SAndroid Build Coastguard Worker // accumulation operand. Therefore this pass tries to find maximal sequences
22*9880d681SAndroid Build Coastguard Worker // ("Chains") of multiply-accumulates linked via their accumulation operand,
23*9880d681SAndroid Build Coastguard Worker // and assign them all the same "color" (oddness/evenness).
24*9880d681SAndroid Build Coastguard Worker //
25*9880d681SAndroid Build Coastguard Worker // This optimization affects S-register and D-register floating point
26*9880d681SAndroid Build Coastguard Worker // multiplies and FMADD/FMAs, as well as vector (floating point only) muls and
27*9880d681SAndroid Build Coastguard Worker // FMADD/FMA. Q register instructions (and 128-bit vector instructions) are
28*9880d681SAndroid Build Coastguard Worker // not affected.
29*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
30*9880d681SAndroid Build Coastguard Worker
31*9880d681SAndroid Build Coastguard Worker #include "AArch64.h"
32*9880d681SAndroid Build Coastguard Worker #include "AArch64InstrInfo.h"
33*9880d681SAndroid Build Coastguard Worker #include "AArch64Subtarget.h"
34*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/BitVector.h"
35*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/EquivalenceClasses.h"
36*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunction.h"
37*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionPass.h"
38*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineInstr.h"
39*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineInstrBuilder.h"
40*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineRegisterInfo.h"
41*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/RegisterClassInfo.h"
42*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/RegisterScavenging.h"
43*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/CommandLine.h"
44*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
45*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/raw_ostream.h"
46*9880d681SAndroid Build Coastguard Worker using namespace llvm;
47*9880d681SAndroid Build Coastguard Worker
48*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "aarch64-a57-fp-load-balancing"
49*9880d681SAndroid Build Coastguard Worker
50*9880d681SAndroid Build Coastguard Worker // Enforce the algorithm to use the scavenged register even when the original
51*9880d681SAndroid Build Coastguard Worker // destination register is the correct color. Used for testing.
52*9880d681SAndroid Build Coastguard Worker static cl::opt<bool>
53*9880d681SAndroid Build Coastguard Worker TransformAll("aarch64-a57-fp-load-balancing-force-all",
54*9880d681SAndroid Build Coastguard Worker cl::desc("Always modify dest registers regardless of color"),
55*9880d681SAndroid Build Coastguard Worker cl::init(false), cl::Hidden);
56*9880d681SAndroid Build Coastguard Worker
57*9880d681SAndroid Build Coastguard Worker // Never use the balance information obtained from chains - return a specific
58*9880d681SAndroid Build Coastguard Worker // color always. Used for testing.
59*9880d681SAndroid Build Coastguard Worker static cl::opt<unsigned>
60*9880d681SAndroid Build Coastguard Worker OverrideBalance("aarch64-a57-fp-load-balancing-override",
61*9880d681SAndroid Build Coastguard Worker cl::desc("Ignore balance information, always return "
62*9880d681SAndroid Build Coastguard Worker "(1: Even, 2: Odd)."),
63*9880d681SAndroid Build Coastguard Worker cl::init(0), cl::Hidden);
64*9880d681SAndroid Build Coastguard Worker
65*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
66*9880d681SAndroid Build Coastguard Worker // Helper functions
67*9880d681SAndroid Build Coastguard Worker
68*9880d681SAndroid Build Coastguard Worker // Is the instruction a type of multiply on 64-bit (or 32-bit) FPRs?
isMul(MachineInstr * MI)69*9880d681SAndroid Build Coastguard Worker static bool isMul(MachineInstr *MI) {
70*9880d681SAndroid Build Coastguard Worker switch (MI->getOpcode()) {
71*9880d681SAndroid Build Coastguard Worker case AArch64::FMULSrr:
72*9880d681SAndroid Build Coastguard Worker case AArch64::FNMULSrr:
73*9880d681SAndroid Build Coastguard Worker case AArch64::FMULDrr:
74*9880d681SAndroid Build Coastguard Worker case AArch64::FNMULDrr:
75*9880d681SAndroid Build Coastguard Worker return true;
76*9880d681SAndroid Build Coastguard Worker default:
77*9880d681SAndroid Build Coastguard Worker return false;
78*9880d681SAndroid Build Coastguard Worker }
79*9880d681SAndroid Build Coastguard Worker }
80*9880d681SAndroid Build Coastguard Worker
81*9880d681SAndroid Build Coastguard Worker // Is the instruction a type of FP multiply-accumulate on 64-bit (or 32-bit) FPRs?
isMla(MachineInstr * MI)82*9880d681SAndroid Build Coastguard Worker static bool isMla(MachineInstr *MI) {
83*9880d681SAndroid Build Coastguard Worker switch (MI->getOpcode()) {
84*9880d681SAndroid Build Coastguard Worker case AArch64::FMSUBSrrr:
85*9880d681SAndroid Build Coastguard Worker case AArch64::FMADDSrrr:
86*9880d681SAndroid Build Coastguard Worker case AArch64::FNMSUBSrrr:
87*9880d681SAndroid Build Coastguard Worker case AArch64::FNMADDSrrr:
88*9880d681SAndroid Build Coastguard Worker case AArch64::FMSUBDrrr:
89*9880d681SAndroid Build Coastguard Worker case AArch64::FMADDDrrr:
90*9880d681SAndroid Build Coastguard Worker case AArch64::FNMSUBDrrr:
91*9880d681SAndroid Build Coastguard Worker case AArch64::FNMADDDrrr:
92*9880d681SAndroid Build Coastguard Worker return true;
93*9880d681SAndroid Build Coastguard Worker default:
94*9880d681SAndroid Build Coastguard Worker return false;
95*9880d681SAndroid Build Coastguard Worker }
96*9880d681SAndroid Build Coastguard Worker }
97*9880d681SAndroid Build Coastguard Worker
98*9880d681SAndroid Build Coastguard Worker namespace llvm {
99*9880d681SAndroid Build Coastguard Worker static void initializeAArch64A57FPLoadBalancingPass(PassRegistry &);
100*9880d681SAndroid Build Coastguard Worker }
101*9880d681SAndroid Build Coastguard Worker
102*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
103*9880d681SAndroid Build Coastguard Worker
104*9880d681SAndroid Build Coastguard Worker namespace {
105*9880d681SAndroid Build Coastguard Worker /// A "color", which is either even or odd. Yes, these aren't really colors
106*9880d681SAndroid Build Coastguard Worker /// but the algorithm is conceptually doing two-color graph coloring.
107*9880d681SAndroid Build Coastguard Worker enum class Color { Even, Odd };
108*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
109*9880d681SAndroid Build Coastguard Worker static const char *ColorNames[2] = { "Even", "Odd" };
110*9880d681SAndroid Build Coastguard Worker #endif
111*9880d681SAndroid Build Coastguard Worker
112*9880d681SAndroid Build Coastguard Worker class Chain;
113*9880d681SAndroid Build Coastguard Worker
114*9880d681SAndroid Build Coastguard Worker class AArch64A57FPLoadBalancing : public MachineFunctionPass {
115*9880d681SAndroid Build Coastguard Worker MachineRegisterInfo *MRI;
116*9880d681SAndroid Build Coastguard Worker const TargetRegisterInfo *TRI;
117*9880d681SAndroid Build Coastguard Worker RegisterClassInfo RCI;
118*9880d681SAndroid Build Coastguard Worker
119*9880d681SAndroid Build Coastguard Worker public:
120*9880d681SAndroid Build Coastguard Worker static char ID;
AArch64A57FPLoadBalancing()121*9880d681SAndroid Build Coastguard Worker explicit AArch64A57FPLoadBalancing() : MachineFunctionPass(ID) {
122*9880d681SAndroid Build Coastguard Worker initializeAArch64A57FPLoadBalancingPass(*PassRegistry::getPassRegistry());
123*9880d681SAndroid Build Coastguard Worker }
124*9880d681SAndroid Build Coastguard Worker
125*9880d681SAndroid Build Coastguard Worker bool runOnMachineFunction(MachineFunction &F) override;
126*9880d681SAndroid Build Coastguard Worker
getRequiredProperties() const127*9880d681SAndroid Build Coastguard Worker MachineFunctionProperties getRequiredProperties() const override {
128*9880d681SAndroid Build Coastguard Worker return MachineFunctionProperties().set(
129*9880d681SAndroid Build Coastguard Worker MachineFunctionProperties::Property::AllVRegsAllocated);
130*9880d681SAndroid Build Coastguard Worker }
131*9880d681SAndroid Build Coastguard Worker
getPassName() const132*9880d681SAndroid Build Coastguard Worker const char *getPassName() const override {
133*9880d681SAndroid Build Coastguard Worker return "A57 FP Anti-dependency breaker";
134*9880d681SAndroid Build Coastguard Worker }
135*9880d681SAndroid Build Coastguard Worker
getAnalysisUsage(AnalysisUsage & AU) const136*9880d681SAndroid Build Coastguard Worker void getAnalysisUsage(AnalysisUsage &AU) const override {
137*9880d681SAndroid Build Coastguard Worker AU.setPreservesCFG();
138*9880d681SAndroid Build Coastguard Worker MachineFunctionPass::getAnalysisUsage(AU);
139*9880d681SAndroid Build Coastguard Worker }
140*9880d681SAndroid Build Coastguard Worker
141*9880d681SAndroid Build Coastguard Worker private:
142*9880d681SAndroid Build Coastguard Worker bool runOnBasicBlock(MachineBasicBlock &MBB);
143*9880d681SAndroid Build Coastguard Worker bool colorChainSet(std::vector<Chain*> GV, MachineBasicBlock &MBB,
144*9880d681SAndroid Build Coastguard Worker int &Balance);
145*9880d681SAndroid Build Coastguard Worker bool colorChain(Chain *G, Color C, MachineBasicBlock &MBB);
146*9880d681SAndroid Build Coastguard Worker int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB);
147*9880d681SAndroid Build Coastguard Worker void scanInstruction(MachineInstr *MI, unsigned Idx,
148*9880d681SAndroid Build Coastguard Worker std::map<unsigned, Chain*> &Active,
149*9880d681SAndroid Build Coastguard Worker std::vector<std::unique_ptr<Chain>> &AllChains);
150*9880d681SAndroid Build Coastguard Worker void maybeKillChain(MachineOperand &MO, unsigned Idx,
151*9880d681SAndroid Build Coastguard Worker std::map<unsigned, Chain*> &RegChains);
152*9880d681SAndroid Build Coastguard Worker Color getColor(unsigned Register);
153*9880d681SAndroid Build Coastguard Worker Chain *getAndEraseNext(Color PreferredColor, std::vector<Chain*> &L);
154*9880d681SAndroid Build Coastguard Worker };
155*9880d681SAndroid Build Coastguard Worker }
156*9880d681SAndroid Build Coastguard Worker
157*9880d681SAndroid Build Coastguard Worker char AArch64A57FPLoadBalancing::ID = 0;
158*9880d681SAndroid Build Coastguard Worker
159*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_BEGIN(AArch64A57FPLoadBalancing, DEBUG_TYPE,
160*9880d681SAndroid Build Coastguard Worker "AArch64 A57 FP Load-Balancing", false, false)
161*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_END(AArch64A57FPLoadBalancing, DEBUG_TYPE,
162*9880d681SAndroid Build Coastguard Worker "AArch64 A57 FP Load-Balancing", false, false)
163*9880d681SAndroid Build Coastguard Worker
164*9880d681SAndroid Build Coastguard Worker namespace {
165*9880d681SAndroid Build Coastguard Worker /// A Chain is a sequence of instructions that are linked together by
166*9880d681SAndroid Build Coastguard Worker /// an accumulation operand. For example:
167*9880d681SAndroid Build Coastguard Worker ///
168*9880d681SAndroid Build Coastguard Worker /// fmul d0<def>, ?
169*9880d681SAndroid Build Coastguard Worker /// fmla d1<def>, ?, ?, d0<kill>
170*9880d681SAndroid Build Coastguard Worker /// fmla d2<def>, ?, ?, d1<kill>
171*9880d681SAndroid Build Coastguard Worker ///
172*9880d681SAndroid Build Coastguard Worker /// There may be other instructions interleaved in the sequence that
173*9880d681SAndroid Build Coastguard Worker /// do not belong to the chain. These other instructions must not use
174*9880d681SAndroid Build Coastguard Worker /// the "chain" register at any point.
175*9880d681SAndroid Build Coastguard Worker ///
176*9880d681SAndroid Build Coastguard Worker /// We currently only support chains where the "chain" operand is killed
177*9880d681SAndroid Build Coastguard Worker /// at each link in the chain for simplicity.
178*9880d681SAndroid Build Coastguard Worker /// A chain has three important instructions - Start, Last and Kill.
179*9880d681SAndroid Build Coastguard Worker /// * The start instruction is the first instruction in the chain.
180*9880d681SAndroid Build Coastguard Worker /// * Last is the final instruction in the chain.
181*9880d681SAndroid Build Coastguard Worker /// * Kill may or may not be defined. If defined, Kill is the instruction
182*9880d681SAndroid Build Coastguard Worker /// where the outgoing value of the Last instruction is killed.
183*9880d681SAndroid Build Coastguard Worker /// This information is important as if we know the outgoing value is
184*9880d681SAndroid Build Coastguard Worker /// killed with no intervening uses, we can safely change its register.
185*9880d681SAndroid Build Coastguard Worker ///
186*9880d681SAndroid Build Coastguard Worker /// Without a kill instruction, we must assume the outgoing value escapes
187*9880d681SAndroid Build Coastguard Worker /// beyond our model and either must not change its register or must
188*9880d681SAndroid Build Coastguard Worker /// create a fixup FMOV to keep the old register value consistent.
189*9880d681SAndroid Build Coastguard Worker ///
190*9880d681SAndroid Build Coastguard Worker class Chain {
191*9880d681SAndroid Build Coastguard Worker public:
192*9880d681SAndroid Build Coastguard Worker /// The important (marker) instructions.
193*9880d681SAndroid Build Coastguard Worker MachineInstr *StartInst, *LastInst, *KillInst;
194*9880d681SAndroid Build Coastguard Worker /// The index, from the start of the basic block, that each marker
195*9880d681SAndroid Build Coastguard Worker /// appears. These are stored so we can do quick interval tests.
196*9880d681SAndroid Build Coastguard Worker unsigned StartInstIdx, LastInstIdx, KillInstIdx;
197*9880d681SAndroid Build Coastguard Worker /// All instructions in the chain.
198*9880d681SAndroid Build Coastguard Worker std::set<MachineInstr*> Insts;
199*9880d681SAndroid Build Coastguard Worker /// True if KillInst cannot be modified. If this is true,
200*9880d681SAndroid Build Coastguard Worker /// we cannot change LastInst's outgoing register.
201*9880d681SAndroid Build Coastguard Worker /// This will be true for tied values and regmasks.
202*9880d681SAndroid Build Coastguard Worker bool KillIsImmutable;
203*9880d681SAndroid Build Coastguard Worker /// The "color" of LastInst. This will be the preferred chain color,
204*9880d681SAndroid Build Coastguard Worker /// as changing intermediate nodes is easy but changing the last
205*9880d681SAndroid Build Coastguard Worker /// instruction can be more tricky.
206*9880d681SAndroid Build Coastguard Worker Color LastColor;
207*9880d681SAndroid Build Coastguard Worker
Chain(MachineInstr * MI,unsigned Idx,Color C)208*9880d681SAndroid Build Coastguard Worker Chain(MachineInstr *MI, unsigned Idx, Color C)
209*9880d681SAndroid Build Coastguard Worker : StartInst(MI), LastInst(MI), KillInst(nullptr),
210*9880d681SAndroid Build Coastguard Worker StartInstIdx(Idx), LastInstIdx(Idx), KillInstIdx(0),
211*9880d681SAndroid Build Coastguard Worker LastColor(C) {
212*9880d681SAndroid Build Coastguard Worker Insts.insert(MI);
213*9880d681SAndroid Build Coastguard Worker }
214*9880d681SAndroid Build Coastguard Worker
215*9880d681SAndroid Build Coastguard Worker /// Add a new instruction into the chain. The instruction's dest operand
216*9880d681SAndroid Build Coastguard Worker /// has the given color.
add(MachineInstr * MI,unsigned Idx,Color C)217*9880d681SAndroid Build Coastguard Worker void add(MachineInstr *MI, unsigned Idx, Color C) {
218*9880d681SAndroid Build Coastguard Worker LastInst = MI;
219*9880d681SAndroid Build Coastguard Worker LastInstIdx = Idx;
220*9880d681SAndroid Build Coastguard Worker LastColor = C;
221*9880d681SAndroid Build Coastguard Worker assert((KillInstIdx == 0 || LastInstIdx < KillInstIdx) &&
222*9880d681SAndroid Build Coastguard Worker "Chain: broken invariant. A Chain can only be killed after its last "
223*9880d681SAndroid Build Coastguard Worker "def");
224*9880d681SAndroid Build Coastguard Worker
225*9880d681SAndroid Build Coastguard Worker Insts.insert(MI);
226*9880d681SAndroid Build Coastguard Worker }
227*9880d681SAndroid Build Coastguard Worker
228*9880d681SAndroid Build Coastguard Worker /// Return true if MI is a member of the chain.
contains(MachineInstr & MI)229*9880d681SAndroid Build Coastguard Worker bool contains(MachineInstr &MI) { return Insts.count(&MI) > 0; }
230*9880d681SAndroid Build Coastguard Worker
231*9880d681SAndroid Build Coastguard Worker /// Return the number of instructions in the chain.
size() const232*9880d681SAndroid Build Coastguard Worker unsigned size() const {
233*9880d681SAndroid Build Coastguard Worker return Insts.size();
234*9880d681SAndroid Build Coastguard Worker }
235*9880d681SAndroid Build Coastguard Worker
236*9880d681SAndroid Build Coastguard Worker /// Inform the chain that its last active register (the dest register of
237*9880d681SAndroid Build Coastguard Worker /// LastInst) is killed by MI with no intervening uses or defs.
setKill(MachineInstr * MI,unsigned Idx,bool Immutable)238*9880d681SAndroid Build Coastguard Worker void setKill(MachineInstr *MI, unsigned Idx, bool Immutable) {
239*9880d681SAndroid Build Coastguard Worker KillInst = MI;
240*9880d681SAndroid Build Coastguard Worker KillInstIdx = Idx;
241*9880d681SAndroid Build Coastguard Worker KillIsImmutable = Immutable;
242*9880d681SAndroid Build Coastguard Worker assert((KillInstIdx == 0 || LastInstIdx < KillInstIdx) &&
243*9880d681SAndroid Build Coastguard Worker "Chain: broken invariant. A Chain can only be killed after its last "
244*9880d681SAndroid Build Coastguard Worker "def");
245*9880d681SAndroid Build Coastguard Worker }
246*9880d681SAndroid Build Coastguard Worker
247*9880d681SAndroid Build Coastguard Worker /// Return the first instruction in the chain.
getStart() const248*9880d681SAndroid Build Coastguard Worker MachineInstr *getStart() const { return StartInst; }
249*9880d681SAndroid Build Coastguard Worker /// Return the last instruction in the chain.
getLast() const250*9880d681SAndroid Build Coastguard Worker MachineInstr *getLast() const { return LastInst; }
251*9880d681SAndroid Build Coastguard Worker /// Return the "kill" instruction (as set with setKill()) or NULL.
getKill() const252*9880d681SAndroid Build Coastguard Worker MachineInstr *getKill() const { return KillInst; }
253*9880d681SAndroid Build Coastguard Worker /// Return an instruction that can be used as an iterator for the end
254*9880d681SAndroid Build Coastguard Worker /// of the chain. This is the maximum of KillInst (if set) and LastInst.
end() const255*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator end() const {
256*9880d681SAndroid Build Coastguard Worker return ++MachineBasicBlock::iterator(KillInst ? KillInst : LastInst);
257*9880d681SAndroid Build Coastguard Worker }
begin() const258*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator begin() const { return getStart(); }
259*9880d681SAndroid Build Coastguard Worker
260*9880d681SAndroid Build Coastguard Worker /// Can the Kill instruction (assuming one exists) be modified?
isKillImmutable() const261*9880d681SAndroid Build Coastguard Worker bool isKillImmutable() const { return KillIsImmutable; }
262*9880d681SAndroid Build Coastguard Worker
263*9880d681SAndroid Build Coastguard Worker /// Return the preferred color of this chain.
getPreferredColor()264*9880d681SAndroid Build Coastguard Worker Color getPreferredColor() {
265*9880d681SAndroid Build Coastguard Worker if (OverrideBalance != 0)
266*9880d681SAndroid Build Coastguard Worker return OverrideBalance == 1 ? Color::Even : Color::Odd;
267*9880d681SAndroid Build Coastguard Worker return LastColor;
268*9880d681SAndroid Build Coastguard Worker }
269*9880d681SAndroid Build Coastguard Worker
270*9880d681SAndroid Build Coastguard Worker /// Return true if this chain (StartInst..KillInst) overlaps with Other.
rangeOverlapsWith(const Chain & Other) const271*9880d681SAndroid Build Coastguard Worker bool rangeOverlapsWith(const Chain &Other) const {
272*9880d681SAndroid Build Coastguard Worker unsigned End = KillInst ? KillInstIdx : LastInstIdx;
273*9880d681SAndroid Build Coastguard Worker unsigned OtherEnd = Other.KillInst ?
274*9880d681SAndroid Build Coastguard Worker Other.KillInstIdx : Other.LastInstIdx;
275*9880d681SAndroid Build Coastguard Worker
276*9880d681SAndroid Build Coastguard Worker return StartInstIdx <= OtherEnd && Other.StartInstIdx <= End;
277*9880d681SAndroid Build Coastguard Worker }
278*9880d681SAndroid Build Coastguard Worker
279*9880d681SAndroid Build Coastguard Worker /// Return true if this chain starts before Other.
startsBefore(const Chain * Other) const280*9880d681SAndroid Build Coastguard Worker bool startsBefore(const Chain *Other) const {
281*9880d681SAndroid Build Coastguard Worker return StartInstIdx < Other->StartInstIdx;
282*9880d681SAndroid Build Coastguard Worker }
283*9880d681SAndroid Build Coastguard Worker
284*9880d681SAndroid Build Coastguard Worker /// Return true if the group will require a fixup MOV at the end.
requiresFixup() const285*9880d681SAndroid Build Coastguard Worker bool requiresFixup() const {
286*9880d681SAndroid Build Coastguard Worker return (getKill() && isKillImmutable()) || !getKill();
287*9880d681SAndroid Build Coastguard Worker }
288*9880d681SAndroid Build Coastguard Worker
289*9880d681SAndroid Build Coastguard Worker /// Return a simple string representation of the chain.
str() const290*9880d681SAndroid Build Coastguard Worker std::string str() const {
291*9880d681SAndroid Build Coastguard Worker std::string S;
292*9880d681SAndroid Build Coastguard Worker raw_string_ostream OS(S);
293*9880d681SAndroid Build Coastguard Worker
294*9880d681SAndroid Build Coastguard Worker OS << "{";
295*9880d681SAndroid Build Coastguard Worker StartInst->print(OS, /* SkipOpers= */true);
296*9880d681SAndroid Build Coastguard Worker OS << " -> ";
297*9880d681SAndroid Build Coastguard Worker LastInst->print(OS, /* SkipOpers= */true);
298*9880d681SAndroid Build Coastguard Worker if (KillInst) {
299*9880d681SAndroid Build Coastguard Worker OS << " (kill @ ";
300*9880d681SAndroid Build Coastguard Worker KillInst->print(OS, /* SkipOpers= */true);
301*9880d681SAndroid Build Coastguard Worker OS << ")";
302*9880d681SAndroid Build Coastguard Worker }
303*9880d681SAndroid Build Coastguard Worker OS << "}";
304*9880d681SAndroid Build Coastguard Worker
305*9880d681SAndroid Build Coastguard Worker return OS.str();
306*9880d681SAndroid Build Coastguard Worker }
307*9880d681SAndroid Build Coastguard Worker
308*9880d681SAndroid Build Coastguard Worker };
309*9880d681SAndroid Build Coastguard Worker
310*9880d681SAndroid Build Coastguard Worker } // end anonymous namespace
311*9880d681SAndroid Build Coastguard Worker
312*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
313*9880d681SAndroid Build Coastguard Worker
runOnMachineFunction(MachineFunction & F)314*9880d681SAndroid Build Coastguard Worker bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) {
315*9880d681SAndroid Build Coastguard Worker if (skipFunction(*F.getFunction()))
316*9880d681SAndroid Build Coastguard Worker return false;
317*9880d681SAndroid Build Coastguard Worker
318*9880d681SAndroid Build Coastguard Worker if (!F.getSubtarget<AArch64Subtarget>().balanceFPOps())
319*9880d681SAndroid Build Coastguard Worker return false;
320*9880d681SAndroid Build Coastguard Worker
321*9880d681SAndroid Build Coastguard Worker bool Changed = false;
322*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n");
323*9880d681SAndroid Build Coastguard Worker
324*9880d681SAndroid Build Coastguard Worker MRI = &F.getRegInfo();
325*9880d681SAndroid Build Coastguard Worker TRI = F.getRegInfo().getTargetRegisterInfo();
326*9880d681SAndroid Build Coastguard Worker RCI.runOnMachineFunction(F);
327*9880d681SAndroid Build Coastguard Worker
328*9880d681SAndroid Build Coastguard Worker for (auto &MBB : F) {
329*9880d681SAndroid Build Coastguard Worker Changed |= runOnBasicBlock(MBB);
330*9880d681SAndroid Build Coastguard Worker }
331*9880d681SAndroid Build Coastguard Worker
332*9880d681SAndroid Build Coastguard Worker return Changed;
333*9880d681SAndroid Build Coastguard Worker }
334*9880d681SAndroid Build Coastguard Worker
runOnBasicBlock(MachineBasicBlock & MBB)335*9880d681SAndroid Build Coastguard Worker bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
336*9880d681SAndroid Build Coastguard Worker bool Changed = false;
337*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "Running on MBB: " << MBB << " - scanning instructions...\n");
338*9880d681SAndroid Build Coastguard Worker
339*9880d681SAndroid Build Coastguard Worker // First, scan the basic block producing a set of chains.
340*9880d681SAndroid Build Coastguard Worker
341*9880d681SAndroid Build Coastguard Worker // The currently "active" chains - chains that can be added to and haven't
342*9880d681SAndroid Build Coastguard Worker // been killed yet. This is keyed by register - all chains can only have one
343*9880d681SAndroid Build Coastguard Worker // "link" register between each inst in the chain.
344*9880d681SAndroid Build Coastguard Worker std::map<unsigned, Chain*> ActiveChains;
345*9880d681SAndroid Build Coastguard Worker std::vector<std::unique_ptr<Chain>> AllChains;
346*9880d681SAndroid Build Coastguard Worker unsigned Idx = 0;
347*9880d681SAndroid Build Coastguard Worker for (auto &MI : MBB)
348*9880d681SAndroid Build Coastguard Worker scanInstruction(&MI, Idx++, ActiveChains, AllChains);
349*9880d681SAndroid Build Coastguard Worker
350*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "Scan complete, "<< AllChains.size() << " chains created.\n");
351*9880d681SAndroid Build Coastguard Worker
352*9880d681SAndroid Build Coastguard Worker // Group the chains into disjoint sets based on their liveness range. This is
353*9880d681SAndroid Build Coastguard Worker // a poor-man's version of graph coloring. Ideally we'd create an interference
354*9880d681SAndroid Build Coastguard Worker // graph and perform full-on graph coloring on that, but;
355*9880d681SAndroid Build Coastguard Worker // (a) That's rather heavyweight for only two colors.
356*9880d681SAndroid Build Coastguard Worker // (b) We expect multiple disjoint interference regions - in practice the live
357*9880d681SAndroid Build Coastguard Worker // range of chains is quite small and they are clustered between loads
358*9880d681SAndroid Build Coastguard Worker // and stores.
359*9880d681SAndroid Build Coastguard Worker EquivalenceClasses<Chain*> EC;
360*9880d681SAndroid Build Coastguard Worker for (auto &I : AllChains)
361*9880d681SAndroid Build Coastguard Worker EC.insert(I.get());
362*9880d681SAndroid Build Coastguard Worker
363*9880d681SAndroid Build Coastguard Worker for (auto &I : AllChains)
364*9880d681SAndroid Build Coastguard Worker for (auto &J : AllChains)
365*9880d681SAndroid Build Coastguard Worker if (I != J && I->rangeOverlapsWith(*J))
366*9880d681SAndroid Build Coastguard Worker EC.unionSets(I.get(), J.get());
367*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "Created " << EC.getNumClasses() << " disjoint sets.\n");
368*9880d681SAndroid Build Coastguard Worker
369*9880d681SAndroid Build Coastguard Worker // Now we assume that every member of an equivalence class interferes
370*9880d681SAndroid Build Coastguard Worker // with every other member of that class, and with no members of other classes.
371*9880d681SAndroid Build Coastguard Worker
372*9880d681SAndroid Build Coastguard Worker // Convert the EquivalenceClasses to a simpler set of sets.
373*9880d681SAndroid Build Coastguard Worker std::vector<std::vector<Chain*> > V;
374*9880d681SAndroid Build Coastguard Worker for (auto I = EC.begin(), E = EC.end(); I != E; ++I) {
375*9880d681SAndroid Build Coastguard Worker std::vector<Chain*> Cs(EC.member_begin(I), EC.member_end());
376*9880d681SAndroid Build Coastguard Worker if (Cs.empty()) continue;
377*9880d681SAndroid Build Coastguard Worker V.push_back(std::move(Cs));
378*9880d681SAndroid Build Coastguard Worker }
379*9880d681SAndroid Build Coastguard Worker
380*9880d681SAndroid Build Coastguard Worker // Now we have a set of sets, order them by start address so
381*9880d681SAndroid Build Coastguard Worker // we can iterate over them sequentially.
382*9880d681SAndroid Build Coastguard Worker std::sort(V.begin(), V.end(),
383*9880d681SAndroid Build Coastguard Worker [](const std::vector<Chain*> &A,
384*9880d681SAndroid Build Coastguard Worker const std::vector<Chain*> &B) {
385*9880d681SAndroid Build Coastguard Worker return A.front()->startsBefore(B.front());
386*9880d681SAndroid Build Coastguard Worker });
387*9880d681SAndroid Build Coastguard Worker
388*9880d681SAndroid Build Coastguard Worker // As we only have two colors, we can track the global (BB-level) balance of
389*9880d681SAndroid Build Coastguard Worker // odds versus evens. We aim to keep this near zero to keep both execution
390*9880d681SAndroid Build Coastguard Worker // units fed.
391*9880d681SAndroid Build Coastguard Worker // Positive means we're even-heavy, negative we're odd-heavy.
392*9880d681SAndroid Build Coastguard Worker //
393*9880d681SAndroid Build Coastguard Worker // FIXME: If chains have interdependencies, for example:
394*9880d681SAndroid Build Coastguard Worker // mul r0, r1, r2
395*9880d681SAndroid Build Coastguard Worker // mul r3, r0, r1
396*9880d681SAndroid Build Coastguard Worker // We do not model this and may color each one differently, assuming we'll
397*9880d681SAndroid Build Coastguard Worker // get ILP when we obviously can't. This hasn't been seen to be a problem
398*9880d681SAndroid Build Coastguard Worker // in practice so far, so we simplify the algorithm by ignoring it.
399*9880d681SAndroid Build Coastguard Worker int Parity = 0;
400*9880d681SAndroid Build Coastguard Worker
401*9880d681SAndroid Build Coastguard Worker for (auto &I : V)
402*9880d681SAndroid Build Coastguard Worker Changed |= colorChainSet(std::move(I), MBB, Parity);
403*9880d681SAndroid Build Coastguard Worker
404*9880d681SAndroid Build Coastguard Worker return Changed;
405*9880d681SAndroid Build Coastguard Worker }
406*9880d681SAndroid Build Coastguard Worker
getAndEraseNext(Color PreferredColor,std::vector<Chain * > & L)407*9880d681SAndroid Build Coastguard Worker Chain *AArch64A57FPLoadBalancing::getAndEraseNext(Color PreferredColor,
408*9880d681SAndroid Build Coastguard Worker std::vector<Chain*> &L) {
409*9880d681SAndroid Build Coastguard Worker if (L.empty())
410*9880d681SAndroid Build Coastguard Worker return nullptr;
411*9880d681SAndroid Build Coastguard Worker
412*9880d681SAndroid Build Coastguard Worker // We try and get the best candidate from L to color next, given that our
413*9880d681SAndroid Build Coastguard Worker // preferred color is "PreferredColor". L is ordered from larger to smaller
414*9880d681SAndroid Build Coastguard Worker // chains. It is beneficial to color the large chains before the small chains,
415*9880d681SAndroid Build Coastguard Worker // but if we can't find a chain of the maximum length with the preferred color,
416*9880d681SAndroid Build Coastguard Worker // we fuzz the size and look for slightly smaller chains before giving up and
417*9880d681SAndroid Build Coastguard Worker // returning a chain that must be recolored.
418*9880d681SAndroid Build Coastguard Worker
419*9880d681SAndroid Build Coastguard Worker // FIXME: Does this need to be configurable?
420*9880d681SAndroid Build Coastguard Worker const unsigned SizeFuzz = 1;
421*9880d681SAndroid Build Coastguard Worker unsigned MinSize = L.front()->size() - SizeFuzz;
422*9880d681SAndroid Build Coastguard Worker for (auto I = L.begin(), E = L.end(); I != E; ++I) {
423*9880d681SAndroid Build Coastguard Worker if ((*I)->size() <= MinSize) {
424*9880d681SAndroid Build Coastguard Worker // We've gone past the size limit. Return the previous item.
425*9880d681SAndroid Build Coastguard Worker Chain *Ch = *--I;
426*9880d681SAndroid Build Coastguard Worker L.erase(I);
427*9880d681SAndroid Build Coastguard Worker return Ch;
428*9880d681SAndroid Build Coastguard Worker }
429*9880d681SAndroid Build Coastguard Worker
430*9880d681SAndroid Build Coastguard Worker if ((*I)->getPreferredColor() == PreferredColor) {
431*9880d681SAndroid Build Coastguard Worker Chain *Ch = *I;
432*9880d681SAndroid Build Coastguard Worker L.erase(I);
433*9880d681SAndroid Build Coastguard Worker return Ch;
434*9880d681SAndroid Build Coastguard Worker }
435*9880d681SAndroid Build Coastguard Worker }
436*9880d681SAndroid Build Coastguard Worker
437*9880d681SAndroid Build Coastguard Worker // Bailout case - just return the first item.
438*9880d681SAndroid Build Coastguard Worker Chain *Ch = L.front();
439*9880d681SAndroid Build Coastguard Worker L.erase(L.begin());
440*9880d681SAndroid Build Coastguard Worker return Ch;
441*9880d681SAndroid Build Coastguard Worker }
442*9880d681SAndroid Build Coastguard Worker
colorChainSet(std::vector<Chain * > GV,MachineBasicBlock & MBB,int & Parity)443*9880d681SAndroid Build Coastguard Worker bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
444*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB,
445*9880d681SAndroid Build Coastguard Worker int &Parity) {
446*9880d681SAndroid Build Coastguard Worker bool Changed = false;
447*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "colorChainSet(): #sets=" << GV.size() << "\n");
448*9880d681SAndroid Build Coastguard Worker
449*9880d681SAndroid Build Coastguard Worker // Sort by descending size order so that we allocate the most important
450*9880d681SAndroid Build Coastguard Worker // sets first.
451*9880d681SAndroid Build Coastguard Worker // Tie-break equivalent sizes by sorting chains requiring fixups before
452*9880d681SAndroid Build Coastguard Worker // those without fixups. The logic here is that we should look at the
453*9880d681SAndroid Build Coastguard Worker // chains that we cannot change before we look at those we can,
454*9880d681SAndroid Build Coastguard Worker // so the parity counter is updated and we know what color we should
455*9880d681SAndroid Build Coastguard Worker // change them to!
456*9880d681SAndroid Build Coastguard Worker // Final tie-break with instruction order so pass output is stable (i.e. not
457*9880d681SAndroid Build Coastguard Worker // dependent on malloc'd pointer values).
458*9880d681SAndroid Build Coastguard Worker std::sort(GV.begin(), GV.end(), [](const Chain *G1, const Chain *G2) {
459*9880d681SAndroid Build Coastguard Worker if (G1->size() != G2->size())
460*9880d681SAndroid Build Coastguard Worker return G1->size() > G2->size();
461*9880d681SAndroid Build Coastguard Worker if (G1->requiresFixup() != G2->requiresFixup())
462*9880d681SAndroid Build Coastguard Worker return G1->requiresFixup() > G2->requiresFixup();
463*9880d681SAndroid Build Coastguard Worker // Make sure startsBefore() produces a stable final order.
464*9880d681SAndroid Build Coastguard Worker assert((G1 == G2 || (G1->startsBefore(G2) ^ G2->startsBefore(G1))) &&
465*9880d681SAndroid Build Coastguard Worker "Starts before not total order!");
466*9880d681SAndroid Build Coastguard Worker return G1->startsBefore(G2);
467*9880d681SAndroid Build Coastguard Worker });
468*9880d681SAndroid Build Coastguard Worker
469*9880d681SAndroid Build Coastguard Worker Color PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
470*9880d681SAndroid Build Coastguard Worker while (Chain *G = getAndEraseNext(PreferredColor, GV)) {
471*9880d681SAndroid Build Coastguard Worker // Start off by assuming we'll color to our own preferred color.
472*9880d681SAndroid Build Coastguard Worker Color C = PreferredColor;
473*9880d681SAndroid Build Coastguard Worker if (Parity == 0)
474*9880d681SAndroid Build Coastguard Worker // But if we really don't care, use the chain's preferred color.
475*9880d681SAndroid Build Coastguard Worker C = G->getPreferredColor();
476*9880d681SAndroid Build Coastguard Worker
477*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << " - Parity=" << Parity << ", Color="
478*9880d681SAndroid Build Coastguard Worker << ColorNames[(int)C] << "\n");
479*9880d681SAndroid Build Coastguard Worker
480*9880d681SAndroid Build Coastguard Worker // If we'll need a fixup FMOV, don't bother. Testing has shown that this
481*9880d681SAndroid Build Coastguard Worker // happens infrequently and when it does it has at least a 50% chance of
482*9880d681SAndroid Build Coastguard Worker // slowing code down instead of speeding it up.
483*9880d681SAndroid Build Coastguard Worker if (G->requiresFixup() && C != G->getPreferredColor()) {
484*9880d681SAndroid Build Coastguard Worker C = G->getPreferredColor();
485*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << " - " << G->str() << " - not worthwhile changing; "
486*9880d681SAndroid Build Coastguard Worker "color remains " << ColorNames[(int)C] << "\n");
487*9880d681SAndroid Build Coastguard Worker }
488*9880d681SAndroid Build Coastguard Worker
489*9880d681SAndroid Build Coastguard Worker Changed |= colorChain(G, C, MBB);
490*9880d681SAndroid Build Coastguard Worker
491*9880d681SAndroid Build Coastguard Worker Parity += (C == Color::Even) ? G->size() : -G->size();
492*9880d681SAndroid Build Coastguard Worker PreferredColor = Parity < 0 ? Color::Even : Color::Odd;
493*9880d681SAndroid Build Coastguard Worker }
494*9880d681SAndroid Build Coastguard Worker
495*9880d681SAndroid Build Coastguard Worker return Changed;
496*9880d681SAndroid Build Coastguard Worker }
497*9880d681SAndroid Build Coastguard Worker
scavengeRegister(Chain * G,Color C,MachineBasicBlock & MBB)498*9880d681SAndroid Build Coastguard Worker int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
499*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB) {
500*9880d681SAndroid Build Coastguard Worker RegScavenger RS;
501*9880d681SAndroid Build Coastguard Worker RS.enterBasicBlock(MBB);
502*9880d681SAndroid Build Coastguard Worker RS.forward(MachineBasicBlock::iterator(G->getStart()));
503*9880d681SAndroid Build Coastguard Worker
504*9880d681SAndroid Build Coastguard Worker // Can we find an appropriate register that is available throughout the life
505*9880d681SAndroid Build Coastguard Worker // of the chain?
506*9880d681SAndroid Build Coastguard Worker unsigned RegClassID = G->getStart()->getDesc().OpInfo[0].RegClass;
507*9880d681SAndroid Build Coastguard Worker BitVector AvailableRegs = RS.getRegsAvailable(TRI->getRegClass(RegClassID));
508*9880d681SAndroid Build Coastguard Worker for (MachineBasicBlock::iterator I = G->begin(), E = G->end(); I != E; ++I) {
509*9880d681SAndroid Build Coastguard Worker RS.forward(I);
510*9880d681SAndroid Build Coastguard Worker AvailableRegs &= RS.getRegsAvailable(TRI->getRegClass(RegClassID));
511*9880d681SAndroid Build Coastguard Worker
512*9880d681SAndroid Build Coastguard Worker // Remove any registers clobbered by a regmask or any def register that is
513*9880d681SAndroid Build Coastguard Worker // immediately dead.
514*9880d681SAndroid Build Coastguard Worker for (auto J : I->operands()) {
515*9880d681SAndroid Build Coastguard Worker if (J.isRegMask())
516*9880d681SAndroid Build Coastguard Worker AvailableRegs.clearBitsNotInMask(J.getRegMask());
517*9880d681SAndroid Build Coastguard Worker
518*9880d681SAndroid Build Coastguard Worker if (J.isReg() && J.isDef()) {
519*9880d681SAndroid Build Coastguard Worker MCRegAliasIterator AI(J.getReg(), TRI, /*IncludeSelf=*/true);
520*9880d681SAndroid Build Coastguard Worker if (J.isDead())
521*9880d681SAndroid Build Coastguard Worker for (; AI.isValid(); ++AI)
522*9880d681SAndroid Build Coastguard Worker AvailableRegs.reset(*AI);
523*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
524*9880d681SAndroid Build Coastguard Worker else
525*9880d681SAndroid Build Coastguard Worker for (; AI.isValid(); ++AI)
526*9880d681SAndroid Build Coastguard Worker assert(!AvailableRegs[*AI] &&
527*9880d681SAndroid Build Coastguard Worker "Non-dead def should have been removed by now!");
528*9880d681SAndroid Build Coastguard Worker #endif
529*9880d681SAndroid Build Coastguard Worker }
530*9880d681SAndroid Build Coastguard Worker }
531*9880d681SAndroid Build Coastguard Worker }
532*9880d681SAndroid Build Coastguard Worker
533*9880d681SAndroid Build Coastguard Worker // Make sure we allocate in-order, to get the cheapest registers first.
534*9880d681SAndroid Build Coastguard Worker auto Ord = RCI.getOrder(TRI->getRegClass(RegClassID));
535*9880d681SAndroid Build Coastguard Worker for (auto Reg : Ord) {
536*9880d681SAndroid Build Coastguard Worker if (!AvailableRegs[Reg])
537*9880d681SAndroid Build Coastguard Worker continue;
538*9880d681SAndroid Build Coastguard Worker if (C == getColor(Reg))
539*9880d681SAndroid Build Coastguard Worker return Reg;
540*9880d681SAndroid Build Coastguard Worker }
541*9880d681SAndroid Build Coastguard Worker
542*9880d681SAndroid Build Coastguard Worker return -1;
543*9880d681SAndroid Build Coastguard Worker }
544*9880d681SAndroid Build Coastguard Worker
colorChain(Chain * G,Color C,MachineBasicBlock & MBB)545*9880d681SAndroid Build Coastguard Worker bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
546*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB) {
547*9880d681SAndroid Build Coastguard Worker bool Changed = false;
548*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << " - colorChain(" << G->str() << ", "
549*9880d681SAndroid Build Coastguard Worker << ColorNames[(int)C] << ")\n");
550*9880d681SAndroid Build Coastguard Worker
551*9880d681SAndroid Build Coastguard Worker // Try and obtain a free register of the right class. Without a register
552*9880d681SAndroid Build Coastguard Worker // to play with we cannot continue.
553*9880d681SAndroid Build Coastguard Worker int Reg = scavengeRegister(G, C, MBB);
554*9880d681SAndroid Build Coastguard Worker if (Reg == -1) {
555*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "Scavenging (thus coloring) failed!\n");
556*9880d681SAndroid Build Coastguard Worker return false;
557*9880d681SAndroid Build Coastguard Worker }
558*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << " - Scavenged register: " << TRI->getName(Reg) << "\n");
559*9880d681SAndroid Build Coastguard Worker
560*9880d681SAndroid Build Coastguard Worker std::map<unsigned, unsigned> Substs;
561*9880d681SAndroid Build Coastguard Worker for (MachineInstr &I : *G) {
562*9880d681SAndroid Build Coastguard Worker if (!G->contains(I) && (&I != G->getKill() || G->isKillImmutable()))
563*9880d681SAndroid Build Coastguard Worker continue;
564*9880d681SAndroid Build Coastguard Worker
565*9880d681SAndroid Build Coastguard Worker // I is a member of G, or I is a mutable instruction that kills G.
566*9880d681SAndroid Build Coastguard Worker
567*9880d681SAndroid Build Coastguard Worker std::vector<unsigned> ToErase;
568*9880d681SAndroid Build Coastguard Worker for (auto &U : I.operands()) {
569*9880d681SAndroid Build Coastguard Worker if (U.isReg() && U.isUse() && Substs.find(U.getReg()) != Substs.end()) {
570*9880d681SAndroid Build Coastguard Worker unsigned OrigReg = U.getReg();
571*9880d681SAndroid Build Coastguard Worker U.setReg(Substs[OrigReg]);
572*9880d681SAndroid Build Coastguard Worker if (U.isKill())
573*9880d681SAndroid Build Coastguard Worker // Don't erase straight away, because there may be other operands
574*9880d681SAndroid Build Coastguard Worker // that also reference this substitution!
575*9880d681SAndroid Build Coastguard Worker ToErase.push_back(OrigReg);
576*9880d681SAndroid Build Coastguard Worker } else if (U.isRegMask()) {
577*9880d681SAndroid Build Coastguard Worker for (auto J : Substs) {
578*9880d681SAndroid Build Coastguard Worker if (U.clobbersPhysReg(J.first))
579*9880d681SAndroid Build Coastguard Worker ToErase.push_back(J.first);
580*9880d681SAndroid Build Coastguard Worker }
581*9880d681SAndroid Build Coastguard Worker }
582*9880d681SAndroid Build Coastguard Worker }
583*9880d681SAndroid Build Coastguard Worker // Now it's safe to remove the substs identified earlier.
584*9880d681SAndroid Build Coastguard Worker for (auto J : ToErase)
585*9880d681SAndroid Build Coastguard Worker Substs.erase(J);
586*9880d681SAndroid Build Coastguard Worker
587*9880d681SAndroid Build Coastguard Worker // Only change the def if this isn't the last instruction.
588*9880d681SAndroid Build Coastguard Worker if (&I != G->getKill()) {
589*9880d681SAndroid Build Coastguard Worker MachineOperand &MO = I.getOperand(0);
590*9880d681SAndroid Build Coastguard Worker
591*9880d681SAndroid Build Coastguard Worker bool Change = TransformAll || getColor(MO.getReg()) != C;
592*9880d681SAndroid Build Coastguard Worker if (G->requiresFixup() && &I == G->getLast())
593*9880d681SAndroid Build Coastguard Worker Change = false;
594*9880d681SAndroid Build Coastguard Worker
595*9880d681SAndroid Build Coastguard Worker if (Change) {
596*9880d681SAndroid Build Coastguard Worker Substs[MO.getReg()] = Reg;
597*9880d681SAndroid Build Coastguard Worker MO.setReg(Reg);
598*9880d681SAndroid Build Coastguard Worker
599*9880d681SAndroid Build Coastguard Worker Changed = true;
600*9880d681SAndroid Build Coastguard Worker }
601*9880d681SAndroid Build Coastguard Worker }
602*9880d681SAndroid Build Coastguard Worker }
603*9880d681SAndroid Build Coastguard Worker assert(Substs.size() == 0 && "No substitutions should be left active!");
604*9880d681SAndroid Build Coastguard Worker
605*9880d681SAndroid Build Coastguard Worker if (G->getKill()) {
606*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << " - Kill instruction seen.\n");
607*9880d681SAndroid Build Coastguard Worker } else {
608*9880d681SAndroid Build Coastguard Worker // We didn't have a kill instruction, but we didn't seem to need to change
609*9880d681SAndroid Build Coastguard Worker // the destination register anyway.
610*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << " - Destination register not changed.\n");
611*9880d681SAndroid Build Coastguard Worker }
612*9880d681SAndroid Build Coastguard Worker return Changed;
613*9880d681SAndroid Build Coastguard Worker }
614*9880d681SAndroid Build Coastguard Worker
scanInstruction(MachineInstr * MI,unsigned Idx,std::map<unsigned,Chain * > & ActiveChains,std::vector<std::unique_ptr<Chain>> & AllChains)615*9880d681SAndroid Build Coastguard Worker void AArch64A57FPLoadBalancing::scanInstruction(
616*9880d681SAndroid Build Coastguard Worker MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains,
617*9880d681SAndroid Build Coastguard Worker std::vector<std::unique_ptr<Chain>> &AllChains) {
618*9880d681SAndroid Build Coastguard Worker // Inspect "MI", updating ActiveChains and AllChains.
619*9880d681SAndroid Build Coastguard Worker
620*9880d681SAndroid Build Coastguard Worker if (isMul(MI)) {
621*9880d681SAndroid Build Coastguard Worker
622*9880d681SAndroid Build Coastguard Worker for (auto &I : MI->uses())
623*9880d681SAndroid Build Coastguard Worker maybeKillChain(I, Idx, ActiveChains);
624*9880d681SAndroid Build Coastguard Worker for (auto &I : MI->defs())
625*9880d681SAndroid Build Coastguard Worker maybeKillChain(I, Idx, ActiveChains);
626*9880d681SAndroid Build Coastguard Worker
627*9880d681SAndroid Build Coastguard Worker // Create a new chain. Multiplies don't require forwarding so can go on any
628*9880d681SAndroid Build Coastguard Worker // unit.
629*9880d681SAndroid Build Coastguard Worker unsigned DestReg = MI->getOperand(0).getReg();
630*9880d681SAndroid Build Coastguard Worker
631*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "New chain started for register "
632*9880d681SAndroid Build Coastguard Worker << TRI->getName(DestReg) << " at " << *MI);
633*9880d681SAndroid Build Coastguard Worker
634*9880d681SAndroid Build Coastguard Worker auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
635*9880d681SAndroid Build Coastguard Worker ActiveChains[DestReg] = G.get();
636*9880d681SAndroid Build Coastguard Worker AllChains.push_back(std::move(G));
637*9880d681SAndroid Build Coastguard Worker
638*9880d681SAndroid Build Coastguard Worker } else if (isMla(MI)) {
639*9880d681SAndroid Build Coastguard Worker
640*9880d681SAndroid Build Coastguard Worker // It is beneficial to keep MLAs on the same functional unit as their
641*9880d681SAndroid Build Coastguard Worker // accumulator operand.
642*9880d681SAndroid Build Coastguard Worker unsigned DestReg = MI->getOperand(0).getReg();
643*9880d681SAndroid Build Coastguard Worker unsigned AccumReg = MI->getOperand(3).getReg();
644*9880d681SAndroid Build Coastguard Worker
645*9880d681SAndroid Build Coastguard Worker maybeKillChain(MI->getOperand(1), Idx, ActiveChains);
646*9880d681SAndroid Build Coastguard Worker maybeKillChain(MI->getOperand(2), Idx, ActiveChains);
647*9880d681SAndroid Build Coastguard Worker if (DestReg != AccumReg)
648*9880d681SAndroid Build Coastguard Worker maybeKillChain(MI->getOperand(0), Idx, ActiveChains);
649*9880d681SAndroid Build Coastguard Worker
650*9880d681SAndroid Build Coastguard Worker if (ActiveChains.find(AccumReg) != ActiveChains.end()) {
651*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "Chain found for accumulator register "
652*9880d681SAndroid Build Coastguard Worker << TRI->getName(AccumReg) << " in MI " << *MI);
653*9880d681SAndroid Build Coastguard Worker
654*9880d681SAndroid Build Coastguard Worker // For simplicity we only chain together sequences of MULs/MLAs where the
655*9880d681SAndroid Build Coastguard Worker // accumulator register is killed on each instruction. This means we don't
656*9880d681SAndroid Build Coastguard Worker // need to track other uses of the registers we want to rewrite.
657*9880d681SAndroid Build Coastguard Worker //
658*9880d681SAndroid Build Coastguard Worker // FIXME: We could extend to handle the non-kill cases for more coverage.
659*9880d681SAndroid Build Coastguard Worker if (MI->getOperand(3).isKill()) {
660*9880d681SAndroid Build Coastguard Worker // Add to chain.
661*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "Instruction was successfully added to chain.\n");
662*9880d681SAndroid Build Coastguard Worker ActiveChains[AccumReg]->add(MI, Idx, getColor(DestReg));
663*9880d681SAndroid Build Coastguard Worker // Handle cases where the destination is not the same as the accumulator.
664*9880d681SAndroid Build Coastguard Worker if (DestReg != AccumReg) {
665*9880d681SAndroid Build Coastguard Worker ActiveChains[DestReg] = ActiveChains[AccumReg];
666*9880d681SAndroid Build Coastguard Worker ActiveChains.erase(AccumReg);
667*9880d681SAndroid Build Coastguard Worker }
668*9880d681SAndroid Build Coastguard Worker return;
669*9880d681SAndroid Build Coastguard Worker }
670*9880d681SAndroid Build Coastguard Worker
671*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "Cannot add to chain because accumulator operand wasn't "
672*9880d681SAndroid Build Coastguard Worker << "marked <kill>!\n");
673*9880d681SAndroid Build Coastguard Worker maybeKillChain(MI->getOperand(3), Idx, ActiveChains);
674*9880d681SAndroid Build Coastguard Worker }
675*9880d681SAndroid Build Coastguard Worker
676*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "Creating new chain for dest register "
677*9880d681SAndroid Build Coastguard Worker << TRI->getName(DestReg) << "\n");
678*9880d681SAndroid Build Coastguard Worker auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
679*9880d681SAndroid Build Coastguard Worker ActiveChains[DestReg] = G.get();
680*9880d681SAndroid Build Coastguard Worker AllChains.push_back(std::move(G));
681*9880d681SAndroid Build Coastguard Worker
682*9880d681SAndroid Build Coastguard Worker } else {
683*9880d681SAndroid Build Coastguard Worker
684*9880d681SAndroid Build Coastguard Worker // Non-MUL or MLA instruction. Invalidate any chain in the uses or defs
685*9880d681SAndroid Build Coastguard Worker // lists.
686*9880d681SAndroid Build Coastguard Worker for (auto &I : MI->uses())
687*9880d681SAndroid Build Coastguard Worker maybeKillChain(I, Idx, ActiveChains);
688*9880d681SAndroid Build Coastguard Worker for (auto &I : MI->defs())
689*9880d681SAndroid Build Coastguard Worker maybeKillChain(I, Idx, ActiveChains);
690*9880d681SAndroid Build Coastguard Worker
691*9880d681SAndroid Build Coastguard Worker }
692*9880d681SAndroid Build Coastguard Worker }
693*9880d681SAndroid Build Coastguard Worker
694*9880d681SAndroid Build Coastguard Worker void AArch64A57FPLoadBalancing::
maybeKillChain(MachineOperand & MO,unsigned Idx,std::map<unsigned,Chain * > & ActiveChains)695*9880d681SAndroid Build Coastguard Worker maybeKillChain(MachineOperand &MO, unsigned Idx,
696*9880d681SAndroid Build Coastguard Worker std::map<unsigned, Chain*> &ActiveChains) {
697*9880d681SAndroid Build Coastguard Worker // Given an operand and the set of active chains (keyed by register),
698*9880d681SAndroid Build Coastguard Worker // determine if a chain should be ended and remove from ActiveChains.
699*9880d681SAndroid Build Coastguard Worker MachineInstr *MI = MO.getParent();
700*9880d681SAndroid Build Coastguard Worker
701*9880d681SAndroid Build Coastguard Worker if (MO.isReg()) {
702*9880d681SAndroid Build Coastguard Worker
703*9880d681SAndroid Build Coastguard Worker // If this is a KILL of a current chain, record it.
704*9880d681SAndroid Build Coastguard Worker if (MO.isKill() && ActiveChains.find(MO.getReg()) != ActiveChains.end()) {
705*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "Kill seen for chain " << TRI->getName(MO.getReg())
706*9880d681SAndroid Build Coastguard Worker << "\n");
707*9880d681SAndroid Build Coastguard Worker ActiveChains[MO.getReg()]->setKill(MI, Idx, /*Immutable=*/MO.isTied());
708*9880d681SAndroid Build Coastguard Worker }
709*9880d681SAndroid Build Coastguard Worker ActiveChains.erase(MO.getReg());
710*9880d681SAndroid Build Coastguard Worker
711*9880d681SAndroid Build Coastguard Worker } else if (MO.isRegMask()) {
712*9880d681SAndroid Build Coastguard Worker
713*9880d681SAndroid Build Coastguard Worker for (auto I = ActiveChains.begin(), E = ActiveChains.end();
714*9880d681SAndroid Build Coastguard Worker I != E;) {
715*9880d681SAndroid Build Coastguard Worker if (MO.clobbersPhysReg(I->first)) {
716*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "Kill (regmask) seen for chain "
717*9880d681SAndroid Build Coastguard Worker << TRI->getName(I->first) << "\n");
718*9880d681SAndroid Build Coastguard Worker I->second->setKill(MI, Idx, /*Immutable=*/true);
719*9880d681SAndroid Build Coastguard Worker ActiveChains.erase(I++);
720*9880d681SAndroid Build Coastguard Worker } else
721*9880d681SAndroid Build Coastguard Worker ++I;
722*9880d681SAndroid Build Coastguard Worker }
723*9880d681SAndroid Build Coastguard Worker
724*9880d681SAndroid Build Coastguard Worker }
725*9880d681SAndroid Build Coastguard Worker }
726*9880d681SAndroid Build Coastguard Worker
getColor(unsigned Reg)727*9880d681SAndroid Build Coastguard Worker Color AArch64A57FPLoadBalancing::getColor(unsigned Reg) {
728*9880d681SAndroid Build Coastguard Worker if ((TRI->getEncodingValue(Reg) % 2) == 0)
729*9880d681SAndroid Build Coastguard Worker return Color::Even;
730*9880d681SAndroid Build Coastguard Worker else
731*9880d681SAndroid Build Coastguard Worker return Color::Odd;
732*9880d681SAndroid Build Coastguard Worker }
733*9880d681SAndroid Build Coastguard Worker
734*9880d681SAndroid Build Coastguard Worker // Factory function used by AArch64TargetMachine to add the pass to the passmanager.
createAArch64A57FPLoadBalancing()735*9880d681SAndroid Build Coastguard Worker FunctionPass *llvm::createAArch64A57FPLoadBalancing() {
736*9880d681SAndroid Build Coastguard Worker return new AArch64A57FPLoadBalancing();
737*9880d681SAndroid Build Coastguard Worker }
738