1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright © 2018 Valve Corporation
3*61046927SAndroid Build Coastguard Worker *
4*61046927SAndroid Build Coastguard Worker * SPDX-License-Identifier: MIT
5*61046927SAndroid Build Coastguard Worker */
6*61046927SAndroid Build Coastguard Worker
7*61046927SAndroid Build Coastguard Worker #include "aco_builder.h"
8*61046927SAndroid Build Coastguard Worker #include "aco_ir.h"
9*61046927SAndroid Build Coastguard Worker
10*61046927SAndroid Build Coastguard Worker #include "common/amdgfxregs.h"
11*61046927SAndroid Build Coastguard Worker
12*61046927SAndroid Build Coastguard Worker #include <algorithm>
13*61046927SAndroid Build Coastguard Worker #include <unordered_set>
14*61046927SAndroid Build Coastguard Worker #include <vector>
15*61046927SAndroid Build Coastguard Worker
16*61046927SAndroid Build Coastguard Worker #define SMEM_WINDOW_SIZE (350 - ctx.num_waves * 35)
17*61046927SAndroid Build Coastguard Worker #define VMEM_WINDOW_SIZE (1024 - ctx.num_waves * 64)
18*61046927SAndroid Build Coastguard Worker #define LDS_WINDOW_SIZE 64
19*61046927SAndroid Build Coastguard Worker #define POS_EXP_WINDOW_SIZE 512
20*61046927SAndroid Build Coastguard Worker #define SMEM_MAX_MOVES (64 - ctx.num_waves * 4)
21*61046927SAndroid Build Coastguard Worker #define VMEM_MAX_MOVES (256 - ctx.num_waves * 16)
22*61046927SAndroid Build Coastguard Worker #define LDSDIR_MAX_MOVES 10
23*61046927SAndroid Build Coastguard Worker #define LDS_MAX_MOVES 32
24*61046927SAndroid Build Coastguard Worker /* creating clauses decreases def-use distances, so make it less aggressive the lower num_waves is */
25*61046927SAndroid Build Coastguard Worker #define VMEM_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 2)
26*61046927SAndroid Build Coastguard Worker #define VMEM_STORE_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 4)
27*61046927SAndroid Build Coastguard Worker #define POS_EXP_MAX_MOVES 512
28*61046927SAndroid Build Coastguard Worker
29*61046927SAndroid Build Coastguard Worker namespace aco {
30*61046927SAndroid Build Coastguard Worker
31*61046927SAndroid Build Coastguard Worker namespace {
32*61046927SAndroid Build Coastguard Worker
33*61046927SAndroid Build Coastguard Worker enum MoveResult {
34*61046927SAndroid Build Coastguard Worker move_success,
35*61046927SAndroid Build Coastguard Worker move_fail_ssa,
36*61046927SAndroid Build Coastguard Worker move_fail_rar,
37*61046927SAndroid Build Coastguard Worker move_fail_pressure,
38*61046927SAndroid Build Coastguard Worker };
39*61046927SAndroid Build Coastguard Worker
40*61046927SAndroid Build Coastguard Worker /**
41*61046927SAndroid Build Coastguard Worker * Cursor for downwards moves, where a single instruction is moved towards
42*61046927SAndroid Build Coastguard Worker * or below a group of instruction that hardware can execute as a clause.
43*61046927SAndroid Build Coastguard Worker */
44*61046927SAndroid Build Coastguard Worker struct DownwardsCursor {
45*61046927SAndroid Build Coastguard Worker int source_idx; /* Current instruction to consider for moving */
46*61046927SAndroid Build Coastguard Worker
47*61046927SAndroid Build Coastguard Worker int insert_idx_clause; /* First clause instruction */
48*61046927SAndroid Build Coastguard Worker int insert_idx; /* First instruction *after* the clause */
49*61046927SAndroid Build Coastguard Worker
50*61046927SAndroid Build Coastguard Worker /* Maximum demand of all clause instructions,
51*61046927SAndroid Build Coastguard Worker * i.e. from insert_idx_clause (inclusive) to insert_idx (exclusive) */
52*61046927SAndroid Build Coastguard Worker RegisterDemand clause_demand;
53*61046927SAndroid Build Coastguard Worker /* Maximum demand of instructions from source_idx to insert_idx_clause (both exclusive) */
54*61046927SAndroid Build Coastguard Worker RegisterDemand total_demand;
55*61046927SAndroid Build Coastguard Worker
DownwardsCursoraco::__anond3dcce770111::DownwardsCursor56*61046927SAndroid Build Coastguard Worker DownwardsCursor(int current_idx, RegisterDemand initial_clause_demand)
57*61046927SAndroid Build Coastguard Worker : source_idx(current_idx - 1), insert_idx_clause(current_idx), insert_idx(current_idx + 1),
58*61046927SAndroid Build Coastguard Worker clause_demand(initial_clause_demand)
59*61046927SAndroid Build Coastguard Worker {}
60*61046927SAndroid Build Coastguard Worker
61*61046927SAndroid Build Coastguard Worker void verify_invariants(const Block* block);
62*61046927SAndroid Build Coastguard Worker };
63*61046927SAndroid Build Coastguard Worker
64*61046927SAndroid Build Coastguard Worker /**
65*61046927SAndroid Build Coastguard Worker * Cursor for upwards moves, where a single instruction is moved below
66*61046927SAndroid Build Coastguard Worker * another instruction.
67*61046927SAndroid Build Coastguard Worker */
68*61046927SAndroid Build Coastguard Worker struct UpwardsCursor {
69*61046927SAndroid Build Coastguard Worker int source_idx; /* Current instruction to consider for moving */
70*61046927SAndroid Build Coastguard Worker int insert_idx; /* Instruction to move in front of */
71*61046927SAndroid Build Coastguard Worker
72*61046927SAndroid Build Coastguard Worker /* Maximum demand of instructions from insert_idx (inclusive) to source_idx (exclusive) */
73*61046927SAndroid Build Coastguard Worker RegisterDemand total_demand;
74*61046927SAndroid Build Coastguard Worker
UpwardsCursoraco::__anond3dcce770111::UpwardsCursor75*61046927SAndroid Build Coastguard Worker UpwardsCursor(int source_idx_) : source_idx(source_idx_)
76*61046927SAndroid Build Coastguard Worker {
77*61046927SAndroid Build Coastguard Worker insert_idx = -1; /* to be initialized later */
78*61046927SAndroid Build Coastguard Worker }
79*61046927SAndroid Build Coastguard Worker
has_insert_idxaco::__anond3dcce770111::UpwardsCursor80*61046927SAndroid Build Coastguard Worker bool has_insert_idx() const { return insert_idx != -1; }
81*61046927SAndroid Build Coastguard Worker void verify_invariants(const Block* block);
82*61046927SAndroid Build Coastguard Worker };
83*61046927SAndroid Build Coastguard Worker
84*61046927SAndroid Build Coastguard Worker struct MoveState {
85*61046927SAndroid Build Coastguard Worker RegisterDemand max_registers;
86*61046927SAndroid Build Coastguard Worker
87*61046927SAndroid Build Coastguard Worker Block* block;
88*61046927SAndroid Build Coastguard Worker Instruction* current;
89*61046927SAndroid Build Coastguard Worker bool improved_rar;
90*61046927SAndroid Build Coastguard Worker
91*61046927SAndroid Build Coastguard Worker std::vector<bool> depends_on;
92*61046927SAndroid Build Coastguard Worker /* Two are needed because, for downwards VMEM scheduling, one needs to
93*61046927SAndroid Build Coastguard Worker * exclude the instructions in the clause, since new instructions in the
94*61046927SAndroid Build Coastguard Worker * clause are not moved past any other instructions in the clause. */
95*61046927SAndroid Build Coastguard Worker std::vector<bool> RAR_dependencies;
96*61046927SAndroid Build Coastguard Worker std::vector<bool> RAR_dependencies_clause;
97*61046927SAndroid Build Coastguard Worker
98*61046927SAndroid Build Coastguard Worker /* for moving instructions before the current instruction to after it */
99*61046927SAndroid Build Coastguard Worker DownwardsCursor downwards_init(int current_idx, bool improved_rar, bool may_form_clauses);
100*61046927SAndroid Build Coastguard Worker MoveResult downwards_move(DownwardsCursor&, bool clause);
101*61046927SAndroid Build Coastguard Worker void downwards_skip(DownwardsCursor&);
102*61046927SAndroid Build Coastguard Worker
103*61046927SAndroid Build Coastguard Worker /* for moving instructions after the first use of the current instruction upwards */
104*61046927SAndroid Build Coastguard Worker UpwardsCursor upwards_init(int source_idx, bool improved_rar);
105*61046927SAndroid Build Coastguard Worker bool upwards_check_deps(UpwardsCursor&);
106*61046927SAndroid Build Coastguard Worker void upwards_update_insert_idx(UpwardsCursor&);
107*61046927SAndroid Build Coastguard Worker MoveResult upwards_move(UpwardsCursor&);
108*61046927SAndroid Build Coastguard Worker void upwards_skip(UpwardsCursor&);
109*61046927SAndroid Build Coastguard Worker };
110*61046927SAndroid Build Coastguard Worker
111*61046927SAndroid Build Coastguard Worker struct sched_ctx {
112*61046927SAndroid Build Coastguard Worker amd_gfx_level gfx_level;
113*61046927SAndroid Build Coastguard Worker int16_t num_waves;
114*61046927SAndroid Build Coastguard Worker int16_t last_SMEM_stall;
115*61046927SAndroid Build Coastguard Worker int last_SMEM_dep_idx;
116*61046927SAndroid Build Coastguard Worker MoveState mv;
117*61046927SAndroid Build Coastguard Worker bool schedule_pos_exports = true;
118*61046927SAndroid Build Coastguard Worker unsigned schedule_pos_export_div = 1;
119*61046927SAndroid Build Coastguard Worker };
120*61046927SAndroid Build Coastguard Worker
121*61046927SAndroid Build Coastguard Worker /* This scheduler is a simple bottom-up pass based on ideas from
122*61046927SAndroid Build Coastguard Worker * "A Novel Lightweight Instruction Scheduling Algorithm for Just-In-Time Compiler"
123*61046927SAndroid Build Coastguard Worker * from Xiaohua Shi and Peng Guo.
124*61046927SAndroid Build Coastguard Worker * The basic approach is to iterate over all instructions. When a memory instruction
125*61046927SAndroid Build Coastguard Worker * is encountered it tries to move independent instructions from above and below
126*61046927SAndroid Build Coastguard Worker * between the memory instruction and it's first user.
127*61046927SAndroid Build Coastguard Worker * The novelty is that this scheduler cares for the current register pressure:
128*61046927SAndroid Build Coastguard Worker * Instructions will only be moved if the register pressure won't exceed a certain bound.
129*61046927SAndroid Build Coastguard Worker */
130*61046927SAndroid Build Coastguard Worker
131*61046927SAndroid Build Coastguard Worker template <typename T>
132*61046927SAndroid Build Coastguard Worker void
move_element(T begin_it,size_t idx,size_t before)133*61046927SAndroid Build Coastguard Worker move_element(T begin_it, size_t idx, size_t before)
134*61046927SAndroid Build Coastguard Worker {
135*61046927SAndroid Build Coastguard Worker if (idx < before) {
136*61046927SAndroid Build Coastguard Worker auto begin = std::next(begin_it, idx);
137*61046927SAndroid Build Coastguard Worker auto end = std::next(begin_it, before);
138*61046927SAndroid Build Coastguard Worker std::rotate(begin, begin + 1, end);
139*61046927SAndroid Build Coastguard Worker } else if (idx > before) {
140*61046927SAndroid Build Coastguard Worker auto begin = std::next(begin_it, before);
141*61046927SAndroid Build Coastguard Worker auto end = std::next(begin_it, idx + 1);
142*61046927SAndroid Build Coastguard Worker std::rotate(begin, end - 1, end);
143*61046927SAndroid Build Coastguard Worker }
144*61046927SAndroid Build Coastguard Worker }
145*61046927SAndroid Build Coastguard Worker
146*61046927SAndroid Build Coastguard Worker void
verify_invariants(const Block * block)147*61046927SAndroid Build Coastguard Worker DownwardsCursor::verify_invariants(const Block* block)
148*61046927SAndroid Build Coastguard Worker {
149*61046927SAndroid Build Coastguard Worker assert(source_idx < insert_idx_clause);
150*61046927SAndroid Build Coastguard Worker assert(insert_idx_clause < insert_idx);
151*61046927SAndroid Build Coastguard Worker
152*61046927SAndroid Build Coastguard Worker #ifndef NDEBUG
153*61046927SAndroid Build Coastguard Worker RegisterDemand reference_demand;
154*61046927SAndroid Build Coastguard Worker for (int i = source_idx + 1; i < insert_idx_clause; ++i) {
155*61046927SAndroid Build Coastguard Worker reference_demand.update(block->instructions[i]->register_demand);
156*61046927SAndroid Build Coastguard Worker }
157*61046927SAndroid Build Coastguard Worker assert(total_demand == reference_demand);
158*61046927SAndroid Build Coastguard Worker
159*61046927SAndroid Build Coastguard Worker reference_demand = {};
160*61046927SAndroid Build Coastguard Worker for (int i = insert_idx_clause; i < insert_idx; ++i) {
161*61046927SAndroid Build Coastguard Worker reference_demand.update(block->instructions[i]->register_demand);
162*61046927SAndroid Build Coastguard Worker }
163*61046927SAndroid Build Coastguard Worker assert(clause_demand == reference_demand);
164*61046927SAndroid Build Coastguard Worker #endif
165*61046927SAndroid Build Coastguard Worker }
166*61046927SAndroid Build Coastguard Worker
167*61046927SAndroid Build Coastguard Worker DownwardsCursor
downwards_init(int current_idx,bool improved_rar_,bool may_form_clauses)168*61046927SAndroid Build Coastguard Worker MoveState::downwards_init(int current_idx, bool improved_rar_, bool may_form_clauses)
169*61046927SAndroid Build Coastguard Worker {
170*61046927SAndroid Build Coastguard Worker improved_rar = improved_rar_;
171*61046927SAndroid Build Coastguard Worker
172*61046927SAndroid Build Coastguard Worker std::fill(depends_on.begin(), depends_on.end(), false);
173*61046927SAndroid Build Coastguard Worker if (improved_rar) {
174*61046927SAndroid Build Coastguard Worker std::fill(RAR_dependencies.begin(), RAR_dependencies.end(), false);
175*61046927SAndroid Build Coastguard Worker if (may_form_clauses)
176*61046927SAndroid Build Coastguard Worker std::fill(RAR_dependencies_clause.begin(), RAR_dependencies_clause.end(), false);
177*61046927SAndroid Build Coastguard Worker }
178*61046927SAndroid Build Coastguard Worker
179*61046927SAndroid Build Coastguard Worker for (const Operand& op : current->operands) {
180*61046927SAndroid Build Coastguard Worker if (op.isTemp()) {
181*61046927SAndroid Build Coastguard Worker depends_on[op.tempId()] = true;
182*61046927SAndroid Build Coastguard Worker if (improved_rar && op.isFirstKill())
183*61046927SAndroid Build Coastguard Worker RAR_dependencies[op.tempId()] = true;
184*61046927SAndroid Build Coastguard Worker }
185*61046927SAndroid Build Coastguard Worker }
186*61046927SAndroid Build Coastguard Worker
187*61046927SAndroid Build Coastguard Worker DownwardsCursor cursor(current_idx, block->instructions[current_idx]->register_demand);
188*61046927SAndroid Build Coastguard Worker cursor.verify_invariants(block);
189*61046927SAndroid Build Coastguard Worker return cursor;
190*61046927SAndroid Build Coastguard Worker }
191*61046927SAndroid Build Coastguard Worker
192*61046927SAndroid Build Coastguard Worker /* If add_to_clause is true, the current clause is extended by moving the
193*61046927SAndroid Build Coastguard Worker * instruction at source_idx in front of the clause. Otherwise, the instruction
194*61046927SAndroid Build Coastguard Worker * is moved past the end of the clause without extending it */
195*61046927SAndroid Build Coastguard Worker MoveResult
downwards_move(DownwardsCursor & cursor,bool add_to_clause)196*61046927SAndroid Build Coastguard Worker MoveState::downwards_move(DownwardsCursor& cursor, bool add_to_clause)
197*61046927SAndroid Build Coastguard Worker {
198*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& instr = block->instructions[cursor.source_idx];
199*61046927SAndroid Build Coastguard Worker
200*61046927SAndroid Build Coastguard Worker for (const Definition& def : instr->definitions)
201*61046927SAndroid Build Coastguard Worker if (def.isTemp() && depends_on[def.tempId()])
202*61046927SAndroid Build Coastguard Worker return move_fail_ssa;
203*61046927SAndroid Build Coastguard Worker
204*61046927SAndroid Build Coastguard Worker /* check if one of candidate's operands is killed by depending instruction */
205*61046927SAndroid Build Coastguard Worker std::vector<bool>& RAR_deps =
206*61046927SAndroid Build Coastguard Worker improved_rar ? (add_to_clause ? RAR_dependencies_clause : RAR_dependencies) : depends_on;
207*61046927SAndroid Build Coastguard Worker for (const Operand& op : instr->operands) {
208*61046927SAndroid Build Coastguard Worker if (op.isTemp() && RAR_deps[op.tempId()]) {
209*61046927SAndroid Build Coastguard Worker // FIXME: account for difference in register pressure
210*61046927SAndroid Build Coastguard Worker return move_fail_rar;
211*61046927SAndroid Build Coastguard Worker }
212*61046927SAndroid Build Coastguard Worker }
213*61046927SAndroid Build Coastguard Worker
214*61046927SAndroid Build Coastguard Worker if (add_to_clause) {
215*61046927SAndroid Build Coastguard Worker for (const Operand& op : instr->operands) {
216*61046927SAndroid Build Coastguard Worker if (op.isTemp()) {
217*61046927SAndroid Build Coastguard Worker depends_on[op.tempId()] = true;
218*61046927SAndroid Build Coastguard Worker if (op.isFirstKill())
219*61046927SAndroid Build Coastguard Worker RAR_dependencies[op.tempId()] = true;
220*61046927SAndroid Build Coastguard Worker }
221*61046927SAndroid Build Coastguard Worker }
222*61046927SAndroid Build Coastguard Worker }
223*61046927SAndroid Build Coastguard Worker
224*61046927SAndroid Build Coastguard Worker const int dest_insert_idx = add_to_clause ? cursor.insert_idx_clause : cursor.insert_idx;
225*61046927SAndroid Build Coastguard Worker RegisterDemand register_pressure = cursor.total_demand;
226*61046927SAndroid Build Coastguard Worker if (!add_to_clause) {
227*61046927SAndroid Build Coastguard Worker register_pressure.update(cursor.clause_demand);
228*61046927SAndroid Build Coastguard Worker }
229*61046927SAndroid Build Coastguard Worker
230*61046927SAndroid Build Coastguard Worker /* Check the new demand of the instructions being moved over */
231*61046927SAndroid Build Coastguard Worker const RegisterDemand candidate_diff = get_live_changes(instr.get());
232*61046927SAndroid Build Coastguard Worker if (RegisterDemand(register_pressure - candidate_diff).exceeds(max_registers))
233*61046927SAndroid Build Coastguard Worker return move_fail_pressure;
234*61046927SAndroid Build Coastguard Worker
235*61046927SAndroid Build Coastguard Worker /* New demand for the moved instruction */
236*61046927SAndroid Build Coastguard Worker const RegisterDemand temp = get_temp_registers(instr.get());
237*61046927SAndroid Build Coastguard Worker const RegisterDemand temp2 = get_temp_registers(block->instructions[dest_insert_idx - 1].get());
238*61046927SAndroid Build Coastguard Worker const RegisterDemand new_demand =
239*61046927SAndroid Build Coastguard Worker block->instructions[dest_insert_idx - 1]->register_demand - temp2 + temp;
240*61046927SAndroid Build Coastguard Worker if (new_demand.exceeds(max_registers))
241*61046927SAndroid Build Coastguard Worker return move_fail_pressure;
242*61046927SAndroid Build Coastguard Worker
243*61046927SAndroid Build Coastguard Worker /* move the candidate below the memory load */
244*61046927SAndroid Build Coastguard Worker move_element(block->instructions.begin(), cursor.source_idx, dest_insert_idx);
245*61046927SAndroid Build Coastguard Worker
246*61046927SAndroid Build Coastguard Worker /* update register pressure */
247*61046927SAndroid Build Coastguard Worker for (int i = cursor.source_idx; i < dest_insert_idx - 1; i++)
248*61046927SAndroid Build Coastguard Worker block->instructions[i]->register_demand -= candidate_diff;
249*61046927SAndroid Build Coastguard Worker block->instructions[dest_insert_idx - 1]->register_demand = new_demand;
250*61046927SAndroid Build Coastguard Worker cursor.insert_idx_clause--;
251*61046927SAndroid Build Coastguard Worker if (cursor.source_idx != cursor.insert_idx_clause) {
252*61046927SAndroid Build Coastguard Worker /* Update demand if we moved over any instructions before the clause */
253*61046927SAndroid Build Coastguard Worker cursor.total_demand -= candidate_diff;
254*61046927SAndroid Build Coastguard Worker } else {
255*61046927SAndroid Build Coastguard Worker assert(cursor.total_demand == RegisterDemand{});
256*61046927SAndroid Build Coastguard Worker }
257*61046927SAndroid Build Coastguard Worker if (add_to_clause) {
258*61046927SAndroid Build Coastguard Worker cursor.clause_demand.update(new_demand);
259*61046927SAndroid Build Coastguard Worker } else {
260*61046927SAndroid Build Coastguard Worker cursor.clause_demand -= candidate_diff;
261*61046927SAndroid Build Coastguard Worker cursor.insert_idx--;
262*61046927SAndroid Build Coastguard Worker }
263*61046927SAndroid Build Coastguard Worker
264*61046927SAndroid Build Coastguard Worker cursor.source_idx--;
265*61046927SAndroid Build Coastguard Worker cursor.verify_invariants(block);
266*61046927SAndroid Build Coastguard Worker return move_success;
267*61046927SAndroid Build Coastguard Worker }
268*61046927SAndroid Build Coastguard Worker
269*61046927SAndroid Build Coastguard Worker void
downwards_skip(DownwardsCursor & cursor)270*61046927SAndroid Build Coastguard Worker MoveState::downwards_skip(DownwardsCursor& cursor)
271*61046927SAndroid Build Coastguard Worker {
272*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& instr = block->instructions[cursor.source_idx];
273*61046927SAndroid Build Coastguard Worker
274*61046927SAndroid Build Coastguard Worker for (const Operand& op : instr->operands) {
275*61046927SAndroid Build Coastguard Worker if (op.isTemp()) {
276*61046927SAndroid Build Coastguard Worker depends_on[op.tempId()] = true;
277*61046927SAndroid Build Coastguard Worker if (improved_rar && op.isFirstKill()) {
278*61046927SAndroid Build Coastguard Worker RAR_dependencies[op.tempId()] = true;
279*61046927SAndroid Build Coastguard Worker RAR_dependencies_clause[op.tempId()] = true;
280*61046927SAndroid Build Coastguard Worker }
281*61046927SAndroid Build Coastguard Worker }
282*61046927SAndroid Build Coastguard Worker }
283*61046927SAndroid Build Coastguard Worker cursor.total_demand.update(instr->register_demand);
284*61046927SAndroid Build Coastguard Worker cursor.source_idx--;
285*61046927SAndroid Build Coastguard Worker cursor.verify_invariants(block);
286*61046927SAndroid Build Coastguard Worker }
287*61046927SAndroid Build Coastguard Worker
288*61046927SAndroid Build Coastguard Worker void
verify_invariants(const Block * block)289*61046927SAndroid Build Coastguard Worker UpwardsCursor::verify_invariants(const Block* block)
290*61046927SAndroid Build Coastguard Worker {
291*61046927SAndroid Build Coastguard Worker #ifndef NDEBUG
292*61046927SAndroid Build Coastguard Worker if (!has_insert_idx()) {
293*61046927SAndroid Build Coastguard Worker return;
294*61046927SAndroid Build Coastguard Worker }
295*61046927SAndroid Build Coastguard Worker
296*61046927SAndroid Build Coastguard Worker assert(insert_idx < source_idx);
297*61046927SAndroid Build Coastguard Worker
298*61046927SAndroid Build Coastguard Worker RegisterDemand reference_demand;
299*61046927SAndroid Build Coastguard Worker for (int i = insert_idx; i < source_idx; ++i) {
300*61046927SAndroid Build Coastguard Worker reference_demand.update(block->instructions[i]->register_demand);
301*61046927SAndroid Build Coastguard Worker }
302*61046927SAndroid Build Coastguard Worker assert(total_demand == reference_demand);
303*61046927SAndroid Build Coastguard Worker #endif
304*61046927SAndroid Build Coastguard Worker }
305*61046927SAndroid Build Coastguard Worker
306*61046927SAndroid Build Coastguard Worker UpwardsCursor
upwards_init(int source_idx,bool improved_rar_)307*61046927SAndroid Build Coastguard Worker MoveState::upwards_init(int source_idx, bool improved_rar_)
308*61046927SAndroid Build Coastguard Worker {
309*61046927SAndroid Build Coastguard Worker improved_rar = improved_rar_;
310*61046927SAndroid Build Coastguard Worker
311*61046927SAndroid Build Coastguard Worker std::fill(depends_on.begin(), depends_on.end(), false);
312*61046927SAndroid Build Coastguard Worker std::fill(RAR_dependencies.begin(), RAR_dependencies.end(), false);
313*61046927SAndroid Build Coastguard Worker
314*61046927SAndroid Build Coastguard Worker for (const Definition& def : current->definitions) {
315*61046927SAndroid Build Coastguard Worker if (def.isTemp())
316*61046927SAndroid Build Coastguard Worker depends_on[def.tempId()] = true;
317*61046927SAndroid Build Coastguard Worker }
318*61046927SAndroid Build Coastguard Worker
319*61046927SAndroid Build Coastguard Worker return UpwardsCursor(source_idx);
320*61046927SAndroid Build Coastguard Worker }
321*61046927SAndroid Build Coastguard Worker
322*61046927SAndroid Build Coastguard Worker bool
upwards_check_deps(UpwardsCursor & cursor)323*61046927SAndroid Build Coastguard Worker MoveState::upwards_check_deps(UpwardsCursor& cursor)
324*61046927SAndroid Build Coastguard Worker {
325*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& instr = block->instructions[cursor.source_idx];
326*61046927SAndroid Build Coastguard Worker for (const Operand& op : instr->operands) {
327*61046927SAndroid Build Coastguard Worker if (op.isTemp() && depends_on[op.tempId()])
328*61046927SAndroid Build Coastguard Worker return false;
329*61046927SAndroid Build Coastguard Worker }
330*61046927SAndroid Build Coastguard Worker return true;
331*61046927SAndroid Build Coastguard Worker }
332*61046927SAndroid Build Coastguard Worker
333*61046927SAndroid Build Coastguard Worker void
upwards_update_insert_idx(UpwardsCursor & cursor)334*61046927SAndroid Build Coastguard Worker MoveState::upwards_update_insert_idx(UpwardsCursor& cursor)
335*61046927SAndroid Build Coastguard Worker {
336*61046927SAndroid Build Coastguard Worker cursor.insert_idx = cursor.source_idx;
337*61046927SAndroid Build Coastguard Worker cursor.total_demand = block->instructions[cursor.insert_idx]->register_demand;
338*61046927SAndroid Build Coastguard Worker }
339*61046927SAndroid Build Coastguard Worker
340*61046927SAndroid Build Coastguard Worker MoveResult
upwards_move(UpwardsCursor & cursor)341*61046927SAndroid Build Coastguard Worker MoveState::upwards_move(UpwardsCursor& cursor)
342*61046927SAndroid Build Coastguard Worker {
343*61046927SAndroid Build Coastguard Worker assert(cursor.has_insert_idx());
344*61046927SAndroid Build Coastguard Worker
345*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& instr = block->instructions[cursor.source_idx];
346*61046927SAndroid Build Coastguard Worker for (const Operand& op : instr->operands) {
347*61046927SAndroid Build Coastguard Worker if (op.isTemp() && depends_on[op.tempId()])
348*61046927SAndroid Build Coastguard Worker return move_fail_ssa;
349*61046927SAndroid Build Coastguard Worker }
350*61046927SAndroid Build Coastguard Worker
351*61046927SAndroid Build Coastguard Worker /* check if candidate uses/kills an operand which is used by a dependency */
352*61046927SAndroid Build Coastguard Worker for (const Operand& op : instr->operands) {
353*61046927SAndroid Build Coastguard Worker if (op.isTemp() && (!improved_rar || op.isFirstKill()) && RAR_dependencies[op.tempId()])
354*61046927SAndroid Build Coastguard Worker return move_fail_rar;
355*61046927SAndroid Build Coastguard Worker }
356*61046927SAndroid Build Coastguard Worker
357*61046927SAndroid Build Coastguard Worker /* check if register pressure is low enough: the diff is negative if register pressure is
358*61046927SAndroid Build Coastguard Worker * decreased */
359*61046927SAndroid Build Coastguard Worker const RegisterDemand candidate_diff = get_live_changes(instr.get());
360*61046927SAndroid Build Coastguard Worker const RegisterDemand temp = get_temp_registers(instr.get());
361*61046927SAndroid Build Coastguard Worker if (RegisterDemand(cursor.total_demand + candidate_diff).exceeds(max_registers))
362*61046927SAndroid Build Coastguard Worker return move_fail_pressure;
363*61046927SAndroid Build Coastguard Worker const RegisterDemand temp2 =
364*61046927SAndroid Build Coastguard Worker get_temp_registers(block->instructions[cursor.insert_idx - 1].get());
365*61046927SAndroid Build Coastguard Worker const RegisterDemand new_demand =
366*61046927SAndroid Build Coastguard Worker block->instructions[cursor.insert_idx - 1]->register_demand - temp2 + candidate_diff + temp;
367*61046927SAndroid Build Coastguard Worker if (new_demand.exceeds(max_registers))
368*61046927SAndroid Build Coastguard Worker return move_fail_pressure;
369*61046927SAndroid Build Coastguard Worker
370*61046927SAndroid Build Coastguard Worker /* move the candidate above the insert_idx */
371*61046927SAndroid Build Coastguard Worker move_element(block->instructions.begin(), cursor.source_idx, cursor.insert_idx);
372*61046927SAndroid Build Coastguard Worker
373*61046927SAndroid Build Coastguard Worker /* update register pressure */
374*61046927SAndroid Build Coastguard Worker block->instructions[cursor.insert_idx]->register_demand = new_demand;
375*61046927SAndroid Build Coastguard Worker for (int i = cursor.insert_idx + 1; i <= cursor.source_idx; i++)
376*61046927SAndroid Build Coastguard Worker block->instructions[i]->register_demand += candidate_diff;
377*61046927SAndroid Build Coastguard Worker cursor.total_demand += candidate_diff;
378*61046927SAndroid Build Coastguard Worker
379*61046927SAndroid Build Coastguard Worker cursor.total_demand.update(block->instructions[cursor.source_idx]->register_demand);
380*61046927SAndroid Build Coastguard Worker
381*61046927SAndroid Build Coastguard Worker cursor.insert_idx++;
382*61046927SAndroid Build Coastguard Worker cursor.source_idx++;
383*61046927SAndroid Build Coastguard Worker
384*61046927SAndroid Build Coastguard Worker cursor.verify_invariants(block);
385*61046927SAndroid Build Coastguard Worker
386*61046927SAndroid Build Coastguard Worker return move_success;
387*61046927SAndroid Build Coastguard Worker }
388*61046927SAndroid Build Coastguard Worker
389*61046927SAndroid Build Coastguard Worker void
upwards_skip(UpwardsCursor & cursor)390*61046927SAndroid Build Coastguard Worker MoveState::upwards_skip(UpwardsCursor& cursor)
391*61046927SAndroid Build Coastguard Worker {
392*61046927SAndroid Build Coastguard Worker if (cursor.has_insert_idx()) {
393*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& instr = block->instructions[cursor.source_idx];
394*61046927SAndroid Build Coastguard Worker for (const Definition& def : instr->definitions) {
395*61046927SAndroid Build Coastguard Worker if (def.isTemp())
396*61046927SAndroid Build Coastguard Worker depends_on[def.tempId()] = true;
397*61046927SAndroid Build Coastguard Worker }
398*61046927SAndroid Build Coastguard Worker for (const Operand& op : instr->operands) {
399*61046927SAndroid Build Coastguard Worker if (op.isTemp())
400*61046927SAndroid Build Coastguard Worker RAR_dependencies[op.tempId()] = true;
401*61046927SAndroid Build Coastguard Worker }
402*61046927SAndroid Build Coastguard Worker cursor.total_demand.update(instr->register_demand);
403*61046927SAndroid Build Coastguard Worker }
404*61046927SAndroid Build Coastguard Worker
405*61046927SAndroid Build Coastguard Worker cursor.source_idx++;
406*61046927SAndroid Build Coastguard Worker
407*61046927SAndroid Build Coastguard Worker cursor.verify_invariants(block);
408*61046927SAndroid Build Coastguard Worker }
409*61046927SAndroid Build Coastguard Worker
410*61046927SAndroid Build Coastguard Worker bool
is_done_sendmsg(amd_gfx_level gfx_level,const Instruction * instr)411*61046927SAndroid Build Coastguard Worker is_done_sendmsg(amd_gfx_level gfx_level, const Instruction* instr)
412*61046927SAndroid Build Coastguard Worker {
413*61046927SAndroid Build Coastguard Worker if (gfx_level <= GFX10_3 && instr->opcode == aco_opcode::s_sendmsg)
414*61046927SAndroid Build Coastguard Worker return (instr->salu().imm & sendmsg_id_mask) == sendmsg_gs_done;
415*61046927SAndroid Build Coastguard Worker return false;
416*61046927SAndroid Build Coastguard Worker }
417*61046927SAndroid Build Coastguard Worker
418*61046927SAndroid Build Coastguard Worker bool
is_pos_prim_export(amd_gfx_level gfx_level,const Instruction * instr)419*61046927SAndroid Build Coastguard Worker is_pos_prim_export(amd_gfx_level gfx_level, const Instruction* instr)
420*61046927SAndroid Build Coastguard Worker {
421*61046927SAndroid Build Coastguard Worker /* Because of NO_PC_EXPORT=1, a done=1 position or primitive export can launch PS waves before
422*61046927SAndroid Build Coastguard Worker * the NGG/VS wave finishes if there are no parameter exports.
423*61046927SAndroid Build Coastguard Worker */
424*61046927SAndroid Build Coastguard Worker return instr->opcode == aco_opcode::exp && instr->exp().dest >= V_008DFC_SQ_EXP_POS &&
425*61046927SAndroid Build Coastguard Worker instr->exp().dest <= V_008DFC_SQ_EXP_PRIM && gfx_level >= GFX10;
426*61046927SAndroid Build Coastguard Worker }
427*61046927SAndroid Build Coastguard Worker
428*61046927SAndroid Build Coastguard Worker memory_sync_info
get_sync_info_with_hack(const Instruction * instr)429*61046927SAndroid Build Coastguard Worker get_sync_info_with_hack(const Instruction* instr)
430*61046927SAndroid Build Coastguard Worker {
431*61046927SAndroid Build Coastguard Worker memory_sync_info sync = get_sync_info(instr);
432*61046927SAndroid Build Coastguard Worker if (instr->isSMEM() && !instr->operands.empty() && instr->operands[0].bytes() == 16) {
433*61046927SAndroid Build Coastguard Worker // FIXME: currently, it doesn't seem beneficial to omit this due to how our scheduler works
434*61046927SAndroid Build Coastguard Worker sync.storage = (storage_class)(sync.storage | storage_buffer);
435*61046927SAndroid Build Coastguard Worker sync.semantics =
436*61046927SAndroid Build Coastguard Worker (memory_semantics)((sync.semantics | semantic_private) & ~semantic_can_reorder);
437*61046927SAndroid Build Coastguard Worker }
438*61046927SAndroid Build Coastguard Worker return sync;
439*61046927SAndroid Build Coastguard Worker }
440*61046927SAndroid Build Coastguard Worker
441*61046927SAndroid Build Coastguard Worker struct memory_event_set {
442*61046927SAndroid Build Coastguard Worker bool has_control_barrier;
443*61046927SAndroid Build Coastguard Worker
444*61046927SAndroid Build Coastguard Worker unsigned bar_acquire;
445*61046927SAndroid Build Coastguard Worker unsigned bar_release;
446*61046927SAndroid Build Coastguard Worker unsigned bar_classes;
447*61046927SAndroid Build Coastguard Worker
448*61046927SAndroid Build Coastguard Worker unsigned access_acquire;
449*61046927SAndroid Build Coastguard Worker unsigned access_release;
450*61046927SAndroid Build Coastguard Worker unsigned access_relaxed;
451*61046927SAndroid Build Coastguard Worker unsigned access_atomic;
452*61046927SAndroid Build Coastguard Worker };
453*61046927SAndroid Build Coastguard Worker
454*61046927SAndroid Build Coastguard Worker struct hazard_query {
455*61046927SAndroid Build Coastguard Worker amd_gfx_level gfx_level;
456*61046927SAndroid Build Coastguard Worker bool contains_spill;
457*61046927SAndroid Build Coastguard Worker bool contains_sendmsg;
458*61046927SAndroid Build Coastguard Worker bool uses_exec;
459*61046927SAndroid Build Coastguard Worker bool writes_exec;
460*61046927SAndroid Build Coastguard Worker memory_event_set mem_events;
461*61046927SAndroid Build Coastguard Worker unsigned aliasing_storage; /* storage classes which are accessed (non-SMEM) */
462*61046927SAndroid Build Coastguard Worker unsigned aliasing_storage_smem; /* storage classes which are accessed (SMEM) */
463*61046927SAndroid Build Coastguard Worker };
464*61046927SAndroid Build Coastguard Worker
465*61046927SAndroid Build Coastguard Worker void
init_hazard_query(const sched_ctx & ctx,hazard_query * query)466*61046927SAndroid Build Coastguard Worker init_hazard_query(const sched_ctx& ctx, hazard_query* query)
467*61046927SAndroid Build Coastguard Worker {
468*61046927SAndroid Build Coastguard Worker query->gfx_level = ctx.gfx_level;
469*61046927SAndroid Build Coastguard Worker query->contains_spill = false;
470*61046927SAndroid Build Coastguard Worker query->contains_sendmsg = false;
471*61046927SAndroid Build Coastguard Worker query->uses_exec = false;
472*61046927SAndroid Build Coastguard Worker query->writes_exec = false;
473*61046927SAndroid Build Coastguard Worker memset(&query->mem_events, 0, sizeof(query->mem_events));
474*61046927SAndroid Build Coastguard Worker query->aliasing_storage = 0;
475*61046927SAndroid Build Coastguard Worker query->aliasing_storage_smem = 0;
476*61046927SAndroid Build Coastguard Worker }
477*61046927SAndroid Build Coastguard Worker
478*61046927SAndroid Build Coastguard Worker void
add_memory_event(amd_gfx_level gfx_level,memory_event_set * set,Instruction * instr,memory_sync_info * sync)479*61046927SAndroid Build Coastguard Worker add_memory_event(amd_gfx_level gfx_level, memory_event_set* set, Instruction* instr,
480*61046927SAndroid Build Coastguard Worker memory_sync_info* sync)
481*61046927SAndroid Build Coastguard Worker {
482*61046927SAndroid Build Coastguard Worker set->has_control_barrier |= is_done_sendmsg(gfx_level, instr);
483*61046927SAndroid Build Coastguard Worker set->has_control_barrier |= is_pos_prim_export(gfx_level, instr);
484*61046927SAndroid Build Coastguard Worker if (instr->opcode == aco_opcode::p_barrier) {
485*61046927SAndroid Build Coastguard Worker Pseudo_barrier_instruction& bar = instr->barrier();
486*61046927SAndroid Build Coastguard Worker if (bar.sync.semantics & semantic_acquire)
487*61046927SAndroid Build Coastguard Worker set->bar_acquire |= bar.sync.storage;
488*61046927SAndroid Build Coastguard Worker if (bar.sync.semantics & semantic_release)
489*61046927SAndroid Build Coastguard Worker set->bar_release |= bar.sync.storage;
490*61046927SAndroid Build Coastguard Worker set->bar_classes |= bar.sync.storage;
491*61046927SAndroid Build Coastguard Worker
492*61046927SAndroid Build Coastguard Worker set->has_control_barrier |= bar.exec_scope > scope_invocation;
493*61046927SAndroid Build Coastguard Worker }
494*61046927SAndroid Build Coastguard Worker
495*61046927SAndroid Build Coastguard Worker if (!sync->storage)
496*61046927SAndroid Build Coastguard Worker return;
497*61046927SAndroid Build Coastguard Worker
498*61046927SAndroid Build Coastguard Worker if (sync->semantics & semantic_acquire)
499*61046927SAndroid Build Coastguard Worker set->access_acquire |= sync->storage;
500*61046927SAndroid Build Coastguard Worker if (sync->semantics & semantic_release)
501*61046927SAndroid Build Coastguard Worker set->access_release |= sync->storage;
502*61046927SAndroid Build Coastguard Worker
503*61046927SAndroid Build Coastguard Worker if (!(sync->semantics & semantic_private)) {
504*61046927SAndroid Build Coastguard Worker if (sync->semantics & semantic_atomic)
505*61046927SAndroid Build Coastguard Worker set->access_atomic |= sync->storage;
506*61046927SAndroid Build Coastguard Worker else
507*61046927SAndroid Build Coastguard Worker set->access_relaxed |= sync->storage;
508*61046927SAndroid Build Coastguard Worker }
509*61046927SAndroid Build Coastguard Worker }
510*61046927SAndroid Build Coastguard Worker
511*61046927SAndroid Build Coastguard Worker void
add_to_hazard_query(hazard_query * query,Instruction * instr)512*61046927SAndroid Build Coastguard Worker add_to_hazard_query(hazard_query* query, Instruction* instr)
513*61046927SAndroid Build Coastguard Worker {
514*61046927SAndroid Build Coastguard Worker if (instr->opcode == aco_opcode::p_spill || instr->opcode == aco_opcode::p_reload)
515*61046927SAndroid Build Coastguard Worker query->contains_spill = true;
516*61046927SAndroid Build Coastguard Worker query->contains_sendmsg |= instr->opcode == aco_opcode::s_sendmsg;
517*61046927SAndroid Build Coastguard Worker query->uses_exec |= needs_exec_mask(instr);
518*61046927SAndroid Build Coastguard Worker for (const Definition& def : instr->definitions) {
519*61046927SAndroid Build Coastguard Worker if (def.isFixed() && def.physReg() == exec)
520*61046927SAndroid Build Coastguard Worker query->writes_exec = true;
521*61046927SAndroid Build Coastguard Worker }
522*61046927SAndroid Build Coastguard Worker
523*61046927SAndroid Build Coastguard Worker memory_sync_info sync = get_sync_info_with_hack(instr);
524*61046927SAndroid Build Coastguard Worker
525*61046927SAndroid Build Coastguard Worker add_memory_event(query->gfx_level, &query->mem_events, instr, &sync);
526*61046927SAndroid Build Coastguard Worker
527*61046927SAndroid Build Coastguard Worker if (!(sync.semantics & semantic_can_reorder)) {
528*61046927SAndroid Build Coastguard Worker unsigned storage = sync.storage;
529*61046927SAndroid Build Coastguard Worker /* images and buffer/global memory can alias */ // TODO: more precisely, buffer images and
530*61046927SAndroid Build Coastguard Worker // buffer/global memory can alias
531*61046927SAndroid Build Coastguard Worker if (storage & (storage_buffer | storage_image))
532*61046927SAndroid Build Coastguard Worker storage |= storage_buffer | storage_image;
533*61046927SAndroid Build Coastguard Worker if (instr->isSMEM())
534*61046927SAndroid Build Coastguard Worker query->aliasing_storage_smem |= storage;
535*61046927SAndroid Build Coastguard Worker else
536*61046927SAndroid Build Coastguard Worker query->aliasing_storage |= storage;
537*61046927SAndroid Build Coastguard Worker }
538*61046927SAndroid Build Coastguard Worker }
539*61046927SAndroid Build Coastguard Worker
540*61046927SAndroid Build Coastguard Worker enum HazardResult {
541*61046927SAndroid Build Coastguard Worker hazard_success,
542*61046927SAndroid Build Coastguard Worker hazard_fail_reorder_vmem_smem,
543*61046927SAndroid Build Coastguard Worker hazard_fail_reorder_ds,
544*61046927SAndroid Build Coastguard Worker hazard_fail_reorder_sendmsg,
545*61046927SAndroid Build Coastguard Worker hazard_fail_spill,
546*61046927SAndroid Build Coastguard Worker hazard_fail_export,
547*61046927SAndroid Build Coastguard Worker hazard_fail_barrier,
548*61046927SAndroid Build Coastguard Worker /* Must stop at these failures. The hazard query code doesn't consider them
549*61046927SAndroid Build Coastguard Worker * when added. */
550*61046927SAndroid Build Coastguard Worker hazard_fail_exec,
551*61046927SAndroid Build Coastguard Worker hazard_fail_unreorderable,
552*61046927SAndroid Build Coastguard Worker };
553*61046927SAndroid Build Coastguard Worker
554*61046927SAndroid Build Coastguard Worker HazardResult
perform_hazard_query(hazard_query * query,Instruction * instr,bool upwards)555*61046927SAndroid Build Coastguard Worker perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
556*61046927SAndroid Build Coastguard Worker {
557*61046927SAndroid Build Coastguard Worker /* don't schedule discards downwards */
558*61046927SAndroid Build Coastguard Worker if (!upwards && instr->opcode == aco_opcode::p_exit_early_if)
559*61046927SAndroid Build Coastguard Worker return hazard_fail_unreorderable;
560*61046927SAndroid Build Coastguard Worker
561*61046927SAndroid Build Coastguard Worker /* In Primitive Ordered Pixel Shading, await overlapped waves as late as possible, and notify
562*61046927SAndroid Build Coastguard Worker * overlapping waves that they can continue execution as early as possible.
563*61046927SAndroid Build Coastguard Worker */
564*61046927SAndroid Build Coastguard Worker if (upwards) {
565*61046927SAndroid Build Coastguard Worker if (instr->opcode == aco_opcode::p_pops_gfx9_add_exiting_wave_id ||
566*61046927SAndroid Build Coastguard Worker is_wait_export_ready(query->gfx_level, instr)) {
567*61046927SAndroid Build Coastguard Worker return hazard_fail_unreorderable;
568*61046927SAndroid Build Coastguard Worker }
569*61046927SAndroid Build Coastguard Worker } else {
570*61046927SAndroid Build Coastguard Worker if (instr->opcode == aco_opcode::p_pops_gfx9_ordered_section_done) {
571*61046927SAndroid Build Coastguard Worker return hazard_fail_unreorderable;
572*61046927SAndroid Build Coastguard Worker }
573*61046927SAndroid Build Coastguard Worker }
574*61046927SAndroid Build Coastguard Worker
575*61046927SAndroid Build Coastguard Worker if (query->uses_exec || query->writes_exec) {
576*61046927SAndroid Build Coastguard Worker for (const Definition& def : instr->definitions) {
577*61046927SAndroid Build Coastguard Worker if (def.isFixed() && def.physReg() == exec)
578*61046927SAndroid Build Coastguard Worker return hazard_fail_exec;
579*61046927SAndroid Build Coastguard Worker }
580*61046927SAndroid Build Coastguard Worker }
581*61046927SAndroid Build Coastguard Worker if (query->writes_exec && needs_exec_mask(instr))
582*61046927SAndroid Build Coastguard Worker return hazard_fail_exec;
583*61046927SAndroid Build Coastguard Worker
584*61046927SAndroid Build Coastguard Worker /* Don't move exports so that they stay closer together.
585*61046927SAndroid Build Coastguard Worker * Since GFX11, export order matters. MRTZ must come first,
586*61046927SAndroid Build Coastguard Worker * then color exports sorted from first to last.
587*61046927SAndroid Build Coastguard Worker * Also, with Primitive Ordered Pixel Shading on GFX11+, the `done` export must not be moved
588*61046927SAndroid Build Coastguard Worker * above the memory accesses before the queue family scope (more precisely, fragment interlock
589*61046927SAndroid Build Coastguard Worker * scope, but it's not available in ACO) release barrier that is expected to be inserted before
590*61046927SAndroid Build Coastguard Worker * the export, as well as before any `s_wait_event export_ready` which enters the ordered
591*61046927SAndroid Build Coastguard Worker * section, because the `done` export exits the ordered section.
592*61046927SAndroid Build Coastguard Worker */
593*61046927SAndroid Build Coastguard Worker if (instr->isEXP() || instr->opcode == aco_opcode::p_dual_src_export_gfx11)
594*61046927SAndroid Build Coastguard Worker return hazard_fail_export;
595*61046927SAndroid Build Coastguard Worker
596*61046927SAndroid Build Coastguard Worker /* don't move non-reorderable instructions */
597*61046927SAndroid Build Coastguard Worker if (instr->opcode == aco_opcode::s_memtime || instr->opcode == aco_opcode::s_memrealtime ||
598*61046927SAndroid Build Coastguard Worker instr->opcode == aco_opcode::s_setprio || instr->opcode == aco_opcode::s_getreg_b32 ||
599*61046927SAndroid Build Coastguard Worker instr->opcode == aco_opcode::p_shader_cycles_hi_lo_hi ||
600*61046927SAndroid Build Coastguard Worker instr->opcode == aco_opcode::p_init_scratch ||
601*61046927SAndroid Build Coastguard Worker instr->opcode == aco_opcode::p_jump_to_epilog ||
602*61046927SAndroid Build Coastguard Worker instr->opcode == aco_opcode::s_sendmsg_rtn_b32 ||
603*61046927SAndroid Build Coastguard Worker instr->opcode == aco_opcode::s_sendmsg_rtn_b64 ||
604*61046927SAndroid Build Coastguard Worker instr->opcode == aco_opcode::p_end_with_regs || instr->opcode == aco_opcode::s_nop ||
605*61046927SAndroid Build Coastguard Worker instr->opcode == aco_opcode::s_sleep)
606*61046927SAndroid Build Coastguard Worker return hazard_fail_unreorderable;
607*61046927SAndroid Build Coastguard Worker
608*61046927SAndroid Build Coastguard Worker memory_event_set instr_set;
609*61046927SAndroid Build Coastguard Worker memset(&instr_set, 0, sizeof(instr_set));
610*61046927SAndroid Build Coastguard Worker memory_sync_info sync = get_sync_info_with_hack(instr);
611*61046927SAndroid Build Coastguard Worker add_memory_event(query->gfx_level, &instr_set, instr, &sync);
612*61046927SAndroid Build Coastguard Worker
613*61046927SAndroid Build Coastguard Worker memory_event_set* first = &instr_set;
614*61046927SAndroid Build Coastguard Worker memory_event_set* second = &query->mem_events;
615*61046927SAndroid Build Coastguard Worker if (upwards)
616*61046927SAndroid Build Coastguard Worker std::swap(first, second);
617*61046927SAndroid Build Coastguard Worker
618*61046927SAndroid Build Coastguard Worker /* everything after barrier(acquire) happens after the atomics/control_barriers before
619*61046927SAndroid Build Coastguard Worker * everything after load(acquire) happens after the load
620*61046927SAndroid Build Coastguard Worker */
621*61046927SAndroid Build Coastguard Worker if ((first->has_control_barrier || first->access_atomic) && second->bar_acquire)
622*61046927SAndroid Build Coastguard Worker return hazard_fail_barrier;
623*61046927SAndroid Build Coastguard Worker if (((first->access_acquire || first->bar_acquire) && second->bar_classes) ||
624*61046927SAndroid Build Coastguard Worker ((first->access_acquire | first->bar_acquire) &
625*61046927SAndroid Build Coastguard Worker (second->access_relaxed | second->access_atomic)))
626*61046927SAndroid Build Coastguard Worker return hazard_fail_barrier;
627*61046927SAndroid Build Coastguard Worker
628*61046927SAndroid Build Coastguard Worker /* everything before barrier(release) happens before the atomics/control_barriers after *
629*61046927SAndroid Build Coastguard Worker * everything before store(release) happens before the store
630*61046927SAndroid Build Coastguard Worker */
631*61046927SAndroid Build Coastguard Worker if (first->bar_release && (second->has_control_barrier || second->access_atomic))
632*61046927SAndroid Build Coastguard Worker return hazard_fail_barrier;
633*61046927SAndroid Build Coastguard Worker if ((first->bar_classes && (second->bar_release || second->access_release)) ||
634*61046927SAndroid Build Coastguard Worker ((first->access_relaxed | first->access_atomic) &
635*61046927SAndroid Build Coastguard Worker (second->bar_release | second->access_release)))
636*61046927SAndroid Build Coastguard Worker return hazard_fail_barrier;
637*61046927SAndroid Build Coastguard Worker
638*61046927SAndroid Build Coastguard Worker /* don't move memory barriers around other memory barriers */
639*61046927SAndroid Build Coastguard Worker if (first->bar_classes && second->bar_classes)
640*61046927SAndroid Build Coastguard Worker return hazard_fail_barrier;
641*61046927SAndroid Build Coastguard Worker
642*61046927SAndroid Build Coastguard Worker /* Don't move memory accesses to before control barriers. I don't think
643*61046927SAndroid Build Coastguard Worker * this is necessary for the Vulkan memory model, but it might be for GLSL450. */
644*61046927SAndroid Build Coastguard Worker unsigned control_classes =
645*61046927SAndroid Build Coastguard Worker storage_buffer | storage_image | storage_shared | storage_task_payload;
646*61046927SAndroid Build Coastguard Worker if (first->has_control_barrier &&
647*61046927SAndroid Build Coastguard Worker ((second->access_atomic | second->access_relaxed) & control_classes))
648*61046927SAndroid Build Coastguard Worker return hazard_fail_barrier;
649*61046927SAndroid Build Coastguard Worker
650*61046927SAndroid Build Coastguard Worker /* don't move memory loads/stores past potentially aliasing loads/stores */
651*61046927SAndroid Build Coastguard Worker unsigned aliasing_storage =
652*61046927SAndroid Build Coastguard Worker instr->isSMEM() ? query->aliasing_storage_smem : query->aliasing_storage;
653*61046927SAndroid Build Coastguard Worker if ((sync.storage & aliasing_storage) && !(sync.semantics & semantic_can_reorder)) {
654*61046927SAndroid Build Coastguard Worker unsigned intersect = sync.storage & aliasing_storage;
655*61046927SAndroid Build Coastguard Worker if (intersect & storage_shared)
656*61046927SAndroid Build Coastguard Worker return hazard_fail_reorder_ds;
657*61046927SAndroid Build Coastguard Worker return hazard_fail_reorder_vmem_smem;
658*61046927SAndroid Build Coastguard Worker }
659*61046927SAndroid Build Coastguard Worker
660*61046927SAndroid Build Coastguard Worker if ((instr->opcode == aco_opcode::p_spill || instr->opcode == aco_opcode::p_reload) &&
661*61046927SAndroid Build Coastguard Worker query->contains_spill)
662*61046927SAndroid Build Coastguard Worker return hazard_fail_spill;
663*61046927SAndroid Build Coastguard Worker
664*61046927SAndroid Build Coastguard Worker if (instr->opcode == aco_opcode::s_sendmsg && query->contains_sendmsg)
665*61046927SAndroid Build Coastguard Worker return hazard_fail_reorder_sendmsg;
666*61046927SAndroid Build Coastguard Worker
667*61046927SAndroid Build Coastguard Worker return hazard_success;
668*61046927SAndroid Build Coastguard Worker }
669*61046927SAndroid Build Coastguard Worker
670*61046927SAndroid Build Coastguard Worker unsigned
get_likely_cost(Instruction * instr)671*61046927SAndroid Build Coastguard Worker get_likely_cost(Instruction* instr)
672*61046927SAndroid Build Coastguard Worker {
673*61046927SAndroid Build Coastguard Worker if (instr->opcode == aco_opcode::p_split_vector ||
674*61046927SAndroid Build Coastguard Worker instr->opcode == aco_opcode::p_extract_vector) {
675*61046927SAndroid Build Coastguard Worker unsigned cost = 0;
676*61046927SAndroid Build Coastguard Worker for (Definition def : instr->definitions) {
677*61046927SAndroid Build Coastguard Worker if (instr->operands[0].isKill() &&
678*61046927SAndroid Build Coastguard Worker def.regClass().type() == instr->operands[0].regClass().type())
679*61046927SAndroid Build Coastguard Worker continue;
680*61046927SAndroid Build Coastguard Worker cost += def.size();
681*61046927SAndroid Build Coastguard Worker }
682*61046927SAndroid Build Coastguard Worker return cost;
683*61046927SAndroid Build Coastguard Worker } else if (instr->opcode == aco_opcode::p_create_vector) {
684*61046927SAndroid Build Coastguard Worker unsigned cost = 0;
685*61046927SAndroid Build Coastguard Worker for (Operand op : instr->operands) {
686*61046927SAndroid Build Coastguard Worker if (op.isTemp() && op.isFirstKill() &&
687*61046927SAndroid Build Coastguard Worker op.regClass().type() == instr->definitions[0].regClass().type())
688*61046927SAndroid Build Coastguard Worker continue;
689*61046927SAndroid Build Coastguard Worker cost += op.size();
690*61046927SAndroid Build Coastguard Worker }
691*61046927SAndroid Build Coastguard Worker return cost;
692*61046927SAndroid Build Coastguard Worker } else {
693*61046927SAndroid Build Coastguard Worker /* For the moment, just assume the same cost for all other instructions. */
694*61046927SAndroid Build Coastguard Worker return 1;
695*61046927SAndroid Build Coastguard Worker }
696*61046927SAndroid Build Coastguard Worker }
697*61046927SAndroid Build Coastguard Worker
698*61046927SAndroid Build Coastguard Worker void
schedule_SMEM(sched_ctx & ctx,Block * block,Instruction * current,int idx)699*61046927SAndroid Build Coastguard Worker schedule_SMEM(sched_ctx& ctx, Block* block, Instruction* current, int idx)
700*61046927SAndroid Build Coastguard Worker {
701*61046927SAndroid Build Coastguard Worker assert(idx != 0);
702*61046927SAndroid Build Coastguard Worker int window_size = SMEM_WINDOW_SIZE;
703*61046927SAndroid Build Coastguard Worker int max_moves = SMEM_MAX_MOVES;
704*61046927SAndroid Build Coastguard Worker int16_t k = 0;
705*61046927SAndroid Build Coastguard Worker
706*61046927SAndroid Build Coastguard Worker /* don't move s_memtime/s_memrealtime */
707*61046927SAndroid Build Coastguard Worker if (current->opcode == aco_opcode::s_memtime || current->opcode == aco_opcode::s_memrealtime ||
708*61046927SAndroid Build Coastguard Worker current->opcode == aco_opcode::s_sendmsg_rtn_b32 ||
709*61046927SAndroid Build Coastguard Worker current->opcode == aco_opcode::s_sendmsg_rtn_b64)
710*61046927SAndroid Build Coastguard Worker return;
711*61046927SAndroid Build Coastguard Worker
712*61046927SAndroid Build Coastguard Worker /* first, check if we have instructions before current to move down */
713*61046927SAndroid Build Coastguard Worker hazard_query hq;
714*61046927SAndroid Build Coastguard Worker init_hazard_query(ctx, &hq);
715*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&hq, current);
716*61046927SAndroid Build Coastguard Worker
717*61046927SAndroid Build Coastguard Worker DownwardsCursor cursor = ctx.mv.downwards_init(idx, false, false);
718*61046927SAndroid Build Coastguard Worker
719*61046927SAndroid Build Coastguard Worker for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int)idx - window_size;
720*61046927SAndroid Build Coastguard Worker candidate_idx--) {
721*61046927SAndroid Build Coastguard Worker assert(candidate_idx >= 0);
722*61046927SAndroid Build Coastguard Worker assert(candidate_idx == cursor.source_idx);
723*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
724*61046927SAndroid Build Coastguard Worker
725*61046927SAndroid Build Coastguard Worker /* break if we'd make the previous SMEM instruction stall */
726*61046927SAndroid Build Coastguard Worker bool can_stall_prev_smem =
727*61046927SAndroid Build Coastguard Worker idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx;
728*61046927SAndroid Build Coastguard Worker if (can_stall_prev_smem && ctx.last_SMEM_stall >= 0)
729*61046927SAndroid Build Coastguard Worker break;
730*61046927SAndroid Build Coastguard Worker
731*61046927SAndroid Build Coastguard Worker /* break when encountering another MEM instruction, logical_start or barriers */
732*61046927SAndroid Build Coastguard Worker if (candidate->opcode == aco_opcode::p_logical_start)
733*61046927SAndroid Build Coastguard Worker break;
734*61046927SAndroid Build Coastguard Worker /* only move VMEM instructions below descriptor loads. be more aggressive at higher num_waves
735*61046927SAndroid Build Coastguard Worker * to help create more vmem clauses */
736*61046927SAndroid Build Coastguard Worker if ((candidate->isVMEM() || candidate->isFlatLike()) &&
737*61046927SAndroid Build Coastguard Worker (cursor.insert_idx - cursor.source_idx > (ctx.num_waves * 4) ||
738*61046927SAndroid Build Coastguard Worker current->operands[0].size() == 4))
739*61046927SAndroid Build Coastguard Worker break;
740*61046927SAndroid Build Coastguard Worker /* don't move descriptor loads below buffer loads */
741*61046927SAndroid Build Coastguard Worker if (candidate->isSMEM() && !candidate->operands.empty() && current->operands[0].size() == 4 &&
742*61046927SAndroid Build Coastguard Worker candidate->operands[0].size() == 2)
743*61046927SAndroid Build Coastguard Worker break;
744*61046927SAndroid Build Coastguard Worker
745*61046927SAndroid Build Coastguard Worker bool can_move_down = true;
746*61046927SAndroid Build Coastguard Worker
747*61046927SAndroid Build Coastguard Worker HazardResult haz = perform_hazard_query(&hq, candidate.get(), false);
748*61046927SAndroid Build Coastguard Worker if (haz == hazard_fail_reorder_ds || haz == hazard_fail_spill ||
749*61046927SAndroid Build Coastguard Worker haz == hazard_fail_reorder_sendmsg || haz == hazard_fail_barrier ||
750*61046927SAndroid Build Coastguard Worker haz == hazard_fail_export)
751*61046927SAndroid Build Coastguard Worker can_move_down = false;
752*61046927SAndroid Build Coastguard Worker else if (haz != hazard_success)
753*61046927SAndroid Build Coastguard Worker break;
754*61046927SAndroid Build Coastguard Worker
755*61046927SAndroid Build Coastguard Worker /* don't use LDS/GDS instructions to hide latency since it can
756*61046927SAndroid Build Coastguard Worker * significantly worsen LDS scheduling */
757*61046927SAndroid Build Coastguard Worker if (candidate->isDS() || !can_move_down) {
758*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&hq, candidate.get());
759*61046927SAndroid Build Coastguard Worker ctx.mv.downwards_skip(cursor);
760*61046927SAndroid Build Coastguard Worker continue;
761*61046927SAndroid Build Coastguard Worker }
762*61046927SAndroid Build Coastguard Worker
763*61046927SAndroid Build Coastguard Worker MoveResult res = ctx.mv.downwards_move(cursor, false);
764*61046927SAndroid Build Coastguard Worker if (res == move_fail_ssa || res == move_fail_rar) {
765*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&hq, candidate.get());
766*61046927SAndroid Build Coastguard Worker ctx.mv.downwards_skip(cursor);
767*61046927SAndroid Build Coastguard Worker continue;
768*61046927SAndroid Build Coastguard Worker } else if (res == move_fail_pressure) {
769*61046927SAndroid Build Coastguard Worker break;
770*61046927SAndroid Build Coastguard Worker }
771*61046927SAndroid Build Coastguard Worker
772*61046927SAndroid Build Coastguard Worker if (candidate_idx < ctx.last_SMEM_dep_idx)
773*61046927SAndroid Build Coastguard Worker ctx.last_SMEM_stall++;
774*61046927SAndroid Build Coastguard Worker k++;
775*61046927SAndroid Build Coastguard Worker }
776*61046927SAndroid Build Coastguard Worker
777*61046927SAndroid Build Coastguard Worker /* find the first instruction depending on current or find another MEM */
778*61046927SAndroid Build Coastguard Worker UpwardsCursor up_cursor = ctx.mv.upwards_init(idx + 1, false);
779*61046927SAndroid Build Coastguard Worker
780*61046927SAndroid Build Coastguard Worker bool found_dependency = false;
781*61046927SAndroid Build Coastguard Worker /* second, check if we have instructions after current to move up */
782*61046927SAndroid Build Coastguard Worker for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int)idx + window_size;
783*61046927SAndroid Build Coastguard Worker candidate_idx++) {
784*61046927SAndroid Build Coastguard Worker assert(candidate_idx == up_cursor.source_idx);
785*61046927SAndroid Build Coastguard Worker assert(candidate_idx < (int)block->instructions.size());
786*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
787*61046927SAndroid Build Coastguard Worker
788*61046927SAndroid Build Coastguard Worker if (candidate->opcode == aco_opcode::p_logical_end)
789*61046927SAndroid Build Coastguard Worker break;
790*61046927SAndroid Build Coastguard Worker
791*61046927SAndroid Build Coastguard Worker /* check if candidate depends on current */
792*61046927SAndroid Build Coastguard Worker bool is_dependency = !found_dependency && !ctx.mv.upwards_check_deps(up_cursor);
793*61046927SAndroid Build Coastguard Worker /* no need to steal from following VMEM instructions */
794*61046927SAndroid Build Coastguard Worker if (is_dependency && (candidate->isVMEM() || candidate->isFlatLike()))
795*61046927SAndroid Build Coastguard Worker break;
796*61046927SAndroid Build Coastguard Worker
797*61046927SAndroid Build Coastguard Worker if (found_dependency) {
798*61046927SAndroid Build Coastguard Worker HazardResult haz = perform_hazard_query(&hq, candidate.get(), true);
799*61046927SAndroid Build Coastguard Worker if (haz == hazard_fail_reorder_ds || haz == hazard_fail_spill ||
800*61046927SAndroid Build Coastguard Worker haz == hazard_fail_reorder_sendmsg || haz == hazard_fail_barrier ||
801*61046927SAndroid Build Coastguard Worker haz == hazard_fail_export)
802*61046927SAndroid Build Coastguard Worker is_dependency = true;
803*61046927SAndroid Build Coastguard Worker else if (haz != hazard_success)
804*61046927SAndroid Build Coastguard Worker break;
805*61046927SAndroid Build Coastguard Worker }
806*61046927SAndroid Build Coastguard Worker
807*61046927SAndroid Build Coastguard Worker if (is_dependency) {
808*61046927SAndroid Build Coastguard Worker if (!found_dependency) {
809*61046927SAndroid Build Coastguard Worker ctx.mv.upwards_update_insert_idx(up_cursor);
810*61046927SAndroid Build Coastguard Worker init_hazard_query(ctx, &hq);
811*61046927SAndroid Build Coastguard Worker found_dependency = true;
812*61046927SAndroid Build Coastguard Worker }
813*61046927SAndroid Build Coastguard Worker }
814*61046927SAndroid Build Coastguard Worker
815*61046927SAndroid Build Coastguard Worker if (is_dependency || !found_dependency) {
816*61046927SAndroid Build Coastguard Worker if (found_dependency)
817*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&hq, candidate.get());
818*61046927SAndroid Build Coastguard Worker else
819*61046927SAndroid Build Coastguard Worker k++;
820*61046927SAndroid Build Coastguard Worker ctx.mv.upwards_skip(up_cursor);
821*61046927SAndroid Build Coastguard Worker continue;
822*61046927SAndroid Build Coastguard Worker }
823*61046927SAndroid Build Coastguard Worker
824*61046927SAndroid Build Coastguard Worker MoveResult res = ctx.mv.upwards_move(up_cursor);
825*61046927SAndroid Build Coastguard Worker if (res == move_fail_ssa || res == move_fail_rar) {
826*61046927SAndroid Build Coastguard Worker /* no need to steal from following VMEM instructions */
827*61046927SAndroid Build Coastguard Worker if (res == move_fail_ssa && (candidate->isVMEM() || candidate->isFlatLike()))
828*61046927SAndroid Build Coastguard Worker break;
829*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&hq, candidate.get());
830*61046927SAndroid Build Coastguard Worker ctx.mv.upwards_skip(up_cursor);
831*61046927SAndroid Build Coastguard Worker continue;
832*61046927SAndroid Build Coastguard Worker } else if (res == move_fail_pressure) {
833*61046927SAndroid Build Coastguard Worker break;
834*61046927SAndroid Build Coastguard Worker }
835*61046927SAndroid Build Coastguard Worker k++;
836*61046927SAndroid Build Coastguard Worker }
837*61046927SAndroid Build Coastguard Worker
838*61046927SAndroid Build Coastguard Worker ctx.last_SMEM_dep_idx = found_dependency ? up_cursor.insert_idx : 0;
839*61046927SAndroid Build Coastguard Worker ctx.last_SMEM_stall = 10 - ctx.num_waves - k;
840*61046927SAndroid Build Coastguard Worker }
841*61046927SAndroid Build Coastguard Worker
842*61046927SAndroid Build Coastguard Worker void
schedule_VMEM(sched_ctx & ctx,Block * block,Instruction * current,int idx)843*61046927SAndroid Build Coastguard Worker schedule_VMEM(sched_ctx& ctx, Block* block, Instruction* current, int idx)
844*61046927SAndroid Build Coastguard Worker {
845*61046927SAndroid Build Coastguard Worker assert(idx != 0);
846*61046927SAndroid Build Coastguard Worker int window_size = VMEM_WINDOW_SIZE;
847*61046927SAndroid Build Coastguard Worker int max_moves = VMEM_MAX_MOVES;
848*61046927SAndroid Build Coastguard Worker int clause_max_grab_dist = VMEM_CLAUSE_MAX_GRAB_DIST;
849*61046927SAndroid Build Coastguard Worker bool only_clauses = false;
850*61046927SAndroid Build Coastguard Worker int16_t k = 0;
851*61046927SAndroid Build Coastguard Worker
852*61046927SAndroid Build Coastguard Worker /* first, check if we have instructions before current to move down */
853*61046927SAndroid Build Coastguard Worker hazard_query indep_hq;
854*61046927SAndroid Build Coastguard Worker hazard_query clause_hq;
855*61046927SAndroid Build Coastguard Worker init_hazard_query(ctx, &indep_hq);
856*61046927SAndroid Build Coastguard Worker init_hazard_query(ctx, &clause_hq);
857*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&indep_hq, current);
858*61046927SAndroid Build Coastguard Worker
859*61046927SAndroid Build Coastguard Worker DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, true);
860*61046927SAndroid Build Coastguard Worker
861*61046927SAndroid Build Coastguard Worker for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int)idx - window_size;
862*61046927SAndroid Build Coastguard Worker candidate_idx--) {
863*61046927SAndroid Build Coastguard Worker assert(candidate_idx == cursor.source_idx);
864*61046927SAndroid Build Coastguard Worker assert(candidate_idx >= 0);
865*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
866*61046927SAndroid Build Coastguard Worker bool is_vmem = candidate->isVMEM() || candidate->isFlatLike();
867*61046927SAndroid Build Coastguard Worker
868*61046927SAndroid Build Coastguard Worker /* break when encountering another VMEM instruction, logical_start or barriers */
869*61046927SAndroid Build Coastguard Worker if (candidate->opcode == aco_opcode::p_logical_start)
870*61046927SAndroid Build Coastguard Worker break;
871*61046927SAndroid Build Coastguard Worker
872*61046927SAndroid Build Coastguard Worker /* break if we'd make the previous SMEM instruction stall */
873*61046927SAndroid Build Coastguard Worker bool can_stall_prev_smem =
874*61046927SAndroid Build Coastguard Worker idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx;
875*61046927SAndroid Build Coastguard Worker if (can_stall_prev_smem && ctx.last_SMEM_stall >= 0)
876*61046927SAndroid Build Coastguard Worker break;
877*61046927SAndroid Build Coastguard Worker
878*61046927SAndroid Build Coastguard Worker bool part_of_clause = false;
879*61046927SAndroid Build Coastguard Worker if (current->isVMEM() == candidate->isVMEM()) {
880*61046927SAndroid Build Coastguard Worker int grab_dist = cursor.insert_idx_clause - candidate_idx;
881*61046927SAndroid Build Coastguard Worker /* We can't easily tell how much this will decrease the def-to-use
882*61046927SAndroid Build Coastguard Worker * distances, so just use how far it will be moved as a heuristic. */
883*61046927SAndroid Build Coastguard Worker part_of_clause =
884*61046927SAndroid Build Coastguard Worker grab_dist < clause_max_grab_dist + k && should_form_clause(current, candidate.get());
885*61046927SAndroid Build Coastguard Worker }
886*61046927SAndroid Build Coastguard Worker
887*61046927SAndroid Build Coastguard Worker /* if current depends on candidate, add additional dependencies and continue */
888*61046927SAndroid Build Coastguard Worker bool can_move_down = !is_vmem || part_of_clause || candidate->definitions.empty();
889*61046927SAndroid Build Coastguard Worker if (only_clauses) {
890*61046927SAndroid Build Coastguard Worker /* In case of high register pressure, only try to form clauses,
891*61046927SAndroid Build Coastguard Worker * and only if the previous clause is not larger
892*61046927SAndroid Build Coastguard Worker * than the current one will be.
893*61046927SAndroid Build Coastguard Worker */
894*61046927SAndroid Build Coastguard Worker if (part_of_clause) {
895*61046927SAndroid Build Coastguard Worker int clause_size = cursor.insert_idx - cursor.insert_idx_clause;
896*61046927SAndroid Build Coastguard Worker int prev_clause_size = 1;
897*61046927SAndroid Build Coastguard Worker while (should_form_clause(current,
898*61046927SAndroid Build Coastguard Worker block->instructions[candidate_idx - prev_clause_size].get()))
899*61046927SAndroid Build Coastguard Worker prev_clause_size++;
900*61046927SAndroid Build Coastguard Worker if (prev_clause_size > clause_size + 1)
901*61046927SAndroid Build Coastguard Worker break;
902*61046927SAndroid Build Coastguard Worker } else {
903*61046927SAndroid Build Coastguard Worker can_move_down = false;
904*61046927SAndroid Build Coastguard Worker }
905*61046927SAndroid Build Coastguard Worker }
906*61046927SAndroid Build Coastguard Worker HazardResult haz =
907*61046927SAndroid Build Coastguard Worker perform_hazard_query(part_of_clause ? &clause_hq : &indep_hq, candidate.get(), false);
908*61046927SAndroid Build Coastguard Worker if (haz == hazard_fail_reorder_ds || haz == hazard_fail_spill ||
909*61046927SAndroid Build Coastguard Worker haz == hazard_fail_reorder_sendmsg || haz == hazard_fail_barrier ||
910*61046927SAndroid Build Coastguard Worker haz == hazard_fail_export)
911*61046927SAndroid Build Coastguard Worker can_move_down = false;
912*61046927SAndroid Build Coastguard Worker else if (haz != hazard_success)
913*61046927SAndroid Build Coastguard Worker break;
914*61046927SAndroid Build Coastguard Worker
915*61046927SAndroid Build Coastguard Worker if (!can_move_down) {
916*61046927SAndroid Build Coastguard Worker if (part_of_clause)
917*61046927SAndroid Build Coastguard Worker break;
918*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&indep_hq, candidate.get());
919*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&clause_hq, candidate.get());
920*61046927SAndroid Build Coastguard Worker ctx.mv.downwards_skip(cursor);
921*61046927SAndroid Build Coastguard Worker continue;
922*61046927SAndroid Build Coastguard Worker }
923*61046927SAndroid Build Coastguard Worker
924*61046927SAndroid Build Coastguard Worker Instruction* candidate_ptr = candidate.get();
925*61046927SAndroid Build Coastguard Worker MoveResult res = ctx.mv.downwards_move(cursor, part_of_clause);
926*61046927SAndroid Build Coastguard Worker if (res == move_fail_ssa || res == move_fail_rar) {
927*61046927SAndroid Build Coastguard Worker if (part_of_clause)
928*61046927SAndroid Build Coastguard Worker break;
929*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&indep_hq, candidate.get());
930*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&clause_hq, candidate.get());
931*61046927SAndroid Build Coastguard Worker ctx.mv.downwards_skip(cursor);
932*61046927SAndroid Build Coastguard Worker continue;
933*61046927SAndroid Build Coastguard Worker } else if (res == move_fail_pressure) {
934*61046927SAndroid Build Coastguard Worker only_clauses = true;
935*61046927SAndroid Build Coastguard Worker if (part_of_clause)
936*61046927SAndroid Build Coastguard Worker break;
937*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&indep_hq, candidate.get());
938*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&clause_hq, candidate.get());
939*61046927SAndroid Build Coastguard Worker ctx.mv.downwards_skip(cursor);
940*61046927SAndroid Build Coastguard Worker continue;
941*61046927SAndroid Build Coastguard Worker }
942*61046927SAndroid Build Coastguard Worker if (part_of_clause)
943*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&indep_hq, candidate_ptr);
944*61046927SAndroid Build Coastguard Worker else
945*61046927SAndroid Build Coastguard Worker k++;
946*61046927SAndroid Build Coastguard Worker if (candidate_idx < ctx.last_SMEM_dep_idx)
947*61046927SAndroid Build Coastguard Worker ctx.last_SMEM_stall++;
948*61046927SAndroid Build Coastguard Worker }
949*61046927SAndroid Build Coastguard Worker
950*61046927SAndroid Build Coastguard Worker /* find the first instruction depending on current or find another VMEM */
951*61046927SAndroid Build Coastguard Worker UpwardsCursor up_cursor = ctx.mv.upwards_init(idx + 1, true);
952*61046927SAndroid Build Coastguard Worker
953*61046927SAndroid Build Coastguard Worker bool found_dependency = false;
954*61046927SAndroid Build Coastguard Worker /* second, check if we have instructions after current to move up */
955*61046927SAndroid Build Coastguard Worker for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int)idx + window_size;
956*61046927SAndroid Build Coastguard Worker candidate_idx++) {
957*61046927SAndroid Build Coastguard Worker assert(candidate_idx == up_cursor.source_idx);
958*61046927SAndroid Build Coastguard Worker assert(candidate_idx < (int)block->instructions.size());
959*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
960*61046927SAndroid Build Coastguard Worker bool is_vmem = candidate->isVMEM() || candidate->isFlatLike();
961*61046927SAndroid Build Coastguard Worker
962*61046927SAndroid Build Coastguard Worker if (candidate->opcode == aco_opcode::p_logical_end)
963*61046927SAndroid Build Coastguard Worker break;
964*61046927SAndroid Build Coastguard Worker
965*61046927SAndroid Build Coastguard Worker /* check if candidate depends on current */
966*61046927SAndroid Build Coastguard Worker bool is_dependency = false;
967*61046927SAndroid Build Coastguard Worker if (found_dependency) {
968*61046927SAndroid Build Coastguard Worker HazardResult haz = perform_hazard_query(&indep_hq, candidate.get(), true);
969*61046927SAndroid Build Coastguard Worker if (haz == hazard_fail_reorder_ds || haz == hazard_fail_spill ||
970*61046927SAndroid Build Coastguard Worker haz == hazard_fail_reorder_vmem_smem || haz == hazard_fail_reorder_sendmsg ||
971*61046927SAndroid Build Coastguard Worker haz == hazard_fail_barrier || haz == hazard_fail_export)
972*61046927SAndroid Build Coastguard Worker is_dependency = true;
973*61046927SAndroid Build Coastguard Worker else if (haz != hazard_success)
974*61046927SAndroid Build Coastguard Worker break;
975*61046927SAndroid Build Coastguard Worker }
976*61046927SAndroid Build Coastguard Worker
977*61046927SAndroid Build Coastguard Worker is_dependency |= !found_dependency && !ctx.mv.upwards_check_deps(up_cursor);
978*61046927SAndroid Build Coastguard Worker if (is_dependency) {
979*61046927SAndroid Build Coastguard Worker if (!found_dependency) {
980*61046927SAndroid Build Coastguard Worker ctx.mv.upwards_update_insert_idx(up_cursor);
981*61046927SAndroid Build Coastguard Worker init_hazard_query(ctx, &indep_hq);
982*61046927SAndroid Build Coastguard Worker found_dependency = true;
983*61046927SAndroid Build Coastguard Worker }
984*61046927SAndroid Build Coastguard Worker } else if (is_vmem) {
985*61046927SAndroid Build Coastguard Worker /* don't move up dependencies of other VMEM instructions */
986*61046927SAndroid Build Coastguard Worker for (const Definition& def : candidate->definitions) {
987*61046927SAndroid Build Coastguard Worker if (def.isTemp())
988*61046927SAndroid Build Coastguard Worker ctx.mv.depends_on[def.tempId()] = true;
989*61046927SAndroid Build Coastguard Worker }
990*61046927SAndroid Build Coastguard Worker }
991*61046927SAndroid Build Coastguard Worker
992*61046927SAndroid Build Coastguard Worker if (is_dependency || !found_dependency) {
993*61046927SAndroid Build Coastguard Worker if (found_dependency)
994*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&indep_hq, candidate.get());
995*61046927SAndroid Build Coastguard Worker else
996*61046927SAndroid Build Coastguard Worker k++;
997*61046927SAndroid Build Coastguard Worker ctx.mv.upwards_skip(up_cursor);
998*61046927SAndroid Build Coastguard Worker continue;
999*61046927SAndroid Build Coastguard Worker }
1000*61046927SAndroid Build Coastguard Worker
1001*61046927SAndroid Build Coastguard Worker MoveResult res = ctx.mv.upwards_move(up_cursor);
1002*61046927SAndroid Build Coastguard Worker if (res == move_fail_ssa || res == move_fail_rar) {
1003*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&indep_hq, candidate.get());
1004*61046927SAndroid Build Coastguard Worker ctx.mv.upwards_skip(up_cursor);
1005*61046927SAndroid Build Coastguard Worker continue;
1006*61046927SAndroid Build Coastguard Worker } else if (res == move_fail_pressure) {
1007*61046927SAndroid Build Coastguard Worker break;
1008*61046927SAndroid Build Coastguard Worker }
1009*61046927SAndroid Build Coastguard Worker k++;
1010*61046927SAndroid Build Coastguard Worker }
1011*61046927SAndroid Build Coastguard Worker }
1012*61046927SAndroid Build Coastguard Worker
1013*61046927SAndroid Build Coastguard Worker void
schedule_LDS(sched_ctx & ctx,Block * block,Instruction * current,int idx)1014*61046927SAndroid Build Coastguard Worker schedule_LDS(sched_ctx& ctx, Block* block, Instruction* current, int idx)
1015*61046927SAndroid Build Coastguard Worker {
1016*61046927SAndroid Build Coastguard Worker assert(idx != 0);
1017*61046927SAndroid Build Coastguard Worker int window_size = LDS_WINDOW_SIZE;
1018*61046927SAndroid Build Coastguard Worker int max_moves = current->isLDSDIR() ? LDSDIR_MAX_MOVES : LDS_MAX_MOVES;
1019*61046927SAndroid Build Coastguard Worker int16_t k = 0;
1020*61046927SAndroid Build Coastguard Worker
1021*61046927SAndroid Build Coastguard Worker /* first, check if we have instructions before current to move down */
1022*61046927SAndroid Build Coastguard Worker hazard_query hq;
1023*61046927SAndroid Build Coastguard Worker init_hazard_query(ctx, &hq);
1024*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&hq, current);
1025*61046927SAndroid Build Coastguard Worker
1026*61046927SAndroid Build Coastguard Worker DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, false);
1027*61046927SAndroid Build Coastguard Worker
1028*61046927SAndroid Build Coastguard Worker for (int i = 0; k < max_moves && i < window_size; i++) {
1029*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& candidate = block->instructions[cursor.source_idx];
1030*61046927SAndroid Build Coastguard Worker bool is_mem = candidate->isVMEM() || candidate->isFlatLike() || candidate->isSMEM();
1031*61046927SAndroid Build Coastguard Worker if (candidate->opcode == aco_opcode::p_logical_start || is_mem)
1032*61046927SAndroid Build Coastguard Worker break;
1033*61046927SAndroid Build Coastguard Worker
1034*61046927SAndroid Build Coastguard Worker if (candidate->isDS() || candidate->isLDSDIR()) {
1035*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&hq, candidate.get());
1036*61046927SAndroid Build Coastguard Worker ctx.mv.downwards_skip(cursor);
1037*61046927SAndroid Build Coastguard Worker continue;
1038*61046927SAndroid Build Coastguard Worker }
1039*61046927SAndroid Build Coastguard Worker
1040*61046927SAndroid Build Coastguard Worker if (perform_hazard_query(&hq, candidate.get(), false) != hazard_success ||
1041*61046927SAndroid Build Coastguard Worker ctx.mv.downwards_move(cursor, false) != move_success)
1042*61046927SAndroid Build Coastguard Worker break;
1043*61046927SAndroid Build Coastguard Worker
1044*61046927SAndroid Build Coastguard Worker k++;
1045*61046927SAndroid Build Coastguard Worker }
1046*61046927SAndroid Build Coastguard Worker
1047*61046927SAndroid Build Coastguard Worker /* second, check if we have instructions after current to move up */
1048*61046927SAndroid Build Coastguard Worker bool found_dependency = false;
1049*61046927SAndroid Build Coastguard Worker int i = 0;
1050*61046927SAndroid Build Coastguard Worker UpwardsCursor up_cursor = ctx.mv.upwards_init(idx + 1, true);
1051*61046927SAndroid Build Coastguard Worker /* find the first instruction depending on current */
1052*61046927SAndroid Build Coastguard Worker for (; k < max_moves && i < window_size; i++) {
1053*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& candidate = block->instructions[up_cursor.source_idx];
1054*61046927SAndroid Build Coastguard Worker bool is_mem = candidate->isVMEM() || candidate->isFlatLike() || candidate->isSMEM();
1055*61046927SAndroid Build Coastguard Worker if (candidate->opcode == aco_opcode::p_logical_end || is_mem)
1056*61046927SAndroid Build Coastguard Worker break;
1057*61046927SAndroid Build Coastguard Worker
1058*61046927SAndroid Build Coastguard Worker /* check if candidate depends on current */
1059*61046927SAndroid Build Coastguard Worker if (!ctx.mv.upwards_check_deps(up_cursor)) {
1060*61046927SAndroid Build Coastguard Worker init_hazard_query(ctx, &hq);
1061*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&hq, candidate.get());
1062*61046927SAndroid Build Coastguard Worker ctx.mv.upwards_update_insert_idx(up_cursor);
1063*61046927SAndroid Build Coastguard Worker ctx.mv.upwards_skip(up_cursor);
1064*61046927SAndroid Build Coastguard Worker found_dependency = true;
1065*61046927SAndroid Build Coastguard Worker i++;
1066*61046927SAndroid Build Coastguard Worker break;
1067*61046927SAndroid Build Coastguard Worker }
1068*61046927SAndroid Build Coastguard Worker
1069*61046927SAndroid Build Coastguard Worker ctx.mv.upwards_skip(up_cursor);
1070*61046927SAndroid Build Coastguard Worker }
1071*61046927SAndroid Build Coastguard Worker
1072*61046927SAndroid Build Coastguard Worker for (; found_dependency && k < max_moves && i < window_size; i++) {
1073*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& candidate = block->instructions[up_cursor.source_idx];
1074*61046927SAndroid Build Coastguard Worker bool is_mem = candidate->isVMEM() || candidate->isFlatLike() || candidate->isSMEM();
1075*61046927SAndroid Build Coastguard Worker if (candidate->opcode == aco_opcode::p_logical_end || is_mem)
1076*61046927SAndroid Build Coastguard Worker break;
1077*61046927SAndroid Build Coastguard Worker
1078*61046927SAndroid Build Coastguard Worker HazardResult haz = perform_hazard_query(&hq, candidate.get(), true);
1079*61046927SAndroid Build Coastguard Worker if (haz == hazard_fail_exec || haz == hazard_fail_unreorderable)
1080*61046927SAndroid Build Coastguard Worker break;
1081*61046927SAndroid Build Coastguard Worker
1082*61046927SAndroid Build Coastguard Worker if (haz != hazard_success || ctx.mv.upwards_move(up_cursor) != move_success) {
1083*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&hq, candidate.get());
1084*61046927SAndroid Build Coastguard Worker ctx.mv.upwards_skip(up_cursor);
1085*61046927SAndroid Build Coastguard Worker } else {
1086*61046927SAndroid Build Coastguard Worker k++;
1087*61046927SAndroid Build Coastguard Worker }
1088*61046927SAndroid Build Coastguard Worker }
1089*61046927SAndroid Build Coastguard Worker }
1090*61046927SAndroid Build Coastguard Worker
1091*61046927SAndroid Build Coastguard Worker void
schedule_position_export(sched_ctx & ctx,Block * block,Instruction * current,int idx)1092*61046927SAndroid Build Coastguard Worker schedule_position_export(sched_ctx& ctx, Block* block, Instruction* current, int idx)
1093*61046927SAndroid Build Coastguard Worker {
1094*61046927SAndroid Build Coastguard Worker assert(idx != 0);
1095*61046927SAndroid Build Coastguard Worker int window_size = POS_EXP_WINDOW_SIZE / ctx.schedule_pos_export_div;
1096*61046927SAndroid Build Coastguard Worker int max_moves = POS_EXP_MAX_MOVES / ctx.schedule_pos_export_div;
1097*61046927SAndroid Build Coastguard Worker int16_t k = 0;
1098*61046927SAndroid Build Coastguard Worker
1099*61046927SAndroid Build Coastguard Worker DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, false);
1100*61046927SAndroid Build Coastguard Worker
1101*61046927SAndroid Build Coastguard Worker hazard_query hq;
1102*61046927SAndroid Build Coastguard Worker init_hazard_query(ctx, &hq);
1103*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&hq, current);
1104*61046927SAndroid Build Coastguard Worker
1105*61046927SAndroid Build Coastguard Worker for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int)idx - window_size;
1106*61046927SAndroid Build Coastguard Worker candidate_idx--) {
1107*61046927SAndroid Build Coastguard Worker assert(candidate_idx >= 0);
1108*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
1109*61046927SAndroid Build Coastguard Worker
1110*61046927SAndroid Build Coastguard Worker if (candidate->opcode == aco_opcode::p_logical_start)
1111*61046927SAndroid Build Coastguard Worker break;
1112*61046927SAndroid Build Coastguard Worker if (candidate->isVMEM() || candidate->isSMEM() || candidate->isFlatLike())
1113*61046927SAndroid Build Coastguard Worker break;
1114*61046927SAndroid Build Coastguard Worker
1115*61046927SAndroid Build Coastguard Worker HazardResult haz = perform_hazard_query(&hq, candidate.get(), false);
1116*61046927SAndroid Build Coastguard Worker if (haz == hazard_fail_exec || haz == hazard_fail_unreorderable)
1117*61046927SAndroid Build Coastguard Worker break;
1118*61046927SAndroid Build Coastguard Worker
1119*61046927SAndroid Build Coastguard Worker if (haz != hazard_success) {
1120*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&hq, candidate.get());
1121*61046927SAndroid Build Coastguard Worker ctx.mv.downwards_skip(cursor);
1122*61046927SAndroid Build Coastguard Worker continue;
1123*61046927SAndroid Build Coastguard Worker }
1124*61046927SAndroid Build Coastguard Worker
1125*61046927SAndroid Build Coastguard Worker MoveResult res = ctx.mv.downwards_move(cursor, false);
1126*61046927SAndroid Build Coastguard Worker if (res == move_fail_ssa || res == move_fail_rar) {
1127*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&hq, candidate.get());
1128*61046927SAndroid Build Coastguard Worker ctx.mv.downwards_skip(cursor);
1129*61046927SAndroid Build Coastguard Worker continue;
1130*61046927SAndroid Build Coastguard Worker } else if (res == move_fail_pressure) {
1131*61046927SAndroid Build Coastguard Worker break;
1132*61046927SAndroid Build Coastguard Worker }
1133*61046927SAndroid Build Coastguard Worker k++;
1134*61046927SAndroid Build Coastguard Worker }
1135*61046927SAndroid Build Coastguard Worker }
1136*61046927SAndroid Build Coastguard Worker
1137*61046927SAndroid Build Coastguard Worker unsigned
schedule_VMEM_store(sched_ctx & ctx,Block * block,Instruction * current,int idx)1138*61046927SAndroid Build Coastguard Worker schedule_VMEM_store(sched_ctx& ctx, Block* block, Instruction* current, int idx)
1139*61046927SAndroid Build Coastguard Worker {
1140*61046927SAndroid Build Coastguard Worker hazard_query hq;
1141*61046927SAndroid Build Coastguard Worker init_hazard_query(ctx, &hq);
1142*61046927SAndroid Build Coastguard Worker
1143*61046927SAndroid Build Coastguard Worker DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, true);
1144*61046927SAndroid Build Coastguard Worker int skip = 0;
1145*61046927SAndroid Build Coastguard Worker
1146*61046927SAndroid Build Coastguard Worker for (int16_t k = 0; k < VMEM_STORE_CLAUSE_MAX_GRAB_DIST;) {
1147*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& candidate = block->instructions[cursor.source_idx];
1148*61046927SAndroid Build Coastguard Worker if (candidate->opcode == aco_opcode::p_logical_start)
1149*61046927SAndroid Build Coastguard Worker break;
1150*61046927SAndroid Build Coastguard Worker
1151*61046927SAndroid Build Coastguard Worker if (!should_form_clause(current, candidate.get())) {
1152*61046927SAndroid Build Coastguard Worker add_to_hazard_query(&hq, candidate.get());
1153*61046927SAndroid Build Coastguard Worker ctx.mv.downwards_skip(cursor);
1154*61046927SAndroid Build Coastguard Worker k += get_likely_cost(candidate.get());
1155*61046927SAndroid Build Coastguard Worker continue;
1156*61046927SAndroid Build Coastguard Worker }
1157*61046927SAndroid Build Coastguard Worker
1158*61046927SAndroid Build Coastguard Worker if (perform_hazard_query(&hq, candidate.get(), false) != hazard_success ||
1159*61046927SAndroid Build Coastguard Worker ctx.mv.downwards_move(cursor, true) != move_success)
1160*61046927SAndroid Build Coastguard Worker break;
1161*61046927SAndroid Build Coastguard Worker
1162*61046927SAndroid Build Coastguard Worker skip++;
1163*61046927SAndroid Build Coastguard Worker }
1164*61046927SAndroid Build Coastguard Worker
1165*61046927SAndroid Build Coastguard Worker return skip;
1166*61046927SAndroid Build Coastguard Worker }
1167*61046927SAndroid Build Coastguard Worker
1168*61046927SAndroid Build Coastguard Worker void
schedule_block(sched_ctx & ctx,Program * program,Block * block)1169*61046927SAndroid Build Coastguard Worker schedule_block(sched_ctx& ctx, Program* program, Block* block)
1170*61046927SAndroid Build Coastguard Worker {
1171*61046927SAndroid Build Coastguard Worker ctx.last_SMEM_dep_idx = 0;
1172*61046927SAndroid Build Coastguard Worker ctx.last_SMEM_stall = INT16_MIN;
1173*61046927SAndroid Build Coastguard Worker ctx.mv.block = block;
1174*61046927SAndroid Build Coastguard Worker
1175*61046927SAndroid Build Coastguard Worker /* go through all instructions and find memory loads */
1176*61046927SAndroid Build Coastguard Worker unsigned num_stores = 0;
1177*61046927SAndroid Build Coastguard Worker for (unsigned idx = 0; idx < block->instructions.size(); idx++) {
1178*61046927SAndroid Build Coastguard Worker Instruction* current = block->instructions[idx].get();
1179*61046927SAndroid Build Coastguard Worker
1180*61046927SAndroid Build Coastguard Worker if (current->opcode == aco_opcode::p_logical_end)
1181*61046927SAndroid Build Coastguard Worker break;
1182*61046927SAndroid Build Coastguard Worker
1183*61046927SAndroid Build Coastguard Worker if (block->kind & block_kind_export_end && current->isEXP() && ctx.schedule_pos_exports) {
1184*61046927SAndroid Build Coastguard Worker unsigned target = current->exp().dest;
1185*61046927SAndroid Build Coastguard Worker if (target >= V_008DFC_SQ_EXP_POS && target < V_008DFC_SQ_EXP_PRIM) {
1186*61046927SAndroid Build Coastguard Worker ctx.mv.current = current;
1187*61046927SAndroid Build Coastguard Worker schedule_position_export(ctx, block, current, idx);
1188*61046927SAndroid Build Coastguard Worker }
1189*61046927SAndroid Build Coastguard Worker }
1190*61046927SAndroid Build Coastguard Worker
1191*61046927SAndroid Build Coastguard Worker if (current->definitions.empty()) {
1192*61046927SAndroid Build Coastguard Worker num_stores += current->isVMEM() || current->isFlatLike() ? 1 : 0;
1193*61046927SAndroid Build Coastguard Worker continue;
1194*61046927SAndroid Build Coastguard Worker }
1195*61046927SAndroid Build Coastguard Worker
1196*61046927SAndroid Build Coastguard Worker if (current->isVMEM() || current->isFlatLike()) {
1197*61046927SAndroid Build Coastguard Worker ctx.mv.current = current;
1198*61046927SAndroid Build Coastguard Worker schedule_VMEM(ctx, block, current, idx);
1199*61046927SAndroid Build Coastguard Worker }
1200*61046927SAndroid Build Coastguard Worker
1201*61046927SAndroid Build Coastguard Worker if (current->isSMEM()) {
1202*61046927SAndroid Build Coastguard Worker ctx.mv.current = current;
1203*61046927SAndroid Build Coastguard Worker schedule_SMEM(ctx, block, current, idx);
1204*61046927SAndroid Build Coastguard Worker }
1205*61046927SAndroid Build Coastguard Worker
1206*61046927SAndroid Build Coastguard Worker if (current->isLDSDIR() || (current->isDS() && !current->ds().gds)) {
1207*61046927SAndroid Build Coastguard Worker ctx.mv.current = current;
1208*61046927SAndroid Build Coastguard Worker schedule_LDS(ctx, block, current, idx);
1209*61046927SAndroid Build Coastguard Worker }
1210*61046927SAndroid Build Coastguard Worker }
1211*61046927SAndroid Build Coastguard Worker
1212*61046927SAndroid Build Coastguard Worker /* GFX11 benefits from creating VMEM store clauses. */
1213*61046927SAndroid Build Coastguard Worker if (num_stores > 1 && program->gfx_level >= GFX11) {
1214*61046927SAndroid Build Coastguard Worker for (int idx = block->instructions.size() - 1; idx >= 0; idx--) {
1215*61046927SAndroid Build Coastguard Worker Instruction* current = block->instructions[idx].get();
1216*61046927SAndroid Build Coastguard Worker if (!current->definitions.empty() || !(current->isVMEM() || current->isFlatLike()))
1217*61046927SAndroid Build Coastguard Worker continue;
1218*61046927SAndroid Build Coastguard Worker
1219*61046927SAndroid Build Coastguard Worker ctx.mv.current = current;
1220*61046927SAndroid Build Coastguard Worker idx -= schedule_VMEM_store(ctx, block, current, idx);
1221*61046927SAndroid Build Coastguard Worker }
1222*61046927SAndroid Build Coastguard Worker }
1223*61046927SAndroid Build Coastguard Worker
1224*61046927SAndroid Build Coastguard Worker /* resummarize the block's register demand */
1225*61046927SAndroid Build Coastguard Worker block->register_demand = block->live_in_demand;
1226*61046927SAndroid Build Coastguard Worker for (const aco_ptr<Instruction>& instr : block->instructions)
1227*61046927SAndroid Build Coastguard Worker block->register_demand.update(instr->register_demand);
1228*61046927SAndroid Build Coastguard Worker }
1229*61046927SAndroid Build Coastguard Worker
1230*61046927SAndroid Build Coastguard Worker } /* end namespace */
1231*61046927SAndroid Build Coastguard Worker
1232*61046927SAndroid Build Coastguard Worker void
schedule_program(Program * program)1233*61046927SAndroid Build Coastguard Worker schedule_program(Program* program)
1234*61046927SAndroid Build Coastguard Worker {
1235*61046927SAndroid Build Coastguard Worker /* don't use program->max_reg_demand because that is affected by max_waves_per_simd */
1236*61046927SAndroid Build Coastguard Worker RegisterDemand demand;
1237*61046927SAndroid Build Coastguard Worker for (Block& block : program->blocks)
1238*61046927SAndroid Build Coastguard Worker demand.update(block.register_demand);
1239*61046927SAndroid Build Coastguard Worker demand.vgpr += program->config->num_shared_vgprs / 2;
1240*61046927SAndroid Build Coastguard Worker
1241*61046927SAndroid Build Coastguard Worker sched_ctx ctx;
1242*61046927SAndroid Build Coastguard Worker ctx.gfx_level = program->gfx_level;
1243*61046927SAndroid Build Coastguard Worker ctx.mv.depends_on.resize(program->peekAllocationId());
1244*61046927SAndroid Build Coastguard Worker ctx.mv.RAR_dependencies.resize(program->peekAllocationId());
1245*61046927SAndroid Build Coastguard Worker ctx.mv.RAR_dependencies_clause.resize(program->peekAllocationId());
1246*61046927SAndroid Build Coastguard Worker /* Allowing the scheduler to reduce the number of waves to as low as 5
1247*61046927SAndroid Build Coastguard Worker * improves performance of Thrones of Britannia significantly and doesn't
1248*61046927SAndroid Build Coastguard Worker * seem to hurt anything else. */
1249*61046927SAndroid Build Coastguard Worker // TODO: account for possible uneven num_waves on GFX10+
1250*61046927SAndroid Build Coastguard Worker unsigned wave_fac = program->dev.physical_vgprs / 256;
1251*61046927SAndroid Build Coastguard Worker if (program->num_waves <= 5 * wave_fac)
1252*61046927SAndroid Build Coastguard Worker ctx.num_waves = program->num_waves;
1253*61046927SAndroid Build Coastguard Worker else if (demand.vgpr >= 29)
1254*61046927SAndroid Build Coastguard Worker ctx.num_waves = 5 * wave_fac;
1255*61046927SAndroid Build Coastguard Worker else if (demand.vgpr >= 25)
1256*61046927SAndroid Build Coastguard Worker ctx.num_waves = 6 * wave_fac;
1257*61046927SAndroid Build Coastguard Worker else
1258*61046927SAndroid Build Coastguard Worker ctx.num_waves = 7 * wave_fac;
1259*61046927SAndroid Build Coastguard Worker ctx.num_waves = std::max<uint16_t>(ctx.num_waves, program->min_waves);
1260*61046927SAndroid Build Coastguard Worker ctx.num_waves = std::min<uint16_t>(ctx.num_waves, program->num_waves);
1261*61046927SAndroid Build Coastguard Worker ctx.num_waves = max_suitable_waves(program, ctx.num_waves);
1262*61046927SAndroid Build Coastguard Worker
1263*61046927SAndroid Build Coastguard Worker /* VMEM_MAX_MOVES and such assume pre-GFX10 wave count */
1264*61046927SAndroid Build Coastguard Worker ctx.num_waves = std::max<uint16_t>(ctx.num_waves / wave_fac, 1);
1265*61046927SAndroid Build Coastguard Worker
1266*61046927SAndroid Build Coastguard Worker assert(ctx.num_waves > 0);
1267*61046927SAndroid Build Coastguard Worker ctx.mv.max_registers = {int16_t(get_addr_vgpr_from_waves(program, ctx.num_waves * wave_fac) - 2),
1268*61046927SAndroid Build Coastguard Worker int16_t(get_addr_sgpr_from_waves(program, ctx.num_waves * wave_fac))};
1269*61046927SAndroid Build Coastguard Worker
1270*61046927SAndroid Build Coastguard Worker /* NGG culling shaders are very sensitive to position export scheduling.
1271*61046927SAndroid Build Coastguard Worker * Schedule less aggressively when early primitive export is used, and
1272*61046927SAndroid Build Coastguard Worker * keep the position export at the very bottom when late primitive export is used.
1273*61046927SAndroid Build Coastguard Worker */
1274*61046927SAndroid Build Coastguard Worker if (program->info.has_ngg_culling && program->stage.num_sw_stages() == 1) {
1275*61046927SAndroid Build Coastguard Worker if (!program->info.has_ngg_early_prim_export)
1276*61046927SAndroid Build Coastguard Worker ctx.schedule_pos_exports = false;
1277*61046927SAndroid Build Coastguard Worker else
1278*61046927SAndroid Build Coastguard Worker ctx.schedule_pos_export_div = 4;
1279*61046927SAndroid Build Coastguard Worker }
1280*61046927SAndroid Build Coastguard Worker
1281*61046927SAndroid Build Coastguard Worker for (Block& block : program->blocks)
1282*61046927SAndroid Build Coastguard Worker schedule_block(ctx, program, &block);
1283*61046927SAndroid Build Coastguard Worker
1284*61046927SAndroid Build Coastguard Worker /* update max_reg_demand and num_waves */
1285*61046927SAndroid Build Coastguard Worker RegisterDemand new_demand;
1286*61046927SAndroid Build Coastguard Worker for (Block& block : program->blocks) {
1287*61046927SAndroid Build Coastguard Worker new_demand.update(block.register_demand);
1288*61046927SAndroid Build Coastguard Worker }
1289*61046927SAndroid Build Coastguard Worker update_vgpr_sgpr_demand(program, new_demand);
1290*61046927SAndroid Build Coastguard Worker
1291*61046927SAndroid Build Coastguard Worker /* Validate live variable information */
1292*61046927SAndroid Build Coastguard Worker if (!validate_live_vars(program))
1293*61046927SAndroid Build Coastguard Worker abort();
1294*61046927SAndroid Build Coastguard Worker }
1295*61046927SAndroid Build Coastguard Worker
1296*61046927SAndroid Build Coastguard Worker } // namespace aco
1297