1 /*
2 * Copyright 2010 Jerome Glisse <[email protected]>
3 * SPDX-License-Identifier: MIT
4 */
5
6 #ifndef R600_ASM_H
7 #define R600_ASM_H
8
9 #include "util/format/u_format.h"
10 #include "util/list.h"
11 #include "amd_family.h"
12 #include "r600_isa.h"
13
14 #include <stdbool.h>
15 #include <stdint.h>
16 #include <stdio.h>
17
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21
22 #define R600_ASM_ERR(fmt, args...) \
23 fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
24
25 struct r600_bytecode_alu_src {
26 unsigned sel;
27 unsigned chan;
28 unsigned neg;
29 unsigned abs;
30 unsigned rel;
31 unsigned kc_bank;
32 unsigned kc_rel;
33 uint32_t value;
34 };
35
36 struct r600_bytecode_alu_dst {
37 unsigned sel;
38 unsigned chan;
39 unsigned clamp;
40 unsigned write;
41 unsigned rel;
42 };
43
44 struct r600_bytecode_alu {
45 struct list_head list;
46 struct r600_bytecode_alu_src src[3];
47 struct r600_bytecode_alu_dst dst;
48 unsigned op;
49 unsigned last;
50 unsigned is_op3;
51 unsigned is_lds_idx_op;
52 unsigned execute_mask;
53 unsigned update_pred;
54 unsigned pred_sel;
55 unsigned bank_swizzle;
56 unsigned bank_swizzle_force;
57 unsigned omod;
58 unsigned index_mode;
59 unsigned lds_idx;
60 };
61
62 struct r600_bytecode_tex {
63 struct list_head list;
64 unsigned op;
65 unsigned inst_mod;
66 unsigned resource_id;
67 unsigned src_gpr;
68 unsigned src_rel;
69 unsigned dst_gpr;
70 unsigned dst_rel;
71 unsigned dst_sel_x;
72 unsigned dst_sel_y;
73 unsigned dst_sel_z;
74 unsigned dst_sel_w;
75 unsigned lod_bias;
76 unsigned coord_type_x;
77 unsigned coord_type_y;
78 unsigned coord_type_z;
79 unsigned coord_type_w;
80 int offset_x;
81 int offset_y;
82 int offset_z;
83 unsigned sampler_id;
84 unsigned src_sel_x;
85 unsigned src_sel_y;
86 unsigned src_sel_z;
87 unsigned src_sel_w;
88 /* indexed samplers/resources only on evergreen/cayman */
89 unsigned sampler_index_mode;
90 unsigned resource_index_mode;
91 };
92
93 struct r600_bytecode_vtx {
94 struct list_head list;
95 unsigned op;
96 unsigned fetch_type;
97 unsigned buffer_id;
98 unsigned src_gpr;
99 unsigned src_sel_x;
100 unsigned mega_fetch_count;
101 unsigned dst_gpr;
102 unsigned dst_sel_x;
103 unsigned dst_sel_y;
104 unsigned dst_sel_z;
105 unsigned dst_sel_w;
106 unsigned use_const_fields;
107 unsigned data_format;
108 unsigned num_format_all;
109 unsigned format_comp_all;
110 unsigned srf_mode_all;
111 unsigned offset;
112 unsigned endian;
113 unsigned buffer_index_mode;
114
115 // READ_SCRATCH fields
116 unsigned uncached;
117 unsigned indexed;
118 unsigned src_sel_y;
119 unsigned src_rel;
120 unsigned elem_size;
121 unsigned array_size;
122 unsigned array_base;
123 unsigned burst_count;
124 unsigned dst_rel;
125 };
126
127 struct r600_bytecode_gds {
128 struct list_head list;
129 unsigned op;
130 unsigned src_gpr;
131 unsigned src_rel;
132 unsigned src_sel_x;
133 unsigned src_sel_y;
134 unsigned src_sel_z;
135 unsigned src_gpr2;
136 unsigned dst_gpr;
137 unsigned dst_rel;
138 unsigned dst_sel_x;
139 unsigned dst_sel_y;
140 unsigned dst_sel_z;
141 unsigned dst_sel_w;
142 unsigned uav_index_mode;
143 unsigned uav_id;
144 unsigned alloc_consume;
145 unsigned bcast_first_req;
146 };
147
148 struct r600_bytecode_output {
149 unsigned array_base;
150 unsigned array_size;
151 unsigned comp_mask;
152 unsigned type;
153
154 unsigned op;
155
156 unsigned elem_size;
157 unsigned gpr;
158 unsigned swizzle_x;
159 unsigned swizzle_y;
160 unsigned swizzle_z;
161 unsigned swizzle_w;
162 unsigned burst_count;
163 unsigned index_gpr;
164 unsigned mark; /* used by MEM_SCRATCH */
165 };
166
167 struct r600_bytecode_rat {
168 unsigned id;
169 unsigned inst;
170 unsigned index_mode;
171 };
172
173 struct r600_bytecode_kcache {
174 unsigned bank;
175 unsigned mode;
176 unsigned addr;
177 unsigned index_mode;
178 };
179
180 struct r600_bytecode_cf {
181 struct list_head list;
182
183 unsigned op;
184 unsigned addr;
185 unsigned ndw;
186 unsigned id;
187 unsigned cond;
188 unsigned pop_count;
189 unsigned count;
190 unsigned cf_addr; /* control flow addr */
191 struct r600_bytecode_kcache kcache[4];
192 unsigned r6xx_uses_waterfall;
193 unsigned eg_alu_extended;
194 unsigned barrier;
195 unsigned end_of_program;
196 unsigned mark;
197 unsigned vpm;
198 struct list_head alu;
199 struct list_head tex;
200 struct list_head vtx;
201 struct list_head gds;
202 struct r600_bytecode_output output;
203 struct r600_bytecode_rat rat;
204 struct r600_bytecode_alu *curr_bs_head;
205 struct r600_bytecode_alu *prev_bs_head;
206 struct r600_bytecode_alu *prev2_bs_head;
207 unsigned isa[2];
208 unsigned nlds_read;
209 unsigned nqueue_read;
210 unsigned clause_local_written;
211 };
212
213 #define FC_NONE 0
214 #define FC_IF 1
215 #define FC_LOOP 2
216 #define FC_REP 3
217 #define FC_PUSH_VPM 4
218 #define FC_PUSH_WQM 5
219
220 struct r600_cf_stack_entry {
221 int type;
222 struct r600_bytecode_cf *start;
223 struct r600_bytecode_cf **mid; /* used to store the else point */
224 int num_mid;
225 };
226
227 #define SQ_MAX_CALL_DEPTH 0x00000020
228
229 #define AR_HANDLE_NORMAL 0
230 #define AR_HANDLE_RV6XX 1 /* except RV670 */
231
232 struct r600_stack_info {
233 /* current level of non-WQM PUSH operations
234 * (PUSH, PUSH_ELSE, ALU_PUSH_BEFORE) */
235 int push;
236 /* current level of WQM PUSH operations
237 * (PUSH, PUSH_ELSE, PUSH_WQM) */
238 int push_wqm;
239 /* current loop level */
240 int loop;
241
242 /* required depth */
243 int max_entries;
244 /* subentries per entry */
245 int entry_size;
246 };
247
248 struct r600_bytecode {
249 enum amd_gfx_level gfx_level;
250 enum radeon_family family;
251 bool has_compressed_msaa_texturing;
252 int type;
253 struct list_head cf;
254 struct r600_bytecode_cf *cf_last;
255 unsigned ndw;
256 unsigned ncf;
257 unsigned nalu_groups;
258 unsigned ngpr;
259 unsigned nstack;
260 unsigned nlds_dw;
261 unsigned nresource;
262 unsigned force_add_cf;
263 uint32_t *bytecode;
264 uint32_t fc_sp;
265 struct r600_cf_stack_entry fc_stack[256];
266 struct r600_stack_info stack;
267 unsigned ar_loaded;
268 unsigned ar_reg;
269 unsigned ar_chan;
270 unsigned ar_handling;
271 unsigned r6xx_nop_after_rel_dst;
272 bool index_loaded[2];
273 unsigned index_reg[2]; /* indexing register CF_INDEX_[01] */
274 unsigned index_reg_chan[2]; /* indexing register channel CF_INDEX_[01] */
275 unsigned debug_id;
276 struct r600_isa* isa;
277 struct r600_bytecode_output pending_outputs[5];
278 int n_pending_outputs;
279 bool need_wait_ack; /* emit a pending WAIT_ACK prior to control flow */
280 bool precise;
281 };
282
283 /* eg_asm.c */
284 int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
285 int eg_bytecode_gds_build(struct r600_bytecode *bc, struct r600_bytecode_gds *gds, unsigned id);
286 int eg_bytecode_alu_build(struct r600_bytecode *bc,
287 struct r600_bytecode_alu *alu, unsigned id);
288 /* r600_asm.c */
289 void r600_bytecode_init(struct r600_bytecode *bc,
290 enum amd_gfx_level gfx_level,
291 enum radeon_family family,
292 bool has_compressed_msaa_texturing);
293 void r600_bytecode_clear(struct r600_bytecode *bc);
294 int r600_bytecode_add_alu(struct r600_bytecode *bc,
295 const struct r600_bytecode_alu *alu);
296 int r600_bytecode_add_vtx(struct r600_bytecode *bc,
297 const struct r600_bytecode_vtx *vtx);
298 int r600_bytecode_add_vtx_tc(struct r600_bytecode *bc,
299 const struct r600_bytecode_vtx *vtx);
300 int r600_bytecode_add_tex(struct r600_bytecode *bc,
301 const struct r600_bytecode_tex *tex);
302 int r600_bytecode_add_gds(struct r600_bytecode *bc,
303 const struct r600_bytecode_gds *gds);
304 int r600_bytecode_add_output(struct r600_bytecode *bc,
305 const struct r600_bytecode_output *output);
306 int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
307 const struct r600_bytecode_output *output);
308
309 void r600_bytecode_add_ack(struct r600_bytecode *bc);
310 int r600_bytecode_wait_acks(struct r600_bytecode *bc);
311 uint32_t r600_bytecode_write_export_ack_type(struct r600_bytecode *bc, bool indirect);
312
313 int r600_bytecode_build(struct r600_bytecode *bc);
314 int r600_bytecode_add_cf(struct r600_bytecode *bc);
315 int r600_bytecode_add_cfinst(struct r600_bytecode *bc,
316 unsigned op);
317 int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
318 const struct r600_bytecode_alu *alu, unsigned type);
319 void r600_bytecode_special_constants(uint32_t value, unsigned *sel);
320 void r600_bytecode_disasm(struct r600_bytecode *bc);
321 void r600_bytecode_alu_read(struct r600_bytecode *bc,
322 struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
323 int r600_load_ar(struct r600_bytecode *bc, bool for_src);
324
325 int cm_bytecode_add_cf_end(struct r600_bytecode *bc);
326
327 /* r700_asm.c */
328 void r700_bytecode_cf_vtx_build(uint32_t *bytecode,
329 const struct r600_bytecode_cf *cf);
330 int r700_bytecode_alu_build(struct r600_bytecode *bc,
331 struct r600_bytecode_alu *alu, unsigned id);
332 void r700_bytecode_alu_read(struct r600_bytecode *bc,
333 struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
334 int r700_bytecode_fetch_mem_build(struct r600_bytecode *bc,
335 struct r600_bytecode_vtx *mem, unsigned id);
336
337 void r600_bytecode_export_read(struct r600_bytecode *bc,
338 struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
339 void eg_bytecode_export_read(struct r600_bytecode *bc,
340 struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
341
342 void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
343 unsigned *num_format, unsigned *format_comp, unsigned *endian);
344
345 int r600_load_ar(struct r600_bytecode *bc, bool for_src);
346
fp64_switch(int i)347 static inline int fp64_switch(int i)
348 {
349 switch (i) {
350 case 0:
351 return 1;
352 case 1:
353 return 0;
354 case 2:
355 return 3;
356 case 3:
357 return 2;
358 }
359 return 0;
360 }
361
362 #ifdef __cplusplus
363 }
364 #endif
365
366 #endif
367