1 /*
2 * Copyright 2010 Jerome Glisse <[email protected]>
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "r600_asm.h"
7 #include "r600_sq.h"
8 #include "r600_opcodes.h"
9 #include "r600_formats.h"
10 #include "r600d.h"
11 #include "r600d_common.h"
12
13 #include <errno.h>
14 #include <string.h>
15 #include "compiler/shader_enums.h"
16 #include "util/u_memory.h"
17 #include "util/u_math.h"
18
19 #define NUM_OF_CYCLES 3
20 #define NUM_OF_COMPONENTS 4
21
alu_writes(struct r600_bytecode_alu * alu)22 static inline bool alu_writes(struct r600_bytecode_alu *alu)
23 {
24 return alu->dst.write || alu->is_op3;
25 }
26
r600_bytecode_get_num_operands(const struct r600_bytecode_alu * alu)27 static inline unsigned int r600_bytecode_get_num_operands(const struct r600_bytecode_alu *alu)
28 {
29 return r600_isa_alu(alu->op)->src_count;
30 }
31
r600_bytecode_cf(void)32 static struct r600_bytecode_cf *r600_bytecode_cf(void)
33 {
34 struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf);
35
36 if (!cf)
37 return NULL;
38 list_inithead(&cf->list);
39 list_inithead(&cf->alu);
40 list_inithead(&cf->vtx);
41 list_inithead(&cf->tex);
42 list_inithead(&cf->gds);
43 return cf;
44 }
45
r600_bytecode_alu(void)46 static struct r600_bytecode_alu *r600_bytecode_alu(void)
47 {
48 struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu);
49
50 if (!alu)
51 return NULL;
52 list_inithead(&alu->list);
53 return alu;
54 }
55
r600_bytecode_vtx(void)56 static struct r600_bytecode_vtx *r600_bytecode_vtx(void)
57 {
58 struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx);
59
60 if (!vtx)
61 return NULL;
62 list_inithead(&vtx->list);
63 return vtx;
64 }
65
r600_bytecode_tex(void)66 static struct r600_bytecode_tex *r600_bytecode_tex(void)
67 {
68 struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex);
69
70 if (!tex)
71 return NULL;
72 list_inithead(&tex->list);
73 return tex;
74 }
75
r600_bytecode_gds(void)76 static struct r600_bytecode_gds *r600_bytecode_gds(void)
77 {
78 struct r600_bytecode_gds *gds = CALLOC_STRUCT(r600_bytecode_gds);
79
80 if (gds == NULL)
81 return NULL;
82 list_inithead(&gds->list);
83 return gds;
84 }
85
stack_entry_size(enum radeon_family chip)86 static unsigned stack_entry_size(enum radeon_family chip) {
87 /* Wavefront size:
88 * 64: R600/RV670/RV770/Cypress/R740/Barts/Turks/Caicos/
89 * Aruba/Sumo/Sumo2/redwood/juniper
90 * 32: R630/R730/R710/Palm/Cedar
91 * 16: R610/Rs780
92 *
93 * Stack row size:
94 * Wavefront Size 16 32 48 64
95 * Columns per Row (R6xx/R7xx/R8xx only) 8 8 4 4
96 * Columns per Row (R9xx+) 8 4 4 4 */
97
98 switch (chip) {
99 /* FIXME: are some chips missing here? */
100 /* wavefront size 16 */
101 case CHIP_RV610:
102 case CHIP_RS780:
103 case CHIP_RV620:
104 case CHIP_RS880:
105 /* wavefront size 32 */
106 case CHIP_RV630:
107 case CHIP_RV635:
108 case CHIP_RV730:
109 case CHIP_RV710:
110 case CHIP_PALM:
111 case CHIP_CEDAR:
112 return 8;
113
114 /* wavefront size 64 */
115 default:
116 return 4;
117 }
118 }
119
r600_bytecode_init(struct r600_bytecode * bc,enum amd_gfx_level gfx_level,enum radeon_family family,bool has_compressed_msaa_texturing)120 void r600_bytecode_init(struct r600_bytecode *bc,
121 enum amd_gfx_level gfx_level,
122 enum radeon_family family,
123 bool has_compressed_msaa_texturing)
124 {
125 static unsigned next_shader_id = 0;
126
127 bc->debug_id = ++next_shader_id;
128
129 if ((gfx_level == R600) &&
130 (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) {
131 bc->ar_handling = AR_HANDLE_RV6XX;
132
133 /* Insert a nop after a relative temp write so that a read in
134 * the following instruction group gets the right value. The
135 * r600 and EG ISA specs both say that read-after-rel-write of a
136 * register in the next instr group is illegal, but apparently
137 * that's not true on all chips (see commit
138 * c96b9834032952492efbd2d1f5511fe225704918).
139 */
140 bc->r6xx_nop_after_rel_dst = 1;
141 } else if (family == CHIP_RV770) {
142 bc->ar_handling = AR_HANDLE_NORMAL;
143 bc->r6xx_nop_after_rel_dst = 1;
144 } else {
145 bc->ar_handling = AR_HANDLE_NORMAL;
146 bc->r6xx_nop_after_rel_dst = 0;
147 }
148
149 list_inithead(&bc->cf);
150 bc->gfx_level = gfx_level;
151 bc->family = family;
152 bc->has_compressed_msaa_texturing = has_compressed_msaa_texturing;
153 bc->stack.entry_size = stack_entry_size(family);
154 }
155
r600_bytecode_add_cf(struct r600_bytecode * bc)156 int r600_bytecode_add_cf(struct r600_bytecode *bc)
157 {
158 struct r600_bytecode_cf *cf = r600_bytecode_cf();
159
160 if (!cf)
161 return -ENOMEM;
162 list_addtail(&cf->list, &bc->cf);
163 if (bc->cf_last) {
164 cf->id = bc->cf_last->id + 2;
165 if (bc->cf_last->eg_alu_extended) {
166 /* take into account extended alu size */
167 cf->id += 2;
168 bc->ndw += 2;
169 }
170 }
171 bc->cf_last = cf;
172 bc->ncf++;
173 bc->ndw += 2;
174 bc->force_add_cf = 0;
175 bc->ar_loaded = 0;
176 return 0;
177 }
178
r600_bytecode_add_output(struct r600_bytecode * bc,const struct r600_bytecode_output * output)179 int r600_bytecode_add_output(struct r600_bytecode *bc,
180 const struct r600_bytecode_output *output)
181 {
182 int r;
183
184 if (output->gpr >= bc->ngpr)
185 bc->ngpr = output->gpr + 1;
186
187 if (bc->cf_last && (bc->cf_last->op == output->op ||
188 (bc->cf_last->op == CF_OP_EXPORT &&
189 output->op == CF_OP_EXPORT_DONE)) &&
190 output->type == bc->cf_last->output.type &&
191 output->elem_size == bc->cf_last->output.elem_size &&
192 output->swizzle_x == bc->cf_last->output.swizzle_x &&
193 output->swizzle_y == bc->cf_last->output.swizzle_y &&
194 output->swizzle_z == bc->cf_last->output.swizzle_z &&
195 output->swizzle_w == bc->cf_last->output.swizzle_w &&
196 output->comp_mask == bc->cf_last->output.comp_mask &&
197 (output->burst_count + bc->cf_last->output.burst_count) <= 16) {
198
199 if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
200 (output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
201
202 bc->cf_last->op = bc->cf_last->output.op = output->op;
203 bc->cf_last->output.gpr = output->gpr;
204 bc->cf_last->output.array_base = output->array_base;
205 bc->cf_last->output.burst_count += output->burst_count;
206 return 0;
207
208 } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
209 output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
210
211 bc->cf_last->op = bc->cf_last->output.op = output->op;
212 bc->cf_last->output.burst_count += output->burst_count;
213 return 0;
214 }
215 }
216
217 r = r600_bytecode_add_cf(bc);
218 if (r)
219 return r;
220 bc->cf_last->op = output->op;
221 memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output));
222 bc->cf_last->barrier = 1;
223 return 0;
224 }
225
r600_bytecode_add_pending_output(struct r600_bytecode * bc,const struct r600_bytecode_output * output)226 int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
227 const struct r600_bytecode_output *output)
228 {
229 assert(bc->n_pending_outputs + 1 < ARRAY_SIZE(bc->pending_outputs));
230 bc->pending_outputs[bc->n_pending_outputs++] = *output;
231
232 return 0;
233 }
234
235 void
r600_bytecode_add_ack(struct r600_bytecode * bc)236 r600_bytecode_add_ack(struct r600_bytecode *bc)
237 {
238 bc->need_wait_ack = true;
239 }
240
241 int
r600_bytecode_wait_acks(struct r600_bytecode * bc)242 r600_bytecode_wait_acks(struct r600_bytecode *bc)
243 {
244 /* Store acks are an R700+ feature. */
245 if (bc->gfx_level < R700)
246 return 0;
247
248 if (!bc->need_wait_ack)
249 return 0;
250
251 int ret = r600_bytecode_add_cfinst(bc, CF_OP_WAIT_ACK);
252 if (ret != 0)
253 return ret;
254
255 struct r600_bytecode_cf *cf = bc->cf_last;
256 cf->barrier = 1;
257 /* Request a wait if the number of outstanding acks is > 0 */
258 cf->cf_addr = 0;
259
260 return 0;
261 }
262
263 uint32_t
r600_bytecode_write_export_ack_type(struct r600_bytecode * bc,bool indirect)264 r600_bytecode_write_export_ack_type(struct r600_bytecode *bc, bool indirect)
265 {
266 if (bc->gfx_level >= R700) {
267 if (indirect)
268 return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK_EG;
269 else
270 return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_ACK_EG;
271 } else {
272 if (indirect)
273 return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
274 else
275 return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
276 }
277 }
278
279 /* alu instructions that can only exits once per group */
is_alu_once_inst(struct r600_bytecode_alu * alu)280 static int is_alu_once_inst(struct r600_bytecode_alu *alu)
281 {
282 return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED) || alu->is_lds_idx_op || alu->op == ALU_OP0_GROUP_BARRIER;
283 }
284
is_alu_reduction_inst(struct r600_bytecode * bc,struct r600_bytecode_alu * alu)285 static int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
286 {
287 return (r600_isa_alu(alu->op)->flags & AF_REPL) &&
288 (r600_isa_alu_slots(bc->isa->hw_class, alu->op) == AF_4V);
289 }
290
is_alu_mova_inst(struct r600_bytecode_alu * alu)291 static int is_alu_mova_inst(struct r600_bytecode_alu *alu)
292 {
293 return r600_isa_alu(alu->op)->flags & AF_MOVA;
294 }
295
alu_uses_rel(struct r600_bytecode_alu * alu)296 static int alu_uses_rel(struct r600_bytecode_alu *alu)
297 {
298 unsigned num_src = r600_bytecode_get_num_operands(alu);
299 unsigned src;
300
301 if (alu->dst.rel) {
302 return 1;
303 }
304
305 for (src = 0; src < num_src; ++src) {
306 if (alu->src[src].rel) {
307 return 1;
308 }
309 }
310 return 0;
311 }
312
is_lds_read(int sel)313 static int is_lds_read(int sel)
314 {
315 return sel == EG_V_SQ_ALU_SRC_LDS_OQ_A_POP || sel == EG_V_SQ_ALU_SRC_LDS_OQ_B_POP;
316 }
317
alu_uses_lds(struct r600_bytecode_alu * alu)318 static int alu_uses_lds(struct r600_bytecode_alu *alu)
319 {
320 unsigned num_src = r600_bytecode_get_num_operands(alu);
321 unsigned src;
322
323 for (src = 0; src < num_src; ++src) {
324 if (is_lds_read(alu->src[src].sel)) {
325 return 1;
326 }
327 }
328 return 0;
329 }
330
is_alu_64bit_inst(struct r600_bytecode_alu * alu)331 static int is_alu_64bit_inst(struct r600_bytecode_alu *alu)
332 {
333 const struct alu_op_info *op = r600_isa_alu(alu->op);
334 return (op->flags & AF_64);
335 }
336
is_alu_vec_unit_inst(struct r600_bytecode * bc,struct r600_bytecode_alu * alu)337 static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
338 {
339 unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
340 return !(slots & AF_S);
341 }
342
is_alu_trans_unit_inst(struct r600_bytecode * bc,struct r600_bytecode_alu * alu)343 static int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
344 {
345 unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
346 return !(slots & AF_V);
347 }
348
349 /* alu instructions that can execute on any unit */
is_alu_any_unit_inst(struct r600_bytecode * bc,struct r600_bytecode_alu * alu)350 static int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
351 {
352 unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
353 return slots == AF_VS;
354 }
355
is_nop_inst(struct r600_bytecode_alu * alu)356 static int is_nop_inst(struct r600_bytecode_alu *alu)
357 {
358 return alu->op == ALU_OP0_NOP;
359 }
360
assign_alu_units(struct r600_bytecode * bc,struct r600_bytecode_alu * alu_first,struct r600_bytecode_alu * assignment[5])361 static int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first,
362 struct r600_bytecode_alu *assignment[5])
363 {
364 struct r600_bytecode_alu *alu;
365 unsigned i, chan, trans;
366 int max_slots = bc->gfx_level == CAYMAN ? 4 : 5;
367
368 for (i = 0; i < max_slots; i++)
369 assignment[i] = NULL;
370
371 for (alu = alu_first; alu; alu = list_entry(alu->list.next, struct r600_bytecode_alu, list)) {
372 chan = alu->dst.chan;
373 if (max_slots == 4)
374 trans = 0;
375 else if (is_alu_trans_unit_inst(bc, alu))
376 trans = 1;
377 else if (is_alu_vec_unit_inst(bc, alu))
378 trans = 0;
379 else if (assignment[chan])
380 trans = 1; /* Assume ALU_INST_PREFER_VECTOR. */
381 else
382 trans = 0;
383
384 if (trans) {
385 if (assignment[4]) {
386 assert(0); /* ALU.Trans has already been allocated. */
387 return -1;
388 }
389 assignment[4] = alu;
390 } else {
391 if (assignment[chan]) {
392 assert(0); /* ALU.chan has already been allocated. */
393 return -1;
394 }
395 assignment[chan] = alu;
396 }
397
398 if (alu->last)
399 break;
400 }
401 return 0;
402 }
403
404 struct alu_bank_swizzle {
405 int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
406 int hw_cfile_addr[4];
407 int hw_cfile_elem[4];
408 };
409
410 static const unsigned cycle_for_bank_swizzle_vec[][3] = {
411 [SQ_ALU_VEC_012] = { 0, 1, 2 },
412 [SQ_ALU_VEC_021] = { 0, 2, 1 },
413 [SQ_ALU_VEC_120] = { 1, 2, 0 },
414 [SQ_ALU_VEC_102] = { 1, 0, 2 },
415 [SQ_ALU_VEC_201] = { 2, 0, 1 },
416 [SQ_ALU_VEC_210] = { 2, 1, 0 }
417 };
418
419 static const unsigned cycle_for_bank_swizzle_scl[][3] = {
420 [SQ_ALU_SCL_210] = { 2, 1, 0 },
421 [SQ_ALU_SCL_122] = { 1, 2, 2 },
422 [SQ_ALU_SCL_212] = { 2, 1, 2 },
423 [SQ_ALU_SCL_221] = { 2, 2, 1 }
424 };
425
init_bank_swizzle(struct alu_bank_swizzle * bs)426 static void init_bank_swizzle(struct alu_bank_swizzle *bs)
427 {
428 int i, cycle, component;
429 /* set up gpr use */
430 for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++)
431 for (component = 0; component < NUM_OF_COMPONENTS; component++)
432 bs->hw_gpr[cycle][component] = -1;
433 for (i = 0; i < 4; i++)
434 bs->hw_cfile_addr[i] = -1;
435 for (i = 0; i < 4; i++)
436 bs->hw_cfile_elem[i] = -1;
437 }
438
reserve_gpr(struct alu_bank_swizzle * bs,unsigned sel,unsigned chan,unsigned cycle)439 static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle)
440 {
441 if (bs->hw_gpr[cycle][chan] == -1)
442 bs->hw_gpr[cycle][chan] = sel;
443 else if (bs->hw_gpr[cycle][chan] != (int)sel) {
444 /* Another scalar operation has already used the GPR read port for the channel. */
445 return -1;
446 }
447 return 0;
448 }
449
reserve_cfile(const struct r600_bytecode * bc,struct alu_bank_swizzle * bs,unsigned sel,unsigned chan)450 static int reserve_cfile(const struct r600_bytecode *bc,
451 struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
452 {
453 int res, num_res = 4;
454 if (bc->gfx_level >= R700) {
455 num_res = 2;
456 chan /= 2;
457 }
458 for (res = 0; res < num_res; ++res) {
459 if (bs->hw_cfile_addr[res] == -1) {
460 bs->hw_cfile_addr[res] = sel;
461 bs->hw_cfile_elem[res] = chan;
462 return 0;
463 } else if (bs->hw_cfile_addr[res] == sel &&
464 bs->hw_cfile_elem[res] == chan)
465 return 0; /* Read for this scalar element already reserved, nothing to do here. */
466 }
467 /* All cfile read ports are used, cannot reference vector element. */
468 return -1;
469 }
470
is_gpr(unsigned sel)471 static int is_gpr(unsigned sel)
472 {
473 return (sel <= 127);
474 }
475
476 /* CB constants start at 512, and get translated to a kcache index when ALU
477 * clauses are constructed. Note that we handle kcache constants the same way
478 * as (the now gone) cfile constants, is that really required? */
is_kcache(unsigned sel)479 static int is_kcache(unsigned sel)
480 {
481 return (sel > 511 && sel < 4607) || /* Kcache before translation. */
482 (sel > 127 && sel < 192) || /* Kcache 0 & 1 after translation. */
483 (sel > 256 && sel < 320); /* Kcache 2 & 3 after translation (EG). */
484 }
485
is_const(int sel)486 static int is_const(int sel)
487 {
488 return is_kcache(sel) ||
489 (sel >= V_SQ_ALU_SRC_0 &&
490 sel <= V_SQ_ALU_SRC_LITERAL);
491 }
492
check_vector(const struct r600_bytecode * bc,const struct r600_bytecode_alu * alu,struct alu_bank_swizzle * bs,int bank_swizzle)493 static int check_vector(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
494 struct alu_bank_swizzle *bs, int bank_swizzle)
495 {
496 int r, src, num_src, sel, elem, cycle;
497
498 num_src = r600_bytecode_get_num_operands(alu);
499 for (src = 0; src < num_src; src++) {
500 sel = alu->src[src].sel;
501 elem = alu->src[src].chan;
502 if (is_gpr(sel)) {
503 cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src];
504 if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan)
505 /* Nothing to do; special-case optimization,
506 * second source uses first source’s reservation. */
507 continue;
508 else {
509 r = reserve_gpr(bs, sel, elem, cycle);
510 if (r)
511 return r;
512 }
513 } else if (is_kcache(sel)) {
514 r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem);
515 if (r)
516 return r;
517 }
518 /* No restrictions on PV, PS, literal or special constants. */
519 }
520 return 0;
521 }
522
check_scalar(const struct r600_bytecode * bc,const struct r600_bytecode_alu * alu,struct alu_bank_swizzle * bs,int bank_swizzle)523 static int check_scalar(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
524 struct alu_bank_swizzle *bs, int bank_swizzle)
525 {
526 int r, src, num_src, const_count, sel, elem, cycle;
527
528 num_src = r600_bytecode_get_num_operands(alu);
529 for (const_count = 0, src = 0; src < num_src; ++src) {
530 sel = alu->src[src].sel;
531 elem = alu->src[src].chan;
532 if (is_const(sel)) { /* Any constant, including literal and inline constants. */
533 if (const_count >= 2)
534 /* More than two references to a constant in
535 * transcendental operation. */
536 return -1;
537 else
538 const_count++;
539 }
540 if (is_kcache(sel)) {
541 r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem);
542 if (r)
543 return r;
544 }
545 }
546 for (src = 0; src < num_src; ++src) {
547 sel = alu->src[src].sel;
548 elem = alu->src[src].chan;
549 if (is_gpr(sel)) {
550 cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
551 if (cycle < const_count)
552 /* Cycle for GPR load conflicts with
553 * constant load in transcendental operation. */
554 return -1;
555 r = reserve_gpr(bs, sel, elem, cycle);
556 if (r)
557 return r;
558 }
559 /* PV PS restrictions */
560 if (const_count && (sel == 254 || sel == 255)) {
561 cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
562 if (cycle < const_count)
563 return -1;
564 }
565 }
566 return 0;
567 }
568
check_and_set_bank_swizzle(const struct r600_bytecode * bc,struct r600_bytecode_alu * slots[5])569 static int check_and_set_bank_swizzle(const struct r600_bytecode *bc,
570 struct r600_bytecode_alu *slots[5])
571 {
572 struct alu_bank_swizzle bs;
573 int bank_swizzle[5];
574 int i, r = 0, forced = 1;
575 bool scalar_only = bc->gfx_level == CAYMAN ? false : true;
576 int max_slots = bc->gfx_level == CAYMAN ? 4 : 5;
577 int max_checks = max_slots * 1000;
578
579 for (i = 0; i < max_slots; i++) {
580 if (slots[i]) {
581 if (slots[i]->bank_swizzle_force) {
582 slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
583 } else {
584 forced = 0;
585 }
586 }
587
588 if (i < 4 && slots[i])
589 scalar_only = false;
590 }
591 if (forced)
592 return 0;
593
594 /* Just check every possible combination of bank swizzle.
595 * Not very efficient, but works on the first try in most of the cases. */
596 for (i = 0; i < 4; i++)
597 if (!slots[i] || !slots[i]->bank_swizzle_force || slots[i]->is_lds_idx_op)
598 bank_swizzle[i] = SQ_ALU_VEC_012;
599 else
600 bank_swizzle[i] = slots[i]->bank_swizzle;
601
602 bank_swizzle[4] = SQ_ALU_SCL_210;
603
604 while(bank_swizzle[4] <= SQ_ALU_SCL_221 && max_checks--) {
605 init_bank_swizzle(&bs);
606 if (scalar_only == false) {
607 for (i = 0; i < 4; i++) {
608 if (slots[i]) {
609 r = check_vector(bc, slots[i], &bs, bank_swizzle[i]);
610 if (r)
611 break;
612 }
613 }
614 } else
615 r = 0;
616
617 if (!r && max_slots == 5 && slots[4]) {
618 r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]);
619 }
620 if (!r) {
621 for (i = 0; i < max_slots; i++) {
622 if (slots[i])
623 slots[i]->bank_swizzle = bank_swizzle[i];
624 }
625 return 0;
626 }
627
628 if (scalar_only) {
629 bank_swizzle[4]++;
630 } else {
631 for (i = 0; i < max_slots; i++) {
632 if (!slots[i] || (!slots[i]->bank_swizzle_force && !slots[i]->is_lds_idx_op)) {
633 bank_swizzle[i]++;
634 if (bank_swizzle[i] <= SQ_ALU_VEC_210)
635 break;
636 else if (i < max_slots - 1)
637 bank_swizzle[i] = SQ_ALU_VEC_012;
638 else
639 return -1;
640 }
641 }
642 }
643 }
644
645 /* Couldn't find a working swizzle. */
646 return -1;
647 }
648
replace_gpr_with_pv_ps(struct r600_bytecode * bc,struct r600_bytecode_alu * slots[5],struct r600_bytecode_alu * alu_prev)649 static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
650 struct r600_bytecode_alu *slots[5], struct r600_bytecode_alu *alu_prev)
651 {
652 struct r600_bytecode_alu *prev[5];
653 int gpr[5], chan[5];
654 int i, j, r, src, num_src;
655 int max_slots = bc->gfx_level == CAYMAN ? 4 : 5;
656
657 r = assign_alu_units(bc, alu_prev, prev);
658 if (r)
659 return r;
660
661 for (i = 0; i < max_slots; ++i) {
662 if (prev[i] && alu_writes(prev[i]) && !prev[i]->dst.rel) {
663
664 if (is_alu_64bit_inst(prev[i])) {
665 gpr[i] = -1;
666 continue;
667 }
668
669 gpr[i] = prev[i]->dst.sel;
670 /* cube writes more than PV.X */
671 if (is_alu_reduction_inst(bc, prev[i]))
672 chan[i] = 0;
673 else
674 chan[i] = prev[i]->dst.chan;
675 } else
676 gpr[i] = -1;
677 }
678
679 for (i = 0; i < max_slots; ++i) {
680 struct r600_bytecode_alu *alu = slots[i];
681 if (!alu)
682 continue;
683
684 if (is_alu_64bit_inst(alu))
685 continue;
686 num_src = r600_bytecode_get_num_operands(alu);
687 for (src = 0; src < num_src; ++src) {
688 if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
689 continue;
690
691 if (bc->gfx_level < CAYMAN) {
692 if (alu->src[src].sel == gpr[4] &&
693 alu->src[src].chan == chan[4] &&
694 alu_prev->pred_sel == alu->pred_sel) {
695 alu->src[src].sel = V_SQ_ALU_SRC_PS;
696 alu->src[src].chan = 0;
697 continue;
698 }
699 }
700
701 for (j = 0; j < 4; ++j) {
702 if (alu->src[src].sel == gpr[j] &&
703 alu->src[src].chan == j &&
704 alu_prev->pred_sel == alu->pred_sel) {
705 alu->src[src].sel = V_SQ_ALU_SRC_PV;
706 alu->src[src].chan = chan[j];
707 break;
708 }
709 }
710 }
711 }
712
713 return 0;
714 }
715
r600_bytecode_special_constants(uint32_t value,unsigned * sel)716 void r600_bytecode_special_constants(uint32_t value, unsigned *sel)
717 {
718 switch(value) {
719 case 0:
720 *sel = V_SQ_ALU_SRC_0;
721 break;
722 case 1:
723 *sel = V_SQ_ALU_SRC_1_INT;
724 break;
725 case -1:
726 *sel = V_SQ_ALU_SRC_M_1_INT;
727 break;
728 case 0x3F800000: /* 1.0f */
729 *sel = V_SQ_ALU_SRC_1;
730 break;
731 case 0x3F000000: /* 0.5f */
732 *sel = V_SQ_ALU_SRC_0_5;
733 break;
734 default:
735 *sel = V_SQ_ALU_SRC_LITERAL;
736 break;
737 }
738 }
739
740 /* compute how many literal are needed */
r600_bytecode_alu_nliterals(struct r600_bytecode_alu * alu,uint32_t literal[4],unsigned * nliteral)741 static int r600_bytecode_alu_nliterals(struct r600_bytecode_alu *alu,
742 uint32_t literal[4], unsigned *nliteral)
743 {
744 unsigned num_src = r600_bytecode_get_num_operands(alu);
745 unsigned i, j;
746
747 for (i = 0; i < num_src; ++i) {
748 if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
749 uint32_t value = alu->src[i].value;
750 unsigned found = 0;
751 for (j = 0; j < *nliteral; ++j) {
752 if (literal[j] == value) {
753 found = 1;
754 break;
755 }
756 }
757 if (!found) {
758 if (*nliteral >= 4)
759 return -EINVAL;
760 literal[(*nliteral)++] = value;
761 }
762 }
763 }
764 return 0;
765 }
766
r600_bytecode_alu_adjust_literals(struct r600_bytecode_alu * alu,uint32_t literal[4],unsigned nliteral)767 static void r600_bytecode_alu_adjust_literals(struct r600_bytecode_alu *alu,
768 uint32_t literal[4], unsigned nliteral)
769 {
770 unsigned num_src = r600_bytecode_get_num_operands(alu);
771 unsigned i, j;
772
773 for (i = 0; i < num_src; ++i) {
774 if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
775 uint32_t value = alu->src[i].value;
776 for (j = 0; j < nliteral; ++j) {
777 if (literal[j] == value) {
778 alu->src[i].chan = j;
779 break;
780 }
781 }
782 }
783 }
784 }
785
merge_inst_groups(struct r600_bytecode * bc,struct r600_bytecode_alu * slots[5],struct r600_bytecode_alu * alu_prev)786 static int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu *slots[5],
787 struct r600_bytecode_alu *alu_prev)
788 {
789 struct r600_bytecode_alu *prev[5];
790 struct r600_bytecode_alu *result[5] = { NULL };
791
792 uint8_t interp_xz = 0;
793
794 uint32_t literal[4], prev_literal[4];
795 unsigned nliteral = 0, prev_nliteral = 0;
796
797 int i, j, r, src, num_src;
798 int num_once_inst = 0;
799 int have_mova = 0, have_rel = 0;
800 int max_slots = bc->gfx_level == CAYMAN ? 4 : 5;
801
802 bool has_dot = false;
803
804 r = assign_alu_units(bc, alu_prev, prev);
805 if (r)
806 return r;
807
808 for (i = 0; i < max_slots; ++i) {
809 if (prev[i]) {
810 if (prev[i]->pred_sel)
811 return 0;
812 if (is_alu_once_inst(prev[i]))
813 return 0;
814 has_dot |= prev[i]->op == ALU_OP2_DOT || prev[i]->op == ALU_OP2_DOT_IEEE;
815
816
817 if (prev[i]->op == ALU_OP1_INTERP_LOAD_P0)
818 interp_xz |= 3;
819 if (prev[i]->op == ALU_OP2_INTERP_X)
820 interp_xz |= 1;
821 if (prev[i]->op == ALU_OP2_INTERP_Z)
822 interp_xz |= 2;
823 }
824 if (slots[i]) {
825 if (slots[i]->pred_sel)
826 return 0;
827 if (is_alu_once_inst(slots[i]))
828 return 0;
829 has_dot |= slots[i]->op == ALU_OP2_DOT || slots[i]->op == ALU_OP2_DOT_IEEE;
830 return 0;
831 if (slots[i]->op == ALU_OP1_INTERP_LOAD_P0)
832 interp_xz |= 3;
833 if (slots[i]->op == ALU_OP2_INTERP_X)
834 interp_xz |= 1;
835 if (slots[i]->op == ALU_OP2_INTERP_Z)
836 interp_xz |= 2;
837 }
838 if (interp_xz == 3)
839 return 0;
840 }
841
842 for (i = 0; i < max_slots; ++i) {
843 struct r600_bytecode_alu *alu;
844
845 if (num_once_inst > 0)
846 return 0;
847
848 /* check number of literals */
849 if (prev[i]) {
850 if (r600_bytecode_alu_nliterals(prev[i], literal, &nliteral))
851 return 0;
852 if (r600_bytecode_alu_nliterals(prev[i], prev_literal, &prev_nliteral))
853 return 0;
854 if (is_alu_mova_inst(prev[i])) {
855 if (have_rel)
856 return 0;
857 have_mova = 1;
858 }
859
860 if (alu_uses_rel(prev[i])) {
861 if (have_mova) {
862 return 0;
863 }
864 have_rel = 1;
865 }
866 if (alu_uses_lds(prev[i]))
867 return 0;
868
869 num_once_inst += is_alu_once_inst(prev[i]);
870 }
871 if (slots[i] && r600_bytecode_alu_nliterals(slots[i], literal, &nliteral))
872 return 0;
873
874 /* Let's check used slots. */
875 if (prev[i] && !slots[i]) {
876 result[i] = prev[i];
877 continue;
878 } else if (prev[i] && slots[i]) {
879 if (max_slots == 5 && !has_dot && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
880 /* Trans unit is still free try to use it. */
881 if (is_alu_any_unit_inst(bc, slots[i]) && !alu_uses_lds(slots[i])) {
882 result[i] = prev[i];
883 result[4] = slots[i];
884 } else if (is_alu_any_unit_inst(bc, prev[i])) {
885 if (slots[i]->dst.sel == prev[i]->dst.sel &&
886 alu_writes(slots[i]) &&
887 alu_writes(prev[i]))
888 return 0;
889
890 result[i] = slots[i];
891 result[4] = prev[i];
892 } else
893 return 0;
894 } else
895 return 0;
896 } else if(!slots[i]) {
897 continue;
898 } else {
899 if (max_slots == 5 && slots[i] && prev[4] &&
900 slots[i]->dst.sel == prev[4]->dst.sel &&
901 slots[i]->dst.chan == prev[4]->dst.chan &&
902 alu_writes(slots[i]) &&
903 alu_writes(prev[4]))
904 return 0;
905
906 result[i] = slots[i];
907 }
908
909 alu = slots[i];
910 num_once_inst += is_alu_once_inst(alu);
911
912 /* don't reschedule NOPs */
913 if (is_nop_inst(alu))
914 return 0;
915
916 if (is_alu_mova_inst(alu)) {
917 if (have_rel) {
918 return 0;
919 }
920 have_mova = 1;
921 }
922
923 if (alu_uses_rel(alu)) {
924 if (have_mova) {
925 return 0;
926 }
927 have_rel = 1;
928 }
929
930 if (alu->op == ALU_OP0_SET_CF_IDX0 ||
931 alu->op == ALU_OP0_SET_CF_IDX1)
932 return 0; /* data hazard with MOVA */
933
934 /* Let's check source gprs */
935 num_src = r600_bytecode_get_num_operands(alu);
936 for (src = 0; src < num_src; ++src) {
937
938 /* Constants don't matter. */
939 if (!is_gpr(alu->src[src].sel))
940 continue;
941
942 for (j = 0; j < max_slots; ++j) {
943 if (!prev[j] || !alu_writes(prev[j]))
944 continue;
945
946 /* If it's relative then we can't determine which gpr is really used. */
947 if (prev[j]->dst.chan == alu->src[src].chan &&
948 (prev[j]->dst.sel == alu->src[src].sel ||
949 prev[j]->dst.rel || alu->src[src].rel))
950 return 0;
951 }
952 }
953 }
954
955 /* more than one PRED_ or KILL_ ? */
956 if (num_once_inst > 1)
957 return 0;
958
959 /* check if the result can still be swizzlet */
960 r = check_and_set_bank_swizzle(bc, result);
961 if (r)
962 return 0;
963
964 /* looks like everything worked out right, apply the changes */
965
966 /* undo adding previous literals */
967 bc->cf_last->ndw -= align(prev_nliteral, 2);
968
969 /* sort instructions */
970 for (i = 0; i < max_slots; ++i) {
971 slots[i] = result[i];
972 if (result[i]) {
973 list_del(&result[i]->list);
974 result[i]->last = 0;
975 list_addtail(&result[i]->list, &bc->cf_last->alu);
976 }
977 }
978
979 /* determine new last instruction */
980 list_entry(bc->cf_last->alu.prev, struct r600_bytecode_alu, list)->last = 1;
981
982 /* determine new first instruction */
983 for (i = 0; i < max_slots; ++i) {
984 if (result[i]) {
985 bc->cf_last->curr_bs_head = result[i];
986 break;
987 }
988 }
989
990 bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head;
991 bc->cf_last->prev2_bs_head = NULL;
992
993 return 0;
994 }
995
996 /* we'll keep kcache sets sorted by bank & addr */
r600_bytecode_alloc_kcache_line(struct r600_bytecode * bc,struct r600_bytecode_kcache * kcache,unsigned bank,unsigned line,unsigned index_mode)997 static int r600_bytecode_alloc_kcache_line(struct r600_bytecode *bc,
998 struct r600_bytecode_kcache *kcache,
999 unsigned bank, unsigned line, unsigned index_mode)
1000 {
1001 int i, kcache_banks = bc->gfx_level >= EVERGREEN ? 4 : 2;
1002
1003 for (i = 0; i < kcache_banks; i++) {
1004 if (kcache[i].mode) {
1005 int d;
1006
1007 if (kcache[i].bank < bank)
1008 continue;
1009
1010 if ((kcache[i].bank == bank && kcache[i].addr > line+1) ||
1011 kcache[i].bank > bank) {
1012 /* try to insert new line */
1013 if (kcache[kcache_banks-1].mode) {
1014 /* all sets are in use */
1015 return -ENOMEM;
1016 }
1017
1018 memmove(&kcache[i+1],&kcache[i], (kcache_banks-i-1)*sizeof(struct r600_bytecode_kcache));
1019 kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1;
1020 kcache[i].bank = bank;
1021 kcache[i].addr = line;
1022 kcache[i].index_mode = index_mode;
1023 return 0;
1024 }
1025
1026 d = line - kcache[i].addr;
1027
1028 if (d == -1) {
1029 kcache[i].addr--;
1030 if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_2) {
1031 /* we are prepending the line to the current set,
1032 * discarding the existing second line,
1033 * so we'll have to insert line+2 after it */
1034 line += 2;
1035 continue;
1036 } else if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_1) {
1037 kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2;
1038 return 0;
1039 } else {
1040 /* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */
1041 return -ENOMEM;
1042 }
1043 } else if (d == 1) {
1044 kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2;
1045 return 0;
1046 } else if (d == 0)
1047 return 0;
1048 } else { /* free kcache set - use it */
1049 kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1;
1050 kcache[i].bank = bank;
1051 kcache[i].addr = line;
1052 kcache[i].index_mode = index_mode;
1053 return 0;
1054 }
1055 }
1056 return -ENOMEM;
1057 }
1058
r600_bytecode_alloc_inst_kcache_lines(struct r600_bytecode * bc,struct r600_bytecode_kcache * kcache,struct r600_bytecode_alu * alu)1059 static int r600_bytecode_alloc_inst_kcache_lines(struct r600_bytecode *bc,
1060 struct r600_bytecode_kcache *kcache,
1061 struct r600_bytecode_alu *alu)
1062 {
1063 int i, r;
1064
1065 for (i = 0; i < 3; i++) {
1066 unsigned bank, line, sel = alu->src[i].sel, index_mode;
1067
1068 if (sel < 512)
1069 continue;
1070
1071 bank = alu->src[i].kc_bank;
1072 assert(bank < R600_MAX_ALU_CONST_BUFFERS);
1073 line = (sel-512)>>4;
1074 index_mode = alu->src[i].kc_rel;
1075
1076 if ((r = r600_bytecode_alloc_kcache_line(bc, kcache, bank, line, index_mode)))
1077 return r;
1078 }
1079 return 0;
1080 }
1081
r600_bytecode_assign_kcache_banks(struct r600_bytecode_alu * alu,struct r600_bytecode_kcache * kcache)1082 static int r600_bytecode_assign_kcache_banks(
1083 struct r600_bytecode_alu *alu,
1084 struct r600_bytecode_kcache * kcache)
1085 {
1086 int i, j;
1087
1088 /* Alter the src operands to refer to the kcache. */
1089 for (i = 0; i < 3; ++i) {
1090 static const unsigned int base[] = {128, 160, 256, 288};
1091 unsigned int line, sel = alu->src[i].sel, found = 0;
1092
1093 if (sel < 512)
1094 continue;
1095
1096 sel -= 512;
1097 line = sel>>4;
1098
1099 for (j = 0; j < 4 && !found; ++j) {
1100 switch (kcache[j].mode) {
1101 case V_SQ_CF_KCACHE_NOP:
1102 case V_SQ_CF_KCACHE_LOCK_LOOP_INDEX:
1103 R600_ASM_ERR("unexpected kcache line mode\n");
1104 return -ENOMEM;
1105 default:
1106 if (kcache[j].bank == alu->src[i].kc_bank &&
1107 kcache[j].addr <= line &&
1108 line < kcache[j].addr + kcache[j].mode) {
1109 alu->src[i].sel = sel - (kcache[j].addr<<4);
1110 alu->src[i].sel += base[j];
1111 found=1;
1112 }
1113 }
1114 }
1115 }
1116 return 0;
1117 }
1118
r600_bytecode_alloc_kcache_lines(struct r600_bytecode * bc,struct r600_bytecode_alu * alu,unsigned type)1119 static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc,
1120 struct r600_bytecode_alu *alu,
1121 unsigned type)
1122 {
1123 struct r600_bytecode_kcache kcache_sets[4];
1124 struct r600_bytecode_kcache *kcache = kcache_sets;
1125 int r;
1126
1127 memcpy(kcache, bc->cf_last->kcache, 4 * sizeof(struct r600_bytecode_kcache));
1128
1129 if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) {
1130 /* can't alloc, need to start new clause */
1131
1132 /* Make sure the CF ends with an "last" instruction when
1133 * we split an ALU group because of a new CF */
1134 if (!list_is_empty(&bc->cf_last->alu)) {
1135 struct r600_bytecode_alu *last_submitted =
1136 list_last_entry(&bc->cf_last->alu, struct r600_bytecode_alu, list);
1137 last_submitted->last = 1;
1138 }
1139
1140 if ((r = r600_bytecode_add_cf(bc))) {
1141 return r;
1142 }
1143 bc->cf_last->op = type;
1144
1145 /* retry with the new clause */
1146 kcache = bc->cf_last->kcache;
1147 if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) {
1148 /* can't alloc again- should never happen */
1149 return r;
1150 }
1151 } else {
1152 /* update kcache sets */
1153 memcpy(bc->cf_last->kcache, kcache, 4 * sizeof(struct r600_bytecode_kcache));
1154 }
1155
1156 /* if we actually used more than 2 kcache sets, or have relative indexing - use ALU_EXTENDED on eg+ */
1157 if (kcache[2].mode != V_SQ_CF_KCACHE_NOP ||
1158 kcache[0].index_mode || kcache[1].index_mode || kcache[2].index_mode || kcache[3].index_mode) {
1159 if (bc->gfx_level < EVERGREEN)
1160 return -ENOMEM;
1161 bc->cf_last->eg_alu_extended = 1;
1162 }
1163
1164 return 0;
1165 }
1166
insert_nop_r6xx(struct r600_bytecode * bc,int max_slots)1167 static int insert_nop_r6xx(struct r600_bytecode *bc, int max_slots)
1168 {
1169 struct r600_bytecode_alu alu;
1170 int r, i;
1171
1172 for (i = 0; i < max_slots; i++) {
1173 memset(&alu, 0, sizeof(alu));
1174 alu.op = ALU_OP0_NOP;
1175 alu.src[0].chan = i & 3;
1176 alu.dst.chan = i & 3;
1177 alu.last = (i == max_slots - 1);
1178 r = r600_bytecode_add_alu(bc, &alu);
1179 if (r)
1180 return r;
1181 }
1182 return 0;
1183 }
1184
1185 /* load AR register from gpr (bc->ar_reg) with MOVA_INT */
load_ar_r6xx(struct r600_bytecode * bc,bool for_src)1186 static int load_ar_r6xx(struct r600_bytecode *bc, bool for_src)
1187 {
1188 struct r600_bytecode_alu alu;
1189 int r;
1190
1191 if (bc->ar_loaded)
1192 return 0;
1193
1194 /* hack to avoid making MOVA the last instruction in the clause */
1195 if (bc->cf_last == NULL || (bc->cf_last->ndw>>1) >= 110)
1196 bc->force_add_cf = 1;
1197 else if (for_src) {
1198 insert_nop_r6xx(bc, 4);
1199 bc->nalu_groups++;
1200 }
1201
1202 memset(&alu, 0, sizeof(alu));
1203 alu.op = ALU_OP1_MOVA_GPR_INT;
1204 alu.src[0].sel = bc->ar_reg;
1205 alu.src[0].chan = bc->ar_chan;
1206 alu.last = 1;
1207 alu.index_mode = INDEX_MODE_LOOP;
1208 r = r600_bytecode_add_alu(bc, &alu);
1209 if (r)
1210 return r;
1211
1212 /* no requirement to set uses waterfall on MOVA_GPR_INT */
1213 bc->ar_loaded = 1;
1214 return 0;
1215 }
1216
1217 /* load AR register from gpr (bc->ar_reg) with MOVA_INT */
r600_load_ar(struct r600_bytecode * bc,bool for_src)1218 int r600_load_ar(struct r600_bytecode *bc, bool for_src)
1219 {
1220 struct r600_bytecode_alu alu;
1221 int r;
1222
1223 if (bc->ar_handling)
1224 return load_ar_r6xx(bc, for_src);
1225
1226 if (bc->ar_loaded)
1227 return 0;
1228
1229 /* hack to avoid making MOVA the last instruction in the clause */
1230 if (bc->cf_last == NULL || (bc->cf_last->ndw>>1) >= 110)
1231 bc->force_add_cf = 1;
1232
1233 memset(&alu, 0, sizeof(alu));
1234 alu.op = ALU_OP1_MOVA_INT;
1235 alu.src[0].sel = bc->ar_reg;
1236 alu.src[0].chan = bc->ar_chan;
1237 alu.last = 1;
1238 r = r600_bytecode_add_alu(bc, &alu);
1239 if (r)
1240 return r;
1241
1242 bc->cf_last->r6xx_uses_waterfall = 1;
1243 bc->ar_loaded = 1;
1244 return 0;
1245 }
1246
r600_bytecode_add_alu_type(struct r600_bytecode * bc,const struct r600_bytecode_alu * alu,unsigned type)1247 int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
1248 const struct r600_bytecode_alu *alu, unsigned type)
1249 {
1250 struct r600_bytecode_alu *nalu = r600_bytecode_alu();
1251 struct r600_bytecode_alu *lalu;
1252 int i, r;
1253
1254 if (!nalu)
1255 return -ENOMEM;
1256 memcpy(nalu, alu, sizeof(struct r600_bytecode_alu));
1257
1258 if (alu->is_op3) {
1259 /* will fail later since alu does not support it. */
1260 assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
1261 }
1262
1263 if (bc->cf_last != NULL && bc->cf_last->op != type) {
1264 /* check if we could add it anyway */
1265 if ((bc->cf_last->op == CF_OP_ALU && type == CF_OP_ALU_PUSH_BEFORE) ||
1266 (bc->cf_last->op == CF_OP_ALU_PUSH_BEFORE && type == CF_OP_ALU)) {
1267 LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
1268 if (lalu->execute_mask) {
1269 assert(bc->force_add_cf || !"no force cf");
1270 bc->force_add_cf = 1;
1271 break;
1272 }
1273 type = CF_OP_ALU_PUSH_BEFORE;
1274 }
1275 } else {
1276 assert(bc->force_add_cf ||!"no force cf");
1277 bc->force_add_cf = 1;
1278 }
1279 }
1280
1281 /* cf can contains only alu or only vtx or only tex */
1282 if (bc->cf_last == NULL || bc->force_add_cf) {
1283 if (bc->cf_last && bc->cf_last->curr_bs_head)
1284 bc->cf_last->curr_bs_head->last = 1;
1285 r = r600_bytecode_add_cf(bc);
1286 if (r) {
1287 free(nalu);
1288 return r;
1289 }
1290 }
1291 bc->cf_last->op = type;
1292
1293 if (bc->gfx_level >= EVERGREEN) {
1294 for (i = 0; i < 3; i++)
1295 if (nalu->src[i].kc_bank && nalu->src[i].kc_rel)
1296 assert(bc->index_loaded[nalu->src[i].kc_rel - 1]);
1297 }
1298
1299 /* Check AR usage and load it if required */
1300 for (i = 0; i < 3; i++)
1301 if (nalu->src[i].rel && !bc->ar_loaded)
1302 r600_load_ar(bc, true);
1303
1304 if (nalu->dst.rel && !bc->ar_loaded)
1305 r600_load_ar(bc, false);
1306
1307 /* Setup the kcache for this ALU instruction. This will start a new
1308 * ALU clause if needed. */
1309 if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) {
1310 free(nalu);
1311 return r;
1312 }
1313
1314 if (!bc->cf_last->curr_bs_head) {
1315 bc->cf_last->curr_bs_head = nalu;
1316 }
1317 /* number of gpr == the last gpr used in any alu */
1318 for (i = 0; i < 3; i++) {
1319 if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 123) {
1320 bc->ngpr = nalu->src[i].sel + 1;
1321 }
1322 if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
1323 r600_bytecode_special_constants(nalu->src[i].value,
1324 &nalu->src[i].sel);
1325 }
1326 if (nalu->dst.write && nalu->dst.sel >= bc->ngpr && nalu->dst.sel < 123) {
1327 bc->ngpr = nalu->dst.sel + 1;
1328 }
1329 list_addtail(&nalu->list, &bc->cf_last->alu);
1330 /* each alu use 2 dwords */
1331 bc->cf_last->ndw += 2;
1332 bc->ndw += 2;
1333
1334 /* process cur ALU instructions for bank swizzle */
1335 if (nalu->last) {
1336 uint32_t literal[4];
1337 unsigned nliteral;
1338 struct r600_bytecode_alu *slots[5];
1339 int max_slots = bc->gfx_level == CAYMAN ? 4 : 5;
1340 r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots);
1341 if (r)
1342 return r;
1343
1344 if (bc->cf_last->prev_bs_head) {
1345 struct r600_bytecode_alu *cur_prev_head = bc->cf_last->prev_bs_head;
1346 r = merge_inst_groups(bc, slots, cur_prev_head);
1347 if (r)
1348 return r;
1349 if (cur_prev_head != bc->cf_last->prev_bs_head)
1350 bc->nalu_groups--;
1351 }
1352
1353 if (bc->cf_last->prev_bs_head) {
1354 r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head);
1355 if (r)
1356 return r;
1357 }
1358
1359 r = check_and_set_bank_swizzle(bc, slots);
1360 if (r)
1361 return r;
1362
1363 for (i = 0, nliteral = 0; i < max_slots; i++) {
1364 if (slots[i]) {
1365 r = r600_bytecode_alu_nliterals(slots[i], literal, &nliteral);
1366 if (r)
1367 return r;
1368 }
1369 }
1370 bc->cf_last->ndw += align(nliteral, 2);
1371
1372 bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head;
1373 bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head;
1374 bc->cf_last->curr_bs_head = NULL;
1375
1376 bc->nalu_groups++;
1377
1378 if (bc->r6xx_nop_after_rel_dst) {
1379 for (int i = 0; i < max_slots; ++i) {
1380 if (slots[i] && slots[i]->dst.rel) {
1381 insert_nop_r6xx(bc, max_slots);
1382 bc->nalu_groups++;
1383 break;
1384 }
1385 }
1386 }
1387 }
1388
1389 /* Might need to insert spill write ops after current clause */
1390 if (nalu->last && bc->n_pending_outputs) {
1391 while (bc->n_pending_outputs) {
1392 r = r600_bytecode_add_output(bc, &bc->pending_outputs[--bc->n_pending_outputs]);
1393 if (r)
1394 return r;
1395 }
1396 }
1397
1398 return 0;
1399 }
1400
r600_bytecode_add_alu(struct r600_bytecode * bc,const struct r600_bytecode_alu * alu)1401 int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu)
1402 {
1403 return r600_bytecode_add_alu_type(bc, alu, CF_OP_ALU);
1404 }
1405
r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode * bc)1406 static unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode *bc)
1407 {
1408 switch (bc->gfx_level) {
1409 case R600:
1410 return 8;
1411
1412 case R700:
1413 case EVERGREEN:
1414 case CAYMAN:
1415 return 16;
1416
1417 default:
1418 R600_ASM_ERR("Unknown gfx level %d.\n", bc->gfx_level);
1419 return 8;
1420 }
1421 }
1422
last_inst_was_not_vtx_fetch(struct r600_bytecode * bc,bool use_tc)1423 static inline bool last_inst_was_not_vtx_fetch(struct r600_bytecode *bc, bool use_tc)
1424 {
1425 return !((r600_isa_cf(bc->cf_last->op)->flags & CF_FETCH) &&
1426 bc->cf_last->op != CF_OP_GDS &&
1427 (bc->gfx_level == CAYMAN || use_tc ||
1428 bc->cf_last->op != CF_OP_TEX));
1429 }
1430
r600_bytecode_add_vtx_internal(struct r600_bytecode * bc,const struct r600_bytecode_vtx * vtx,bool use_tc)1431 static int r600_bytecode_add_vtx_internal(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx,
1432 bool use_tc)
1433 {
1434 struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx();
1435 int r;
1436
1437 if (!nvtx)
1438 return -ENOMEM;
1439 memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx));
1440
1441 if (bc->gfx_level >= EVERGREEN) {
1442 assert(!vtx->buffer_index_mode ||
1443 bc->index_loaded[vtx->buffer_index_mode - 1]);
1444 }
1445
1446
1447 /* cf can contains only alu or only vtx or only tex */
1448 if (bc->cf_last == NULL ||
1449 last_inst_was_not_vtx_fetch(bc, use_tc) ||
1450 bc->force_add_cf) {
1451 r = r600_bytecode_add_cf(bc);
1452 if (r) {
1453 free(nvtx);
1454 return r;
1455 }
1456 switch (bc->gfx_level) {
1457 case R600:
1458 case R700:
1459 bc->cf_last->op = CF_OP_VTX;
1460 break;
1461 case EVERGREEN:
1462 if (use_tc)
1463 bc->cf_last->op = CF_OP_TEX;
1464 else
1465 bc->cf_last->op = CF_OP_VTX;
1466 break;
1467 case CAYMAN:
1468 bc->cf_last->op = CF_OP_TEX;
1469 break;
1470 default:
1471 R600_ASM_ERR("Unknown gfx level %d.\n", bc->gfx_level);
1472 free(nvtx);
1473 return -EINVAL;
1474 }
1475 }
1476 list_addtail(&nvtx->list, &bc->cf_last->vtx);
1477 /* each fetch use 4 dwords */
1478 bc->cf_last->ndw += 4;
1479 bc->ndw += 4;
1480 if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
1481 bc->force_add_cf = 1;
1482
1483 bc->ngpr = MAX2(bc->ngpr, vtx->src_gpr + 1);
1484 bc->ngpr = MAX2(bc->ngpr, vtx->dst_gpr + 1);
1485
1486 return 0;
1487 }
1488
r600_bytecode_add_vtx(struct r600_bytecode * bc,const struct r600_bytecode_vtx * vtx)1489 int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
1490 {
1491 return r600_bytecode_add_vtx_internal(bc, vtx, false);
1492 }
1493
r600_bytecode_add_vtx_tc(struct r600_bytecode * bc,const struct r600_bytecode_vtx * vtx)1494 int r600_bytecode_add_vtx_tc(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
1495 {
1496 return r600_bytecode_add_vtx_internal(bc, vtx, true);
1497 }
1498
r600_bytecode_add_tex(struct r600_bytecode * bc,const struct r600_bytecode_tex * tex)1499 int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex)
1500 {
1501 struct r600_bytecode_tex *ntex = r600_bytecode_tex();
1502 int r;
1503
1504 if (!ntex)
1505 return -ENOMEM;
1506 memcpy(ntex, tex, sizeof(struct r600_bytecode_tex));
1507
1508 if (bc->gfx_level >= EVERGREEN) {
1509 assert(!tex->sampler_index_mode ||
1510 bc->index_loaded[tex->sampler_index_mode - 1]);
1511 assert(!tex->resource_index_mode ||
1512 bc->index_loaded[tex->resource_index_mode - 1]);
1513 }
1514
1515 /* we can't fetch data und use it as texture lookup address in the same TEX clause */
1516 if (bc->cf_last != NULL &&
1517 bc->cf_last->op == CF_OP_TEX) {
1518 struct r600_bytecode_tex *ttex;
1519 uint8_t use_mask = ((1 << ntex->src_sel_x) |
1520 (1 << ntex->src_sel_y) |
1521 (1 << ntex->src_sel_z) |
1522 (1 << ntex->src_sel_w)) & 0xf;
1523
1524 LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
1525 if (ttex->dst_gpr == ntex->src_gpr) {
1526 uint8_t write_mask = (ttex->dst_sel_x < 6 ? 1 : 0) |
1527 (ttex->dst_sel_y < 6 ? 2 : 0) |
1528 (ttex->dst_sel_z < 6 ? 4 : 0) |
1529 (ttex->dst_sel_w < 6 ? 8 : 0);
1530 if (use_mask & write_mask) {
1531 bc->force_add_cf = 1;
1532 break;
1533 }
1534 }
1535 }
1536 /* vtx instrs get inserted after tex, so make sure we aren't moving the tex
1537 * before (say) the instr fetching the texcoord.
1538 */
1539 if (!list_is_empty(&bc->cf_last->vtx))
1540 bc->force_add_cf = 1;
1541
1542 /* slight hack to make gradients always go into same cf */
1543 if (ntex->op == FETCH_OP_SET_GRADIENTS_H)
1544 bc->force_add_cf = 1;
1545 }
1546
1547 /* cf can contains only alu or only vtx or only tex */
1548 if (bc->cf_last == NULL ||
1549 bc->cf_last->op != CF_OP_TEX ||
1550 bc->force_add_cf) {
1551 r = r600_bytecode_add_cf(bc);
1552 if (r) {
1553 free(ntex);
1554 return r;
1555 }
1556 bc->cf_last->op = CF_OP_TEX;
1557 }
1558 if (ntex->src_gpr >= bc->ngpr) {
1559 bc->ngpr = ntex->src_gpr + 1;
1560 }
1561 if (ntex->dst_gpr >= bc->ngpr) {
1562 bc->ngpr = ntex->dst_gpr + 1;
1563 }
1564 list_addtail(&ntex->list, &bc->cf_last->tex);
1565 /* each texture fetch use 4 dwords */
1566 bc->cf_last->ndw += 4;
1567 bc->ndw += 4;
1568 if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
1569 bc->force_add_cf = 1;
1570 return 0;
1571 }
1572
r600_bytecode_add_gds(struct r600_bytecode * bc,const struct r600_bytecode_gds * gds)1573 int r600_bytecode_add_gds(struct r600_bytecode *bc, const struct r600_bytecode_gds *gds)
1574 {
1575 struct r600_bytecode_gds *ngds = r600_bytecode_gds();
1576 int r;
1577
1578 if (ngds == NULL)
1579 return -ENOMEM;
1580 memcpy(ngds, gds, sizeof(struct r600_bytecode_gds));
1581
1582 if (bc->gfx_level >= EVERGREEN) {
1583 assert(!gds->uav_index_mode ||
1584 bc->index_loaded[gds->uav_index_mode - 1]);
1585 }
1586
1587 if (bc->cf_last == NULL ||
1588 bc->cf_last->op != CF_OP_GDS ||
1589 bc->force_add_cf) {
1590 r = r600_bytecode_add_cf(bc);
1591 if (r) {
1592 free(ngds);
1593 return r;
1594 }
1595 bc->cf_last->op = CF_OP_GDS;
1596 }
1597
1598 list_addtail(&ngds->list, &bc->cf_last->gds);
1599 bc->cf_last->ndw += 4; /* each GDS uses 4 dwords */
1600 if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
1601 bc->force_add_cf = 1;
1602 return 0;
1603 }
1604
r600_bytecode_add_cfinst(struct r600_bytecode * bc,unsigned op)1605 int r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op)
1606 {
1607 int r;
1608
1609 /* Emit WAIT_ACK before control flow to ensure pending writes are always acked. */
1610 if (op != CF_OP_WAIT_ACK && op != CF_OP_MEM_SCRATCH)
1611 r600_bytecode_wait_acks(bc);
1612
1613 r = r600_bytecode_add_cf(bc);
1614 if (r)
1615 return r;
1616
1617 bc->cf_last->cond = V_SQ_CF_COND_ACTIVE;
1618 bc->cf_last->op = op;
1619 return 0;
1620 }
1621
cm_bytecode_add_cf_end(struct r600_bytecode * bc)1622 int cm_bytecode_add_cf_end(struct r600_bytecode *bc)
1623 {
1624 return r600_bytecode_add_cfinst(bc, CF_OP_CF_END);
1625 }
1626
1627 /* common to all 3 families */
r600_bytecode_vtx_build(struct r600_bytecode * bc,struct r600_bytecode_vtx * vtx,unsigned id)1628 static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id)
1629 {
1630 if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
1631 return r700_bytecode_fetch_mem_build(bc, vtx, id);
1632 bc->bytecode[id] = S_SQ_VTX_WORD0_VTX_INST(r600_isa_fetch_opcode(bc->isa->hw_class, vtx->op)) |
1633 S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
1634 S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
1635 S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
1636 S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x);
1637 if (bc->gfx_level < CAYMAN)
1638 bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
1639 id++;
1640 bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) |
1641 S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) |
1642 S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) |
1643 S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) |
1644 S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) |
1645 S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) |
1646 S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) |
1647 S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) |
1648 S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) |
1649 S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
1650 bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)|
1651 S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian);
1652 if (bc->gfx_level >= EVERGREEN)
1653 bc->bytecode[id] |= ((vtx->buffer_index_mode & 0x3) << 21); // S_SQ_VTX_WORD2_BIM(vtx->buffer_index_mode);
1654 if (bc->gfx_level < CAYMAN)
1655 bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1);
1656 id++;
1657 bc->bytecode[id++] = 0;
1658 return 0;
1659 }
1660
1661 /* common to all 3 families */
r600_bytecode_tex_build(struct r600_bytecode * bc,struct r600_bytecode_tex * tex,unsigned id)1662 static int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id)
1663 {
1664 bc->bytecode[id] = S_SQ_TEX_WORD0_TEX_INST(
1665 r600_isa_fetch_opcode(bc->isa->hw_class, tex->op)) |
1666 EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) |
1667 S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
1668 S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) |
1669 S_SQ_TEX_WORD0_SRC_REL(tex->src_rel);
1670 if (bc->gfx_level >= EVERGREEN)
1671 bc->bytecode[id] |= ((tex->sampler_index_mode & 0x3) << 27) | // S_SQ_TEX_WORD0_SIM(tex->sampler_index_mode);
1672 ((tex->resource_index_mode & 0x3) << 25); // S_SQ_TEX_WORD0_RIM(tex->resource_index_mode)
1673 id++;
1674 bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) |
1675 S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) |
1676 S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) |
1677 S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) |
1678 S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) |
1679 S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) |
1680 S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) |
1681 S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) |
1682 S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) |
1683 S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) |
1684 S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w);
1685 bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) |
1686 S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) |
1687 S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) |
1688 S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) |
1689 S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) |
1690 S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) |
1691 S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) |
1692 S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w);
1693 bc->bytecode[id++] = 0;
1694 return 0;
1695 }
1696
1697 /* r600 only, r700/eg bits in r700_asm.c */
r600_bytecode_alu_build(struct r600_bytecode * bc,struct r600_bytecode_alu * alu,unsigned id)1698 static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)
1699 {
1700 unsigned opcode = r600_isa_alu_opcode(bc->isa->hw_class, alu->op);
1701
1702 /* don't replace gpr by pv or ps for destination register */
1703 bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
1704 S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
1705 S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
1706 S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
1707 S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
1708 S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
1709 S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
1710 S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
1711 S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) |
1712 S_SQ_ALU_WORD0_PRED_SEL(alu->pred_sel) |
1713 S_SQ_ALU_WORD0_LAST(alu->last);
1714
1715 if (alu->is_op3) {
1716 assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
1717 bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
1718 S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
1719 S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
1720 S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
1721 S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
1722 S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) |
1723 S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
1724 S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
1725 S_SQ_ALU_WORD1_OP3_ALU_INST(opcode) |
1726 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle);
1727 } else {
1728 bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
1729 S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
1730 S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
1731 S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
1732 S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
1733 S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
1734 S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
1735 S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
1736 S_SQ_ALU_WORD1_OP2_ALU_INST(opcode) |
1737 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
1738 S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->execute_mask) |
1739 S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->update_pred);
1740 }
1741 return 0;
1742 }
1743
r600_bytecode_cf_vtx_build(uint32_t * bytecode,const struct r600_bytecode_cf * cf)1744 static void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf)
1745 {
1746 *bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
1747 *bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) |
1748 S_SQ_CF_WORD1_BARRIER(1) |
1749 S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)|
1750 S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
1751 }
1752
1753 /* common for r600/r700 - eg in eg_asm.c */
r600_bytecode_cf_build(struct r600_bytecode * bc,struct r600_bytecode_cf * cf)1754 static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
1755 {
1756 unsigned id = cf->id;
1757 const struct cf_op_info *cfop = r600_isa_cf(cf->op);
1758 unsigned opcode = r600_isa_cf_opcode(bc->isa->hw_class, cf->op);
1759
1760
1761 if (cf->op == CF_NATIVE) {
1762 bc->bytecode[id++] = cf->isa[0];
1763 bc->bytecode[id++] = cf->isa[1];
1764 } else if (cfop->flags & CF_ALU) {
1765 bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
1766 S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
1767 S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
1768 S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
1769
1770 bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(opcode) |
1771 S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
1772 S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
1773 S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
1774 S_SQ_CF_ALU_WORD1_BARRIER(1) |
1775 S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->gfx_level == R600 ? cf->r6xx_uses_waterfall : 0) |
1776 S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
1777 } else if (cfop->flags & CF_FETCH) {
1778 if (bc->gfx_level == R700)
1779 r700_bytecode_cf_vtx_build(&bc->bytecode[id], cf);
1780 else
1781 r600_bytecode_cf_vtx_build(&bc->bytecode[id], cf);
1782 } else if (cfop->flags & CF_EXP) {
1783 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
1784 S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
1785 S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
1786 S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) |
1787 S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr);
1788 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
1789 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
1790 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
1791 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
1792 S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
1793 S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
1794 S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
1795 S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
1796 } else if (cfop->flags & CF_MEM) {
1797 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
1798 S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
1799 S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
1800 S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) |
1801 S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr);
1802 bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
1803 S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
1804 S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
1805 S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program) |
1806 S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) |
1807 S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask);
1808 } else {
1809 bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
1810 bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) |
1811 S_SQ_CF_WORD1_BARRIER(1) |
1812 S_SQ_CF_WORD1_COND(cf->cond) |
1813 S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
1814 S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
1815 }
1816 return 0;
1817 }
1818
r600_bytecode_build(struct r600_bytecode * bc)1819 int r600_bytecode_build(struct r600_bytecode *bc)
1820 {
1821 struct r600_bytecode_cf *cf;
1822 struct r600_bytecode_alu *alu;
1823 struct r600_bytecode_vtx *vtx;
1824 struct r600_bytecode_tex *tex;
1825 struct r600_bytecode_gds *gds;
1826 uint32_t literal[4];
1827 unsigned nliteral;
1828 unsigned addr;
1829 int i, r;
1830
1831 if (!bc->nstack) { // If not 0, Stack_size already provided by llvm
1832 if (bc->stack.max_entries)
1833 bc->nstack = bc->stack.max_entries;
1834 else if (bc->type == PIPE_SHADER_VERTEX ||
1835 bc->type == PIPE_SHADER_TESS_EVAL ||
1836 bc->type == PIPE_SHADER_TESS_CTRL)
1837 bc->nstack = 1;
1838 }
1839
1840 /* first path compute addr of each CF block */
1841 /* addr start after all the CF instructions */
1842 addr = bc->cf_last->id + 2;
1843 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
1844 if (r600_isa_cf(cf->op)->flags & CF_FETCH) {
1845 addr += 3;
1846 addr &= 0xFFFFFFFCUL;
1847 }
1848 cf->addr = addr;
1849 addr += cf->ndw;
1850 bc->ndw = cf->addr + cf->ndw;
1851 }
1852 free(bc->bytecode);
1853 bc->bytecode = calloc(4, bc->ndw);
1854 if (bc->bytecode == NULL)
1855 return -ENOMEM;
1856 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
1857 const struct cf_op_info *cfop = r600_isa_cf(cf->op);
1858 addr = cf->addr;
1859 if (bc->gfx_level >= EVERGREEN)
1860 r = eg_bytecode_cf_build(bc, cf);
1861 else
1862 r = r600_bytecode_cf_build(bc, cf);
1863 if (r)
1864 return r;
1865 if (cfop->flags & CF_ALU) {
1866 nliteral = 0;
1867 memset(literal, 0, sizeof(literal));
1868 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
1869 r = r600_bytecode_alu_nliterals(alu, literal, &nliteral);
1870 if (r)
1871 return r;
1872 r600_bytecode_alu_adjust_literals(alu, literal, nliteral);
1873 r600_bytecode_assign_kcache_banks(alu, cf->kcache);
1874
1875 switch(bc->gfx_level) {
1876 case R600:
1877 r = r600_bytecode_alu_build(bc, alu, addr);
1878 break;
1879 case R700:
1880 r = r700_bytecode_alu_build(bc, alu, addr);
1881 break;
1882 case EVERGREEN:
1883 case CAYMAN:
1884 r = eg_bytecode_alu_build(bc, alu, addr);
1885 break;
1886 default:
1887 R600_ASM_ERR("unknown gfx level %d.\n", bc->gfx_level);
1888 return -EINVAL;
1889 }
1890 if (r)
1891 return r;
1892 addr += 2;
1893 if (alu->last) {
1894 for (i = 0; i < align(nliteral, 2); ++i) {
1895 bc->bytecode[addr++] = literal[i];
1896 }
1897 nliteral = 0;
1898 memset(literal, 0, sizeof(literal));
1899 }
1900 }
1901 } else if (cf->op == CF_OP_VTX) {
1902 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
1903 r = r600_bytecode_vtx_build(bc, vtx, addr);
1904 if (r)
1905 return r;
1906 addr += 4;
1907 }
1908 } else if (cf->op == CF_OP_GDS) {
1909 assert(bc->gfx_level >= EVERGREEN);
1910 LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) {
1911 r = eg_bytecode_gds_build(bc, gds, addr);
1912 if (r)
1913 return r;
1914 addr += 4;
1915 }
1916 } else if (cf->op == CF_OP_TEX) {
1917 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
1918 assert(bc->gfx_level >= EVERGREEN);
1919 r = r600_bytecode_vtx_build(bc, vtx, addr);
1920 if (r)
1921 return r;
1922 addr += 4;
1923 }
1924 LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
1925 r = r600_bytecode_tex_build(bc, tex, addr);
1926 if (r)
1927 return r;
1928 addr += 4;
1929 }
1930 }
1931 }
1932 return 0;
1933 }
1934
r600_bytecode_clear(struct r600_bytecode * bc)1935 void r600_bytecode_clear(struct r600_bytecode *bc)
1936 {
1937 struct r600_bytecode_cf *cf = NULL, *next_cf;
1938
1939 free(bc->bytecode);
1940 bc->bytecode = NULL;
1941
1942 LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
1943 struct r600_bytecode_alu *alu = NULL, *next_alu;
1944 struct r600_bytecode_tex *tex = NULL, *next_tex;
1945 struct r600_bytecode_tex *vtx = NULL, *next_vtx;
1946 struct r600_bytecode_gds *gds = NULL, *next_gds;
1947
1948 LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
1949 free(alu);
1950 }
1951
1952 list_inithead(&cf->alu);
1953
1954 LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) {
1955 free(tex);
1956 }
1957
1958 list_inithead(&cf->tex);
1959
1960 LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) {
1961 free(vtx);
1962 }
1963
1964 list_inithead(&cf->vtx);
1965
1966 LIST_FOR_EACH_ENTRY_SAFE(gds, next_gds, &cf->gds, list) {
1967 free(gds);
1968 }
1969
1970 list_inithead(&cf->gds);
1971
1972 free(cf);
1973 }
1974
1975 list_inithead(&cf->list);
1976 }
1977
print_swizzle(unsigned swz)1978 static int print_swizzle(unsigned swz)
1979 {
1980 const char * swzchars = "xyzw01?_";
1981 assert(swz<8 && swz != 6);
1982 return fprintf(stderr, "%c", swzchars[swz]);
1983 }
1984
print_sel(unsigned sel,unsigned rel,unsigned index_mode,unsigned need_brackets)1985 static int print_sel(unsigned sel, unsigned rel, unsigned index_mode,
1986 unsigned need_brackets)
1987 {
1988 int o = 0;
1989 if (rel && index_mode >= 5 && sel < 128)
1990 o += fprintf(stderr, "G");
1991 if (rel || need_brackets) {
1992 o += fprintf(stderr, "[");
1993 }
1994 o += fprintf(stderr, "%d", sel);
1995 if (rel) {
1996 if (index_mode == 0 || index_mode == 6)
1997 o += fprintf(stderr, "+AR");
1998 else if (index_mode == 4)
1999 o += fprintf(stderr, "+AL");
2000 }
2001 if (rel || need_brackets) {
2002 o += fprintf(stderr, "]");
2003 }
2004 return o;
2005 }
2006
print_dst(struct r600_bytecode_alu * alu)2007 static int print_dst(struct r600_bytecode_alu *alu)
2008 {
2009 int o = 0;
2010 unsigned sel = alu->dst.sel;
2011 char reg_char = 'R';
2012 if (sel >= 128 - 4) { /* clause temporary gpr */
2013 sel -= 128 - 4;
2014 reg_char = 'T';
2015 }
2016
2017 if (alu_writes(alu)) {
2018 o += fprintf(stderr, "%c", reg_char);
2019 o += print_sel(sel, alu->dst.rel, alu->index_mode, 0);
2020 } else {
2021 o += fprintf(stderr, "__");
2022 }
2023 o += fprintf(stderr, ".");
2024 o += print_swizzle(alu->dst.chan);
2025 return o;
2026 }
2027
print_src(struct r600_bytecode_alu * alu,unsigned idx)2028 static int print_src(struct r600_bytecode_alu *alu, unsigned idx)
2029 {
2030 int o = 0;
2031 struct r600_bytecode_alu_src *src = &alu->src[idx];
2032 unsigned sel = src->sel, need_sel = 1, need_chan = 1, need_brackets = 0;
2033
2034 if (src->neg)
2035 o += fprintf(stderr,"-");
2036 if (src->abs)
2037 o += fprintf(stderr,"|");
2038
2039 if (sel < 128 - 4) {
2040 o += fprintf(stderr, "R");
2041 } else if (sel < 128) {
2042 o += fprintf(stderr, "T");
2043 sel -= 128 - 4;
2044 } else if (sel < 160) {
2045 o += fprintf(stderr, "KC0");
2046 need_brackets = 1;
2047 sel -= 128;
2048 } else if (sel < 192) {
2049 o += fprintf(stderr, "KC1");
2050 need_brackets = 1;
2051 sel -= 160;
2052 } else if (sel >= 512) {
2053 o += fprintf(stderr, "C%d", src->kc_bank);
2054 need_brackets = 1;
2055 sel -= 512;
2056 } else if (sel >= 448) {
2057 o += fprintf(stderr, "Param");
2058 sel -= 448;
2059 need_chan = 0;
2060 } else if (sel >= 288) {
2061 o += fprintf(stderr, "KC3");
2062 need_brackets = 1;
2063 sel -= 288;
2064 } else if (sel >= 256) {
2065 o += fprintf(stderr, "KC2");
2066 need_brackets = 1;
2067 sel -= 256;
2068 } else {
2069 need_sel = 0;
2070 need_chan = 0;
2071 switch (sel) {
2072 case EG_V_SQ_ALU_SRC_LDS_DIRECT_A:
2073 o += fprintf(stderr, "LDS_A[0x%08X]", src->value);
2074 break;
2075 case EG_V_SQ_ALU_SRC_LDS_DIRECT_B:
2076 o += fprintf(stderr, "LDS_B[0x%08X]", src->value);
2077 break;
2078 case EG_V_SQ_ALU_SRC_LDS_OQ_A:
2079 o += fprintf(stderr, "LDS_OQ_A");
2080 need_chan = 1;
2081 break;
2082 case EG_V_SQ_ALU_SRC_LDS_OQ_B:
2083 o += fprintf(stderr, "LDS_OQ_B");
2084 need_chan = 1;
2085 break;
2086 case EG_V_SQ_ALU_SRC_LDS_OQ_A_POP:
2087 o += fprintf(stderr, "LDS_OQ_A_POP");
2088 need_chan = 1;
2089 break;
2090 case EG_V_SQ_ALU_SRC_LDS_OQ_B_POP:
2091 o += fprintf(stderr, "LDS_OQ_B_POP");
2092 need_chan = 1;
2093 break;
2094 case EG_V_SQ_ALU_SRC_TIME_LO:
2095 o += fprintf(stderr, "TIME_LO");
2096 break;
2097 case EG_V_SQ_ALU_SRC_TIME_HI:
2098 o += fprintf(stderr, "TIME_HI");
2099 break;
2100 case EG_V_SQ_ALU_SRC_SE_ID:
2101 o += fprintf(stderr, "SE_ID");
2102 break;
2103 case EG_V_SQ_ALU_SRC_SIMD_ID:
2104 o += fprintf(stderr, "SIMD_ID");
2105 break;
2106 case EG_V_SQ_ALU_SRC_HW_WAVE_ID:
2107 o += fprintf(stderr, "HW_WAVE_ID");
2108 break;
2109 case V_SQ_ALU_SRC_PS:
2110 o += fprintf(stderr, "PS");
2111 break;
2112 case V_SQ_ALU_SRC_PV:
2113 o += fprintf(stderr, "PV");
2114 need_chan = 1;
2115 break;
2116 case V_SQ_ALU_SRC_LITERAL:
2117 {
2118 const uint32_t value_uint32 = src->value;
2119 float value_float;
2120 memcpy(&value_float, &value_uint32, sizeof(float));
2121 o += fprintf(stderr, "[0x%08X %f]", value_uint32, value_float);
2122 }
2123 break;
2124 case V_SQ_ALU_SRC_0_5:
2125 o += fprintf(stderr, "0.5");
2126 break;
2127 case V_SQ_ALU_SRC_M_1_INT:
2128 o += fprintf(stderr, "-1");
2129 break;
2130 case V_SQ_ALU_SRC_1_INT:
2131 o += fprintf(stderr, "1");
2132 break;
2133 case V_SQ_ALU_SRC_1:
2134 o += fprintf(stderr, "1.0");
2135 break;
2136 case V_SQ_ALU_SRC_0:
2137 o += fprintf(stderr, "0");
2138 break;
2139 default:
2140 o += fprintf(stderr, "??IMM_%d", sel);
2141 break;
2142 }
2143 }
2144
2145 if (need_sel)
2146 o += print_sel(sel, src->rel, alu->index_mode, need_brackets);
2147
2148 if (need_chan) {
2149 o += fprintf(stderr, ".");
2150 o += print_swizzle(src->chan);
2151 }
2152
2153 if (src->abs)
2154 o += fprintf(stderr,"|");
2155
2156 return o;
2157 }
2158
print_indent(int p,int c)2159 static int print_indent(int p, int c)
2160 {
2161 int o = 0;
2162 while (p++ < c)
2163 o += fprintf(stderr, " ");
2164 return o;
2165 }
2166
2167 const char *rat_instr_name[] = {
2168 "NOP",
2169 "STORE_TYPED",
2170 "STORE_RAW",
2171 "STORE_RAW_FDENORM",
2172 "CMP_XCHG_INT",
2173 "CMP_XCHG_FLT",
2174 "CMP_XCHG_FDENORM",
2175 "ADD",
2176 "SUB",
2177 "RSUB",
2178 "MIN_INT",
2179 "MIN_UINT",
2180 "MAX_INT",
2181 "MAX_UINT",
2182 "AND",
2183 "OR",
2184 "XOR",
2185 "MSKOR",
2186 "INC_UINT",
2187 "DEC_UINT",
2188 "RESERVED20",
2189 "RESERVED21",
2190 "RESERVED22",
2191 "RESERVED23",
2192 "RESERVED24",
2193 "RESERVED25",
2194 "RESERVED26",
2195 "RESERVED27",
2196 "RESERVED28",
2197 "RESERVED29",
2198 "RESERVED30",
2199 "RESERVED31",
2200 "NOP_RTN",
2201 "RESERVED33",
2202 "XCHG_RTN",
2203 "XCHG_FDENORM_RTN",
2204 "CMPXCHG_INT_RTN",
2205 "CMPXCHG_FLT_RTN",
2206 "CMPXCHG_FDENORM_RTN",
2207 "ADD_RTN",
2208 "SUB_RTN",
2209 "RSUB_RTN",
2210 "MIN_INT_RTN",
2211 "MIN_UINT_RTN",
2212 "MAX_INT_RTN",
2213 "MAX_UINT_RTN",
2214 "AND_RTN",
2215 "OR_RTN",
2216 "XOR_RTN",
2217 "MSKOR_RTN",
2218 "INC_UINT_RTN",
2219 "DEC_UINT_RTN",
2220 };
2221
2222
r600_bytecode_disasm(struct r600_bytecode * bc)2223 void r600_bytecode_disasm(struct r600_bytecode *bc)
2224 {
2225 const char *index_mode[] = {"CF_INDEX_NONE", "CF_INDEX_0", "CF_INDEX_1"};
2226 static int index = 0;
2227 struct r600_bytecode_cf *cf = NULL;
2228 struct r600_bytecode_alu *alu = NULL;
2229 struct r600_bytecode_vtx *vtx = NULL;
2230 struct r600_bytecode_tex *tex = NULL;
2231 struct r600_bytecode_gds *gds = NULL;
2232
2233 unsigned id, ngr = 0, last;
2234 uint32_t literal[4];
2235 unsigned nliteral;
2236 char chip = '6';
2237
2238 switch (bc->gfx_level) {
2239 case R700:
2240 chip = '7';
2241 break;
2242 case EVERGREEN:
2243 chip = 'E';
2244 break;
2245 case CAYMAN:
2246 chip = 'C';
2247 break;
2248 case R600:
2249 default:
2250 chip = '6';
2251 break;
2252 }
2253 fprintf(stderr, "bytecode %d dw -- %d gprs -- %d nstack -------------\n",
2254 bc->ndw, bc->ngpr, bc->nstack);
2255 fprintf(stderr, "shader %d -- %c\n", index++, chip);
2256
2257 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
2258 id = cf->id;
2259 if (cf->op == CF_NATIVE) {
2260 fprintf(stderr, "%04d %08X %08X CF_NATIVE\n", id, bc->bytecode[id],
2261 bc->bytecode[id + 1]);
2262 } else {
2263 const struct cf_op_info *cfop = r600_isa_cf(cf->op);
2264 if (cfop->flags & CF_ALU) {
2265 if (cf->eg_alu_extended) {
2266 fprintf(stderr, "%04d %08X %08X %s\n", id, bc->bytecode[id],
2267 bc->bytecode[id + 1], "ALU_EXT");
2268 id += 2;
2269 }
2270 fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
2271 bc->bytecode[id + 1], cfop->name);
2272 fprintf(stderr, "%d @%d ", cf->ndw / 2, cf->addr);
2273 for (int i = 0; i < 4; ++i) {
2274 if (cf->kcache[i].mode) {
2275 int c_start = (cf->kcache[i].addr << 4);
2276 int c_end = c_start + (cf->kcache[i].mode << 4);
2277 fprintf(stderr, "KC%d[CB%d:%d-%d%s%s] ",
2278 i, cf->kcache[i].bank, c_start, c_end,
2279 cf->kcache[i].index_mode ? " " : "",
2280 cf->kcache[i].index_mode ? index_mode[cf->kcache[i].index_mode] : "");
2281 }
2282 }
2283 fprintf(stderr, "\n");
2284 } else if (cfop->flags & CF_FETCH) {
2285 fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
2286 bc->bytecode[id + 1], cfop->name);
2287 fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
2288 if (cf->vpm)
2289 fprintf(stderr, "VPM ");
2290 if (cf->end_of_program)
2291 fprintf(stderr, "EOP ");
2292 fprintf(stderr, "\n");
2293
2294 } else if (cfop->flags & CF_EXP) {
2295 int o = 0;
2296 const char *exp_type[] = {"PIXEL", "POS ", "PARAM"};
2297 o += fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
2298 bc->bytecode[id + 1], cfop->name);
2299 o += print_indent(o, 43);
2300 o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
2301 if (cf->output.burst_count > 1) {
2302 o += fprintf(stderr, "%d-%d ", cf->output.array_base,
2303 cf->output.array_base + cf->output.burst_count - 1);
2304
2305 o += print_indent(o, 55);
2306 o += fprintf(stderr, "R%d-%d.", cf->output.gpr,
2307 cf->output.gpr + cf->output.burst_count - 1);
2308 } else {
2309 o += fprintf(stderr, "%d ", cf->output.array_base);
2310 o += print_indent(o, 55);
2311 o += fprintf(stderr, "R%d.", cf->output.gpr);
2312 }
2313
2314 o += print_swizzle(cf->output.swizzle_x);
2315 o += print_swizzle(cf->output.swizzle_y);
2316 o += print_swizzle(cf->output.swizzle_z);
2317 o += print_swizzle(cf->output.swizzle_w);
2318
2319 print_indent(o, 67);
2320
2321 fprintf(stderr, " ES:%X ", cf->output.elem_size);
2322 if (cf->mark)
2323 fprintf(stderr, "MARK ");
2324 if (!cf->barrier)
2325 fprintf(stderr, "NO_BARRIER ");
2326 if (cf->end_of_program)
2327 fprintf(stderr, "EOP ");
2328 fprintf(stderr, "\n");
2329 } else if (r600_isa_cf(cf->op)->flags & CF_MEM) {
2330 int o = 0;
2331 const char *exp_type_r600[] = {"WRITE", "WRITE_IND", "READ",
2332 "READ_IND"};
2333 const char *exp_type_r700[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
2334 "WRITE_IND_ACK"};
2335
2336 const char **exp_type = bc->gfx_level >= R700 ?
2337 exp_type_r700 : exp_type_r600;
2338
2339 o += fprintf(stderr, "%04d %08X %08X %s ", id,
2340 bc->bytecode[id], bc->bytecode[id + 1], cfop->name);
2341 o += print_indent(o, 43);
2342 o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
2343
2344 if (r600_isa_cf(cf->op)->flags & CF_RAT) {
2345 o += fprintf(stderr, "RAT%d", cf->rat.id);
2346 if (cf->rat.index_mode) {
2347 o += fprintf(stderr, "[IDX%d]", cf->rat.index_mode - 1);
2348 }
2349 assert(ARRAY_SIZE(rat_instr_name) > cf->rat.inst);
2350 o += fprintf(stderr, " %s ", rat_instr_name[cf->rat.inst]);
2351 }
2352
2353 if (cf->output.burst_count > 1) {
2354 o += fprintf(stderr, "%d-%d ", cf->output.array_base,
2355 cf->output.array_base + cf->output.burst_count - 1);
2356 o += print_indent(o, 55);
2357 o += fprintf(stderr, "R%d-%d.", cf->output.gpr,
2358 cf->output.gpr + cf->output.burst_count - 1);
2359 } else {
2360 o += fprintf(stderr, "%d ", cf->output.array_base);
2361 o += print_indent(o, 55);
2362 o += fprintf(stderr, "R%d.", cf->output.gpr);
2363 }
2364 for (int i = 0; i < 4; ++i) {
2365 if (cf->output.comp_mask & (1 << i))
2366 o += print_swizzle(i);
2367 else
2368 o += print_swizzle(7);
2369 }
2370
2371 if (cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND ||
2372 cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND)
2373 o += fprintf(stderr, " R%d.xyz", cf->output.index_gpr);
2374
2375 o += print_indent(o, 67);
2376
2377 fprintf(stderr, " ES:%i ", cf->output.elem_size);
2378 if (cf->output.array_size != 0xFFF)
2379 fprintf(stderr, "AS:%i ", cf->output.array_size);
2380 if (cf->mark)
2381 fprintf(stderr, "MARK ");
2382 if (!cf->barrier)
2383 fprintf(stderr, "NO_BARRIER ");
2384 if (cf->end_of_program)
2385 fprintf(stderr, "EOP ");
2386
2387 if (cf->output.mark)
2388 fprintf(stderr, "MARK ");
2389
2390 fprintf(stderr, "\n");
2391 } else {
2392 fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
2393 bc->bytecode[id + 1], cfop->name);
2394 fprintf(stderr, "@%d ", cf->cf_addr);
2395 if (cf->cond)
2396 fprintf(stderr, "CND:%X ", cf->cond);
2397 if (cf->pop_count)
2398 fprintf(stderr, "POP:%X ", cf->pop_count);
2399 if (cf->count && (cfop->flags & CF_EMIT))
2400 fprintf(stderr, "STREAM%d ", cf->count);
2401 if (cf->vpm)
2402 fprintf(stderr, "VPM ");
2403 if (cf->end_of_program)
2404 fprintf(stderr, "EOP ");
2405 fprintf(stderr, "\n");
2406 }
2407 }
2408
2409 id = cf->addr;
2410 nliteral = 0;
2411 last = 1;
2412 int chan_mask = 0;
2413 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
2414 const char chan[] = "xyzwt";
2415 const char *omod_str[] = {"","*2","*4","/2"};
2416 const struct alu_op_info *aop = r600_isa_alu(alu->op);
2417 int o = 0;
2418
2419 r600_bytecode_alu_nliterals(alu, literal, &nliteral);
2420 o += fprintf(stderr, " %04d %08X %08X ", id, bc->bytecode[id], bc->bytecode[id+1]);
2421 if (last)
2422 o += fprintf(stderr, "%4d ", ++ngr);
2423 else
2424 o += fprintf(stderr, " ");
2425
2426 if ((chan_mask & (1 << alu->dst.chan)) ||
2427 ((aop->slots[bc->isa->hw_class] == AF_S) && !(bc->isa->hw_class == ISA_CC_CAYMAN)))
2428 o += fprintf(stderr, "t:");
2429 else
2430 o += fprintf(stderr, "%c:", chan[alu->dst.chan]);
2431 chan_mask |= 1 << alu->dst.chan;
2432
2433 o += fprintf(stderr, "%c%c %c ", alu->execute_mask ? 'M':' ',
2434 alu->update_pred ? 'P':' ',
2435 alu->pred_sel ? alu->pred_sel==2 ? '0':'1':' ');
2436
2437 o += fprintf(stderr, "%s%s%s ", aop->name,
2438 omod_str[alu->omod], alu->dst.clamp ? "_sat":"");
2439
2440 o += print_indent(o,60);
2441 if (bc->isa->hw_class == ISA_CC_CAYMAN && alu->op == ALU_OP1_MOVA_INT) {
2442 switch (alu->dst.sel) {
2443 case 0: fprintf(stderr, "AR"); break;
2444 case 2: fprintf(stderr, "CF_IDX0"); break;
2445 case 3: fprintf(stderr, "CF_IDX1"); break;
2446 }
2447 } else {
2448 o += print_dst(alu);
2449 }
2450 for (int i = 0; i < aop->src_count; ++i) {
2451 o += fprintf(stderr, i == 0 ? ", ": ", ");
2452 o += print_src(alu, i);
2453 }
2454
2455 if (alu->bank_swizzle) {
2456 o += print_indent(o,75);
2457 o += fprintf(stderr, " BS:%d", alu->bank_swizzle);
2458 }
2459
2460 fprintf(stderr, "\n");
2461 id += 2;
2462
2463 if (alu->last) {
2464 for (unsigned i = 0; i < nliteral; i++, id++) {
2465 float *f = (float*)(bc->bytecode + id);
2466 o = fprintf(stderr, " %04d %08X", id, bc->bytecode[id]);
2467 print_indent(o, 60);
2468 fprintf(stderr, " %f (%d)\n", *f, *(bc->bytecode + id));
2469 }
2470 id += nliteral & 1;
2471 nliteral = 0;
2472 chan_mask = 0;
2473 }
2474 last = alu->last;
2475 }
2476
2477 LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
2478 int o = 0;
2479 o += fprintf(stderr, " %04d %08X %08X %08X ", id, bc->bytecode[id],
2480 bc->bytecode[id + 1], bc->bytecode[id + 2]);
2481
2482 o += fprintf(stderr, "%s ", r600_isa_fetch(tex->op)->name);
2483
2484 o += print_indent(o, 50);
2485
2486 o += fprintf(stderr, "R%d.", tex->dst_gpr);
2487 o += print_swizzle(tex->dst_sel_x);
2488 o += print_swizzle(tex->dst_sel_y);
2489 o += print_swizzle(tex->dst_sel_z);
2490 o += print_swizzle(tex->dst_sel_w);
2491
2492 o += fprintf(stderr, ", R%d.", tex->src_gpr);
2493 o += print_swizzle(tex->src_sel_x);
2494 o += print_swizzle(tex->src_sel_y);
2495 o += print_swizzle(tex->src_sel_z);
2496 o += print_swizzle(tex->src_sel_w);
2497
2498 o += fprintf(stderr, ", RID:%d ", tex->resource_id);
2499 if (tex->resource_index_mode)
2500 fprintf(stderr, "RQ_%s", index_mode[tex->resource_index_mode]);
2501
2502 o += fprintf(stderr, ", SID:%d ", tex->sampler_id);
2503
2504 if (tex->sampler_index_mode)
2505 fprintf(stderr, "SQ_%s ", index_mode[tex->sampler_index_mode]);
2506
2507
2508
2509 if (tex->lod_bias)
2510 fprintf(stderr, "LB:%d ", tex->lod_bias);
2511
2512 fprintf(stderr, "CT:%c%c%c%c ",
2513 tex->coord_type_x ? 'N' : 'U',
2514 tex->coord_type_y ? 'N' : 'U',
2515 tex->coord_type_z ? 'N' : 'U',
2516 tex->coord_type_w ? 'N' : 'U');
2517
2518 if (tex->offset_x)
2519 fprintf(stderr, "OX:%d ", tex->offset_x);
2520 if (tex->offset_y)
2521 fprintf(stderr, "OY:%d ", tex->offset_y);
2522 if (tex->offset_z)
2523 fprintf(stderr, "OZ:%d ", tex->offset_z);
2524
2525 id += 4;
2526 fprintf(stderr, "\n");
2527 }
2528
2529 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
2530 int o = 0;
2531 const char * fetch_type[] = {"VERTEX", "INSTANCE", ""};
2532 o += fprintf(stderr, " %04d %08X %08X %08X ", id, bc->bytecode[id],
2533 bc->bytecode[id + 1], bc->bytecode[id + 2]);
2534
2535 o += fprintf(stderr, "%s ", r600_isa_fetch(vtx->op)->name);
2536
2537 o += print_indent(o, 50);
2538
2539 o += fprintf(stderr, "R%d.", vtx->dst_gpr);
2540 o += print_swizzle(vtx->dst_sel_x);
2541 o += print_swizzle(vtx->dst_sel_y);
2542 o += print_swizzle(vtx->dst_sel_z);
2543 o += print_swizzle(vtx->dst_sel_w);
2544
2545 o += fprintf(stderr, ", R%d.", vtx->src_gpr);
2546 o += print_swizzle(vtx->src_sel_x);
2547 if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
2548 o += print_swizzle(vtx->src_sel_y);
2549
2550 if (vtx->offset)
2551 fprintf(stderr, " +%db", vtx->offset);
2552
2553 o += print_indent(o, 55);
2554
2555 fprintf(stderr, ", RID:%d ", vtx->buffer_id);
2556
2557 fprintf(stderr, "%s ", fetch_type[vtx->fetch_type]);
2558
2559 if (bc->gfx_level < CAYMAN && vtx->mega_fetch_count)
2560 fprintf(stderr, "MFC:%d ", vtx->mega_fetch_count);
2561
2562 if (bc->gfx_level >= EVERGREEN && vtx->buffer_index_mode)
2563 fprintf(stderr, "SQ_%s ", index_mode[vtx->buffer_index_mode]);
2564
2565 if (r600_isa_fetch(vtx->op)->flags & FF_MEM) {
2566 if (vtx->uncached)
2567 fprintf(stderr, "UNCACHED ");
2568 if (vtx->indexed)
2569 fprintf(stderr, "INDEXED:%d ", vtx->indexed);
2570
2571 fprintf(stderr, "ELEM_SIZE:%d ", vtx->elem_size);
2572 if (vtx->burst_count)
2573 fprintf(stderr, "BURST_COUNT:%d ", vtx->burst_count);
2574 fprintf(stderr, "ARRAY_BASE:%d ", vtx->array_base);
2575 fprintf(stderr, "ARRAY_SIZE:%d ", vtx->array_size);
2576 }
2577
2578 fprintf(stderr, "UCF:%d ", vtx->use_const_fields);
2579 fprintf(stderr, "FMT(DTA:%d ", vtx->data_format);
2580 fprintf(stderr, "NUM:%d ", vtx->num_format_all);
2581 fprintf(stderr, "COMP:%d ", vtx->format_comp_all);
2582 fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all);
2583
2584 id += 4;
2585 }
2586
2587 LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) {
2588 UNUSED int o = 0;
2589 o += fprintf(stderr, " %04d %08X %08X %08X ", id, bc->bytecode[id],
2590 bc->bytecode[id + 1], bc->bytecode[id + 2]);
2591
2592 o += fprintf(stderr, "%s ", r600_isa_fetch(gds->op)->name);
2593
2594 if (gds->op != FETCH_OP_TF_WRITE) {
2595 o += fprintf(stderr, "R%d.", gds->dst_gpr);
2596 o += print_swizzle(gds->dst_sel_x);
2597 o += print_swizzle(gds->dst_sel_y);
2598 o += print_swizzle(gds->dst_sel_z);
2599 o += print_swizzle(gds->dst_sel_w);
2600 }
2601
2602 o += fprintf(stderr, ", R%d.", gds->src_gpr);
2603 o += print_swizzle(gds->src_sel_x);
2604 o += print_swizzle(gds->src_sel_y);
2605 o += print_swizzle(gds->src_sel_z);
2606
2607 if (gds->op != FETCH_OP_TF_WRITE) {
2608 o += fprintf(stderr, ", R%d.", gds->src_gpr2);
2609 }
2610 if (gds->alloc_consume) {
2611 o += fprintf(stderr, " UAV: %d", gds->uav_id);
2612 if (gds->uav_index_mode)
2613 o += fprintf(stderr, "[%s]", index_mode[gds->uav_index_mode]);
2614 }
2615 fprintf(stderr, "\n");
2616 id += 4;
2617 }
2618 }
2619
2620 fprintf(stderr, "--------------------------------------\n");
2621 }
2622
r600_vertex_data_type(enum pipe_format pformat,unsigned * format,unsigned * num_format,unsigned * format_comp,unsigned * endian)2623 void r600_vertex_data_type(enum pipe_format pformat,
2624 unsigned *format,
2625 unsigned *num_format, unsigned *format_comp, unsigned *endian)
2626 {
2627 const struct util_format_description *desc;
2628 unsigned i;
2629
2630 *format = 0;
2631 *num_format = 0;
2632 *format_comp = 0;
2633 *endian = ENDIAN_NONE;
2634
2635 if (pformat == PIPE_FORMAT_R11G11B10_FLOAT) {
2636 *format = FMT_10_11_11_FLOAT;
2637 *endian = r600_endian_swap(32);
2638 return;
2639 }
2640
2641 if (pformat == PIPE_FORMAT_B5G6R5_UNORM) {
2642 *format = FMT_5_6_5;
2643 *endian = r600_endian_swap(16);
2644 return;
2645 }
2646
2647 if (pformat == PIPE_FORMAT_B5G5R5A1_UNORM) {
2648 *format = FMT_1_5_5_5;
2649 *endian = r600_endian_swap(16);
2650 return;
2651 }
2652
2653 if (pformat == PIPE_FORMAT_A1B5G5R5_UNORM) {
2654 *format = FMT_5_5_5_1;
2655 return;
2656 }
2657
2658 desc = util_format_description(pformat);
2659 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
2660 goto out_unknown;
2661 }
2662
2663 i = util_format_get_first_non_void_channel(pformat);
2664
2665 *endian = r600_endian_swap(desc->channel[i].size);
2666
2667 switch (desc->channel[i].type) {
2668 /* Half-floats, floats, ints */
2669 case UTIL_FORMAT_TYPE_FLOAT:
2670 switch (desc->channel[i].size) {
2671 case 16:
2672 switch (desc->nr_channels) {
2673 case 1:
2674 *format = FMT_16_FLOAT;
2675 break;
2676 case 2:
2677 *format = FMT_16_16_FLOAT;
2678 break;
2679 case 3:
2680 case 4:
2681 *format = FMT_16_16_16_16_FLOAT;
2682 break;
2683 }
2684 break;
2685 case 32:
2686 switch (desc->nr_channels) {
2687 case 1:
2688 *format = FMT_32_FLOAT;
2689 break;
2690 case 2:
2691 *format = FMT_32_32_FLOAT;
2692 break;
2693 case 3:
2694 *format = FMT_32_32_32_FLOAT;
2695 break;
2696 case 4:
2697 *format = FMT_32_32_32_32_FLOAT;
2698 break;
2699 }
2700 break;
2701 default:
2702 goto out_unknown;
2703 }
2704 break;
2705 /* Unsigned ints */
2706 case UTIL_FORMAT_TYPE_UNSIGNED:
2707 /* Signed ints */
2708 case UTIL_FORMAT_TYPE_SIGNED:
2709 switch (desc->channel[i].size) {
2710 case 4:
2711 switch (desc->nr_channels) {
2712 case 2:
2713 *format = FMT_4_4;
2714 break;
2715 case 4:
2716 *format = FMT_4_4_4_4;
2717 break;
2718 }
2719 break;
2720 case 8:
2721 switch (desc->nr_channels) {
2722 case 1:
2723 *format = FMT_8;
2724 break;
2725 case 2:
2726 *format = FMT_8_8;
2727 break;
2728 case 3:
2729 case 4:
2730 *format = FMT_8_8_8_8;
2731 break;
2732 }
2733 break;
2734 case 10:
2735 if (desc->nr_channels != 4)
2736 goto out_unknown;
2737
2738 *format = FMT_2_10_10_10;
2739 break;
2740 case 16:
2741 switch (desc->nr_channels) {
2742 case 1:
2743 *format = FMT_16;
2744 break;
2745 case 2:
2746 *format = FMT_16_16;
2747 break;
2748 case 3:
2749 case 4:
2750 *format = FMT_16_16_16_16;
2751 break;
2752 }
2753 break;
2754 case 32:
2755 switch (desc->nr_channels) {
2756 case 1:
2757 *format = FMT_32;
2758 break;
2759 case 2:
2760 *format = FMT_32_32;
2761 break;
2762 case 3:
2763 *format = FMT_32_32_32;
2764 break;
2765 case 4:
2766 *format = FMT_32_32_32_32;
2767 break;
2768 }
2769 break;
2770 default:
2771 goto out_unknown;
2772 }
2773 break;
2774 default:
2775 goto out_unknown;
2776 }
2777
2778 if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2779 *format_comp = 1;
2780 }
2781
2782 *num_format = 0;
2783 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED ||
2784 desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2785 if (!desc->channel[i].normalized) {
2786 if (desc->channel[i].pure_integer)
2787 *num_format = 1;
2788 else
2789 *num_format = 2;
2790 }
2791 }
2792 return;
2793 out_unknown:
2794 R600_ASM_ERR("unsupported vertex format %s\n", util_format_name(pformat));
2795 }
2796
r600_bytecode_alu_read(struct r600_bytecode * bc,struct r600_bytecode_alu * alu,uint32_t word0,uint32_t word1)2797 void r600_bytecode_alu_read(struct r600_bytecode *bc,
2798 struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1)
2799 {
2800 /* WORD0 */
2801 alu->src[0].sel = G_SQ_ALU_WORD0_SRC0_SEL(word0);
2802 alu->src[0].rel = G_SQ_ALU_WORD0_SRC0_REL(word0);
2803 alu->src[0].chan = G_SQ_ALU_WORD0_SRC0_CHAN(word0);
2804 alu->src[0].neg = G_SQ_ALU_WORD0_SRC0_NEG(word0);
2805 alu->src[1].sel = G_SQ_ALU_WORD0_SRC1_SEL(word0);
2806 alu->src[1].rel = G_SQ_ALU_WORD0_SRC1_REL(word0);
2807 alu->src[1].chan = G_SQ_ALU_WORD0_SRC1_CHAN(word0);
2808 alu->src[1].neg = G_SQ_ALU_WORD0_SRC1_NEG(word0);
2809 alu->index_mode = G_SQ_ALU_WORD0_INDEX_MODE(word0);
2810 alu->pred_sel = G_SQ_ALU_WORD0_PRED_SEL(word0);
2811 alu->last = G_SQ_ALU_WORD0_LAST(word0);
2812
2813 /* WORD1 */
2814 alu->bank_swizzle = G_SQ_ALU_WORD1_BANK_SWIZZLE(word1);
2815 if (alu->bank_swizzle)
2816 alu->bank_swizzle_force = alu->bank_swizzle;
2817 alu->dst.sel = G_SQ_ALU_WORD1_DST_GPR(word1);
2818 alu->dst.rel = G_SQ_ALU_WORD1_DST_REL(word1);
2819 alu->dst.chan = G_SQ_ALU_WORD1_DST_CHAN(word1);
2820 alu->dst.clamp = G_SQ_ALU_WORD1_CLAMP(word1);
2821 if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/
2822 {
2823 alu->is_op3 = 1;
2824 alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1);
2825 alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1);
2826 alu->src[2].chan = G_SQ_ALU_WORD1_OP3_SRC2_CHAN(word1);
2827 alu->src[2].neg = G_SQ_ALU_WORD1_OP3_SRC2_NEG(word1);
2828 alu->op = r600_isa_alu_by_opcode(bc->isa,
2829 G_SQ_ALU_WORD1_OP3_ALU_INST(word1), /* is_op3 = */ 1);
2830
2831 }
2832 else /*ALU_DWORD1_OP2*/
2833 {
2834 alu->src[0].abs = G_SQ_ALU_WORD1_OP2_SRC0_ABS(word1);
2835 alu->src[1].abs = G_SQ_ALU_WORD1_OP2_SRC1_ABS(word1);
2836 alu->op = r600_isa_alu_by_opcode(bc->isa,
2837 G_SQ_ALU_WORD1_OP2_ALU_INST(word1), /* is_op3 = */ 0);
2838 alu->omod = G_SQ_ALU_WORD1_OP2_OMOD(word1);
2839 alu->dst.write = G_SQ_ALU_WORD1_OP2_WRITE_MASK(word1);
2840 alu->update_pred = G_SQ_ALU_WORD1_OP2_UPDATE_PRED(word1);
2841 alu->execute_mask =
2842 G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1);
2843 }
2844 }
2845
2846 #if 0
2847 void r600_bytecode_export_read(struct r600_bytecode *bc,
2848 struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
2849 {
2850 output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
2851 output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
2852 output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
2853 output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
2854
2855 output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
2856 output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
2857 output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
2858 output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
2859 output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
2860 output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
2861 output->op = r600_isa_cf_by_opcode(bc->isa,
2862 G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1), 0);
2863 output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
2864 output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
2865 output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
2866 }
2867 #endif
2868