1 /*
2 * Copyright 2009 Nicolai Hähnle <[email protected]>
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "radeon_compiler.h"
7
8 #include <stdarg.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12
13 #include "util/u_debug.h"
14 #include "pipe/p_state.h"
15 #include "radeon_dataflow.h"
16 #include "radeon_program.h"
17 #include "radeon_program_pair.h"
18 #include "radeon_regalloc.h"
19 #include "radeon_compiler_util.h"
20
21
rc_init(struct radeon_compiler * c,const struct rc_regalloc_state * rs)22 void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs)
23 {
24 memset(c, 0, sizeof(*c));
25
26 memory_pool_init(&c->Pool);
27 c->Program.Instructions.Prev = &c->Program.Instructions;
28 c->Program.Instructions.Next = &c->Program.Instructions;
29 c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
30 c->regalloc_state = rs;
31 c->max_temp_index = -1;
32 }
33
rc_destroy(struct radeon_compiler * c)34 void rc_destroy(struct radeon_compiler * c)
35 {
36 rc_constants_destroy(&c->Program.Constants);
37 memory_pool_destroy(&c->Pool);
38 free(c->ErrorMsg);
39 }
40
rc_debug(struct radeon_compiler * c,const char * fmt,...)41 void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
42 {
43 va_list ap;
44
45 if (!(c->Debug & RC_DBG_LOG))
46 return;
47
48 va_start(ap, fmt);
49 vfprintf(stderr, fmt, ap);
50 va_end(ap);
51 }
52
rc_error(struct radeon_compiler * c,const char * fmt,...)53 void rc_error(struct radeon_compiler * c, const char * fmt, ...)
54 {
55 va_list ap;
56
57 c->Error = 1;
58
59 if (!c->ErrorMsg) {
60 /* Only remember the first error */
61 char buf[1024];
62 int written;
63
64 va_start(ap, fmt);
65 written = vsnprintf(buf, sizeof(buf), fmt, ap);
66 va_end(ap);
67
68 if (written < sizeof(buf)) {
69 c->ErrorMsg = strdup(buf);
70 } else {
71 c->ErrorMsg = malloc(written + 1);
72
73 va_start(ap, fmt);
74 vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
75 va_end(ap);
76 }
77 }
78
79 if (c->Debug & RC_DBG_LOG) {
80 fprintf(stderr, "r300compiler error: ");
81
82 va_start(ap, fmt);
83 vfprintf(stderr, fmt, ap);
84 va_end(ap);
85 }
86 }
87
rc_if_fail_helper(struct radeon_compiler * c,const char * file,int line,const char * assertion)88 int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
89 {
90 rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
91 return 1;
92 }
93
rc_mark_unused_channels(struct radeon_compiler * c,void * user)94 void rc_mark_unused_channels(struct radeon_compiler * c, void *user)
95 {
96 unsigned int srcmasks[3];
97
98 for(struct rc_instruction * inst = c->Program.Instructions.Next;
99 inst != &c->Program.Instructions;
100 inst = inst->Next) {
101
102 rc_compute_sources_for_writemask(inst, inst->U.I.DstReg.WriteMask, srcmasks);
103
104 for(unsigned int src = 0; src < 3; ++src) {
105 for(unsigned int chan = 0; chan < 4; ++chan) {
106 if (!GET_BIT(srcmasks[src], chan))
107 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
108 }
109 }
110 }
111 }
112
113 /**
114 * Recompute c->Program.InputsRead and c->Program.OutputsWritten
115 * based on which inputs and outputs are actually referenced
116 * in program instructions.
117 */
rc_calculate_inputs_outputs(struct radeon_compiler * c)118 void rc_calculate_inputs_outputs(struct radeon_compiler * c)
119 {
120 struct rc_instruction *inst;
121
122 c->Program.InputsRead = 0;
123 c->Program.OutputsWritten = 0;
124
125 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
126 {
127 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
128 int i;
129
130 for (i = 0; i < opcode->NumSrcRegs; ++i) {
131 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
132 c->Program.InputsRead |= 1U << inst->U.I.SrcReg[i].Index;
133 }
134
135 if (opcode->HasDstReg) {
136 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
137 c->Program.OutputsWritten |= 1U << inst->U.I.DstReg.Index;
138 }
139 }
140 }
141
142 /**
143 * Rewrite the program such that a given output is duplicated.
144 */
rc_copy_output(struct radeon_compiler * c,unsigned output,unsigned dup_output)145 void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
146 {
147 unsigned tempreg = rc_find_free_temporary(c);
148 struct rc_instruction * inst;
149 struct rc_instruction * insert_pos = c->Program.Instructions.Prev;
150 struct rc_instruction * last_write_inst = NULL;
151 unsigned branch_depth = 0;
152 unsigned loop_depth = 0;
153 bool emit_after_control_flow = false;
154 unsigned num_writes = 0;
155
156 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
157 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
158
159 if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
160 loop_depth++;
161 if (inst->U.I.Opcode == RC_OPCODE_IF)
162 branch_depth++;
163 if ((inst->U.I.Opcode == RC_OPCODE_ENDLOOP && loop_depth--) ||
164 (inst->U.I.Opcode == RC_OPCODE_ENDIF && branch_depth--))
165 if (emit_after_control_flow && loop_depth == 0 && branch_depth == 0) {
166 insert_pos = inst;
167 emit_after_control_flow = false;
168 }
169
170 if (opcode->HasDstReg) {
171 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
172 num_writes++;
173 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
174 inst->U.I.DstReg.Index = tempreg;
175 insert_pos = inst;
176 last_write_inst = inst;
177 if (loop_depth != 0 && branch_depth != 0)
178 emit_after_control_flow = true;
179 }
180 }
181 }
182
183 /* If there is only a single write, just duplicate the whole instruction instead.
184 * We can do this even when the single write was is a control flow.
185 */
186 if (num_writes == 1) {
187 last_write_inst->U.I.DstReg.File = RC_FILE_OUTPUT;
188 last_write_inst->U.I.DstReg.Index = output;
189
190 inst = rc_insert_new_instruction(c, last_write_inst);
191 struct rc_instruction * prev = inst->Prev;
192 struct rc_instruction * next = inst->Next;
193 memcpy(inst, last_write_inst, sizeof(struct rc_instruction));
194 inst->Prev = prev;
195 inst->Next = next;
196 inst->U.I.DstReg.Index = dup_output;
197 } else {
198 inst = rc_insert_new_instruction(c, insert_pos);
199 inst->U.I.Opcode = RC_OPCODE_MOV;
200 inst->U.I.DstReg.File = RC_FILE_OUTPUT;
201 inst->U.I.DstReg.Index = output;
202
203 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
204 inst->U.I.SrcReg[0].Index = tempreg;
205 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
206
207 inst = rc_insert_new_instruction(c, inst);
208 inst->U.I.Opcode = RC_OPCODE_MOV;
209 inst->U.I.DstReg.File = RC_FILE_OUTPUT;
210 inst->U.I.DstReg.Index = dup_output;
211
212 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
213 inst->U.I.SrcReg[0].Index = tempreg;
214 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
215 }
216
217 c->Program.OutputsWritten |= 1U << dup_output;
218 }
219
220
221 /**
222 * Introduce standard code fragment to deal with fragment.position.
223 */
rc_transform_fragment_wpos(struct radeon_compiler * c,unsigned wpos,unsigned new_input,int full_vtransform)224 void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
225 int full_vtransform)
226 {
227 unsigned tempregi = rc_find_free_temporary(c);
228 struct rc_instruction * inst_rcp;
229 struct rc_instruction * inst_mul;
230 struct rc_instruction * inst_mad;
231 struct rc_instruction * inst;
232
233 c->Program.InputsRead &= ~(1U << wpos);
234 c->Program.InputsRead |= 1U << new_input;
235
236 /* perspective divide */
237 inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
238 inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
239
240 inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
241 inst_rcp->U.I.DstReg.Index = tempregi;
242 inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
243
244 inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
245 inst_rcp->U.I.SrcReg[0].Index = new_input;
246 inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
247
248 inst_mul = rc_insert_new_instruction(c, inst_rcp);
249 inst_mul->U.I.Opcode = RC_OPCODE_MUL;
250
251 inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
252 inst_mul->U.I.DstReg.Index = tempregi;
253 inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
254
255 inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
256 inst_mul->U.I.SrcReg[0].Index = new_input;
257
258 inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
259 inst_mul->U.I.SrcReg[1].Index = tempregi;
260 inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
261
262 /* viewport transformation */
263 inst_mad = rc_insert_new_instruction(c, inst_mul);
264 inst_mad->U.I.Opcode = RC_OPCODE_MAD;
265
266 inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
267 inst_mad->U.I.DstReg.Index = tempregi;
268 inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
269
270 inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
271 inst_mad->U.I.SrcReg[0].Index = tempregi;
272 inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
273
274 inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
275 inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
276
277 inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
278 inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
279
280 if (full_vtransform) {
281 inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
282 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
283 } else {
284 inst_mad->U.I.SrcReg[1].Index =
285 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
286 }
287
288 for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
289 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
290 unsigned i;
291
292 for(i = 0; i < opcode->NumSrcRegs; i++) {
293 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
294 inst->U.I.SrcReg[i].Index == wpos) {
295 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
296 inst->U.I.SrcReg[i].Index = tempregi;
297 }
298 }
299 }
300 }
301
302
303 /**
304 * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
305 * Gallium and OpenGL define it the other way around.
306 *
307 * So let's just negate FACE at the beginning of the shader and rewrite the rest
308 * of the shader to read from the newly allocated temporary.
309 */
rc_transform_fragment_face(struct radeon_compiler * c,unsigned face)310 void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
311 {
312 unsigned tempregi = rc_find_free_temporary(c);
313 struct rc_instruction *inst_add;
314 struct rc_instruction *inst;
315
316 /* perspective divide */
317 inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
318 inst_add->U.I.Opcode = RC_OPCODE_ADD;
319
320 inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
321 inst_add->U.I.DstReg.Index = tempregi;
322 inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
323
324 inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
325 inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
326
327 inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
328 inst_add->U.I.SrcReg[1].Index = face;
329 inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
330 inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
331
332 for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
333 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
334 unsigned i;
335
336 for(i = 0; i < opcode->NumSrcRegs; i++) {
337 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
338 inst->U.I.SrcReg[i].Index == face) {
339 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
340 inst->U.I.SrcReg[i].Index = tempregi;
341 }
342 }
343 }
344 }
345
reg_count_callback(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)346 static void reg_count_callback(void * userdata, struct rc_instruction * inst,
347 rc_register_file file, unsigned int index, unsigned int mask)
348 {
349 struct rc_program_stats *s = userdata;
350 if (file == RC_FILE_TEMPORARY)
351 (int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
352 if (file == RC_FILE_INLINE)
353 s->num_inline_literals++;
354 if (file == RC_FILE_CONSTANT)
355 s->num_consts = MAX2(s->num_consts, index + 1);
356 }
357
rc_get_stats(struct radeon_compiler * c,struct rc_program_stats * s)358 void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
359 {
360 struct rc_instruction * tmp;
361 memset(s, 0, sizeof(*s));
362 unsigned ip = 0;
363 int last_begintex = -1;
364
365 for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
366 tmp = tmp->Next, ip++){
367 const struct rc_opcode_info * info;
368 rc_for_all_reads_mask(tmp, reg_count_callback, s);
369 if (tmp->Type == RC_INSTRUCTION_NORMAL) {
370 info = rc_get_opcode_info(tmp->U.I.Opcode);
371 if (info->Opcode == RC_OPCODE_BEGIN_TEX) {
372 /* The R5xx docs mention ~30 cycles in section 8.3.1
373 * The only case when we don't want to add the cycles
374 * penalty is when the texblock contains only kil.
375 */
376 const struct rc_opcode_info *next_op
377 = rc_get_opcode_info(tmp->Next->U.I.Opcode);
378 struct rc_instruction *second_next_instr = tmp->Next->Next;
379 const struct rc_opcode_info *second_next_op;
380 if (second_next_instr->Type == RC_INSTRUCTION_NORMAL) {
381 second_next_op = rc_get_opcode_info(second_next_instr->U.I.Opcode);
382 } else {
383 second_next_op = rc_get_opcode_info(second_next_instr->U.P.RGB.Opcode);
384 }
385 if (next_op->Opcode != RC_OPCODE_KIL ||
386 (second_next_instr->Type == RC_INSTRUCTION_NORMAL &&
387 second_next_op->HasTexture)) {
388 s->num_cycles += 30;
389 last_begintex = ip;
390 }
391 continue;
392 }
393 if (info->Opcode == RC_OPCODE_MAD &&
394 rc_inst_has_three_diff_temp_srcs(tmp))
395 s->num_cycles++;
396 } else {
397 if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
398 s->num_presub_ops++;
399 if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
400 s->num_presub_ops++;
401 /* Assuming alpha will never be a flow control or
402 * a tex instruction. */
403 if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
404 s->num_alpha_insts++;
405 if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
406 s->num_rgb_insts++;
407 if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 &&
408 tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
409 s->num_omod_ops++;
410 }
411 if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 &&
412 tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
413 s->num_omod_ops++;
414 }
415 if (tmp->U.P.Nop)
416 s->num_cycles++;
417 /* SemWait has effect only on R500, the more instructions we can put
418 * between the tex block and the first texture semaphore, the better.
419 */
420 if (tmp->U.P.SemWait && c->is_r500 && last_begintex != -1) {
421 s->num_cycles -= MIN2(30, ip - last_begintex);
422 last_begintex = -1;
423 }
424 info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
425 }
426 if (info->IsFlowControl) {
427 s->num_fc_insts++;
428 if (info->Opcode == RC_OPCODE_BGNLOOP)
429 s->num_loops++;
430 }
431 /* VS flow control was already translated to the predicate instructions */
432 if (c->type == RC_VERTEX_PROGRAM)
433 if (strstr(info->Name, "PRED") != NULL)
434 s->num_pred_insts++;
435
436 if (info->HasTexture)
437 s->num_tex_insts++;
438 s->num_insts++;
439 s->num_cycles++;
440 }
441 /* Increment here because the reg_count_callback store the max
442 * temporary reg index in s->nun_temp_regs. */
443 s->num_temp_regs++;
444 }
445
print_stats(struct radeon_compiler * c)446 static void print_stats(struct radeon_compiler * c)
447 {
448 struct rc_program_stats s;
449
450 rc_get_stats(c, &s);
451
452 /* Note that we print some dummy values for instruction categories that
453 * only the FS has, because shader-db's report.py wants all shaders to
454 * have the same set.
455 */
456 util_debug_message(c->debug, SHADER_INFO,
457 "%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol, "
458 "%u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits, %u cycles",
459 c->type == RC_VERTEX_PROGRAM ? "VS" : "FS",
460 s.num_insts, s.num_rgb_insts, s.num_alpha_insts, s.num_pred_insts,
461 s.num_fc_insts, s.num_loops, s.num_tex_insts, s.num_presub_ops,
462 s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals,
463 s.num_cycles);
464 }
465
466 static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
467 "Vertex Program",
468 "Fragment Program"
469 };
470
rc_run_compiler_passes(struct radeon_compiler * c,struct radeon_compiler_pass * list)471 bool rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
472 {
473 for (unsigned i = 0; list[i].name; i++) {
474 if (list[i].predicate) {
475 list[i].run(c, list[i].user);
476
477 if (c->Error)
478 return false;
479
480 if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
481 fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
482 rc_print_program(&c->Program);
483 }
484 }
485 }
486 return true;
487 }
488
489 /* Executes a list of compiler passes given in the parameter 'list'. */
rc_run_compiler(struct radeon_compiler * c,struct radeon_compiler_pass * list)490 void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
491 {
492 if (c->Debug & RC_DBG_LOG) {
493 fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
494 rc_print_program(&c->Program);
495 }
496
497 if(rc_run_compiler_passes(c, list)) {
498 print_stats(c);
499 }
500 }
501
rc_validate_final_shader(struct radeon_compiler * c,void * user)502 void rc_validate_final_shader(struct radeon_compiler *c, void *user)
503 {
504 /* Check the number of constants. */
505 if (c->Program.Constants.Count > c->max_constants) {
506 rc_error(c, "Too many constants. Max: %i, Got: %i\n",
507 c->max_constants, c->Program.Constants.Count);
508 }
509 }
510