1 /*
2 * Copyright 2009 Nicolai Haehnle.
3 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Authors:
5 * Nicolai Haehnle
6 * Tom Stellard <[email protected]>
7 * SPDX-License-Identifier: MIT
8 */
9
10 #include "radeon_dataflow.h"
11
12 #include "radeon_code.h"
13 #include "radeon_compiler.h"
14 #include "radeon_compiler_util.h"
15 #include "radeon_swizzle.h"
16
get_swizzle_split(struct radeon_compiler * c,struct rc_swizzle_split * split,struct rc_instruction * inst,unsigned src,unsigned * usemask)17 static unsigned int get_swizzle_split(struct radeon_compiler * c,
18 struct rc_swizzle_split * split, struct rc_instruction * inst,
19 unsigned src, unsigned * usemask)
20 {
21 *usemask = 0;
22 for(unsigned int chan = 0; chan < 4; ++chan) {
23 if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
24 *usemask |= 1 << chan;
25 }
26
27 c->SwizzleCaps->Split(inst->U.I.SrcReg[src], *usemask, split);
28 return split->NumPhases;
29 }
30
rewrite_source(struct radeon_compiler * c,struct rc_instruction * inst,unsigned src)31 static void rewrite_source(struct radeon_compiler * c,
32 struct rc_instruction * inst, unsigned src)
33 {
34 struct rc_swizzle_split split;
35 unsigned int tempreg = rc_find_free_temporary(c);
36 unsigned int usemask;
37
38 get_swizzle_split(c, &split, inst, src, &usemask);
39
40 for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
41 struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
42 unsigned int masked_negate;
43
44 mov->U.I.Opcode = RC_OPCODE_MOV;
45 mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
46 mov->U.I.DstReg.Index = tempreg;
47 mov->U.I.DstReg.WriteMask = split.Phase[phase];
48 mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
49 mov->U.I.PreSub = inst->U.I.PreSub;
50
51 for(unsigned int chan = 0; chan < 4; ++chan) {
52 if (!GET_BIT(split.Phase[phase], chan))
53 SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
54 }
55
56 masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
57 if (masked_negate == 0)
58 mov->U.I.SrcReg[0].Negate = 0;
59 else if (masked_negate == split.Phase[phase])
60 mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
61
62 }
63
64 inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
65 inst->U.I.SrcReg[src].Index = tempreg;
66 inst->U.I.SrcReg[src].Swizzle = 0;
67 inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
68 inst->U.I.SrcReg[src].Abs = 0;
69 for(unsigned int chan = 0; chan < 4; ++chan) {
70 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
71 GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
72 }
73 }
74
75 /**
76 * This function will attempt to rewrite non-native swizzles that read from
77 * immediate registers by rearranging the immediates to allow the
78 * instruction to use native swizzles.
79 */
try_rewrite_constant(struct radeon_compiler * c,struct rc_src_register * reg)80 static unsigned try_rewrite_constant(struct radeon_compiler *c,
81 struct rc_src_register *reg)
82 {
83 unsigned new_swizzle, chan, swz0, swz1, swz2, swz3, found_swizzle, swz;
84 unsigned all_inline = 0;
85 bool w_inline_constant = false;
86 float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f};
87
88 if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) {
89 /* The register does not contain immediates, but if all
90 * the swizzles are inline constants, we can still rewrite
91 * it. */
92
93 new_swizzle = RC_SWIZZLE_XYZW;
94 for (chan = 0 ; chan < 4; chan++) {
95 unsigned swz = GET_SWZ(reg->Swizzle, chan);
96 if (swz <= RC_SWIZZLE_W) {
97 return 0;
98 }
99 if (swz == RC_SWIZZLE_UNUSED) {
100 SET_SWZ(new_swizzle, chan, RC_SWIZZLE_UNUSED);
101 }
102 }
103 all_inline = 1;
104 } else {
105 new_swizzle = reg->Swizzle;
106 }
107
108 swz = RC_SWIZZLE_UNUSED;
109 found_swizzle = 1;
110 /* Check if all channels have the same swizzle. If they do we can skip
111 * the search for a native swizzle. We only need to check the first
112 * three channels, because any swizzle is legal in the fourth channel.
113 */
114 for (chan = 0; chan < 3; chan++) {
115 unsigned chan_swz = GET_SWZ(reg->Swizzle, chan);
116 if (chan_swz == RC_SWIZZLE_UNUSED) {
117 continue;
118 }
119 if (swz == RC_SWIZZLE_UNUSED) {
120 swz = chan_swz;
121 } else if (swz != chan_swz) {
122 found_swizzle = 0;
123 break;
124 }
125 }
126
127 /* Find a legal swizzle */
128
129 /* This loop attempts to find a native swizzle where all the
130 * channels are different. */
131 while (!found_swizzle && !all_inline) {
132 swz0 = GET_SWZ(new_swizzle, 0);
133 swz1 = GET_SWZ(new_swizzle, 1);
134 swz2 = GET_SWZ(new_swizzle, 2);
135
136 /* Swizzle .W. is never legal. */
137 if (swz1 == RC_SWIZZLE_W ||
138 swz1 == RC_SWIZZLE_UNUSED ||
139 swz1 == RC_SWIZZLE_ZERO ||
140 swz1 == RC_SWIZZLE_HALF ||
141 swz1 == RC_SWIZZLE_ONE) {
142 /* We chose Z, because there are two non-repeating
143 * swizzle combinations of the form .Z. There are
144 * only one combination each for .X. and .Y. */
145 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
146 continue;
147 }
148
149 if (swz2 == RC_SWIZZLE_UNUSED) {
150 /* We choose Y, because there are two non-repeating
151 * swizzle combinations of the form ..Y */
152 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
153 continue;
154 }
155
156 switch (swz0) {
157 /* X.. */
158 case RC_SWIZZLE_X:
159 /* Legal swizzles that start with X: XYZ, XXX */
160 switch (swz1) {
161 /* XX. */
162 case RC_SWIZZLE_X:
163 /* The new swizzle will be:
164 * ZXY (XX. => ZX. => ZXY) */
165 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
166 break;
167 /* XY. */
168 case RC_SWIZZLE_Y:
169 /* The new swizzle is XYZ */
170 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z);
171 found_swizzle = 1;
172 break;
173 /* XZ. */
174 case RC_SWIZZLE_Z:
175 /* XZZ */
176 if (swz2 == RC_SWIZZLE_Z) {
177 /* The new swizzle is XYZ */
178 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y);
179 found_swizzle = 1;
180 } else { /* XZ[^Z] */
181 /* The new swizzle will be:
182 * YZX (XZ. => YZ. => YZX) */
183 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y);
184 }
185 break;
186 /* XW. Should have already been handled. */
187 case RC_SWIZZLE_W:
188 assert(0);
189 break;
190 }
191 break;
192 /* Y.. */
193 case RC_SWIZZLE_Y:
194 /* Legal swizzles that start with Y: YYY, YZX */
195 switch (swz1) {
196 /* YY. */
197 case RC_SWIZZLE_Y:
198 /* The new swizzle will be:
199 * XYZ (YY. => XY. => XYZ) */
200 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
201 break;
202 /* YZ. */
203 case RC_SWIZZLE_Z:
204 /* The new swizzle is YZX */
205 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X);
206 found_swizzle = 1;
207 break;
208 /* YX. */
209 case RC_SWIZZLE_X:
210 /* YXX */
211 if (swz2 == RC_SWIZZLE_X) {
212 /*The new swizzle is YZX */
213 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
214 found_swizzle = 1;
215 } else { /* YX[^X] */
216 /* The new swizzle will be:
217 * ZXY (YX. => ZX. -> ZXY) */
218 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
219 }
220 break;
221 /* YW. Should have already been handled. */
222 case RC_SWIZZLE_W:
223 assert(0);
224 break;
225 }
226 break;
227 /* Z.. */
228 case RC_SWIZZLE_Z:
229 /* Legal swizzles that start with Z: ZZZ, ZXY */
230 switch (swz1) {
231 /* ZZ. */
232 case RC_SWIZZLE_Z:
233 /* The new swizzle will be:
234 * WZY (ZZ. => WZ. => WZY) */
235 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W);
236 break;
237 /* ZX. */
238 case RC_SWIZZLE_X:
239 /* The new swizzle is ZXY */
240 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
241 found_swizzle = 1;
242 break;
243 /* ZY. */
244 case RC_SWIZZLE_Y:
245 /* ZYY */
246 if (swz2 == RC_SWIZZLE_Y) {
247 /* The new swizzle is ZXY */
248 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X);
249 found_swizzle = 1;
250 } else { /* ZY[^Y] */
251 /* The new swizzle will be:
252 * XYZ (ZY. => XY. => XYZ) */
253 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
254 }
255 break;
256 /* ZW. Should have already been handled. */
257 case RC_SWIZZLE_W:
258 assert(0);
259 break;
260 }
261 break;
262
263 /* W.. */
264 case RC_SWIZZLE_W:
265 /* Legal swizzles that start with X: WWW, WZY */
266 switch (swz1) {
267 /* WW. Should have already been handled. */
268 case RC_SWIZZLE_W:
269 assert(0);
270 break;
271 /* WZ. */
272 case RC_SWIZZLE_Z:
273 /* The new swizzle will be WZY */
274 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
275 found_swizzle = 1;
276 break;
277 /* WX. */
278 case RC_SWIZZLE_X:
279 /* WY. */
280 case RC_SWIZZLE_Y:
281 /* W[XY]Y */
282 if (swz2 == RC_SWIZZLE_Y) {
283 /* The new swizzle will be WZY */
284 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
285 found_swizzle = 1;
286 } else { /* W[XY][^Y] */
287 /* The new swizzle will be:
288 * ZXY (WX. => XX. => ZX. => ZXY) or
289 * XYZ (WY. => XY. => XYZ)
290 */
291 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
292 }
293 break;
294 }
295 break;
296 /* U.. 0.. 1.. H..*/
297 case RC_SWIZZLE_UNUSED:
298 case RC_SWIZZLE_ZERO:
299 case RC_SWIZZLE_ONE:
300 case RC_SWIZZLE_HALF:
301 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
302 break;
303 }
304 }
305
306 /* Handle the swizzle in the w channel. */
307 swz3 = GET_SWZ(reg->Swizzle, 3);
308
309 /* We can skip this if the swizzle in channel w is an inline constant. */
310 if (is_swizzle_inline_constant(swz3)) {
311 w_inline_constant = true;
312 } else {
313 for (chan = 0; chan < 3; chan++) {
314 unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
315 unsigned new_swz = GET_SWZ(new_swizzle, chan);
316 /* If the swizzle in the w channel is the same as the
317 * swizzle in any other channels, we need to rewrite it.
318 * For example:
319 * reg->Swizzle == XWZW
320 * new_swizzle == XYZX
321 * Since the swizzle in the y channel is being
322 * rewritten from W -> Y we need to change the swizzle
323 * in the w channel from W -> Y as well.
324 */
325 if (old_swz == swz3) {
326 SET_SWZ(new_swizzle, 3,
327 GET_SWZ(new_swizzle, chan));
328 break;
329 }
330
331 /* The swizzle in channel w will be overwritten by one
332 * of the new swizzles. */
333 if (new_swz == swz3) {
334 /* Find an unused swizzle */
335 unsigned i;
336 unsigned used = 0;
337 for (i = 0; i < 3; i++) {
338 used |= 1 << GET_SWZ(new_swizzle, i);
339 }
340 for (i = 0; i < 4; i++) {
341 if (used & (1 << i)) {
342 continue;
343 }
344 SET_SWZ(new_swizzle, 3, i);
345 }
346 }
347 }
348 }
349
350 for (chan = 0; chan < 4; chan++) {
351 unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
352 unsigned new_swz = GET_SWZ(new_swizzle, chan);
353
354 if (old_swz == RC_SWIZZLE_UNUSED) {
355 continue;
356 }
357
358 /* We don't need to change the swizzle in channel w if it is
359 * an inline constant. These are always legal in the w channel.
360 *
361 * Swizzles with a value > RC_SWIZZLE_W are inline constants.
362 */
363 if (chan == 3 && w_inline_constant) {
364 continue;
365 }
366
367 if (new_swz > RC_SWIZZLE_W) {
368 rc_error(c, "Bad swizzle in try_rewrite_constant()");
369 new_swz = RC_SWIZZLE_X;
370 }
371
372 switch (old_swz) {
373 case RC_SWIZZLE_ZERO:
374 imms[new_swz] = 0.0f;
375 break;
376 case RC_SWIZZLE_HALF:
377 if (reg->Negate & (1 << chan)) {
378 imms[new_swz] = -0.5f;
379 } else {
380 imms[new_swz] = 0.5f;
381 }
382 break;
383 case RC_SWIZZLE_ONE:
384 if (reg->Negate & (1 << chan)) {
385 imms[new_swz] = -1.0f;
386 } else {
387 imms[new_swz] = 1.0f;
388 }
389 break;
390 default:
391 imms[new_swz] = rc_get_constant_value(c, reg->Index,
392 reg->Swizzle, reg->Negate, chan);
393 }
394 SET_SWZ(reg->Swizzle, chan, new_swz);
395 }
396 reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants,
397 imms);
398 /* We need to set the register file to CONSTANT in case we are
399 * converting a non-constant register with constant swizzles (e.g.
400 * ONE, ZERO, HALF).
401 */
402 reg->File = RC_FILE_CONSTANT;
403 reg->Negate = w_inline_constant ? reg->Negate & (1 << 3) : 0;
404 return 1;
405 }
406
407 /**
408 * Set all channels not specified by writemaks to unused.
409 */
clear_channels(struct rc_instruction * inst,unsigned writemask)410 static void clear_channels(struct rc_instruction * inst, unsigned writemask)
411 {
412 inst->U.I.DstReg.WriteMask = writemask;
413 for (unsigned chan = 0; chan < 4; chan++) {
414 if (writemask & (1 << chan))
415 continue;
416
417 const struct rc_opcode_info * opcode =
418 rc_get_opcode_info(inst->U.I.Opcode);
419 for (unsigned src = 0; src < opcode->NumSrcRegs; src++) {
420 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
421 }
422 }
423 /* TODO: We could in theory add constant swizzles back as well,
424 * they will be all legal when we have just a single channel,
425 * to save some sources and help the pair scheduling later. */
426 }
427
try_splitting_single_channel(struct radeon_compiler * c,struct rc_instruction * inst)428 static bool try_splitting_single_channel(struct radeon_compiler * c,
429 struct rc_instruction * inst)
430 {
431 for (unsigned chan = 0; chan < 3; chan++) {
432 struct rc_instruction * new_inst;
433 new_inst = rc_insert_new_instruction(c, inst);
434 memcpy(&new_inst->U.I, &inst->U.I, sizeof(struct rc_sub_instruction));
435 clear_channels(new_inst, inst->U.I.DstReg.WriteMask ^ (1 << chan));
436
437 const struct rc_opcode_info * opcode =
438 rc_get_opcode_info(new_inst->U.I.Opcode);
439 bool valid_swizzles = true;
440
441 for (unsigned src = 0; src < opcode->NumSrcRegs; ++src) {
442 struct rc_src_register *reg = &new_inst->U.I.SrcReg[src];
443
444 if (!c->SwizzleCaps->IsNative(new_inst->U.I.Opcode, *reg))
445 valid_swizzles = false;
446 }
447
448 if (!valid_swizzles) {
449 rc_remove_instruction(new_inst);
450 } else {
451 clear_channels(inst, 1 << chan);
452 return true;
453 }
454 }
455 return false;
456 }
457
try_splitting_instruction(struct radeon_compiler * c,struct rc_instruction * inst)458 static bool try_splitting_instruction(struct radeon_compiler * c,
459 struct rc_instruction * inst)
460 {
461 /* Adding more output instructions in FS is bad for performance. */
462 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
463 return false;
464
465 /* When only single channel of the swizzle is wrong, like xwzw,
466 * it is best to just split the single channel out.
467 */
468 if (inst->U.I.DstReg.WriteMask == RC_MASK_XYZW ||
469 inst->U.I.DstReg.WriteMask == RC_MASK_XYZ) {
470 if (try_splitting_single_channel(c, inst))
471 return true;
472 }
473
474 for (unsigned chan = 0; chan < 3; chan++) {
475 if (!(inst->U.I.DstReg.WriteMask & (1 << chan)))
476 continue;
477
478 unsigned next_chan;
479 for (next_chan = chan + 1; next_chan < 4; next_chan++) {
480 if (!(inst->U.I.DstReg.WriteMask & (1 << next_chan)))
481 continue;
482
483 /* We don't want to split the last used x/y/z channel and the
484 * w channel. Pair scheduling might be able to put it back
485 * together, but we don't trust it that much.
486 *
487 * Next is W already, rewrite the original inst and we are done.
488 */
489 if (next_chan == 3) {
490 clear_channels(inst, (1 << chan) | (1 << next_chan));
491 return true;
492 }
493
494 struct rc_instruction * new_inst;
495 new_inst = rc_insert_new_instruction(c, inst->Prev);
496 memcpy(&new_inst->U.I, &inst->U.I, sizeof(struct rc_sub_instruction));
497 clear_channels(new_inst, 1 << chan);
498 break;
499 }
500
501 /* No next chan */
502 if (next_chan == 4) {
503 clear_channels(inst, 1 << chan);
504 return true;
505 }
506 }
507 assert(0 && "Unreachable\n");
508 return false;
509 }
510
rc_dataflow_swizzles(struct radeon_compiler * c,void * user)511 void rc_dataflow_swizzles(struct radeon_compiler * c, void *user)
512 {
513 struct rc_instruction * inst;
514
515 for(inst = c->Program.Instructions.Next;
516 inst != &c->Program.Instructions;
517 inst = inst->Next) {
518 const struct rc_opcode_info * opcode =
519 rc_get_opcode_info(inst->U.I.Opcode);
520 unsigned src, usemask;
521 unsigned total_splits = 0;
522 struct rc_swizzle_split split;
523
524 /* If multiple sources needs splitting or some source needs to split
525 * too many times, it is actually better to just split the whole ALU
526 * instruction to separate channels instead of inserting extra movs.
527 */
528 for (src = 0; src < opcode->NumSrcRegs; ++src) {
529 /* Don't count invalid swizzles from immediates, we can just
530 * insert new immediates with the correct order later.
531 */
532 if (rc_src_reg_is_immediate(c, inst->U.I.SrcReg[src].File,
533 inst->U.I.SrcReg[src].Index)
534 && c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS) {
535 total_splits++;
536 } else {
537 total_splits += get_swizzle_split(c, &split, inst,
538 src, &usemask);
539 }
540 }
541
542 /* Even if there is only a single split, i.e., two extra movs, this still
543 * accounts to three instructions, the same as when we split
544 * the original instruction right away.
545 */
546 if (total_splits > opcode->NumSrcRegs && opcode->IsComponentwise) {
547 if (try_splitting_instruction(c, inst))
548 continue;
549 }
550
551 /* For texturing or non-componentwise opcodes we do the old way
552 * of adding extra movs.
553 */
554 for(src = 0; src < opcode->NumSrcRegs; ++src) {
555 struct rc_src_register *reg = &inst->U.I.SrcReg[src];
556 if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) {
557 continue;
558 }
559 if (!c->is_r500 &&
560 c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS &&
561 (!opcode->HasTexture && inst->U.I.Opcode != RC_OPCODE_KIL) &&
562 try_rewrite_constant(c, reg)) {
563 continue;
564 }
565 rewrite_source(c, inst, src);
566 }
567 }
568 if (c->Debug & RC_DBG_LOG)
569 rc_constants_print(&c->Program.Constants, NULL);
570 }
571