xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2009 Nicolai Haehnle.
3  * Copyright 2012 Advanced Micro Devices, Inc.
4  * Authors:
5  *   Nicolai Haehnle
6  *   Tom Stellard <[email protected]>
7  * SPDX-License-Identifier: MIT
8  */
9 
10 #include "radeon_dataflow.h"
11 
12 #include "radeon_code.h"
13 #include "radeon_compiler.h"
14 #include "radeon_compiler_util.h"
15 #include "radeon_swizzle.h"
16 
get_swizzle_split(struct radeon_compiler * c,struct rc_swizzle_split * split,struct rc_instruction * inst,unsigned src,unsigned * usemask)17 static unsigned int get_swizzle_split(struct radeon_compiler * c,
18 		struct rc_swizzle_split * split, struct rc_instruction * inst,
19 		unsigned src, unsigned * usemask)
20 {
21 	*usemask = 0;
22 	for(unsigned int chan = 0; chan < 4; ++chan) {
23 		if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
24 			*usemask |= 1 << chan;
25 	}
26 
27 	c->SwizzleCaps->Split(inst->U.I.SrcReg[src], *usemask, split);
28 	return split->NumPhases;
29 }
30 
rewrite_source(struct radeon_compiler * c,struct rc_instruction * inst,unsigned src)31 static void rewrite_source(struct radeon_compiler * c,
32 		struct rc_instruction * inst, unsigned src)
33 {
34 	struct rc_swizzle_split split;
35 	unsigned int tempreg = rc_find_free_temporary(c);
36 	unsigned int usemask;
37 
38 	get_swizzle_split(c, &split, inst, src, &usemask);
39 
40 	for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
41 		struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
42 		unsigned int masked_negate;
43 
44 		mov->U.I.Opcode = RC_OPCODE_MOV;
45 		mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
46 		mov->U.I.DstReg.Index = tempreg;
47 		mov->U.I.DstReg.WriteMask = split.Phase[phase];
48 		mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
49 		mov->U.I.PreSub = inst->U.I.PreSub;
50 
51 		for(unsigned int chan = 0; chan < 4; ++chan) {
52 			if (!GET_BIT(split.Phase[phase], chan))
53 				SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
54 		}
55 
56 		masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
57 		if (masked_negate == 0)
58 			mov->U.I.SrcReg[0].Negate = 0;
59 		else if (masked_negate == split.Phase[phase])
60 			mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
61 
62 	}
63 
64 	inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
65 	inst->U.I.SrcReg[src].Index = tempreg;
66 	inst->U.I.SrcReg[src].Swizzle = 0;
67 	inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
68 	inst->U.I.SrcReg[src].Abs = 0;
69 	for(unsigned int chan = 0; chan < 4; ++chan) {
70 		SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
71 				GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
72 	}
73 }
74 
75 /**
76  * This function will attempt to rewrite non-native swizzles that read from
77  * immediate registers by rearranging the immediates to allow the
78  * instruction to use native swizzles.
79  */
try_rewrite_constant(struct radeon_compiler * c,struct rc_src_register * reg)80 static unsigned try_rewrite_constant(struct radeon_compiler *c,
81 					struct rc_src_register *reg)
82 {
83 	unsigned new_swizzle, chan, swz0, swz1, swz2, swz3, found_swizzle, swz;
84 	unsigned all_inline = 0;
85 	bool w_inline_constant = false;
86 	float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f};
87 
88 	if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) {
89 		/* The register does not contain immediates, but if all
90 		 * the swizzles are inline constants, we can still rewrite
91 		 * it. */
92 
93 		new_swizzle = RC_SWIZZLE_XYZW;
94 		for (chan = 0 ; chan < 4; chan++) {
95 			unsigned swz = GET_SWZ(reg->Swizzle, chan);
96 			if (swz <= RC_SWIZZLE_W) {
97 				return 0;
98 			}
99 			if (swz == RC_SWIZZLE_UNUSED) {
100 				SET_SWZ(new_swizzle, chan, RC_SWIZZLE_UNUSED);
101 			}
102 		}
103 		all_inline = 1;
104 	} else {
105 		new_swizzle = reg->Swizzle;
106 	}
107 
108 	swz = RC_SWIZZLE_UNUSED;
109 	found_swizzle = 1;
110 	/* Check if all channels have the same swizzle.  If they do we can skip
111 	 * the search for a native swizzle.  We only need to check the first
112 	 * three channels, because any swizzle is legal in the fourth channel.
113 	 */
114 	for (chan = 0; chan < 3; chan++) {
115 		unsigned chan_swz = GET_SWZ(reg->Swizzle, chan);
116 		if (chan_swz == RC_SWIZZLE_UNUSED) {
117 			continue;
118 		}
119 		if (swz == RC_SWIZZLE_UNUSED) {
120 			swz = chan_swz;
121 		} else if (swz != chan_swz) {
122 			found_swizzle = 0;
123 			break;
124 		}
125 	}
126 
127 	/* Find a legal swizzle */
128 
129 	/* This loop attempts to find a native swizzle where all the
130 	 * channels are different. */
131 	while (!found_swizzle && !all_inline) {
132 		swz0 = GET_SWZ(new_swizzle, 0);
133 		swz1 = GET_SWZ(new_swizzle, 1);
134 		swz2 = GET_SWZ(new_swizzle, 2);
135 
136 		/* Swizzle .W. is never legal. */
137 		if (swz1 == RC_SWIZZLE_W ||
138 			swz1 == RC_SWIZZLE_UNUSED ||
139 			swz1 == RC_SWIZZLE_ZERO ||
140 			swz1 == RC_SWIZZLE_HALF ||
141 			swz1 == RC_SWIZZLE_ONE) {
142 			/* We chose Z, because there are two non-repeating
143 			 * swizzle combinations of the form .Z. There are
144 			 * only one combination each for .X. and .Y. */
145 			SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
146 			continue;
147 		}
148 
149 		if (swz2 == RC_SWIZZLE_UNUSED) {
150 			/* We choose Y, because there are two non-repeating
151 			 * swizzle combinations of the form ..Y */
152 			SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
153 			continue;
154 		}
155 
156 		switch (swz0) {
157 		/* X.. */
158 		case RC_SWIZZLE_X:
159 			/* Legal swizzles that start with X: XYZ, XXX */
160 			switch (swz1) {
161 			/* XX. */
162 			case RC_SWIZZLE_X:
163 				/*  The new swizzle will be:
164 				 *  ZXY (XX. => ZX. => ZXY) */
165 				SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
166 				break;
167 			/* XY. */
168 			case RC_SWIZZLE_Y:
169 				/* The new swizzle is XYZ */
170 				SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z);
171 				found_swizzle = 1;
172 				break;
173 			/* XZ. */
174 			case RC_SWIZZLE_Z:
175 				/* XZZ */
176 				if (swz2 == RC_SWIZZLE_Z) {
177 					/* The new swizzle is XYZ */
178 					SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y);
179 					found_swizzle = 1;
180 				} else { /* XZ[^Z] */
181 					/* The new swizzle will be:
182 					 * YZX (XZ. => YZ. => YZX) */
183 					SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y);
184 				}
185 				break;
186 			/* XW. Should have already been handled. */
187 			case RC_SWIZZLE_W:
188 				assert(0);
189 				break;
190 			}
191 			break;
192 		/* Y.. */
193 		case RC_SWIZZLE_Y:
194 			/* Legal swizzles that start with Y: YYY, YZX */
195 			switch (swz1) {
196 			/* YY. */
197 			case RC_SWIZZLE_Y:
198 				/* The new swizzle will be:
199 				 * XYZ (YY. => XY. => XYZ) */
200 				SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
201 				break;
202 			/* YZ. */
203 			case RC_SWIZZLE_Z:
204 				/* The new swizzle is YZX */
205 				SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X);
206 				found_swizzle = 1;
207 				break;
208 			/* YX. */
209 			case RC_SWIZZLE_X:
210 				/* YXX */
211 				if (swz2 == RC_SWIZZLE_X) {
212 					/*The new swizzle is YZX */
213 					SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
214 					found_swizzle = 1;
215 				} else { /* YX[^X] */
216 					/* The new swizzle will be:
217 					 * ZXY (YX. => ZX. -> ZXY) */
218 					SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z);
219 				}
220 				break;
221 			/* YW. Should have already been handled. */
222 			case RC_SWIZZLE_W:
223 				assert(0);
224 				break;
225 			}
226 			break;
227 		/* Z.. */
228 		case RC_SWIZZLE_Z:
229 			/* Legal swizzles that start with Z: ZZZ, ZXY */
230 			switch (swz1) {
231 			/* ZZ. */
232 			case RC_SWIZZLE_Z:
233 				/* The new swizzle will be:
234 				 * WZY (ZZ. => WZ. => WZY) */
235 				SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W);
236 				break;
237 			/* ZX. */
238 			case RC_SWIZZLE_X:
239 				/* The new swizzle is ZXY */
240 				SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
241 				found_swizzle = 1;
242 				break;
243 			/* ZY. */
244 			case RC_SWIZZLE_Y:
245 				/* ZYY */
246 				if (swz2 == RC_SWIZZLE_Y) {
247 					/* The new swizzle is ZXY */
248 					SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X);
249 					found_swizzle = 1;
250 				} else { /* ZY[^Y] */
251 					/* The new swizzle will be:
252 					 * XYZ (ZY. => XY. => XYZ) */
253 					SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
254 				}
255 				break;
256 			/* ZW. Should have already been handled. */
257 			case RC_SWIZZLE_W:
258 				assert(0);
259 				break;
260 			}
261 			break;
262 
263 		/* W.. */
264 		case RC_SWIZZLE_W:
265 			/* Legal swizzles that start with X: WWW, WZY */
266 			switch (swz1) {
267 			/* WW. Should have already been handled. */
268 			case RC_SWIZZLE_W:
269 				assert(0);
270 				break;
271 			/* WZ. */
272 			case RC_SWIZZLE_Z:
273 				/* The new swizzle will be WZY */
274 				SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y);
275 				found_swizzle = 1;
276 				break;
277 			/* WX. */
278 			case RC_SWIZZLE_X:
279 			/* WY. */
280 			case RC_SWIZZLE_Y:
281 				/* W[XY]Y */
282 				if (swz2 == RC_SWIZZLE_Y) {
283 					/* The new swizzle will be WZY */
284 					SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z);
285 					found_swizzle = 1;
286 				} else { /* W[XY][^Y] */
287 					/* The new swizzle will be:
288 					 * ZXY (WX. => XX. => ZX. => ZXY) or
289 					 * XYZ (WY. => XY. => XYZ)
290 					 */
291 					SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
292 				}
293 				break;
294 			}
295 			break;
296 		/* U.. 0.. 1.. H..*/
297 		case RC_SWIZZLE_UNUSED:
298 		case RC_SWIZZLE_ZERO:
299 		case RC_SWIZZLE_ONE:
300 		case RC_SWIZZLE_HALF:
301 			SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X);
302 			break;
303 		}
304 	}
305 
306 	/* Handle the swizzle in the w channel. */
307 	swz3 = GET_SWZ(reg->Swizzle, 3);
308 
309 	/* We can skip this if the swizzle in channel w is an inline constant. */
310 	if (is_swizzle_inline_constant(swz3)) {
311 		w_inline_constant = true;
312 	} else {
313 		for (chan = 0; chan < 3; chan++) {
314 			unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
315 			unsigned new_swz = GET_SWZ(new_swizzle, chan);
316 			/* If the swizzle in the w channel is the same as the
317 			 * swizzle in any other channels, we need to rewrite it.
318 			 * For example:
319 			 * reg->Swizzle == XWZW
320 			 * new_swizzle  == XYZX
321 			 * Since the swizzle in the y channel is being
322 			 * rewritten from W -> Y we need to change the swizzle
323 			 * in the w channel from W -> Y as well.
324 			 */
325 			if (old_swz == swz3) {
326 				SET_SWZ(new_swizzle, 3,
327 						GET_SWZ(new_swizzle, chan));
328 				break;
329 			}
330 
331 			/* The swizzle in channel w will be overwritten by one
332 			 * of the new swizzles. */
333 			if (new_swz == swz3) {
334 				/* Find an unused swizzle */
335 				unsigned i;
336 				unsigned used = 0;
337 				for (i = 0; i < 3; i++) {
338 					used |= 1 << GET_SWZ(new_swizzle, i);
339 				}
340 				for (i = 0; i < 4; i++) {
341 					if (used & (1 << i)) {
342 						continue;
343 					}
344 					SET_SWZ(new_swizzle, 3, i);
345 				}
346 			}
347 		}
348 	}
349 
350 	for (chan = 0; chan < 4; chan++) {
351 		unsigned old_swz = GET_SWZ(reg->Swizzle, chan);
352 		unsigned new_swz = GET_SWZ(new_swizzle, chan);
353 
354 		if (old_swz == RC_SWIZZLE_UNUSED) {
355 			continue;
356 		}
357 
358 		/* We don't need to change the swizzle in channel w if it is
359 		 * an inline constant.  These are always legal in the w channel.
360 		 *
361 		 * Swizzles with a value > RC_SWIZZLE_W are inline constants.
362 		 */
363 		if (chan == 3 && w_inline_constant) {
364 			continue;
365 		}
366 
367 		if (new_swz > RC_SWIZZLE_W) {
368 			rc_error(c, "Bad swizzle in try_rewrite_constant()");
369 			new_swz = RC_SWIZZLE_X;
370 		}
371 
372 		switch (old_swz) {
373 		case RC_SWIZZLE_ZERO:
374 			imms[new_swz] = 0.0f;
375 			break;
376 		case RC_SWIZZLE_HALF:
377 			if (reg->Negate & (1 << chan)) {
378 				imms[new_swz] = -0.5f;
379 			} else {
380 				imms[new_swz] = 0.5f;
381 			}
382 			break;
383 		case RC_SWIZZLE_ONE:
384 			if (reg->Negate & (1 << chan)) {
385 				imms[new_swz] = -1.0f;
386 			} else {
387 				imms[new_swz] = 1.0f;
388 			}
389 			break;
390 		default:
391 			imms[new_swz] = rc_get_constant_value(c, reg->Index,
392 					reg->Swizzle, reg->Negate, chan);
393 		}
394 		SET_SWZ(reg->Swizzle, chan, new_swz);
395 	}
396 	reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants,
397 							imms);
398 	/* We need to set the register file to CONSTANT in case we are
399 	 * converting a non-constant register with constant swizzles (e.g.
400 	 * ONE, ZERO, HALF).
401 	 */
402 	reg->File = RC_FILE_CONSTANT;
403 	reg->Negate = w_inline_constant ? reg->Negate & (1 << 3) : 0;
404 	return 1;
405 }
406 
407 /**
408  * Set all channels not specified by writemaks to unused.
409  */
clear_channels(struct rc_instruction * inst,unsigned writemask)410 static void clear_channels(struct rc_instruction * inst, unsigned writemask)
411 {
412 	inst->U.I.DstReg.WriteMask = writemask;
413 	for (unsigned chan = 0; chan < 4; chan++) {
414 		if (writemask & (1 << chan))
415 			continue;
416 
417 		const struct rc_opcode_info * opcode =
418 					rc_get_opcode_info(inst->U.I.Opcode);
419 		for (unsigned src = 0; src < opcode->NumSrcRegs; src++) {
420 			SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
421 		}
422 	}
423 	/* TODO: We could in theory add constant swizzles back as well,
424 	 * they will be all legal when we have just a single channel,
425 	 * to save some sources and help the pair scheduling later. */
426 }
427 
try_splitting_single_channel(struct radeon_compiler * c,struct rc_instruction * inst)428 static bool try_splitting_single_channel(struct radeon_compiler * c,
429 						struct rc_instruction * inst)
430 {
431 	for (unsigned chan = 0; chan < 3; chan++) {
432 		struct rc_instruction * new_inst;
433 		new_inst = rc_insert_new_instruction(c, inst);
434 		memcpy(&new_inst->U.I, &inst->U.I, sizeof(struct rc_sub_instruction));
435 		clear_channels(new_inst, inst->U.I.DstReg.WriteMask ^ (1 << chan));
436 
437 		const struct rc_opcode_info * opcode =
438 			rc_get_opcode_info(new_inst->U.I.Opcode);
439 		bool valid_swizzles = true;
440 
441 		for (unsigned src = 0; src < opcode->NumSrcRegs; ++src) {
442 			struct rc_src_register *reg = &new_inst->U.I.SrcReg[src];
443 
444 			if (!c->SwizzleCaps->IsNative(new_inst->U.I.Opcode, *reg))
445 				valid_swizzles = false;
446 		}
447 
448 		if (!valid_swizzles) {
449 			rc_remove_instruction(new_inst);
450 		} else {
451 			clear_channels(inst, 1 << chan);
452 			return true;
453 		}
454 	}
455 	return false;
456 }
457 
try_splitting_instruction(struct radeon_compiler * c,struct rc_instruction * inst)458 static bool try_splitting_instruction(struct radeon_compiler * c,
459 					struct rc_instruction * inst)
460 {
461 	/* Adding more output instructions in FS is bad for performance. */
462 	if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
463 		return false;
464 
465 	/* When only single channel of the swizzle is wrong, like xwzw,
466 	 * it is best to just split the single channel out.
467 	 */
468 	if (inst->U.I.DstReg.WriteMask == RC_MASK_XYZW ||
469 		inst->U.I.DstReg.WriteMask == RC_MASK_XYZ) {
470 		if (try_splitting_single_channel(c, inst))
471 			return true;
472 	}
473 
474 	for (unsigned chan = 0; chan < 3; chan++) {
475 		if (!(inst->U.I.DstReg.WriteMask & (1 << chan)))
476 			continue;
477 
478 		unsigned next_chan;
479 		for (next_chan = chan + 1; next_chan < 4; next_chan++) {
480 			if (!(inst->U.I.DstReg.WriteMask & (1 << next_chan)))
481 				continue;
482 
483 			/* We don't want to split the last used x/y/z channel and the
484 			 * w channel. Pair scheduling might be able to put it back
485 			 * together, but we don't trust it that much.
486 			 *
487 			 * Next is W already, rewrite the original inst and we are done.
488 			 */
489 			if (next_chan == 3) {
490 				clear_channels(inst, (1 << chan) | (1 << next_chan));
491 				return true;
492 			}
493 
494 			struct rc_instruction * new_inst;
495 			new_inst = rc_insert_new_instruction(c, inst->Prev);
496 			memcpy(&new_inst->U.I, &inst->U.I, sizeof(struct rc_sub_instruction));
497 			clear_channels(new_inst, 1 << chan);
498 			break;
499 		}
500 
501 		/* No next chan */
502 		if (next_chan == 4) {
503 			clear_channels(inst, 1 << chan);
504 			return true;
505 		}
506 	}
507 	assert(0 && "Unreachable\n");
508 	return false;
509 }
510 
rc_dataflow_swizzles(struct radeon_compiler * c,void * user)511 void rc_dataflow_swizzles(struct radeon_compiler * c, void *user)
512 {
513 	struct rc_instruction * inst;
514 
515 	for(inst = c->Program.Instructions.Next;
516 					inst != &c->Program.Instructions;
517 					inst = inst->Next) {
518 		const struct rc_opcode_info * opcode =
519 					rc_get_opcode_info(inst->U.I.Opcode);
520 		unsigned src, usemask;
521 		unsigned total_splits = 0;
522 		struct rc_swizzle_split split;
523 
524 		/* If multiple sources needs splitting or some source needs to split
525 		 * too many times, it is actually better to just split the whole ALU
526 		 * instruction to separate channels instead of inserting extra movs.
527 		 */
528 		for (src = 0; src < opcode->NumSrcRegs; ++src) {
529 			/* Don't count invalid swizzles from immediates, we can just
530 			 * insert new immediates with the correct order later.
531 			 */
532 			if (rc_src_reg_is_immediate(c, inst->U.I.SrcReg[src].File,
533 							inst->U.I.SrcReg[src].Index)
534 				&& c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS) {
535 				total_splits++;
536 			} else {
537 				total_splits += get_swizzle_split(c, &split, inst,
538 									src, &usemask);
539 			}
540 		}
541 
542 		/* Even if there is only a single split, i.e., two extra movs, this still
543 		 * accounts to three instructions, the same as when we split
544 		 * the original instruction right away.
545 		 */
546 		if (total_splits > opcode->NumSrcRegs && opcode->IsComponentwise) {
547 			if (try_splitting_instruction(c, inst))
548 				continue;
549 		}
550 
551 		/* For texturing or non-componentwise opcodes we do the old way
552 		 * of adding extra movs.
553 		 */
554 		for(src = 0; src < opcode->NumSrcRegs; ++src) {
555 			struct rc_src_register *reg = &inst->U.I.SrcReg[src];
556 			if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) {
557 				continue;
558 			}
559 			if (!c->is_r500 &&
560 			    c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS &&
561 			    (!opcode->HasTexture && inst->U.I.Opcode != RC_OPCODE_KIL) &&
562 			    try_rewrite_constant(c, reg)) {
563 				continue;
564 			}
565 			rewrite_source(c, inst, src);
566 		}
567 	}
568 	if (c->Debug & RC_DBG_LOG)
569 		rc_constants_print(&c->Program.Constants, NULL);
570 }
571