xref: /aosp_15_r20/external/mesa3d/src/imagination/rogue/rogue_info.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "rogue.h"
25 
26 /**
27  * \file rogue_info.c
28  *
29  * \brief Contains information and definitions for defined types and structures.
30  */
31 
32 /* TODO: Adjust according to core configurations. */
33 /* TODO: Remaining restrictions, e.g. some registers are only
34  * usable by a particular instruction (vertex output) etc. */
35 #define S(n) BITFIELD64_BIT(ROGUE_IO_S##n - 1)
36 const rogue_reg_info rogue_reg_infos[ROGUE_REG_CLASS_COUNT] = {
37    [ROGUE_REG_CLASS_INVALID] = { .name = "!INVALID!", .str = "!INVALID!", },
38    [ROGUE_REG_CLASS_SSA] = { .name = "ssa", .str = "R", },
39    [ROGUE_REG_CLASS_TEMP] = { .name = "temp", .str = "r", .num = 248, },
40    [ROGUE_REG_CLASS_COEFF] = { .name = "coeff", .str = "cf", .num = 4096, .supported_io_srcs = S(0) | S(2) | S(3), },
41    [ROGUE_REG_CLASS_SHARED] = { .name = "shared", .str = "sh", .num = 4096, .supported_io_srcs = S(0) | S(2) | S(3), },
42    [ROGUE_REG_CLASS_SPECIAL] = { .name = "special", .str = "sr", .num = 240, }, /* TODO NEXT: Only S1, S2, S4. */
43    [ROGUE_REG_CLASS_INTERNAL] = { .name = "internal", .str = "i", .num = 8, },
44    [ROGUE_REG_CLASS_CONST] = { .name = "const", .str = "sc", .num = 240, },
45    [ROGUE_REG_CLASS_PIXOUT] = { .name = "pixout", .str = "po", .num = 8, .supported_io_srcs = S(0) | S(2) | S(3), },
46    [ROGUE_REG_CLASS_VTXIN] = { .name = "vtxin", .str = "vi", .num = 248, },
47    [ROGUE_REG_CLASS_VTXOUT] = { .name = "vtxout", .str = "vo", .num = 256, },
48 };
49 #undef S
50 
51 const rogue_regalloc_info regalloc_info[ROGUE_REGALLOC_CLASS_COUNT] = {
52    [ROGUE_REGALLOC_CLASS_TEMP_1] = { .class = ROGUE_REG_CLASS_TEMP, .stride = 1, },
53    [ROGUE_REGALLOC_CLASS_TEMP_2] = { .class = ROGUE_REG_CLASS_TEMP, .stride = 2, },
54    [ROGUE_REGALLOC_CLASS_TEMP_4] = { .class = ROGUE_REG_CLASS_TEMP, .stride = 4, },
55 };
56 
57 const rogue_reg_dst_info rogue_reg_dst_infos[ROGUE_REG_DST_VARIANTS] = {
58    {
59       .num_dsts = 1,
60       .bank_bits = { 1 },
61       .index_bits = { 6 },
62       .bytes = 1,
63    },
64    {
65       .num_dsts = 1,
66       .bank_bits = { 3 },
67       .index_bits = { 11 },
68       .bytes = 2,
69    },
70    {
71       .num_dsts = 2,
72       .bank_bits = { 1, 1 },
73       .index_bits = { 7, 6 },
74       .bytes = 2,
75    },
76    {
77       .num_dsts = 2,
78       .bank_bits = { 3, 3 },
79       .index_bits = { 8, 8 },
80       .bytes = 3,
81    },
82    {
83       .num_dsts = 2,
84       .bank_bits = { 3, 3 },
85       .index_bits = { 11, 11 },
86       .bytes = 4,
87    },
88 };
89 
90 const rogue_reg_src_info rogue_reg_lower_src_infos[ROGUE_REG_SRC_VARIANTS] = {
91    {
92       .num_srcs = 1,
93       .mux_bits = 0,
94       .bank_bits = { 1 },
95       .index_bits = { 6 },
96       .bytes = 1,
97    },
98    {
99       .num_srcs = 1,
100       .mux_bits = 2,
101       .bank_bits = { 3 },
102       .index_bits = { 11 },
103       .bytes = 3,
104    },
105    {
106       .num_srcs = 2,
107       .mux_bits = 0,
108       .bank_bits = { 1, 1 },
109       .index_bits = { 6, 5 },
110       .bytes = 2,
111    },
112    {
113       .num_srcs = 2,
114       .mux_bits = 2,
115       .bank_bits = { 2, 2 },
116       .index_bits = { 7, 7 },
117       .bytes = 3,
118    },
119    {
120       .num_srcs = 2,
121       .mux_bits = 3,
122       .bank_bits = { 3, 2 },
123       .index_bits = { 11, 8 },
124       .bytes = 4,
125    },
126    {
127       .num_srcs = 3,
128       .mux_bits = 2,
129       .bank_bits = { 2, 2, 2 },
130       .index_bits = { 7, 7, 6 },
131       .bytes = 4,
132    },
133    {
134       .num_srcs = 3,
135       .mux_bits = 3,
136       .bank_bits = { 3, 2, 3 },
137       .index_bits = { 8, 8, 8 },
138       .bytes = 5,
139    },
140    {
141       .num_srcs = 3,
142       .mux_bits = 3,
143       .bank_bits = { 3, 2, 3 },
144       .index_bits = { 11, 8, 11 },
145       .bytes = 6,
146    },
147 };
148 
149 const rogue_reg_src_info rogue_reg_upper_src_infos[ROGUE_REG_SRC_VARIANTS] = {
150    {
151       .num_srcs = 1,
152       .bank_bits = { 1 },
153       .index_bits = { 6 },
154       .bytes = 1,
155    },
156    {
157       .num_srcs = 1,
158       .bank_bits = { 3 },
159       .index_bits = { 11 },
160       .bytes = 3,
161    },
162    {
163       .num_srcs = 2,
164       .bank_bits = { 1, 1 },
165       .index_bits = { 6, 5 },
166       .bytes = 2,
167    },
168    {
169       .num_srcs = 2,
170       .bank_bits = { 2, 2 },
171       .index_bits = { 7, 7 },
172       .bytes = 3,
173    },
174    {
175       .num_srcs = 2,
176       .bank_bits = { 3, 2 },
177       .index_bits = { 11, 8 },
178       .bytes = 4,
179    },
180    {
181       .num_srcs = 3,
182       .bank_bits = { 2, 2, 2 },
183       .index_bits = { 7, 7, 6 },
184       .bytes = 4,
185    },
186    {
187       .num_srcs = 3,
188       .bank_bits = { 3, 2, 2 },
189       .index_bits = { 8, 8, 8 },
190       .bytes = 5,
191    },
192    {
193       .num_srcs = 3,
194       .bank_bits = { 3, 2, 2 },
195       .index_bits = { 11, 8, 8 },
196       .bytes = 6,
197    },
198 };
199 
200 #define OM(op_mod) BITFIELD64_BIT(ROGUE_ALU_OP_MOD_##op_mod)
201 const rogue_alu_op_mod_info rogue_alu_op_mod_infos[ROGUE_ALU_OP_MOD_COUNT] = {
202    [ROGUE_ALU_OP_MOD_LP] = { .str = "lp", },
203    [ROGUE_ALU_OP_MOD_SAT] = { .str = "sat", },
204    [ROGUE_ALU_OP_MOD_SCALE] = { .str = "scale", },
205    [ROGUE_ALU_OP_MOD_ROUNDZERO] = { .str = "roundzero", },
206 
207    [ROGUE_ALU_OP_MOD_Z] = { .str = "z", .exclude = OM(GZ) | OM(GEZ) | OM(C) | OM(E) | OM(G) | OM(GE) | OM(NE) | OM(L) | OM(LE) },
208    [ROGUE_ALU_OP_MOD_GZ] = { .str = "gz", .exclude = OM(Z) | OM(GEZ) | OM(C) | OM(E) | OM(G) | OM(GE) | OM(NE) | OM(L) | OM(LE) },
209    [ROGUE_ALU_OP_MOD_GEZ] = { .str = "gez", .exclude = OM(Z) | OM(GZ) | OM(C) | OM(E) | OM(G) | OM(GE) | OM(NE) | OM(L) | OM(LE) },
210    [ROGUE_ALU_OP_MOD_C] = { .str = "c", .exclude = OM(Z) | OM(GZ) | OM(GEZ) | OM(E) | OM(G) | OM(GE) | OM(NE) | OM(L) | OM(LE) },
211    [ROGUE_ALU_OP_MOD_E] = { .str = "e", .exclude = OM(Z) | OM(GZ) | OM(GEZ) | OM(C) | OM(G) | OM(GE) | OM(NE) | OM(L) | OM(LE) },
212    [ROGUE_ALU_OP_MOD_G] = { .str = "g", .exclude = OM(Z) | OM(GZ) | OM(GEZ) | OM(C) | OM(E) | OM(GE) | OM(NE) | OM(L) | OM(LE) },
213    [ROGUE_ALU_OP_MOD_GE] = { .str = "ge", .exclude = OM(Z) | OM(GZ) | OM(GEZ) | OM(C) | OM(E) | OM(G) | OM(NE) | OM(L) | OM(LE) },
214    [ROGUE_ALU_OP_MOD_NE] = { .str = "ne", .exclude = OM(Z) | OM(GZ) | OM(GEZ) | OM(C) | OM(E) | OM(G) | OM(GE) | OM(L) | OM(LE) },
215    [ROGUE_ALU_OP_MOD_L] = { .str = "l", .exclude = OM(Z) | OM(GZ) | OM(GEZ) | OM(C) | OM(E) | OM(G) | OM(GE) | OM(NE) | OM(LE) },
216    [ROGUE_ALU_OP_MOD_LE] = { .str = "le", .exclude = OM(Z) | OM(GZ) | OM(GEZ) | OM(C) | OM(E) | OM(G) | OM(GE) | OM(NE) | OM(L) },
217 
218    [ROGUE_ALU_OP_MOD_F32] = { .str = "f32", .exclude = OM(U16) | OM(S16) | OM(U8) | OM(S8) | OM(U32) | OM(S32) },
219    [ROGUE_ALU_OP_MOD_U16] = { .str = "u16", .exclude = OM(F32) | OM(S16) | OM(U8) | OM(S8) | OM(U32) | OM(S32) },
220    [ROGUE_ALU_OP_MOD_S16] = { .str = "s16", .exclude = OM(F32) | OM(U16) | OM(U8) | OM(S8) | OM(U32) | OM(S32) },
221    [ROGUE_ALU_OP_MOD_U8] = { .str = "u8", .exclude = OM(F32) | OM(U16) | OM(S16) | OM(S8) | OM(U32) | OM(S32) },
222    [ROGUE_ALU_OP_MOD_S8] = { .str = "s8", .exclude = OM(F32) | OM(U16) | OM(S16) | OM(U8) | OM(U32) | OM(S32) },
223    [ROGUE_ALU_OP_MOD_U32] = { .str = "u32", .exclude = OM(F32) | OM(U16) | OM(S16) | OM(U8) | OM(S8) | OM(S32) },
224    [ROGUE_ALU_OP_MOD_S32] = { .str = "s32", .exclude = OM(F32) | OM(U16) | OM(S16) | OM(U8) | OM(S8) | OM(U32) },
225 };
226 #undef OM
227 
228 const rogue_alu_dst_mod_info rogue_alu_dst_mod_infos[ROGUE_ALU_DST_MOD_COUNT] = {
229    [ROGUE_ALU_DST_MOD_E0] = { .str = "e0", },
230    [ROGUE_ALU_DST_MOD_E1] = { .str = "e1", },
231    [ROGUE_ALU_DST_MOD_E2] = { .str = "e2", },
232    [ROGUE_ALU_DST_MOD_E3] = { .str = "e3", },
233 };
234 
235 const rogue_alu_src_mod_info rogue_alu_src_mod_infos[ROGUE_ALU_SRC_MOD_COUNT] = {
236    [ROGUE_ALU_SRC_MOD_FLR] = { .str = "flr", },
237    [ROGUE_ALU_SRC_MOD_ABS] = { .str = "abs", },
238    [ROGUE_ALU_SRC_MOD_NEG] = { .str = "neg", },
239    [ROGUE_ALU_SRC_MOD_E0] = { .str = "e0", },
240    [ROGUE_ALU_SRC_MOD_E1] = { .str = "e1", },
241    [ROGUE_ALU_SRC_MOD_E2] = { .str = "e2", },
242    [ROGUE_ALU_SRC_MOD_E3] = { .str = "e3", },
243 };
244 
245 #define OM(op_mod) BITFIELD64_BIT(ROGUE_CTRL_OP_MOD_##op_mod)
246 const rogue_ctrl_op_mod_info rogue_ctrl_op_mod_infos[ROGUE_CTRL_OP_MOD_COUNT] = {
247    [ROGUE_CTRL_OP_MOD_LINK] = { .str = "link", },
248    [ROGUE_CTRL_OP_MOD_ALLINST] = { .str = "allinst", .exclude = OM(ANYINST) },
249    [ROGUE_CTRL_OP_MOD_ANYINST] = { .str = "anyinst", .exclude = OM(ALLINST) },
250    [ROGUE_CTRL_OP_MOD_END] = { .str = "end", },
251 };
252 #undef OM
253 
254 #define OM(op_mod) BITFIELD64_BIT(ROGUE_CTRL_OP_MOD_##op_mod)
255 #define T(type) BITFIELD64_BIT(ROGUE_REF_TYPE_##type - 1)
256 const rogue_ctrl_op_info rogue_ctrl_op_infos[ROGUE_CTRL_OP_COUNT] = {
257 	[ROGUE_CTRL_OP_INVALID] = { .str = "!INVALID!", },
258 	[ROGUE_CTRL_OP_END] = { .str = "end", .ends_block = true, },
259 	[ROGUE_CTRL_OP_NOP] = { .str = "nop",
260 		.supported_op_mods = OM(END),
261 	},
262 	[ROGUE_CTRL_OP_WOP] = { .str = "wop", },
263 	[ROGUE_CTRL_OP_BR] = { .str = "br", .has_target = true, .ends_block = true,
264 		.supported_op_mods = OM(LINK) | OM(ALLINST) | OM(ANYINST),
265    },
266 	[ROGUE_CTRL_OP_BA] = { .str = "ba", .ends_block = true, .num_srcs = 1,
267 		.supported_op_mods = OM(LINK) | OM(ALLINST) | OM(ANYINST),
268       .supported_src_types = { [0] = T(VAL), },
269    },
270 	[ROGUE_CTRL_OP_WDF] = { .str = "wdf", .num_srcs = 1,
271       .supported_src_types = { [0] = T(DRC), },
272    },
273 };
274 #undef T
275 #undef OM
276 
277 #define IO(io) ROGUE_IO_##io
278 #define OM(op_mod) BITFIELD64_BIT(ROGUE_BACKEND_OP_MOD_##op_mod)
279 #define T(type) BITFIELD64_BIT(ROGUE_REF_TYPE_##type - 1)
280 #define B(n) BITFIELD64_BIT(n)
281 const rogue_backend_op_info rogue_backend_op_infos[ROGUE_BACKEND_OP_COUNT] = {
282 	[ROGUE_BACKEND_OP_INVALID] = { .str = "!INVALID!", },
283    [ROGUE_BACKEND_OP_UVSW_WRITE] = { .str = "uvsw.write", .num_dsts = 1, .num_srcs = 1,
284       .phase_io = { .src[0] = IO(W0), },
285       .supported_dst_types = { [0] = T(REG), },
286       .supported_src_types = { [0] = T(REG), },
287    },
288    [ROGUE_BACKEND_OP_UVSW_EMIT] = { .str = "uvsw.emit", },
289    [ROGUE_BACKEND_OP_UVSW_ENDTASK] = { .str = "uvsw.endtask", },
290 
291    [ROGUE_BACKEND_OP_UVSW_EMITTHENENDTASK] = { .str = "uvsw.emitthenendtask", },
292    [ROGUE_BACKEND_OP_UVSW_WRITETHENEMITTHENENDTASK] = { .str = "uvsw.writethenemitthenendtask", .num_dsts = 1, .num_srcs = 1,
293       .phase_io = { .src[0] = IO(W0), },
294       .supported_dst_types = { [0] = T(REG), },
295       .supported_src_types = { [0] = T(REG), },
296    },
297    [ROGUE_BACKEND_OP_IDF] = { .str = "idf", .num_srcs = 2,
298       .phase_io = { .src[1] = IO(S0), },
299       .supported_src_types = { [0] = T(DRC), [1] = T(REGARRAY), },
300       .src_stride = {
301          [1] = 1,
302       },
303    },
304 
305    [ROGUE_BACKEND_OP_EMITPIX] = { .str = "emitpix", .num_srcs = 2,
306       .phase_io = { .src[0] = IO(S0), .src[1] = IO(S2), },
307       .supported_op_mods = OM(FREEP),
308       .supported_src_types = { [0] = T(REG), [1] = T(REG), },
309    },
310    /* .src[1] and .src[2] can actually be S0-5. */
311    [ROGUE_BACKEND_OP_LD] = { .str = "ld", .num_dsts = 1, .num_srcs = 3,
312       .phase_io = { .dst[0] = IO(S3), .src[2] = IO(S0), },
313       .supported_op_mods = OM(BYPASS) | OM(FORCELINEFILL) | OM(SLCBYPASS) | OM(SLCNOALLOC),
314       .supported_dst_types = { [0] = T(REG) | T(REGARRAY), },
315       .supported_src_types = {
316          [0] = T(DRC),
317          [1] = T(VAL),
318          [2] = T(REGARRAY),
319       },
320       .dst_stride = {
321          [0] = ~0U,
322       },
323       .src_stride = {
324          [2] = 1,
325       },
326    },
327    /* .src[0] and .src[4] can actually be S0-5. */
328    [ROGUE_BACKEND_OP_ST] = { .str = "st", .num_srcs = 6,
329       .phase_io = { .src[0] = IO(S3), .src[4] = IO(S0), },
330       .supported_op_mods = OM(TILED) | OM(WRITETHROUGH) | OM(WRITEBACK) | OM(LAZYWRITEBACK) |
331          OM(SLCBYPASS) | OM(SLCWRITEBACK) | OM(SLCWRITETHROUGH) | OM(SLCNOALLOC),
332       .supported_src_types = {
333          [0] = T(REG) | T(REGARRAY),
334          [1] = T(VAL),
335          [2] = T(DRC),
336          [3] = T(VAL),
337          [4] = T(REGARRAY),
338          [5] = T(IO),
339       },
340       .src_stride = {
341          [4] = 1,
342       },
343    },
344 	[ROGUE_BACKEND_OP_FITR_PIXEL] = { .str = "fitr.pixel", .num_dsts = 1, .num_srcs = 3,
345       .phase_io = { .dst[0] = IO(S3), .src[1] = IO(S0), },
346       .supported_op_mods = OM(SAT),
347       .supported_dst_types = { [0] = T(REG) | T(REGARRAY), },
348       .supported_src_types = {
349          [0] = T(DRC),
350          [1] = T(REGARRAY),
351          [2] = T(VAL),
352       },
353       .dst_stride = {
354          [0] = ~0U,
355       },
356       .src_stride = {
357          [1] = ~0U,
358       },
359    },
360 	[ROGUE_BACKEND_OP_FITRP_PIXEL] = { .str = "fitrp.pixel", .num_dsts = 1, .num_srcs = 4,
361       .phase_io = { .dst[0] = IO(S3), .src[1] = IO(S0), .src[2] = IO(S2), },
362       .supported_op_mods = OM(SAT),
363       .supported_dst_types = { [0] = T(REG), },
364       .supported_src_types = {
365          [0] = T(DRC),
366          [1] = T(REGARRAY),
367          [2] = T(REGARRAY),
368          [3] = T(VAL),
369       },
370       .src_stride = {
371          [1] = 3,
372          [2] = ~0U,
373       },
374    },
375 	[ROGUE_BACKEND_OP_SMP1D] = { .str = "smp1d", .num_dsts = 1, .num_srcs = 6,
376       .phase_io = { .dst[0] = IO(S4), .src[1] = IO(S0), .src[2] = IO(S1), .src[3] = IO(S2), },
377       .supported_op_mods = OM(PROJ) | OM(FCNORM) | OM(NNCOORDS) | OM(BIAS) | OM(REPLACE) |
378          OM(GRADIENT) | OM(PPLOD) | OM(TAO) | OM(SOO) | OM(SNO) | OM(WRT) | OM(DATA) |
379          OM(INFO) | OM(BOTH) | OM(BYPASS) | OM(FORCELINEFILL) | OM(WRITETHROUGH) |
380          OM(WRITEBACK) | OM(LAZYWRITEBACK) | OM(SLCBYPASS) | OM(SLCWRITEBACK) |
381          OM(SLCWRITETHROUGH) | OM(SLCNOALLOC) | OM(ARRAY) | OM(INTEGER) | OM(SCHEDSWAP) |
382          OM(F16),
383       .supported_dst_types = { [0] = T(REG) | T(REGARRAY), },
384       .supported_src_types = {
385          [0] = T(DRC),
386          [1] = T(REGARRAY),
387          [2] = T(REG) | T(REGARRAY),
388          [3] = T(REGARRAY),
389          [4] = T(REGARRAY) | T(IO),
390          [5] = T(VAL),
391       },
392       /* TODO: This may depend on the other options set. */
393       .src_stride = {
394          [1] = 3,
395          [2] = ~0U,
396          [3] = 3,
397          [4] = 1,
398       },
399       .dst_stride = {
400          [0] = ~0U,
401       },
402    },
403 	[ROGUE_BACKEND_OP_SMP2D] = { .str = "smp2d", .num_dsts = 1, .num_srcs = 6,
404       .phase_io = { .dst[0] = IO(S4), .src[1] = IO(S0), .src[2] = IO(S1), .src[3] = IO(S2), },
405       .supported_op_mods = OM(PROJ) | OM(FCNORM) | OM(NNCOORDS) | OM(BIAS) | OM(REPLACE) |
406          OM(GRADIENT) | OM(PPLOD) | OM(TAO) | OM(SOO) | OM(SNO) | OM(WRT) | OM(DATA) |
407          OM(INFO) | OM(BOTH) | OM(BYPASS) | OM(FORCELINEFILL) | OM(WRITETHROUGH) |
408          OM(WRITEBACK) | OM(LAZYWRITEBACK) | OM(SLCBYPASS) | OM(SLCWRITEBACK) |
409          OM(SLCWRITETHROUGH) | OM(SLCNOALLOC) | OM(ARRAY) | OM(INTEGER) | OM(SCHEDSWAP) |
410          OM(F16),
411       .supported_dst_types = { [0] = T(REG) | T(REGARRAY), },
412       .supported_src_types = {
413          [0] = T(DRC),
414          [1] = T(REGARRAY),
415          [2] = T(REG) | T(REGARRAY),
416          [3] = T(REGARRAY),
417          [4] = T(REGARRAY) | T(IO),
418          [5] = T(VAL),
419       },
420       /* TODO: This may depend on the other options set. */
421       .src_stride = {
422          [1] = 3,
423          [2] = ~0U,
424          [3] = 3,
425          [4] = 1,
426       },
427       .dst_stride = {
428          [0] = ~0U,
429       },
430    },
431 	[ROGUE_BACKEND_OP_SMP3D] = { .str = "smp3d", .num_dsts = 1, .num_srcs = 6,
432       .phase_io = { .dst[0] = IO(S4), .src[1] = IO(S0), .src[2] = IO(S1), .src[3] = IO(S2), },
433       .supported_op_mods = OM(PROJ) | OM(FCNORM) | OM(NNCOORDS) | OM(BIAS) | OM(REPLACE) |
434          OM(GRADIENT) | OM(PPLOD) | OM(TAO) | OM(SOO) | OM(SNO) | OM(WRT) | OM(DATA) |
435          OM(INFO) | OM(BOTH) | OM(BYPASS) | OM(FORCELINEFILL) | OM(WRITETHROUGH) |
436          OM(WRITEBACK) | OM(LAZYWRITEBACK) | OM(SLCBYPASS) | OM(SLCWRITEBACK) |
437          OM(SLCWRITETHROUGH) | OM(SLCNOALLOC) | OM(ARRAY) | OM(INTEGER) | OM(SCHEDSWAP) |
438          OM(F16),
439       .supported_dst_types = { [0] = T(REG) | T(REGARRAY), },
440       .supported_src_types = {
441          [0] = T(DRC),
442          [1] = T(REGARRAY),
443          [2] = T(REG) | T(REGARRAY),
444          [3] = T(REGARRAY),
445          [4] = T(REGARRAY) | T(IO),
446          [5] = T(VAL),
447       },
448       /* TODO: This may depend on the other options set. */
449       .src_stride = {
450          [1] = 3,
451          [2] = ~0U,
452          [3] = 3,
453          [4] = 1,
454       },
455       .dst_stride = {
456          [0] = ~0U,
457       },
458    },
459 };
460 #undef B
461 #undef T
462 #undef OM
463 #undef IO
464 
465 #define OM(op_mod) BITFIELD64_BIT(ROGUE_BACKEND_OP_MOD_##op_mod)
466 const rogue_backend_op_mod_info rogue_backend_op_mod_infos[ROGUE_BACKEND_OP_MOD_COUNT] = {
467    [ROGUE_BACKEND_OP_MOD_PROJ]  = { .str = "proj", },
468    [ROGUE_BACKEND_OP_MOD_FCNORM]  = { .str = "fcnorm", },
469    [ROGUE_BACKEND_OP_MOD_NNCOORDS]  = { .str = "nncoords", },
470    [ROGUE_BACKEND_OP_MOD_BIAS]  = { .str = "bias", .exclude = OM(REPLACE) | OM(GRADIENT) },
471    [ROGUE_BACKEND_OP_MOD_REPLACE]  = { .str = "replace", .exclude = OM(BIAS) | OM(GRADIENT) },
472    [ROGUE_BACKEND_OP_MOD_GRADIENT]  = { .str = "gradient", .exclude = OM(BIAS) | OM(REPLACE) },
473    [ROGUE_BACKEND_OP_MOD_PPLOD]  = { .str = "pplod", .require = OM(BIAS) | OM(REPLACE) },
474    [ROGUE_BACKEND_OP_MOD_TAO]  = { .str = "tao", },
475    [ROGUE_BACKEND_OP_MOD_SOO]  = { .str = "soo", },
476    [ROGUE_BACKEND_OP_MOD_SNO]  = { .str = "sno", },
477    [ROGUE_BACKEND_OP_MOD_WRT]  = { .str = "wrt", },
478    [ROGUE_BACKEND_OP_MOD_DATA]  = { .str = "data", .exclude = OM(INFO) | OM(BOTH) },
479    [ROGUE_BACKEND_OP_MOD_INFO]  = { .str = "info", .exclude = OM(DATA) | OM(BOTH) },
480    [ROGUE_BACKEND_OP_MOD_BOTH]  = { .str = "both", .exclude = OM(DATA) | OM(INFO) },
481    [ROGUE_BACKEND_OP_MOD_TILED] = { .str = "tiled", },
482    [ROGUE_BACKEND_OP_MOD_BYPASS]  = { .str = "bypass", .exclude = OM(FORCELINEFILL) | OM(WRITETHROUGH) | OM(WRITEBACK) | OM(LAZYWRITEBACK) },
483    [ROGUE_BACKEND_OP_MOD_FORCELINEFILL]  = { .str = "forcelinefill", .exclude = OM(BYPASS) | OM(WRITETHROUGH) | OM(WRITEBACK) | OM(LAZYWRITEBACK) },
484    [ROGUE_BACKEND_OP_MOD_WRITETHROUGH]  = { .str = "writethrough", .exclude = OM(BYPASS) | OM(FORCELINEFILL) | OM(WRITEBACK) | OM(LAZYWRITEBACK) },
485    [ROGUE_BACKEND_OP_MOD_WRITEBACK]  = { .str = "writeback", .exclude = OM(BYPASS) | OM(FORCELINEFILL) | OM(WRITETHROUGH) | OM(LAZYWRITEBACK) },
486    [ROGUE_BACKEND_OP_MOD_LAZYWRITEBACK]  = { .str = "lazywriteback", .exclude = OM(BYPASS) | OM(FORCELINEFILL) | OM(WRITETHROUGH) | OM(WRITEBACK) },
487    [ROGUE_BACKEND_OP_MOD_SLCBYPASS]  = { .str = "slcbypass", .exclude = OM(SLCWRITEBACK) | OM(SLCWRITETHROUGH) | OM(SLCNOALLOC) },
488    [ROGUE_BACKEND_OP_MOD_SLCWRITEBACK]  = { .str = "slcwriteback", .exclude = OM(SLCBYPASS) | OM(SLCWRITETHROUGH) | OM(SLCNOALLOC) },
489    [ROGUE_BACKEND_OP_MOD_SLCWRITETHROUGH]  = { .str = "slcwritethrough", .exclude = OM(SLCBYPASS) | OM(SLCWRITEBACK) | OM(SLCNOALLOC) },
490    [ROGUE_BACKEND_OP_MOD_SLCNOALLOC]  = { .str = "slcnoalloc", .exclude = OM(SLCBYPASS) | OM(SLCWRITEBACK) | OM(SLCWRITETHROUGH) },
491    [ROGUE_BACKEND_OP_MOD_ARRAY]  = { .str = "array", },
492    [ROGUE_BACKEND_OP_MOD_INTEGER]  = { .str = "integer", },
493    [ROGUE_BACKEND_OP_MOD_SCHEDSWAP]  = { .str = "schedswap", },
494    [ROGUE_BACKEND_OP_MOD_F16]  = { .str = "f16", },
495    [ROGUE_BACKEND_OP_MOD_SAT]  = { .str = "sat", },
496    [ROGUE_BACKEND_OP_MOD_FREEP] = { .str = "freep", },
497 };
498 #undef OM
499 
500 #define OM(op_mod) BITFIELD64_BIT(ROGUE_BITWISE_OP_MOD_##op_mod)
501 const rogue_bitwise_op_mod_info
502    rogue_bitwise_op_mod_infos[ROGUE_BITWISE_OP_MOD_COUNT] = {
503       [ROGUE_BITWISE_OP_MOD_TWB] = { .str = "twb",
504                                      .exclude = OM(PWB) | OM(MTB) | OM(FTB) },
505       [ROGUE_BITWISE_OP_MOD_PWB] = { .str = "pwb",
506                                      .exclude = OM(TWB) | OM(MTB) | OM(FTB) },
507       [ROGUE_BITWISE_OP_MOD_MTB] = { .str = "mtb",
508                                      .exclude = OM(TWB) | OM(PWB) | OM(FTB) },
509       [ROGUE_BITWISE_OP_MOD_FTB] = { .str = "ftb",
510                                      .exclude = OM(TWB) | OM(PWB) | OM(MTB) },
511    };
512 #undef OM
513 
514 #define P(type) BITFIELD64_BIT(ROGUE_INSTR_PHASE_##type)
515 #define PH(type) ROGUE_INSTR_PHASE_##type
516 #define IO(io) ROGUE_IO_##io
517 #define T(type) BITFIELD64_BIT(ROGUE_REF_TYPE_##type - 1)
518 const rogue_bitwise_op_info rogue_bitwise_op_infos[ROGUE_BITWISE_OP_COUNT] = {
519    [ROGUE_BITWISE_OP_INVALID] = { .str = "", },
520    [ROGUE_BITWISE_OP_BYP0] = { .str = "byp", .num_dsts = 2, .num_srcs = 2,
521       .supported_phases = P(0_BITMASK),
522       .phase_io[PH(0_BITMASK)] = { .dst[1] = IO(FT1), },
523       .supported_dst_types = {
524          [0] = T(REG) | T(REGARRAY) | T(IO),
525          [1] = T(REG) | T(REGARRAY) | T(IO),
526       },
527       .supported_src_types = {
528          [0] = T(REG) | T(REGARRAY) | T(IO),
529          [1] = T(REG) | T(REGARRAY) | T(IO) | T(VAL),
530       },
531    },
532 };
533 #undef T
534 #undef IO
535 #undef PH
536 #undef P
537 
538 const rogue_io_info rogue_io_infos[ROGUE_IO_COUNT] = {
539    [ROGUE_IO_INVALID] = { .str = "!INVALID!", },
540    [ROGUE_IO_S0] = { .str = "s0", },
541    [ROGUE_IO_S1] = { .str = "s1", },
542    [ROGUE_IO_S2] = { .str = "s2", },
543    [ROGUE_IO_S3] = { .str = "s3", },
544    [ROGUE_IO_S4] = { .str = "s4", },
545    [ROGUE_IO_S5] = { .str = "s5", },
546    [ROGUE_IO_W0] = { .str = "w0", },
547    [ROGUE_IO_W1] = { .str = "w1", },
548    [ROGUE_IO_IS0] = { .str = "is0", },
549    [ROGUE_IO_IS1] = { .str = "is1", },
550    [ROGUE_IO_IS2] = { .str = "is2", },
551    [ROGUE_IO_IS3] = { .str = "is3", },
552    [ROGUE_IO_IS4] = { .str = "is4/w0", },
553    [ROGUE_IO_IS5] = { .str = "is5/w1", },
554    [ROGUE_IO_FT0] = { .str = "ft0", },
555    [ROGUE_IO_FT1] = { .str = "ft1", },
556    [ROGUE_IO_FT2] = { .str = "ft2", },
557    [ROGUE_IO_FTE] = { .str = "fte", },
558    [ROGUE_IO_FT3] = { .str = "ft3", },
559    [ROGUE_IO_FT4] = { .str = "ft4", },
560    [ROGUE_IO_FT5] = { .str = "ft5", },
561    [ROGUE_IO_FTT] = { .str = "ftt", },
562    [ROGUE_IO_P0] = { .str = "p0", },
563    [ROGUE_IO_NONE] = { .str = "_", },
564 };
565 
566 #define SM(src_mod) BITFIELD64_BIT(ROGUE_ALU_SRC_MOD_##src_mod)
567 #define DM(dst_mod) BITFIELD64_BIT(ROGUE_ALU_DST_MOD_##dst_mod)
568 #define OM(op_mod) BITFIELD64_BIT(ROGUE_ALU_OP_MOD_##op_mod)
569 #define P(type) BITFIELD64_BIT(ROGUE_INSTR_PHASE_##type)
570 #define PH(type) ROGUE_INSTR_PHASE_##type
571 #define IO(io) ROGUE_IO_##io
572 #define T(type) BITFIELD64_BIT(ROGUE_REF_TYPE_##type - 1)
573 #define B(n) BITFIELD64_BIT(n)
574 const rogue_alu_op_info rogue_alu_op_infos[ROGUE_ALU_OP_COUNT] = {
575    [ROGUE_ALU_OP_INVALID] = { .str = "!INVALID!", },
576    [ROGUE_ALU_OP_MBYP] = { .str = "mbyp", .num_dsts = 1, .num_srcs = 1,
577       .supported_phases = P(0),
578       .phase_io[PH(0)] = { .dst[0] = IO(FT0), .src[0] = IO(S0), },
579       .supported_src_mods = {
580          [0] = SM(ABS) | SM(NEG),
581       },
582       .supported_dst_types = { [0] = T(REG) | T(REGARRAY) | T(IO), },
583       .supported_src_types = {
584          [0] = T(REG) | T(REGARRAY),
585       },
586    },
587    [ROGUE_ALU_OP_FADD] = { .str = "fadd", .num_dsts = 1, .num_srcs = 2,
588       .supported_phases = P(0),
589       .phase_io[PH(0)] = { .dst[0] = IO(FT0), .src[0] = IO(S0), .src[1] = IO(S1), },
590       .supported_op_mods = OM(LP) | OM(SAT),
591       .supported_src_mods = {
592          [0] = SM(FLR) | SM(ABS) | SM(NEG),
593          [1] = SM(ABS),
594       },
595    },
596    [ROGUE_ALU_OP_FMUL] = { .str = "fmul", .num_dsts = 1, .num_srcs = 2,
597       .supported_phases = P(0),
598       .phase_io[PH(0)] = { .dst[0] = IO(FT0), .src[0] = IO(S0), .src[1] = IO(S1), },
599       .supported_op_mods = OM(LP) | OM(SAT),
600       .supported_src_mods = {
601          [0] = SM(FLR) | SM(ABS) | SM(NEG),
602          [1] = SM(ABS),
603       },
604       .supported_dst_types = { [0] = T(REG), },
605       .supported_src_types = {
606          [0] = T(REG),
607          [1] = T(REG),
608       },
609    },
610    [ROGUE_ALU_OP_FMAD] = { .str = "fmad", .num_dsts = 1, .num_srcs = 3,
611       .supported_phases = P(0),
612       .phase_io[PH(0)] = { .dst[0] = IO(FT0), .src[0] = IO(S0), .src[1] = IO(S1), .src[2] = IO(S2), },
613       .supported_op_mods = OM(LP) | OM(SAT),
614       .supported_src_mods = {
615          [0] = SM(ABS) | SM(NEG),
616          [1] = SM(ABS) | SM(NEG),
617          [2] = SM(FLR) | SM(ABS) | SM(NEG),
618       },
619       .supported_dst_types = { [0] = T(REG), },
620       .supported_src_types = {
621          [0] = T(REG),
622          [1] = T(REG),
623          [2] = T(REG),
624       },
625    },
626    /* TODO NEXT!: Validate - can/must only select element if non-32-bit type, element has to be same for both args if both args present, 16-bit must be 0 or 1, 32-bit must be 0-3 (can't have no element set)
627     * Also validate number of sources provided/nulled out based on test op */
628    [ROGUE_ALU_OP_TST] = { .str = "tst", .num_dsts = 2, .num_srcs = 2,
629       .supported_phases = P(2_TST),
630       .phase_io[PH(2_TST)] = { .src[0] = IO(IS1), .src[1] = IO(IS2), },
631       .supported_op_mods = OM(Z) | OM(GZ) | OM(GEZ) | OM(C) | OM(E) | OM(G) | OM(GE) | OM(NE) | OM(L) | OM(LE) |
632          OM(F32) | OM(U16) | OM(S16) | OM(U8) | OM(S8) | OM(U32) | OM(S32),
633       .supported_src_mods = {
634          [0] = SM(E0) | SM(E1) | SM(E2) | SM(E3),
635          [1] = SM(E0) | SM(E1) | SM(E2) | SM(E3),
636       },
637       .supported_dst_types = { [0] = T(IO), [1] = T(IO), }, /* FTT and either P0 or NONE */
638       .supported_src_types = {
639          [0] = T(REG) | T(IO),
640          [1] = T(REG) | T(IO),
641       },
642    },
643    /* TODO: Support fully. */
644    [ROGUE_ALU_OP_MOVC] = { .str = "movc", .num_dsts = 2, .num_srcs = 3,
645       .supported_phases = P(2_MOV),
646       .phase_io[PH(2_MOV)] = { .dst[0] = IO(W0), .src[1] = IO(FTE), },
647       .supported_dst_mods = {
648          [0] = DM(E0) | DM(E1) | DM(E2) | DM(E3),
649       },
650       .supported_dst_types = { [0] = T(REG) | T(REGARRAY), [1] = T(REG) | T(REGARRAY) | T(IO), },
651       .supported_src_types = {
652          [0] = T(IO),
653          [1] = T(REG) | T(REGARRAY) | T(IO),
654          [2] = T(REG) | T(REGARRAY) | T(IO),
655       },
656    },
657    [ROGUE_ALU_OP_ADD64] = { .str = "add64", .num_dsts = 3, .num_srcs = 5,
658       .supported_phases = P(0),
659       .phase_io[PH(0)] = { .dst[0] = IO(FT0), .dst[1] = IO(FTE), .src[0] = IO(S0), .src[1] = IO(S1), .src[2] = IO(S2), .src[3] = IO(IS0), },
660       .supported_src_mods = {
661          [0] = SM(ABS) | SM(NEG),
662          [1] = SM(ABS) | SM(NEG),
663          [2] = SM(ABS) | SM(NEG),
664          [3] = SM(ABS) | SM(NEG),
665       },
666       .supported_dst_types = { [0] = T(REG) | T(REGARRAY), [1] = T(REG) | T(REGARRAY) | T(IO), [2] = T(IO) },
667       .supported_src_types = {
668          [0] = T(REG) | T(REGARRAY),
669          [1] = T(REG) | T(REGARRAY),
670          [2] = T(REG) | T(REGARRAY) | T(IMM),
671          [3] = T(REG) | T(REGARRAY)| T(IO) | T(IMM),
672          [4] = T(IO),
673       },
674    },
675    [ROGUE_ALU_OP_PCK_U8888] = { .str = "pck.u8888", .num_dsts = 1, .num_srcs = 1,
676       .supported_phases = P(2_PCK),
677       .phase_io[PH(2_PCK)] = { .dst[0] = IO(FT2), .src[0] = IO(IS3), },
678       .supported_op_mods = OM(SCALE) | OM(ROUNDZERO),
679       .supported_dst_types = { [0] = T(REG), },
680       .supported_src_types = {
681          [0] = T(REGARRAY),
682       },
683       .src_repeat_mask = B(0),
684    },
685    [ROGUE_ALU_OP_MOV] = { .str = "mov", .num_dsts = 1, .num_srcs = 1,
686       .supported_dst_types = { [0] = T(REG) | T(REGARRAY), },
687       .supported_src_types = {
688          [0] = T(REG) | T(REGARRAY) | T(IMM),
689       },
690    },
691    [ROGUE_ALU_OP_CMOV] = { .str = "cmov", .num_dsts = 1, .num_srcs = 3,
692       .supported_dst_types = { [0] = T(REG), },
693       .supported_src_types = {
694          [0] = T(IO),
695          [1] = T(REG),
696          [2] = T(REG),
697       },
698    },
699    [ROGUE_ALU_OP_FABS] = { .str = "fabs", .num_dsts = 1, .num_srcs = 1, },
700    [ROGUE_ALU_OP_FNEG] = { .str = "fneg", .num_dsts = 1, .num_srcs = 1, },
701    [ROGUE_ALU_OP_FNABS] = { .str = "fnabs", .num_dsts = 1, .num_srcs = 1, },
702 
703    [ROGUE_ALU_OP_FMAX] = { .str = "fmax", .num_dsts = 1, .num_srcs = 2, }, /* TODO */
704    [ROGUE_ALU_OP_FMIN] = { .str = "fmin", .num_dsts = 1, .num_srcs = 2, }, /* TODO */
705 };
706 #undef B
707 #undef T
708 #undef IO
709 #undef PH
710 #undef P
711 #undef OM
712 #undef DM
713 #undef SM
714 
715 const char *rogue_exec_cond_str[ROGUE_EXEC_COND_COUNT] = {
716    [ROGUE_EXEC_COND_INVALID] = "!INVALID!",
717    [ROGUE_EXEC_COND_PE_TRUE] = "if(pe)",
718    [ROGUE_EXEC_COND_P0_TRUE] = "if(p0)",
719    [ROGUE_EXEC_COND_PE_ANY] = "any(pe)",
720    [ROGUE_EXEC_COND_P0_FALSE] = "if(!p0)",
721 };
722 
723 const char *rogue_instr_type_str[ROGUE_INSTR_TYPE_COUNT] = {
724    [ROGUE_INSTR_TYPE_INVALID] = "!INVALID!",
725 
726    [ROGUE_INSTR_TYPE_ALU] = "alu",
727    /* [ROGUE_INSTR_TYPE_CMPLX] = "cmplx", */
728    [ROGUE_INSTR_TYPE_BACKEND] = "backend",
729    [ROGUE_INSTR_TYPE_CTRL] = "ctrl",
730    [ROGUE_INSTR_TYPE_BITWISE] = "bitwise",
731    /* [ROGUE_INSTR_TYPE_F16SOP] = "f16sop", */
732 };
733 
734 const char *const rogue_alu_str[ROGUE_ALU_COUNT] = {
735    [ROGUE_ALU_INVALID] = "!INVALID!",
736    [ROGUE_ALU_MAIN] = "main",
737    [ROGUE_ALU_BITWISE] = "bitwise",
738    [ROGUE_ALU_CONTROL] = "control",
739 };
740 
741 const char *const rogue_instr_phase_str[ROGUE_ALU_COUNT][ROGUE_INSTR_PHASE_COUNT] = {
742    /** Main/ALU (and backend) instructions. */
743    [ROGUE_ALU_MAIN] = {
744       [ROGUE_INSTR_PHASE_0] = "p0",
745       [ROGUE_INSTR_PHASE_1] = "p1",
746       [ROGUE_INSTR_PHASE_2_PCK] = "p2pck",
747       [ROGUE_INSTR_PHASE_2_TST] = "p2tst",
748       [ROGUE_INSTR_PHASE_2_MOV] = "p2mov",
749       [ROGUE_INSTR_PHASE_BACKEND] = "backend",
750    },
751 
752    /** Bitwise instructions. */
753    [ROGUE_ALU_BITWISE] = {
754       [ROGUE_INSTR_PHASE_0_BITMASK] = "p0bm",
755       [ROGUE_INSTR_PHASE_0_SHIFT1] = "p0shf1",
756       [ROGUE_INSTR_PHASE_0_COUNT] = "p0cnt",
757       [ROGUE_INSTR_PHASE_1_LOGICAL] = "p1log",
758       [ROGUE_INSTR_PHASE_2_SHIFT2] = "p2shf2",
759       [ROGUE_INSTR_PHASE_2_TEST] = "p2tst",
760    },
761 
762    /** Control instructions (no co-issuing). */
763    [ROGUE_ALU_CONTROL] = {
764       [ROGUE_INSTR_PHASE_CTRL] = "ctrl",
765    },
766 };
767