1 /* 2 * Copyright © 2022 Imagination Technologies Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24 #include "rogue.h" 25 26 /** 27 * \file rogue_info.c 28 * 29 * \brief Contains information and definitions for defined types and structures. 30 */ 31 32 /* TODO: Adjust according to core configurations. */ 33 /* TODO: Remaining restrictions, e.g. some registers are only 34 * usable by a particular instruction (vertex output) etc. */ 35 #define S(n) BITFIELD64_BIT(ROGUE_IO_S##n - 1) 36 const rogue_reg_info rogue_reg_infos[ROGUE_REG_CLASS_COUNT] = { 37 [ROGUE_REG_CLASS_INVALID] = { .name = "!INVALID!", .str = "!INVALID!", }, 38 [ROGUE_REG_CLASS_SSA] = { .name = "ssa", .str = "R", }, 39 [ROGUE_REG_CLASS_TEMP] = { .name = "temp", .str = "r", .num = 248, }, 40 [ROGUE_REG_CLASS_COEFF] = { .name = "coeff", .str = "cf", .num = 4096, .supported_io_srcs = S(0) | S(2) | S(3), }, 41 [ROGUE_REG_CLASS_SHARED] = { .name = "shared", .str = "sh", .num = 4096, .supported_io_srcs = S(0) | S(2) | S(3), }, 42 [ROGUE_REG_CLASS_SPECIAL] = { .name = "special", .str = "sr", .num = 240, }, /* TODO NEXT: Only S1, S2, S4. */ 43 [ROGUE_REG_CLASS_INTERNAL] = { .name = "internal", .str = "i", .num = 8, }, 44 [ROGUE_REG_CLASS_CONST] = { .name = "const", .str = "sc", .num = 240, }, 45 [ROGUE_REG_CLASS_PIXOUT] = { .name = "pixout", .str = "po", .num = 8, .supported_io_srcs = S(0) | S(2) | S(3), }, 46 [ROGUE_REG_CLASS_VTXIN] = { .name = "vtxin", .str = "vi", .num = 248, }, 47 [ROGUE_REG_CLASS_VTXOUT] = { .name = "vtxout", .str = "vo", .num = 256, }, 48 }; 49 #undef S 50 51 const rogue_regalloc_info regalloc_info[ROGUE_REGALLOC_CLASS_COUNT] = { 52 [ROGUE_REGALLOC_CLASS_TEMP_1] = { .class = ROGUE_REG_CLASS_TEMP, .stride = 1, }, 53 [ROGUE_REGALLOC_CLASS_TEMP_2] = { .class = ROGUE_REG_CLASS_TEMP, .stride = 2, }, 54 [ROGUE_REGALLOC_CLASS_TEMP_4] = { .class = ROGUE_REG_CLASS_TEMP, .stride = 4, }, 55 }; 56 57 const rogue_reg_dst_info rogue_reg_dst_infos[ROGUE_REG_DST_VARIANTS] = { 58 { 59 .num_dsts = 1, 60 .bank_bits = { 1 }, 61 .index_bits = { 6 }, 62 .bytes = 1, 63 }, 64 { 65 .num_dsts = 1, 66 .bank_bits = { 3 }, 67 .index_bits = { 11 }, 68 .bytes = 2, 69 }, 70 { 71 .num_dsts = 2, 72 .bank_bits = { 1, 1 }, 73 .index_bits = { 7, 6 }, 74 .bytes = 2, 75 }, 76 { 77 .num_dsts = 2, 78 .bank_bits = { 3, 3 }, 79 .index_bits = { 8, 8 }, 80 .bytes = 3, 81 }, 82 { 83 .num_dsts = 2, 84 .bank_bits = { 3, 3 }, 85 .index_bits = { 11, 11 }, 86 .bytes = 4, 87 }, 88 }; 89 90 const rogue_reg_src_info rogue_reg_lower_src_infos[ROGUE_REG_SRC_VARIANTS] = { 91 { 92 .num_srcs = 1, 93 .mux_bits = 0, 94 .bank_bits = { 1 }, 95 .index_bits = { 6 }, 96 .bytes = 1, 97 }, 98 { 99 .num_srcs = 1, 100 .mux_bits = 2, 101 .bank_bits = { 3 }, 102 .index_bits = { 11 }, 103 .bytes = 3, 104 }, 105 { 106 .num_srcs = 2, 107 .mux_bits = 0, 108 .bank_bits = { 1, 1 }, 109 .index_bits = { 6, 5 }, 110 .bytes = 2, 111 }, 112 { 113 .num_srcs = 2, 114 .mux_bits = 2, 115 .bank_bits = { 2, 2 }, 116 .index_bits = { 7, 7 }, 117 .bytes = 3, 118 }, 119 { 120 .num_srcs = 2, 121 .mux_bits = 3, 122 .bank_bits = { 3, 2 }, 123 .index_bits = { 11, 8 }, 124 .bytes = 4, 125 }, 126 { 127 .num_srcs = 3, 128 .mux_bits = 2, 129 .bank_bits = { 2, 2, 2 }, 130 .index_bits = { 7, 7, 6 }, 131 .bytes = 4, 132 }, 133 { 134 .num_srcs = 3, 135 .mux_bits = 3, 136 .bank_bits = { 3, 2, 3 }, 137 .index_bits = { 8, 8, 8 }, 138 .bytes = 5, 139 }, 140 { 141 .num_srcs = 3, 142 .mux_bits = 3, 143 .bank_bits = { 3, 2, 3 }, 144 .index_bits = { 11, 8, 11 }, 145 .bytes = 6, 146 }, 147 }; 148 149 const rogue_reg_src_info rogue_reg_upper_src_infos[ROGUE_REG_SRC_VARIANTS] = { 150 { 151 .num_srcs = 1, 152 .bank_bits = { 1 }, 153 .index_bits = { 6 }, 154 .bytes = 1, 155 }, 156 { 157 .num_srcs = 1, 158 .bank_bits = { 3 }, 159 .index_bits = { 11 }, 160 .bytes = 3, 161 }, 162 { 163 .num_srcs = 2, 164 .bank_bits = { 1, 1 }, 165 .index_bits = { 6, 5 }, 166 .bytes = 2, 167 }, 168 { 169 .num_srcs = 2, 170 .bank_bits = { 2, 2 }, 171 .index_bits = { 7, 7 }, 172 .bytes = 3, 173 }, 174 { 175 .num_srcs = 2, 176 .bank_bits = { 3, 2 }, 177 .index_bits = { 11, 8 }, 178 .bytes = 4, 179 }, 180 { 181 .num_srcs = 3, 182 .bank_bits = { 2, 2, 2 }, 183 .index_bits = { 7, 7, 6 }, 184 .bytes = 4, 185 }, 186 { 187 .num_srcs = 3, 188 .bank_bits = { 3, 2, 2 }, 189 .index_bits = { 8, 8, 8 }, 190 .bytes = 5, 191 }, 192 { 193 .num_srcs = 3, 194 .bank_bits = { 3, 2, 2 }, 195 .index_bits = { 11, 8, 8 }, 196 .bytes = 6, 197 }, 198 }; 199 200 #define OM(op_mod) BITFIELD64_BIT(ROGUE_ALU_OP_MOD_##op_mod) 201 const rogue_alu_op_mod_info rogue_alu_op_mod_infos[ROGUE_ALU_OP_MOD_COUNT] = { 202 [ROGUE_ALU_OP_MOD_LP] = { .str = "lp", }, 203 [ROGUE_ALU_OP_MOD_SAT] = { .str = "sat", }, 204 [ROGUE_ALU_OP_MOD_SCALE] = { .str = "scale", }, 205 [ROGUE_ALU_OP_MOD_ROUNDZERO] = { .str = "roundzero", }, 206 207 [ROGUE_ALU_OP_MOD_Z] = { .str = "z", .exclude = OM(GZ) | OM(GEZ) | OM(C) | OM(E) | OM(G) | OM(GE) | OM(NE) | OM(L) | OM(LE) }, 208 [ROGUE_ALU_OP_MOD_GZ] = { .str = "gz", .exclude = OM(Z) | OM(GEZ) | OM(C) | OM(E) | OM(G) | OM(GE) | OM(NE) | OM(L) | OM(LE) }, 209 [ROGUE_ALU_OP_MOD_GEZ] = { .str = "gez", .exclude = OM(Z) | OM(GZ) | OM(C) | OM(E) | OM(G) | OM(GE) | OM(NE) | OM(L) | OM(LE) }, 210 [ROGUE_ALU_OP_MOD_C] = { .str = "c", .exclude = OM(Z) | OM(GZ) | OM(GEZ) | OM(E) | OM(G) | OM(GE) | OM(NE) | OM(L) | OM(LE) }, 211 [ROGUE_ALU_OP_MOD_E] = { .str = "e", .exclude = OM(Z) | OM(GZ) | OM(GEZ) | OM(C) | OM(G) | OM(GE) | OM(NE) | OM(L) | OM(LE) }, 212 [ROGUE_ALU_OP_MOD_G] = { .str = "g", .exclude = OM(Z) | OM(GZ) | OM(GEZ) | OM(C) | OM(E) | OM(GE) | OM(NE) | OM(L) | OM(LE) }, 213 [ROGUE_ALU_OP_MOD_GE] = { .str = "ge", .exclude = OM(Z) | OM(GZ) | OM(GEZ) | OM(C) | OM(E) | OM(G) | OM(NE) | OM(L) | OM(LE) }, 214 [ROGUE_ALU_OP_MOD_NE] = { .str = "ne", .exclude = OM(Z) | OM(GZ) | OM(GEZ) | OM(C) | OM(E) | OM(G) | OM(GE) | OM(L) | OM(LE) }, 215 [ROGUE_ALU_OP_MOD_L] = { .str = "l", .exclude = OM(Z) | OM(GZ) | OM(GEZ) | OM(C) | OM(E) | OM(G) | OM(GE) | OM(NE) | OM(LE) }, 216 [ROGUE_ALU_OP_MOD_LE] = { .str = "le", .exclude = OM(Z) | OM(GZ) | OM(GEZ) | OM(C) | OM(E) | OM(G) | OM(GE) | OM(NE) | OM(L) }, 217 218 [ROGUE_ALU_OP_MOD_F32] = { .str = "f32", .exclude = OM(U16) | OM(S16) | OM(U8) | OM(S8) | OM(U32) | OM(S32) }, 219 [ROGUE_ALU_OP_MOD_U16] = { .str = "u16", .exclude = OM(F32) | OM(S16) | OM(U8) | OM(S8) | OM(U32) | OM(S32) }, 220 [ROGUE_ALU_OP_MOD_S16] = { .str = "s16", .exclude = OM(F32) | OM(U16) | OM(U8) | OM(S8) | OM(U32) | OM(S32) }, 221 [ROGUE_ALU_OP_MOD_U8] = { .str = "u8", .exclude = OM(F32) | OM(U16) | OM(S16) | OM(S8) | OM(U32) | OM(S32) }, 222 [ROGUE_ALU_OP_MOD_S8] = { .str = "s8", .exclude = OM(F32) | OM(U16) | OM(S16) | OM(U8) | OM(U32) | OM(S32) }, 223 [ROGUE_ALU_OP_MOD_U32] = { .str = "u32", .exclude = OM(F32) | OM(U16) | OM(S16) | OM(U8) | OM(S8) | OM(S32) }, 224 [ROGUE_ALU_OP_MOD_S32] = { .str = "s32", .exclude = OM(F32) | OM(U16) | OM(S16) | OM(U8) | OM(S8) | OM(U32) }, 225 }; 226 #undef OM 227 228 const rogue_alu_dst_mod_info rogue_alu_dst_mod_infos[ROGUE_ALU_DST_MOD_COUNT] = { 229 [ROGUE_ALU_DST_MOD_E0] = { .str = "e0", }, 230 [ROGUE_ALU_DST_MOD_E1] = { .str = "e1", }, 231 [ROGUE_ALU_DST_MOD_E2] = { .str = "e2", }, 232 [ROGUE_ALU_DST_MOD_E3] = { .str = "e3", }, 233 }; 234 235 const rogue_alu_src_mod_info rogue_alu_src_mod_infos[ROGUE_ALU_SRC_MOD_COUNT] = { 236 [ROGUE_ALU_SRC_MOD_FLR] = { .str = "flr", }, 237 [ROGUE_ALU_SRC_MOD_ABS] = { .str = "abs", }, 238 [ROGUE_ALU_SRC_MOD_NEG] = { .str = "neg", }, 239 [ROGUE_ALU_SRC_MOD_E0] = { .str = "e0", }, 240 [ROGUE_ALU_SRC_MOD_E1] = { .str = "e1", }, 241 [ROGUE_ALU_SRC_MOD_E2] = { .str = "e2", }, 242 [ROGUE_ALU_SRC_MOD_E3] = { .str = "e3", }, 243 }; 244 245 #define OM(op_mod) BITFIELD64_BIT(ROGUE_CTRL_OP_MOD_##op_mod) 246 const rogue_ctrl_op_mod_info rogue_ctrl_op_mod_infos[ROGUE_CTRL_OP_MOD_COUNT] = { 247 [ROGUE_CTRL_OP_MOD_LINK] = { .str = "link", }, 248 [ROGUE_CTRL_OP_MOD_ALLINST] = { .str = "allinst", .exclude = OM(ANYINST) }, 249 [ROGUE_CTRL_OP_MOD_ANYINST] = { .str = "anyinst", .exclude = OM(ALLINST) }, 250 [ROGUE_CTRL_OP_MOD_END] = { .str = "end", }, 251 }; 252 #undef OM 253 254 #define OM(op_mod) BITFIELD64_BIT(ROGUE_CTRL_OP_MOD_##op_mod) 255 #define T(type) BITFIELD64_BIT(ROGUE_REF_TYPE_##type - 1) 256 const rogue_ctrl_op_info rogue_ctrl_op_infos[ROGUE_CTRL_OP_COUNT] = { 257 [ROGUE_CTRL_OP_INVALID] = { .str = "!INVALID!", }, 258 [ROGUE_CTRL_OP_END] = { .str = "end", .ends_block = true, }, 259 [ROGUE_CTRL_OP_NOP] = { .str = "nop", 260 .supported_op_mods = OM(END), 261 }, 262 [ROGUE_CTRL_OP_WOP] = { .str = "wop", }, 263 [ROGUE_CTRL_OP_BR] = { .str = "br", .has_target = true, .ends_block = true, 264 .supported_op_mods = OM(LINK) | OM(ALLINST) | OM(ANYINST), 265 }, 266 [ROGUE_CTRL_OP_BA] = { .str = "ba", .ends_block = true, .num_srcs = 1, 267 .supported_op_mods = OM(LINK) | OM(ALLINST) | OM(ANYINST), 268 .supported_src_types = { [0] = T(VAL), }, 269 }, 270 [ROGUE_CTRL_OP_WDF] = { .str = "wdf", .num_srcs = 1, 271 .supported_src_types = { [0] = T(DRC), }, 272 }, 273 }; 274 #undef T 275 #undef OM 276 277 #define IO(io) ROGUE_IO_##io 278 #define OM(op_mod) BITFIELD64_BIT(ROGUE_BACKEND_OP_MOD_##op_mod) 279 #define T(type) BITFIELD64_BIT(ROGUE_REF_TYPE_##type - 1) 280 #define B(n) BITFIELD64_BIT(n) 281 const rogue_backend_op_info rogue_backend_op_infos[ROGUE_BACKEND_OP_COUNT] = { 282 [ROGUE_BACKEND_OP_INVALID] = { .str = "!INVALID!", }, 283 [ROGUE_BACKEND_OP_UVSW_WRITE] = { .str = "uvsw.write", .num_dsts = 1, .num_srcs = 1, 284 .phase_io = { .src[0] = IO(W0), }, 285 .supported_dst_types = { [0] = T(REG), }, 286 .supported_src_types = { [0] = T(REG), }, 287 }, 288 [ROGUE_BACKEND_OP_UVSW_EMIT] = { .str = "uvsw.emit", }, 289 [ROGUE_BACKEND_OP_UVSW_ENDTASK] = { .str = "uvsw.endtask", }, 290 291 [ROGUE_BACKEND_OP_UVSW_EMITTHENENDTASK] = { .str = "uvsw.emitthenendtask", }, 292 [ROGUE_BACKEND_OP_UVSW_WRITETHENEMITTHENENDTASK] = { .str = "uvsw.writethenemitthenendtask", .num_dsts = 1, .num_srcs = 1, 293 .phase_io = { .src[0] = IO(W0), }, 294 .supported_dst_types = { [0] = T(REG), }, 295 .supported_src_types = { [0] = T(REG), }, 296 }, 297 [ROGUE_BACKEND_OP_IDF] = { .str = "idf", .num_srcs = 2, 298 .phase_io = { .src[1] = IO(S0), }, 299 .supported_src_types = { [0] = T(DRC), [1] = T(REGARRAY), }, 300 .src_stride = { 301 [1] = 1, 302 }, 303 }, 304 305 [ROGUE_BACKEND_OP_EMITPIX] = { .str = "emitpix", .num_srcs = 2, 306 .phase_io = { .src[0] = IO(S0), .src[1] = IO(S2), }, 307 .supported_op_mods = OM(FREEP), 308 .supported_src_types = { [0] = T(REG), [1] = T(REG), }, 309 }, 310 /* .src[1] and .src[2] can actually be S0-5. */ 311 [ROGUE_BACKEND_OP_LD] = { .str = "ld", .num_dsts = 1, .num_srcs = 3, 312 .phase_io = { .dst[0] = IO(S3), .src[2] = IO(S0), }, 313 .supported_op_mods = OM(BYPASS) | OM(FORCELINEFILL) | OM(SLCBYPASS) | OM(SLCNOALLOC), 314 .supported_dst_types = { [0] = T(REG) | T(REGARRAY), }, 315 .supported_src_types = { 316 [0] = T(DRC), 317 [1] = T(VAL), 318 [2] = T(REGARRAY), 319 }, 320 .dst_stride = { 321 [0] = ~0U, 322 }, 323 .src_stride = { 324 [2] = 1, 325 }, 326 }, 327 /* .src[0] and .src[4] can actually be S0-5. */ 328 [ROGUE_BACKEND_OP_ST] = { .str = "st", .num_srcs = 6, 329 .phase_io = { .src[0] = IO(S3), .src[4] = IO(S0), }, 330 .supported_op_mods = OM(TILED) | OM(WRITETHROUGH) | OM(WRITEBACK) | OM(LAZYWRITEBACK) | 331 OM(SLCBYPASS) | OM(SLCWRITEBACK) | OM(SLCWRITETHROUGH) | OM(SLCNOALLOC), 332 .supported_src_types = { 333 [0] = T(REG) | T(REGARRAY), 334 [1] = T(VAL), 335 [2] = T(DRC), 336 [3] = T(VAL), 337 [4] = T(REGARRAY), 338 [5] = T(IO), 339 }, 340 .src_stride = { 341 [4] = 1, 342 }, 343 }, 344 [ROGUE_BACKEND_OP_FITR_PIXEL] = { .str = "fitr.pixel", .num_dsts = 1, .num_srcs = 3, 345 .phase_io = { .dst[0] = IO(S3), .src[1] = IO(S0), }, 346 .supported_op_mods = OM(SAT), 347 .supported_dst_types = { [0] = T(REG) | T(REGARRAY), }, 348 .supported_src_types = { 349 [0] = T(DRC), 350 [1] = T(REGARRAY), 351 [2] = T(VAL), 352 }, 353 .dst_stride = { 354 [0] = ~0U, 355 }, 356 .src_stride = { 357 [1] = ~0U, 358 }, 359 }, 360 [ROGUE_BACKEND_OP_FITRP_PIXEL] = { .str = "fitrp.pixel", .num_dsts = 1, .num_srcs = 4, 361 .phase_io = { .dst[0] = IO(S3), .src[1] = IO(S0), .src[2] = IO(S2), }, 362 .supported_op_mods = OM(SAT), 363 .supported_dst_types = { [0] = T(REG), }, 364 .supported_src_types = { 365 [0] = T(DRC), 366 [1] = T(REGARRAY), 367 [2] = T(REGARRAY), 368 [3] = T(VAL), 369 }, 370 .src_stride = { 371 [1] = 3, 372 [2] = ~0U, 373 }, 374 }, 375 [ROGUE_BACKEND_OP_SMP1D] = { .str = "smp1d", .num_dsts = 1, .num_srcs = 6, 376 .phase_io = { .dst[0] = IO(S4), .src[1] = IO(S0), .src[2] = IO(S1), .src[3] = IO(S2), }, 377 .supported_op_mods = OM(PROJ) | OM(FCNORM) | OM(NNCOORDS) | OM(BIAS) | OM(REPLACE) | 378 OM(GRADIENT) | OM(PPLOD) | OM(TAO) | OM(SOO) | OM(SNO) | OM(WRT) | OM(DATA) | 379 OM(INFO) | OM(BOTH) | OM(BYPASS) | OM(FORCELINEFILL) | OM(WRITETHROUGH) | 380 OM(WRITEBACK) | OM(LAZYWRITEBACK) | OM(SLCBYPASS) | OM(SLCWRITEBACK) | 381 OM(SLCWRITETHROUGH) | OM(SLCNOALLOC) | OM(ARRAY) | OM(INTEGER) | OM(SCHEDSWAP) | 382 OM(F16), 383 .supported_dst_types = { [0] = T(REG) | T(REGARRAY), }, 384 .supported_src_types = { 385 [0] = T(DRC), 386 [1] = T(REGARRAY), 387 [2] = T(REG) | T(REGARRAY), 388 [3] = T(REGARRAY), 389 [4] = T(REGARRAY) | T(IO), 390 [5] = T(VAL), 391 }, 392 /* TODO: This may depend on the other options set. */ 393 .src_stride = { 394 [1] = 3, 395 [2] = ~0U, 396 [3] = 3, 397 [4] = 1, 398 }, 399 .dst_stride = { 400 [0] = ~0U, 401 }, 402 }, 403 [ROGUE_BACKEND_OP_SMP2D] = { .str = "smp2d", .num_dsts = 1, .num_srcs = 6, 404 .phase_io = { .dst[0] = IO(S4), .src[1] = IO(S0), .src[2] = IO(S1), .src[3] = IO(S2), }, 405 .supported_op_mods = OM(PROJ) | OM(FCNORM) | OM(NNCOORDS) | OM(BIAS) | OM(REPLACE) | 406 OM(GRADIENT) | OM(PPLOD) | OM(TAO) | OM(SOO) | OM(SNO) | OM(WRT) | OM(DATA) | 407 OM(INFO) | OM(BOTH) | OM(BYPASS) | OM(FORCELINEFILL) | OM(WRITETHROUGH) | 408 OM(WRITEBACK) | OM(LAZYWRITEBACK) | OM(SLCBYPASS) | OM(SLCWRITEBACK) | 409 OM(SLCWRITETHROUGH) | OM(SLCNOALLOC) | OM(ARRAY) | OM(INTEGER) | OM(SCHEDSWAP) | 410 OM(F16), 411 .supported_dst_types = { [0] = T(REG) | T(REGARRAY), }, 412 .supported_src_types = { 413 [0] = T(DRC), 414 [1] = T(REGARRAY), 415 [2] = T(REG) | T(REGARRAY), 416 [3] = T(REGARRAY), 417 [4] = T(REGARRAY) | T(IO), 418 [5] = T(VAL), 419 }, 420 /* TODO: This may depend on the other options set. */ 421 .src_stride = { 422 [1] = 3, 423 [2] = ~0U, 424 [3] = 3, 425 [4] = 1, 426 }, 427 .dst_stride = { 428 [0] = ~0U, 429 }, 430 }, 431 [ROGUE_BACKEND_OP_SMP3D] = { .str = "smp3d", .num_dsts = 1, .num_srcs = 6, 432 .phase_io = { .dst[0] = IO(S4), .src[1] = IO(S0), .src[2] = IO(S1), .src[3] = IO(S2), }, 433 .supported_op_mods = OM(PROJ) | OM(FCNORM) | OM(NNCOORDS) | OM(BIAS) | OM(REPLACE) | 434 OM(GRADIENT) | OM(PPLOD) | OM(TAO) | OM(SOO) | OM(SNO) | OM(WRT) | OM(DATA) | 435 OM(INFO) | OM(BOTH) | OM(BYPASS) | OM(FORCELINEFILL) | OM(WRITETHROUGH) | 436 OM(WRITEBACK) | OM(LAZYWRITEBACK) | OM(SLCBYPASS) | OM(SLCWRITEBACK) | 437 OM(SLCWRITETHROUGH) | OM(SLCNOALLOC) | OM(ARRAY) | OM(INTEGER) | OM(SCHEDSWAP) | 438 OM(F16), 439 .supported_dst_types = { [0] = T(REG) | T(REGARRAY), }, 440 .supported_src_types = { 441 [0] = T(DRC), 442 [1] = T(REGARRAY), 443 [2] = T(REG) | T(REGARRAY), 444 [3] = T(REGARRAY), 445 [4] = T(REGARRAY) | T(IO), 446 [5] = T(VAL), 447 }, 448 /* TODO: This may depend on the other options set. */ 449 .src_stride = { 450 [1] = 3, 451 [2] = ~0U, 452 [3] = 3, 453 [4] = 1, 454 }, 455 .dst_stride = { 456 [0] = ~0U, 457 }, 458 }, 459 }; 460 #undef B 461 #undef T 462 #undef OM 463 #undef IO 464 465 #define OM(op_mod) BITFIELD64_BIT(ROGUE_BACKEND_OP_MOD_##op_mod) 466 const rogue_backend_op_mod_info rogue_backend_op_mod_infos[ROGUE_BACKEND_OP_MOD_COUNT] = { 467 [ROGUE_BACKEND_OP_MOD_PROJ] = { .str = "proj", }, 468 [ROGUE_BACKEND_OP_MOD_FCNORM] = { .str = "fcnorm", }, 469 [ROGUE_BACKEND_OP_MOD_NNCOORDS] = { .str = "nncoords", }, 470 [ROGUE_BACKEND_OP_MOD_BIAS] = { .str = "bias", .exclude = OM(REPLACE) | OM(GRADIENT) }, 471 [ROGUE_BACKEND_OP_MOD_REPLACE] = { .str = "replace", .exclude = OM(BIAS) | OM(GRADIENT) }, 472 [ROGUE_BACKEND_OP_MOD_GRADIENT] = { .str = "gradient", .exclude = OM(BIAS) | OM(REPLACE) }, 473 [ROGUE_BACKEND_OP_MOD_PPLOD] = { .str = "pplod", .require = OM(BIAS) | OM(REPLACE) }, 474 [ROGUE_BACKEND_OP_MOD_TAO] = { .str = "tao", }, 475 [ROGUE_BACKEND_OP_MOD_SOO] = { .str = "soo", }, 476 [ROGUE_BACKEND_OP_MOD_SNO] = { .str = "sno", }, 477 [ROGUE_BACKEND_OP_MOD_WRT] = { .str = "wrt", }, 478 [ROGUE_BACKEND_OP_MOD_DATA] = { .str = "data", .exclude = OM(INFO) | OM(BOTH) }, 479 [ROGUE_BACKEND_OP_MOD_INFO] = { .str = "info", .exclude = OM(DATA) | OM(BOTH) }, 480 [ROGUE_BACKEND_OP_MOD_BOTH] = { .str = "both", .exclude = OM(DATA) | OM(INFO) }, 481 [ROGUE_BACKEND_OP_MOD_TILED] = { .str = "tiled", }, 482 [ROGUE_BACKEND_OP_MOD_BYPASS] = { .str = "bypass", .exclude = OM(FORCELINEFILL) | OM(WRITETHROUGH) | OM(WRITEBACK) | OM(LAZYWRITEBACK) }, 483 [ROGUE_BACKEND_OP_MOD_FORCELINEFILL] = { .str = "forcelinefill", .exclude = OM(BYPASS) | OM(WRITETHROUGH) | OM(WRITEBACK) | OM(LAZYWRITEBACK) }, 484 [ROGUE_BACKEND_OP_MOD_WRITETHROUGH] = { .str = "writethrough", .exclude = OM(BYPASS) | OM(FORCELINEFILL) | OM(WRITEBACK) | OM(LAZYWRITEBACK) }, 485 [ROGUE_BACKEND_OP_MOD_WRITEBACK] = { .str = "writeback", .exclude = OM(BYPASS) | OM(FORCELINEFILL) | OM(WRITETHROUGH) | OM(LAZYWRITEBACK) }, 486 [ROGUE_BACKEND_OP_MOD_LAZYWRITEBACK] = { .str = "lazywriteback", .exclude = OM(BYPASS) | OM(FORCELINEFILL) | OM(WRITETHROUGH) | OM(WRITEBACK) }, 487 [ROGUE_BACKEND_OP_MOD_SLCBYPASS] = { .str = "slcbypass", .exclude = OM(SLCWRITEBACK) | OM(SLCWRITETHROUGH) | OM(SLCNOALLOC) }, 488 [ROGUE_BACKEND_OP_MOD_SLCWRITEBACK] = { .str = "slcwriteback", .exclude = OM(SLCBYPASS) | OM(SLCWRITETHROUGH) | OM(SLCNOALLOC) }, 489 [ROGUE_BACKEND_OP_MOD_SLCWRITETHROUGH] = { .str = "slcwritethrough", .exclude = OM(SLCBYPASS) | OM(SLCWRITEBACK) | OM(SLCNOALLOC) }, 490 [ROGUE_BACKEND_OP_MOD_SLCNOALLOC] = { .str = "slcnoalloc", .exclude = OM(SLCBYPASS) | OM(SLCWRITEBACK) | OM(SLCWRITETHROUGH) }, 491 [ROGUE_BACKEND_OP_MOD_ARRAY] = { .str = "array", }, 492 [ROGUE_BACKEND_OP_MOD_INTEGER] = { .str = "integer", }, 493 [ROGUE_BACKEND_OP_MOD_SCHEDSWAP] = { .str = "schedswap", }, 494 [ROGUE_BACKEND_OP_MOD_F16] = { .str = "f16", }, 495 [ROGUE_BACKEND_OP_MOD_SAT] = { .str = "sat", }, 496 [ROGUE_BACKEND_OP_MOD_FREEP] = { .str = "freep", }, 497 }; 498 #undef OM 499 500 #define OM(op_mod) BITFIELD64_BIT(ROGUE_BITWISE_OP_MOD_##op_mod) 501 const rogue_bitwise_op_mod_info 502 rogue_bitwise_op_mod_infos[ROGUE_BITWISE_OP_MOD_COUNT] = { 503 [ROGUE_BITWISE_OP_MOD_TWB] = { .str = "twb", 504 .exclude = OM(PWB) | OM(MTB) | OM(FTB) }, 505 [ROGUE_BITWISE_OP_MOD_PWB] = { .str = "pwb", 506 .exclude = OM(TWB) | OM(MTB) | OM(FTB) }, 507 [ROGUE_BITWISE_OP_MOD_MTB] = { .str = "mtb", 508 .exclude = OM(TWB) | OM(PWB) | OM(FTB) }, 509 [ROGUE_BITWISE_OP_MOD_FTB] = { .str = "ftb", 510 .exclude = OM(TWB) | OM(PWB) | OM(MTB) }, 511 }; 512 #undef OM 513 514 #define P(type) BITFIELD64_BIT(ROGUE_INSTR_PHASE_##type) 515 #define PH(type) ROGUE_INSTR_PHASE_##type 516 #define IO(io) ROGUE_IO_##io 517 #define T(type) BITFIELD64_BIT(ROGUE_REF_TYPE_##type - 1) 518 const rogue_bitwise_op_info rogue_bitwise_op_infos[ROGUE_BITWISE_OP_COUNT] = { 519 [ROGUE_BITWISE_OP_INVALID] = { .str = "", }, 520 [ROGUE_BITWISE_OP_BYP0] = { .str = "byp", .num_dsts = 2, .num_srcs = 2, 521 .supported_phases = P(0_BITMASK), 522 .phase_io[PH(0_BITMASK)] = { .dst[1] = IO(FT1), }, 523 .supported_dst_types = { 524 [0] = T(REG) | T(REGARRAY) | T(IO), 525 [1] = T(REG) | T(REGARRAY) | T(IO), 526 }, 527 .supported_src_types = { 528 [0] = T(REG) | T(REGARRAY) | T(IO), 529 [1] = T(REG) | T(REGARRAY) | T(IO) | T(VAL), 530 }, 531 }, 532 }; 533 #undef T 534 #undef IO 535 #undef PH 536 #undef P 537 538 const rogue_io_info rogue_io_infos[ROGUE_IO_COUNT] = { 539 [ROGUE_IO_INVALID] = { .str = "!INVALID!", }, 540 [ROGUE_IO_S0] = { .str = "s0", }, 541 [ROGUE_IO_S1] = { .str = "s1", }, 542 [ROGUE_IO_S2] = { .str = "s2", }, 543 [ROGUE_IO_S3] = { .str = "s3", }, 544 [ROGUE_IO_S4] = { .str = "s4", }, 545 [ROGUE_IO_S5] = { .str = "s5", }, 546 [ROGUE_IO_W0] = { .str = "w0", }, 547 [ROGUE_IO_W1] = { .str = "w1", }, 548 [ROGUE_IO_IS0] = { .str = "is0", }, 549 [ROGUE_IO_IS1] = { .str = "is1", }, 550 [ROGUE_IO_IS2] = { .str = "is2", }, 551 [ROGUE_IO_IS3] = { .str = "is3", }, 552 [ROGUE_IO_IS4] = { .str = "is4/w0", }, 553 [ROGUE_IO_IS5] = { .str = "is5/w1", }, 554 [ROGUE_IO_FT0] = { .str = "ft0", }, 555 [ROGUE_IO_FT1] = { .str = "ft1", }, 556 [ROGUE_IO_FT2] = { .str = "ft2", }, 557 [ROGUE_IO_FTE] = { .str = "fte", }, 558 [ROGUE_IO_FT3] = { .str = "ft3", }, 559 [ROGUE_IO_FT4] = { .str = "ft4", }, 560 [ROGUE_IO_FT5] = { .str = "ft5", }, 561 [ROGUE_IO_FTT] = { .str = "ftt", }, 562 [ROGUE_IO_P0] = { .str = "p0", }, 563 [ROGUE_IO_NONE] = { .str = "_", }, 564 }; 565 566 #define SM(src_mod) BITFIELD64_BIT(ROGUE_ALU_SRC_MOD_##src_mod) 567 #define DM(dst_mod) BITFIELD64_BIT(ROGUE_ALU_DST_MOD_##dst_mod) 568 #define OM(op_mod) BITFIELD64_BIT(ROGUE_ALU_OP_MOD_##op_mod) 569 #define P(type) BITFIELD64_BIT(ROGUE_INSTR_PHASE_##type) 570 #define PH(type) ROGUE_INSTR_PHASE_##type 571 #define IO(io) ROGUE_IO_##io 572 #define T(type) BITFIELD64_BIT(ROGUE_REF_TYPE_##type - 1) 573 #define B(n) BITFIELD64_BIT(n) 574 const rogue_alu_op_info rogue_alu_op_infos[ROGUE_ALU_OP_COUNT] = { 575 [ROGUE_ALU_OP_INVALID] = { .str = "!INVALID!", }, 576 [ROGUE_ALU_OP_MBYP] = { .str = "mbyp", .num_dsts = 1, .num_srcs = 1, 577 .supported_phases = P(0), 578 .phase_io[PH(0)] = { .dst[0] = IO(FT0), .src[0] = IO(S0), }, 579 .supported_src_mods = { 580 [0] = SM(ABS) | SM(NEG), 581 }, 582 .supported_dst_types = { [0] = T(REG) | T(REGARRAY) | T(IO), }, 583 .supported_src_types = { 584 [0] = T(REG) | T(REGARRAY), 585 }, 586 }, 587 [ROGUE_ALU_OP_FADD] = { .str = "fadd", .num_dsts = 1, .num_srcs = 2, 588 .supported_phases = P(0), 589 .phase_io[PH(0)] = { .dst[0] = IO(FT0), .src[0] = IO(S0), .src[1] = IO(S1), }, 590 .supported_op_mods = OM(LP) | OM(SAT), 591 .supported_src_mods = { 592 [0] = SM(FLR) | SM(ABS) | SM(NEG), 593 [1] = SM(ABS), 594 }, 595 }, 596 [ROGUE_ALU_OP_FMUL] = { .str = "fmul", .num_dsts = 1, .num_srcs = 2, 597 .supported_phases = P(0), 598 .phase_io[PH(0)] = { .dst[0] = IO(FT0), .src[0] = IO(S0), .src[1] = IO(S1), }, 599 .supported_op_mods = OM(LP) | OM(SAT), 600 .supported_src_mods = { 601 [0] = SM(FLR) | SM(ABS) | SM(NEG), 602 [1] = SM(ABS), 603 }, 604 .supported_dst_types = { [0] = T(REG), }, 605 .supported_src_types = { 606 [0] = T(REG), 607 [1] = T(REG), 608 }, 609 }, 610 [ROGUE_ALU_OP_FMAD] = { .str = "fmad", .num_dsts = 1, .num_srcs = 3, 611 .supported_phases = P(0), 612 .phase_io[PH(0)] = { .dst[0] = IO(FT0), .src[0] = IO(S0), .src[1] = IO(S1), .src[2] = IO(S2), }, 613 .supported_op_mods = OM(LP) | OM(SAT), 614 .supported_src_mods = { 615 [0] = SM(ABS) | SM(NEG), 616 [1] = SM(ABS) | SM(NEG), 617 [2] = SM(FLR) | SM(ABS) | SM(NEG), 618 }, 619 .supported_dst_types = { [0] = T(REG), }, 620 .supported_src_types = { 621 [0] = T(REG), 622 [1] = T(REG), 623 [2] = T(REG), 624 }, 625 }, 626 /* TODO NEXT!: Validate - can/must only select element if non-32-bit type, element has to be same for both args if both args present, 16-bit must be 0 or 1, 32-bit must be 0-3 (can't have no element set) 627 * Also validate number of sources provided/nulled out based on test op */ 628 [ROGUE_ALU_OP_TST] = { .str = "tst", .num_dsts = 2, .num_srcs = 2, 629 .supported_phases = P(2_TST), 630 .phase_io[PH(2_TST)] = { .src[0] = IO(IS1), .src[1] = IO(IS2), }, 631 .supported_op_mods = OM(Z) | OM(GZ) | OM(GEZ) | OM(C) | OM(E) | OM(G) | OM(GE) | OM(NE) | OM(L) | OM(LE) | 632 OM(F32) | OM(U16) | OM(S16) | OM(U8) | OM(S8) | OM(U32) | OM(S32), 633 .supported_src_mods = { 634 [0] = SM(E0) | SM(E1) | SM(E2) | SM(E3), 635 [1] = SM(E0) | SM(E1) | SM(E2) | SM(E3), 636 }, 637 .supported_dst_types = { [0] = T(IO), [1] = T(IO), }, /* FTT and either P0 or NONE */ 638 .supported_src_types = { 639 [0] = T(REG) | T(IO), 640 [1] = T(REG) | T(IO), 641 }, 642 }, 643 /* TODO: Support fully. */ 644 [ROGUE_ALU_OP_MOVC] = { .str = "movc", .num_dsts = 2, .num_srcs = 3, 645 .supported_phases = P(2_MOV), 646 .phase_io[PH(2_MOV)] = { .dst[0] = IO(W0), .src[1] = IO(FTE), }, 647 .supported_dst_mods = { 648 [0] = DM(E0) | DM(E1) | DM(E2) | DM(E3), 649 }, 650 .supported_dst_types = { [0] = T(REG) | T(REGARRAY), [1] = T(REG) | T(REGARRAY) | T(IO), }, 651 .supported_src_types = { 652 [0] = T(IO), 653 [1] = T(REG) | T(REGARRAY) | T(IO), 654 [2] = T(REG) | T(REGARRAY) | T(IO), 655 }, 656 }, 657 [ROGUE_ALU_OP_ADD64] = { .str = "add64", .num_dsts = 3, .num_srcs = 5, 658 .supported_phases = P(0), 659 .phase_io[PH(0)] = { .dst[0] = IO(FT0), .dst[1] = IO(FTE), .src[0] = IO(S0), .src[1] = IO(S1), .src[2] = IO(S2), .src[3] = IO(IS0), }, 660 .supported_src_mods = { 661 [0] = SM(ABS) | SM(NEG), 662 [1] = SM(ABS) | SM(NEG), 663 [2] = SM(ABS) | SM(NEG), 664 [3] = SM(ABS) | SM(NEG), 665 }, 666 .supported_dst_types = { [0] = T(REG) | T(REGARRAY), [1] = T(REG) | T(REGARRAY) | T(IO), [2] = T(IO) }, 667 .supported_src_types = { 668 [0] = T(REG) | T(REGARRAY), 669 [1] = T(REG) | T(REGARRAY), 670 [2] = T(REG) | T(REGARRAY) | T(IMM), 671 [3] = T(REG) | T(REGARRAY)| T(IO) | T(IMM), 672 [4] = T(IO), 673 }, 674 }, 675 [ROGUE_ALU_OP_PCK_U8888] = { .str = "pck.u8888", .num_dsts = 1, .num_srcs = 1, 676 .supported_phases = P(2_PCK), 677 .phase_io[PH(2_PCK)] = { .dst[0] = IO(FT2), .src[0] = IO(IS3), }, 678 .supported_op_mods = OM(SCALE) | OM(ROUNDZERO), 679 .supported_dst_types = { [0] = T(REG), }, 680 .supported_src_types = { 681 [0] = T(REGARRAY), 682 }, 683 .src_repeat_mask = B(0), 684 }, 685 [ROGUE_ALU_OP_MOV] = { .str = "mov", .num_dsts = 1, .num_srcs = 1, 686 .supported_dst_types = { [0] = T(REG) | T(REGARRAY), }, 687 .supported_src_types = { 688 [0] = T(REG) | T(REGARRAY) | T(IMM), 689 }, 690 }, 691 [ROGUE_ALU_OP_CMOV] = { .str = "cmov", .num_dsts = 1, .num_srcs = 3, 692 .supported_dst_types = { [0] = T(REG), }, 693 .supported_src_types = { 694 [0] = T(IO), 695 [1] = T(REG), 696 [2] = T(REG), 697 }, 698 }, 699 [ROGUE_ALU_OP_FABS] = { .str = "fabs", .num_dsts = 1, .num_srcs = 1, }, 700 [ROGUE_ALU_OP_FNEG] = { .str = "fneg", .num_dsts = 1, .num_srcs = 1, }, 701 [ROGUE_ALU_OP_FNABS] = { .str = "fnabs", .num_dsts = 1, .num_srcs = 1, }, 702 703 [ROGUE_ALU_OP_FMAX] = { .str = "fmax", .num_dsts = 1, .num_srcs = 2, }, /* TODO */ 704 [ROGUE_ALU_OP_FMIN] = { .str = "fmin", .num_dsts = 1, .num_srcs = 2, }, /* TODO */ 705 }; 706 #undef B 707 #undef T 708 #undef IO 709 #undef PH 710 #undef P 711 #undef OM 712 #undef DM 713 #undef SM 714 715 const char *rogue_exec_cond_str[ROGUE_EXEC_COND_COUNT] = { 716 [ROGUE_EXEC_COND_INVALID] = "!INVALID!", 717 [ROGUE_EXEC_COND_PE_TRUE] = "if(pe)", 718 [ROGUE_EXEC_COND_P0_TRUE] = "if(p0)", 719 [ROGUE_EXEC_COND_PE_ANY] = "any(pe)", 720 [ROGUE_EXEC_COND_P0_FALSE] = "if(!p0)", 721 }; 722 723 const char *rogue_instr_type_str[ROGUE_INSTR_TYPE_COUNT] = { 724 [ROGUE_INSTR_TYPE_INVALID] = "!INVALID!", 725 726 [ROGUE_INSTR_TYPE_ALU] = "alu", 727 /* [ROGUE_INSTR_TYPE_CMPLX] = "cmplx", */ 728 [ROGUE_INSTR_TYPE_BACKEND] = "backend", 729 [ROGUE_INSTR_TYPE_CTRL] = "ctrl", 730 [ROGUE_INSTR_TYPE_BITWISE] = "bitwise", 731 /* [ROGUE_INSTR_TYPE_F16SOP] = "f16sop", */ 732 }; 733 734 const char *const rogue_alu_str[ROGUE_ALU_COUNT] = { 735 [ROGUE_ALU_INVALID] = "!INVALID!", 736 [ROGUE_ALU_MAIN] = "main", 737 [ROGUE_ALU_BITWISE] = "bitwise", 738 [ROGUE_ALU_CONTROL] = "control", 739 }; 740 741 const char *const rogue_instr_phase_str[ROGUE_ALU_COUNT][ROGUE_INSTR_PHASE_COUNT] = { 742 /** Main/ALU (and backend) instructions. */ 743 [ROGUE_ALU_MAIN] = { 744 [ROGUE_INSTR_PHASE_0] = "p0", 745 [ROGUE_INSTR_PHASE_1] = "p1", 746 [ROGUE_INSTR_PHASE_2_PCK] = "p2pck", 747 [ROGUE_INSTR_PHASE_2_TST] = "p2tst", 748 [ROGUE_INSTR_PHASE_2_MOV] = "p2mov", 749 [ROGUE_INSTR_PHASE_BACKEND] = "backend", 750 }, 751 752 /** Bitwise instructions. */ 753 [ROGUE_ALU_BITWISE] = { 754 [ROGUE_INSTR_PHASE_0_BITMASK] = "p0bm", 755 [ROGUE_INSTR_PHASE_0_SHIFT1] = "p0shf1", 756 [ROGUE_INSTR_PHASE_0_COUNT] = "p0cnt", 757 [ROGUE_INSTR_PHASE_1_LOGICAL] = "p1log", 758 [ROGUE_INSTR_PHASE_2_SHIFT2] = "p2shf2", 759 [ROGUE_INSTR_PHASE_2_TEST] = "p2tst", 760 }, 761 762 /** Control instructions (no co-issuing). */ 763 [ROGUE_ALU_CONTROL] = { 764 [ROGUE_INSTR_PHASE_CTRL] = "ctrl", 765 }, 766 }; 767