1 /* -*- mesa-c++ -*- 2 * Copyright 2018-2019 Collabora LTD 3 * Author: Gert Wollny <[email protected]> 4 * SPDX-License-Identifier: MIT 5 */ 6 7 #ifndef r600_sfn_alu_defines_h 8 #define r600_sfn_alu_defines_h 9 10 #include "../r600_isa.h" 11 12 #include <bitset> 13 #include <map> 14 15 namespace r600 { 16 17 // We sacrifice 123 for dummy dests 18 static const int g_registers_end = 123; 19 static const int g_clause_local_start = 124; 20 static const int g_clause_local_end = 128; 21 22 /* ALU op2 instructions 17:7 top three bits always zero. */ 23 enum EAluOp { 24 op2_add = 0, 25 op2_mul = 1, 26 op2_mul_ieee = 2, 27 op2_max = 3, 28 op2_min = 4, 29 op2_max_dx10 = 5, 30 op2_min_dx10 = 6, 31 op2_sete = 8, 32 op2_setgt = 9, 33 op2_setge = 10, 34 op2_setne = 11, 35 op2_sete_dx10 = 12, 36 op2_setgt_dx10 = 13, 37 op2_setge_dx10 = 14, 38 op2_setne_dx10 = 15, 39 op1_fract = 16, 40 op1_trunc = 17, 41 op1_ceil = 18, 42 op1_rndne = 19, 43 op1_floor = 20, 44 op2_ashr_int = 21, 45 op2_lshr_int = 22, 46 op2_lshl_int = 23, 47 op1_mov = 25, 48 op0_nop = 26, 49 op2_mul_64 = 27, 50 op1_flt64_to_flt32 = 28, 51 op1_flt32_to_flt64 = 29, 52 op2_pred_setgt_uint = 30, 53 op2_pred_setge_uint = 31, 54 op2_pred_sete = 32, 55 op2_pred_setgt = 33, 56 op2_pred_setge = 34, 57 op2_pred_setne = 35, 58 op1_pred_set_inv = 36, 59 op2_pred_set_pop = 37, 60 op0_pred_set_clr = 38, 61 op1_pred_set_restore = 39, 62 op2_pred_sete_push = 40, 63 op2_pred_setgt_push = 41, 64 op2_pred_setge_push = 42, 65 op2_pred_setne_push = 43, 66 op2_kille = 44, 67 op2_killgt = 45, 68 op2_killge = 46, 69 op2_killne = 47, 70 op2_and_int = 48, 71 op2_or_int = 49, 72 op2_xor_int = 50, 73 op1_not_int = 51, 74 op2_add_int = 52, 75 op2_sub_int = 53, 76 op2_max_int = 54, 77 op2_min_int = 55, 78 op2_max_uint = 56, 79 op2_min_uint = 57, 80 op2_sete_int = 58, 81 op2_setgt_int = 59, 82 op2_setge_int = 60, 83 op2_setne_int = 61, 84 op2_setgt_uint = 62, 85 op2_setge_uint = 63, 86 op2_killgt_uint = 64, 87 op2_killge_uint = 65, 88 op2_prede_int = 66, 89 op2_pred_setgt_int = 67, 90 op2_pred_setge_int = 68, 91 op2_pred_setne_int = 69, 92 op2_kille_int = 70, 93 op2_killgt_int = 71, 94 op2_killge_int = 72, 95 op2_killne_int = 73, 96 op2_pred_sete_push_int = 74, 97 op2_pred_setgt_push_int = 75, 98 op2_pred_setge_push_int = 76, 99 op2_pred_setne_push_int = 77, 100 op2_pred_setlt_push_int = 78, 101 op2_pred_setle_push_int = 79, 102 op1_flt_to_int = 80, 103 op1_bfrev_int = 81, 104 op2_addc_uint = 82, 105 op2_subb_uint = 83, 106 op0_group_barrier = 84, 107 op0_group_seq_begin = 85, 108 op0_group_seq_end = 86, 109 op2_set_mode = 87, 110 op1_set_cf_idx0 = 88, 111 op1_set_cf_idx1 = 89, 112 op2_set_lds_size = 90, 113 op1_exp_ieee = 129, 114 op1_log_clamped = 130, 115 op1_log_ieee = 131, 116 op1_recip_clamped = 132, 117 op1_recip_ff = 133, 118 op1_recip_ieee = 134, 119 op1_recipsqrt_clamped = 135, 120 op1_recipsqrt_ff = 136, 121 op1_recipsqrt_ieee1 = 137, 122 op1_sqrt_ieee = 138, 123 op1_sin = 141, 124 op1_cos = 142, 125 op2_mullo_int = 143, 126 op2_mulhi_int = 144, 127 op2_mullo_uint = 145, 128 op2_mulhi_uint = 146, 129 op1_recip_int = 147, 130 op1_recip_uint = 148, 131 op1_recip_64 = 149, 132 op1_recip_clamped_64 = 150, 133 op1_recipsqrt_64 = 151, 134 op1_recipsqrt_clamped_64 = 152, 135 op1_sqrt_64 = 153, 136 op1_flt_to_uint = 154, 137 op1_int_to_flt = 155, 138 op1_uint_to_flt = 156, 139 op2_bfm_int = 160, 140 op1_flt32_to_flt16 = 162, 141 op1_flt16_to_flt32 = 163, 142 op1_ubyte0_flt = 164, 143 op1_ubyte1_flt = 165, 144 op1_ubyte2_flt = 166, 145 op1_ubyte3_flt = 167, 146 op1_bcnt_int = 170, 147 op1_ffbh_uint = 171, 148 op1_ffbl_int = 172, 149 op1_ffbh_int = 173, 150 op1_flt_to_uint4 = 174, 151 op2_dot_ieee = 175, 152 op1_flt_to_int_rpi = 176, 153 op1_flt_to_int_floor = 177, 154 op2_mulhi_uint24 = 178, 155 op1_mbcnt_32hi_int = 179, 156 op1_offset_to_flt = 180, 157 op2_mul_uint24 = 181, 158 op1_bcnt_accum_prev_int = 182, 159 op1_mbcnt_32lo_accum_prev_int = 183, 160 op2_sete_64 = 184, 161 op2_setne_64 = 185, 162 op2_setgt_64 = 186, 163 op2_setge_64 = 187, 164 op2_min_64 = 188, 165 op2_max_64 = 189, 166 op2_dot4 = 190, 167 op2_dot4_ieee = 191, 168 op2_cube = 192, 169 op1_max4 = 193, 170 op1_frexp_64 = 196, 171 op1_ldexp_64 = 197, 172 op1_fract_64 = 198, 173 op2_pred_setgt_64 = 199, 174 op2_pred_sete_64 = 198, 175 op2_pred_setge_64 = 201, 176 OP2V_MUL_64 = 202, 177 op2_add_64 = 203, 178 op1_mova_int = 204, 179 op1v_flt64_to_flt32 = 205, 180 op1v_flt32_to_flt64 = 206, 181 op2_sad_accum_prev_uint = 207, 182 op2_dot = 208, 183 op1_mul_prev = 209, 184 op1_mul_ieee_prev = 210, 185 op1_add_prev = 211, 186 op2_muladd_prev = 212, 187 op2_muladd_ieee_prev = 213, 188 op2_interp_xy = 214, 189 op2_interp_zw = 215, 190 op2_interp_x = 216, 191 op2_interp_z = 217, 192 op0_store_flags = 218, 193 op1_load_store_flags = 219, 194 op0_lds_1a = 220, 195 op0_lds_1a1d = 221, 196 op0_lds_2a = 223, 197 op1_interp_load_p0 = 224, 198 op1_interp_load_p10 = 125, 199 op1_interp_load_p20 = 126, 200 // op 3 all left shift 6 201 op3_bfe_uint = 4 << 6, 202 op3_bfe_int = 5 << 6, 203 op3_bfi_int = 6 << 6, 204 op3_fma = 7 << 6, 205 op3_cndne_64 = 9 << 6, 206 op3_fma_64 = 10 << 6, 207 op3_lerp_uint = 11 << 6, 208 op3_bit_align_int = 12 << 6, 209 op3_byte_align_int = 13 << 6, 210 op3_sad_accum_uint = 14 << 6, 211 op3_sad_accum_hi_uint = 15 << 6, 212 op3_muladd_uint24 = 16 << 6, 213 op3_lds_idx_op = 17 << 6, 214 op3_muladd = 20 << 6, 215 op3_muladd_m2 = 21 << 6, 216 op3_muladd_m4 = 22 << 6, 217 op3_muladd_d2 = 23 << 6, 218 op3_muladd_ieee = 24 << 6, 219 op3_cnde = 25 << 6, 220 op3_cndgt = 26 << 6, 221 op3_cndge = 27 << 6, 222 op3_cnde_int = 28 << 6, 223 op3_cndgt_int = 29 << 6, 224 op3_cndge_int = 30 << 6, 225 op3_mul_lit = 31 << 6, 226 op_invalid = 0xffff 227 }; 228 229 enum AluModifiers { 230 alu_src0_rel, 231 alu_src1_rel, 232 alu_src2_rel, 233 alu_dst_clamp, 234 alu_dst_rel, 235 alu_last_instr, 236 alu_update_exec, 237 alu_update_pred, 238 alu_write, 239 alu_op3, 240 alu_is_trans, 241 alu_is_cayman_trans, 242 alu_is_lds, 243 alu_lds_group_start, 244 alu_lds_group_end, 245 alu_lds_address, 246 alu_no_schedule_bias, 247 alu_64bit_op, 248 alu_flag_none, 249 alu_flag_count 250 }; 251 252 enum AluDstModifiers { 253 omod_off = 0, 254 omod_mul2 = 1, 255 omod_mul4 = 2, 256 omod_divl2 = 3 257 }; 258 259 enum AluPredSel { 260 pred_off = 0, 261 pred_zero = 2, 262 pred_one = 3 263 }; 264 265 enum AluBankSwizzle { 266 alu_vec_012 = 0, 267 sq_alu_scl_201 = 0, 268 alu_vec_021 = 1, 269 sq_alu_scl_122 = 1, 270 alu_vec_120 = 2, 271 sq_alu_scl_212 = 2, 272 alu_vec_102 = 3, 273 sq_alu_scl_221 = 3, 274 alu_vec_201 = 4, 275 sq_alu_scl_unknown = 4, 276 alu_vec_210 = 5, 277 alu_vec_unknown = 6 278 }; 279 280 inline AluBankSwizzle 281 operator++(AluBankSwizzle& x) 282 { 283 x = static_cast<AluBankSwizzle>(x + 1); 284 return x; 285 } 286 287 using AluOpFlags = std::bitset<alu_flag_count>; 288 289 struct AluOp { 290 static constexpr int x = 1; 291 static constexpr int y = 2; 292 static constexpr int z = 4; 293 static constexpr int w = 8; 294 static constexpr int v = 15; 295 static constexpr int t = 16; 296 static constexpr int a = 31; 297 AluOpAluOp298 AluOp(int ns, bool src_mod, bool clamp, bool fp64, uint8_t um_r600, 299 uint8_t um_r700, uint8_t um_eg, const char *n): 300 nsrc(ns), 301 can_srcmod(src_mod), 302 can_clamp(clamp), 303 is_fp64(fp64), 304 name(n) 305 { 306 unit_mask[0] = um_r600; 307 unit_mask[1] = um_r700; 308 unit_mask[2] = um_eg; 309 } 310 can_channelAluOp311 bool can_channel(int flags, r600_chip_class unit_type) const 312 { 313 assert(unit_type < 3); 314 return flags & unit_mask[unit_type]; 315 } 316 317 int nsrc : 4; 318 int can_srcmod : 1; 319 int can_clamp : 1; 320 int is_fp64 : 1; 321 uint8_t unit_mask[3]; 322 const char *name; 323 }; 324 325 extern const std::map<EAluOp, AluOp> alu_ops; 326 327 enum AluInlineConstants { 328 ALU_SRC_LDS_OQ_A = 219, 329 ALU_SRC_LDS_OQ_B = 220, 330 ALU_SRC_LDS_OQ_A_POP = 221, 331 ALU_SRC_LDS_OQ_B_POP = 222, 332 ALU_SRC_LDS_DIRECT_A = 223, 333 ALU_SRC_LDS_DIRECT_B = 224, 334 ALU_SRC_TIME_HI = 227, 335 ALU_SRC_TIME_LO = 228, 336 ALU_SRC_MASK_HI = 229, 337 ALU_SRC_MASK_LO = 230, 338 ALU_SRC_HW_WAVE_ID = 231, 339 ALU_SRC_SIMD_ID = 232, 340 ALU_SRC_SE_ID = 233, 341 ALU_SRC_HW_THREADGRP_ID = 234, 342 ALU_SRC_WAVE_ID_IN_GRP = 235, 343 ALU_SRC_NUM_THREADGRP_WAVES = 236, 344 ALU_SRC_HW_ALU_ODD = 237, 345 ALU_SRC_LOOP_IDX = 238, 346 ALU_SRC_PARAM_BASE_ADDR = 240, 347 ALU_SRC_NEW_PRIM_MASK = 241, 348 ALU_SRC_PRIM_MASK_HI = 242, 349 ALU_SRC_PRIM_MASK_LO = 243, 350 ALU_SRC_1_DBL_L = 244, 351 ALU_SRC_1_DBL_M = 245, 352 ALU_SRC_0_5_DBL_L = 246, 353 ALU_SRC_0_5_DBL_M = 247, 354 ALU_SRC_0 = 248, 355 ALU_SRC_1 = 249, 356 ALU_SRC_1_INT = 250, 357 ALU_SRC_M_1_INT = 251, 358 ALU_SRC_0_5 = 252, 359 ALU_SRC_LITERAL = 253, 360 ALU_SRC_PV = 254, 361 ALU_SRC_PS = 255, 362 ALU_SRC_PARAM_BASE = 0x1C0, 363 ALU_SRC_UNKNOWN 364 }; 365 366 struct AluInlineConstantDescr { 367 bool use_chan; 368 const char *descr; 369 }; 370 371 extern const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const; 372 373 #define LDSOP2(X) LDS_##X = LDS_OP2_LDS_##X 374 375 enum ESDOp { 376 DS_OP_ADD = 0, 377 DS_OP_SUB = 1, 378 DS_OP_RSUB = 2, 379 DS_OP_INC = 3, 380 DS_OP_DEC = 4, 381 DS_OP_MIN_INT = 5, 382 DS_OP_MAX_INT = 6, 383 DS_OP_MIN_UINT = 7, 384 DS_OP_MAX_UINT = 8, 385 DS_OP_AND = 9, 386 DS_OP_OR = 10, 387 DS_OP_XOR = 11, 388 DS_OP_MSKOR = 12, 389 DS_OP_WRITE = 13, 390 DS_OP_WRITE_REL = 14, 391 DS_OP_WRITE2 = 15, 392 DS_OP_CMP_STORE = 16, 393 DS_OP_CMP_STORE_SPF = 17, 394 DS_OP_BYTE_WRITE = 18, 395 DS_OP_SHORT_WRITE = 19, 396 DS_OP_ADD_RET = 32, 397 DS_OP_SUB_RET = 33, 398 DS_OP_RSUB_RET = 34, 399 DS_OP_INC_RET = 35, 400 DS_OP_DEC_RET = 36, 401 DS_OP_MIN_INT_RET = 37, 402 DS_OP_MAX_INT_RET = 38, 403 DS_OP_MIN_UINT_RET = 39, 404 DS_OP_MAX_UINT_RET = 40, 405 DS_OP_AND_RET = 41, 406 DS_OP_OR_RET = 42, 407 DS_OP_XOR_RET = 43, 408 DS_OP_MSKOR_RET = 44, 409 DS_OP_XCHG_RET = 45, 410 DS_OP_XCHG_REL_RET = 46, 411 DS_OP_XCHG2_RET = 47, 412 DS_OP_CMP_XCHG_RET = 48, 413 DS_OP_CMP_XCHG_SPF_RET = 49, 414 DS_OP_READ_RET = 50, 415 DS_OP_READ_REL_RET = 51, 416 DS_OP_READ2_RET = 52, 417 DS_OP_READWRITE_RET = 53, 418 DS_OP_BYTE_READ_RET = 54, 419 DS_OP_UBYTE_READ_RET = 55, 420 DS_OP_SHORT_READ_RET = 56, 421 DS_OP_USHORT_READ_RET = 57, 422 DS_OP_ATOMIC_ORDERED_ALLOC_RET = 63, 423 DS_OP_INVALID = 64, 424 LDSOP2(ADD_RET), 425 LDSOP2(ADD), 426 LDSOP2(AND_RET), 427 LDSOP2(AND), 428 LDSOP2(WRITE), 429 LDSOP2(OR_RET), 430 LDSOP2(OR), 431 LDSOP2(MAX_INT_RET), 432 LDSOP2(MAX_INT), 433 LDSOP2(MAX_UINT_RET), 434 LDSOP2(MAX_UINT), 435 LDSOP2(MIN_INT_RET), 436 LDSOP2(MIN_INT), 437 LDSOP2(MIN_UINT_RET), 438 LDSOP2(MIN_UINT), 439 LDSOP2(XOR_RET), 440 LDSOP2(XOR), 441 LDSOP2(XCHG_RET), 442 LDS_CMP_XCHG_RET = LDS_OP3_LDS_CMP_XCHG_RET, 443 LDS_WRITE_REL = LDS_OP3_LDS_WRITE_REL 444 }; 445 446 #undef LDSOP2 447 448 struct LDSOp { 449 int nsrc; 450 const char *name; 451 }; 452 453 extern const std::map<ESDOp, LDSOp> lds_ops; 454 455 struct KCacheLine { 456 int bank{0}; 457 int addr{0}; 458 int len{0}; 459 int index_mode{0}; 460 enum KCacheLockMode { 461 free, 462 lock_1, 463 lock_2 464 } mode{free}; 465 }; 466 467 } // namespace r600 468 469 #endif // ALU_DEFINES_H 470