1 /* 2 * Copyright © 2022 Valve Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 */ 6 #include "helpers.h" 7 8 using namespace aco; 9 10 BEGIN_TEST(insert_waitcnt.ds_ordered_count) 11 if (!setup_cs(NULL, GFX10_3)) 12 return; 13 14 Operand def0(PhysReg(256), v1); 15 Operand def1(PhysReg(257), v1); 16 Operand def2(PhysReg(258), v1); 17 Operand gds_base(PhysReg(259), v1); 18 Operand chan_counter(PhysReg(260), v1); 19 Operand m(m0, s1); 20 21 Instruction* ds_instr; 22 //>> ds_ordered_count %0:v[0], %0:v[3], %0:m0 offset0:3072 gds storage:gds semantics:volatile 23 //! s_waitcnt lgkmcnt(0) 24 ds_instr = bld.ds(aco_opcode::ds_ordered_count, def0, gds_base, m, 3072u, 0u, true); 25 ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile); 26 27 //! ds_add_rtn_u32 %0:v[1], %0:v[3], %0:v[4], %0:m0 gds storage:gds semantics:volatile,atomic,rmw 28 ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, def1, gds_base, chan_counter, m, 0u, 0u, true); 29 ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw); 30 31 //! s_waitcnt lgkmcnt(0) 32 //! ds_ordered_count %0:v[2], %0:v[3], %0:m0 offset0:3840 gds storage:gds semantics:volatile 33 ds_instr = bld.ds(aco_opcode::ds_ordered_count, def2, gds_base, m, 3840u, 0u, true); 34 ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile); 35 36 finish_waitcnt_test(); 37 END_TEST 38 39 BEGIN_TEST(insert_waitcnt.clause) 40 if (!setup_cs(NULL, GFX11)) 41 return; 42 43 Definition def_v4(PhysReg(260), v1); 44 Definition def_v5(PhysReg(261), v1); 45 Definition def_v6(PhysReg(262), v1); 46 Definition def_v7(PhysReg(263), v1); 47 Operand op_v0(PhysReg(256), v1); 48 Operand op_v4(PhysReg(260), v1); 49 Operand op_v5(PhysReg(261), v1); 50 Operand op_v6(PhysReg(262), v1); 51 Operand op_v7(PhysReg(263), v1); 52 Operand desc0(PhysReg(0), s4); 53 54 //>> p_unit_test 0 55 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 56 57 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 58 //! v1: %0:v[5] = buffer_load_dword %0:s[0-3], %0:v[0], 0 59 //! v1: %0:v[6] = buffer_load_dword %0:s[0-3], %0:v[0], 0 60 //! v1: %0:v[7] = buffer_load_dword %0:s[0-3], %0:v[0], 0 61 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); 62 bld.mubuf(aco_opcode::buffer_load_dword, def_v5, desc0, op_v0, Operand::zero(), 0, false); 63 bld.mubuf(aco_opcode::buffer_load_dword, def_v6, desc0, op_v0, Operand::zero(), 0, false); 64 bld.mubuf(aco_opcode::buffer_load_dword, def_v7, desc0, op_v0, Operand::zero(), 0, false); 65 //! s_waitcnt vmcnt(0) 66 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[4], 0 67 //! v1: %0:v[5] = buffer_load_dword %0:s[0-3], %0:v[5], 0 68 //! v1: %0:v[6] = buffer_load_dword %0:s[0-3], %0:v[6], 0 69 //! v1: %0:v[7] = buffer_load_dword %0:s[0-3], %0:v[7], 0 70 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v4, Operand::zero(), 0, false); 71 bld.mubuf(aco_opcode::buffer_load_dword, def_v5, desc0, op_v5, Operand::zero(), 0, false); 72 bld.mubuf(aco_opcode::buffer_load_dword, def_v6, desc0, op_v6, Operand::zero(), 0, false); 73 bld.mubuf(aco_opcode::buffer_load_dword, def_v7, desc0, op_v7, Operand::zero(), 0, false); 74 //! s_waitcnt vmcnt(0) 75 //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[4] 76 //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[5] 77 //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[6] 78 //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[7] 79 bld.mubuf(aco_opcode::buffer_store_dword, desc0, op_v0, Operand::zero(), op_v4, 0, false); 80 bld.mubuf(aco_opcode::buffer_store_dword, desc0, op_v0, Operand::zero(), op_v5, 0, false); 81 bld.mubuf(aco_opcode::buffer_store_dword, desc0, op_v0, Operand::zero(), op_v6, 0, false); 82 bld.mubuf(aco_opcode::buffer_store_dword, desc0, op_v0, Operand::zero(), op_v7, 0, false); 83 84 //>> p_unit_test 1 85 bld.reset(program->create_and_insert_block()); 86 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); 87 88 //! s4: %0:s[4-7] = s_load_dwordx4 %0:s[0-1], 0 89 bld.smem(aco_opcode::s_load_dwordx4, Definition(PhysReg(4), s4), Operand(PhysReg(0), s2), 90 Operand::zero()); 91 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 92 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); 93 //! s_waitcnt lgkmcnt(0) vmcnt(0) 94 //! v1: %0:v[5] = buffer_load_dword %0:s[4-7], %0:v[4], 0 95 bld.mubuf(aco_opcode::buffer_load_dword, def_v5, Operand(PhysReg(4), s4), op_v4, Operand::zero(), 96 0, false); 97 98 //>> p_unit_test 2 99 bld.reset(program->create_and_insert_block()); 100 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); 101 102 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 103 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); 104 //! v_nop 105 bld.vop1(aco_opcode::v_nop); 106 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 107 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); 108 //! s_waitcnt vmcnt(0) 109 //! v1: %0:v[5] = buffer_load_dword %0:s[0-3], %0:v[4], 0 110 bld.mubuf(aco_opcode::buffer_load_dword, def_v5, desc0, op_v4, Operand::zero(), 0, false); 111 112 finish_waitcnt_test(); 113 END_TEST 114 115 BEGIN_TEST(insert_waitcnt.waw.mixed_vmem_lds.vmem) 116 if (!setup_cs(NULL, GFX10)) 117 return; 118 119 Definition def_v4(PhysReg(260), v1); 120 Operand op_v0(PhysReg(256), v1); 121 Operand desc0(PhysReg(0), s4); 122 123 //>> BB0 124 //! /* logical preds: / linear preds: / kind: top-level, */ 125 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 126 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); 127 128 //>> BB1 129 //! /* logical preds: / linear preds: / kind: */ 130 //! v1: %0:v[4] = ds_read_b32 %0:v[0] 131 bld.reset(program->create_and_insert_block()); 132 bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0); 133 134 bld.reset(program->create_and_insert_block()); 135 program->blocks[2].linear_preds.push_back(0); 136 program->blocks[2].linear_preds.push_back(1); 137 program->blocks[2].logical_preds.push_back(0); 138 program->blocks[2].logical_preds.push_back(1); 139 140 //>> BB2 141 //! /* logical preds: BB0, BB1, / linear preds: BB0, BB1, / kind: uniform, */ 142 //! s_waitcnt lgkmcnt(0) 143 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 144 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); 145 146 finish_waitcnt_test(); 147 END_TEST 148 149 BEGIN_TEST(insert_waitcnt.waw.mixed_vmem_lds.lds) 150 if (!setup_cs(NULL, GFX10)) 151 return; 152 153 Definition def_v4(PhysReg(260), v1); 154 Operand op_v0(PhysReg(256), v1); 155 Operand desc0(PhysReg(0), s4); 156 157 //>> BB0 158 //! /* logical preds: / linear preds: / kind: top-level, */ 159 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 160 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc0, op_v0, Operand::zero(), 0, false); 161 162 //>> BB1 163 //! /* logical preds: / linear preds: / kind: */ 164 //! v1: %0:v[4] = ds_read_b32 %0:v[0] 165 bld.reset(program->create_and_insert_block()); 166 bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0); 167 168 bld.reset(program->create_and_insert_block()); 169 program->blocks[2].linear_preds.push_back(0); 170 program->blocks[2].linear_preds.push_back(1); 171 program->blocks[2].logical_preds.push_back(0); 172 program->blocks[2].logical_preds.push_back(1); 173 174 //>> BB2 175 //! /* logical preds: BB0, BB1, / linear preds: BB0, BB1, / kind: uniform, */ 176 //! s_waitcnt vmcnt(0) 177 //! v1: %0:v[4] = ds_read_b32 %0:v[0] 178 bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0); 179 180 finish_waitcnt_test(); 181 END_TEST 182 183 BEGIN_TEST(insert_waitcnt.waw.vmem_types) 184 for (amd_gfx_level gfx : {GFX11, GFX12}) { 185 if (!setup_cs(NULL, gfx)) 186 continue; 187 188 Definition def_v4(PhysReg(260), v1); 189 Operand op_v0(PhysReg(256), v1); 190 Operand desc_s4(PhysReg(0), s4); 191 Operand desc_s8(PhysReg(8), s8); 192 193 //>> p_unit_test 0 194 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 195 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 196 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0)); 197 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 198 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 199 200 //>> p_unit_test 1 201 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 202 //~gfx11! s_waitcnt vmcnt(0) 203 //~gfx12! s_wait_loadcnt imm:0 204 //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 205 bld.reset(program->create_and_insert_block()); 206 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); 207 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 208 bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); 209 210 //>> p_unit_test 2 211 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 212 //~gfx11! s_waitcnt vmcnt(0) 213 //~gfx12! s_wait_loadcnt imm:0 214 //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d 215 bld.reset(program->create_and_insert_block()); 216 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); 217 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 218 bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), 219 Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); 220 221 //>> p_unit_test 3 222 //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 223 //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 224 bld.reset(program->create_and_insert_block()); 225 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); 226 bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); 227 bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); 228 229 //>> p_unit_test 4 230 //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 231 //~gfx11! s_waitcnt vmcnt(0) 232 //~gfx12! s_wait_samplecnt imm:0 233 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 234 bld.reset(program->create_and_insert_block()); 235 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); 236 bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); 237 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 238 239 //>> p_unit_test 5 240 //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 241 //~gfx11! s_waitcnt vmcnt(0) 242 //~gfx12! s_wait_samplecnt imm:0 243 //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d 244 bld.reset(program->create_and_insert_block()); 245 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); 246 bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); 247 bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), 248 Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); 249 250 //>> p_unit_test 6 251 //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d 252 //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d 253 bld.reset(program->create_and_insert_block()); 254 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6)); 255 bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), 256 Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); 257 bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), 258 Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); 259 260 //>> p_unit_test 7 261 //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d 262 //~gfx11! s_waitcnt vmcnt(0) 263 //~gfx12! s_wait_bvhcnt imm:0 264 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 265 bld.reset(program->create_and_insert_block()); 266 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7)); 267 bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), 268 Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); 269 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 270 271 //>> p_unit_test 8 272 //! v1: %0:v[4] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d 273 //~gfx11! s_waitcnt vmcnt(0) 274 //~gfx12! s_wait_bvhcnt imm:0 275 //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 276 bld.reset(program->create_and_insert_block()); 277 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8)); 278 bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v4, desc_s4, Operand(s4), Operand(v1), 279 Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))); 280 bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); 281 282 //>> BB9 283 //! /* logical preds: / linear preds: / kind: */ 284 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 285 bld.reset(program->create_and_insert_block()); 286 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 287 288 //>> BB10 289 //! /* logical preds: / linear preds: / kind: */ 290 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 291 bld.reset(program->create_and_insert_block()); 292 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 293 294 bld.reset(program->create_and_insert_block()); 295 program->blocks[11].linear_preds.push_back(9); 296 program->blocks[11].linear_preds.push_back(10); 297 program->blocks[11].logical_preds.push_back(9); 298 program->blocks[11].logical_preds.push_back(10); 299 300 //>> BB11 301 //! /* logical preds: BB9, BB10, / linear preds: BB9, BB10, / kind: uniform, */ 302 //! p_unit_test 9 303 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 304 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9)); 305 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 306 307 //>> BB12 308 //! /* logical preds: / linear preds: / kind: */ 309 //! v1: %0:v[4] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 310 bld.reset(program->create_and_insert_block()); 311 bld.mimg(aco_opcode::image_sample, def_v4, desc_s8, desc_s4, Operand(v1), op_v0); 312 313 //>> BB13 314 //! /* logical preds: / linear preds: / kind: */ 315 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 316 bld.reset(program->create_and_insert_block()); 317 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 318 319 bld.reset(program->create_and_insert_block()); 320 program->blocks[14].linear_preds.push_back(12); 321 program->blocks[14].linear_preds.push_back(13); 322 program->blocks[14].logical_preds.push_back(12); 323 program->blocks[14].logical_preds.push_back(13); 324 325 //>> BB14 326 //! /* logical preds: BB12, BB13, / linear preds: BB12, BB13, / kind: uniform, */ 327 //! p_unit_test 10 328 //~gfx11! s_waitcnt vmcnt(0) 329 //~gfx12! s_wait_samplecnt imm:0 330 //! v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 331 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10)); 332 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 333 334 finish_waitcnt_test(); 335 } 336 END_TEST 337 338 BEGIN_TEST(insert_waitcnt.vmem) 339 if (!setup_cs(NULL, GFX12)) 340 return; 341 342 Definition def_v4(PhysReg(260), v1); 343 Definition def_v5(PhysReg(261), v1); 344 Definition def_v6(PhysReg(262), v1); 345 Definition def_v7(PhysReg(263), v1); 346 Definition def_v8(PhysReg(264), v1); 347 Definition def_v9(PhysReg(265), v1); 348 Operand op_v0(PhysReg(256), v1); 349 Operand op_v4(PhysReg(260), v1); 350 Operand op_v5(PhysReg(261), v1); 351 Operand op_v6(PhysReg(262), v1); 352 Operand op_v7(PhysReg(263), v1); 353 Operand op_v8(PhysReg(264), v1); 354 Operand op_v9(PhysReg(265), v1); 355 Operand desc_s4(PhysReg(0), s4); 356 Operand desc_s8(PhysReg(8), s8); 357 358 //>> v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 359 //! v1: %0:v[5] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 360 //! v1: %0:v[6] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d unrm r128 361 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 362 bld.mimg(aco_opcode::image_sample, def_v5, desc_s8, desc_s4, Operand(v1), op_v0); 363 Instruction* instr = 364 bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v6, desc_s4, Operand(s4), Operand(v1), 365 Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))) 366 .instr; 367 instr->mimg().unrm = true; 368 instr->mimg().r128 = true; 369 370 //! v1: %0:v[7] = image_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d 371 //! v1: %0:v[8] = image_sample %0:s[8-15], %0:s[0-3], v1: undef, %0:v[0] 1d 372 //! v1: %0:v[9] = image_bvh64_intersect_ray %0:s[0-3], s4: undef, v1: undef, %0:v[16-26] 1d unrm r128 373 bld.mimg(aco_opcode::image_load, def_v7, desc_s8, Operand(s4), Operand(v1), op_v0, 0x1); 374 bld.mimg(aco_opcode::image_sample, def_v8, desc_s8, desc_s4, Operand(v1), op_v0); 375 instr = bld.mimg(aco_opcode::image_bvh64_intersect_ray, def_v9, desc_s4, Operand(s4), 376 Operand(v1), Operand(PhysReg(272), RegClass::get(RegType::vgpr, 11 * 4))) 377 .instr; 378 instr->mimg().unrm = true; 379 instr->mimg().r128 = true; 380 381 //! s_wait_loadcnt imm:1 382 //! p_unit_test 0, %0:v[4] 383 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0), op_v4); 384 //! s_wait_samplecnt imm:1 385 //! p_unit_test 1, %0:v[5] 386 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1), op_v5); 387 //! s_wait_bvhcnt imm:1 388 //! p_unit_test 2, %0:v[6] 389 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2), op_v6); 390 //! s_wait_loadcnt imm:0 391 //! p_unit_test 3, %0:v[7] 392 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3), op_v7); 393 //! s_wait_samplecnt imm:0 394 //! p_unit_test 4, %0:v[8] 395 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4), op_v8); 396 //! s_wait_bvhcnt imm:0 397 //! p_unit_test 5, %0:v[9] 398 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5), op_v9); 399 400 /* Despite not using a sampler, this uses samplecnt. */ 401 //! v1: %0:v[5] = image_msaa_load %0:s[8-15], s4: undef, v1: undef, %0:v[0] 1d 402 //! s_wait_samplecnt imm:0 403 //! p_unit_test 6, %0:v[5] 404 bld.mimg(aco_opcode::image_msaa_load, def_v5, desc_s8, Operand(s4), Operand(v1), op_v0); 405 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6), op_v5); 406 407 finish_waitcnt_test(); 408 END_TEST 409 410 BEGIN_TEST(insert_waitcnt.lds_smem) 411 for (amd_gfx_level gfx : {GFX11, GFX12}) { 412 if (!setup_cs(NULL, gfx)) 413 continue; 414 415 Definition def_v4(PhysReg(260), v1); 416 Definition def_v5(PhysReg(261), v1); 417 Definition def_s4(PhysReg(4), s1); 418 Definition def_s5(PhysReg(5), s1); 419 Operand op_s0(PhysReg(0), s1); 420 Operand op_s4(PhysReg(4), s1); 421 Operand op_s5(PhysReg(5), s1); 422 Operand op_v0(PhysReg(256), v1); 423 Operand op_v4(PhysReg(260), v1); 424 Operand op_v5(PhysReg(261), v1); 425 Operand desc_s4(PhysReg(0), s4); 426 427 //>> v1: %0:v[4] = ds_read_b32 %0:v[0] 428 //! s1: %0:s[4] = s_buffer_load_dword %0:s[0-3], %0:s[0] 429 //! v1: %0:v[5] = ds_read_b32 %0:v[0] 430 //! s1: %0:s[5] = s_buffer_load_dword %0:s[0-3], %0:s[0] 431 bld.ds(aco_opcode::ds_read_b32, def_v4, op_v0); 432 bld.smem(aco_opcode::s_buffer_load_dword, def_s4, desc_s4, op_s0); 433 bld.ds(aco_opcode::ds_read_b32, def_v5, op_v0); 434 bld.smem(aco_opcode::s_buffer_load_dword, def_s5, desc_s4, op_s0); 435 436 //~gfx11! s_waitcnt lgkmcnt(1) 437 //~gfx12! s_wait_dscnt imm:1 438 //! p_unit_test 0, %0:v[4] 439 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0), op_v4); 440 //~gfx11! s_waitcnt lgkmcnt(0) 441 //~gfx12! s_wait_kmcnt imm:0 442 //! p_unit_test 1, %0:s[4] 443 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1), op_s4); 444 //~gfx12! s_wait_dscnt imm:0 445 //! p_unit_test 2, %0:v[5] 446 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2), op_v5); 447 //! p_unit_test 3, %0:s[5] 448 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3), op_s5); 449 450 finish_waitcnt_test(); 451 } 452 END_TEST 453 454 BEGIN_TEST(insert_waitcnt.sendmsg_smem) 455 for (amd_gfx_level gfx : {GFX11, GFX12}) { 456 if (!setup_cs(NULL, gfx)) 457 continue; 458 459 Definition def_s4(PhysReg(4), s1); 460 Definition def_s5(PhysReg(5), s1); 461 Definition def_s6(PhysReg(6), s1); 462 Definition def_s7(PhysReg(7), s1); 463 Operand op_s0(PhysReg(0), s1); 464 Operand op_s4(PhysReg(4), s1); 465 Operand op_s5(PhysReg(5), s1); 466 Operand op_s6(PhysReg(6), s1); 467 Operand op_s7(PhysReg(7), s1); 468 Operand desc_s4(PhysReg(0), s4); 469 470 //>> s1: %0:s[4] = s_sendmsg_rtn_b32 3 sendmsg(rtn_get_realtime) 471 //! s1: %0:s[5] = s_buffer_load_dword %0:s[0-3], %0:s[0] 472 //! s1: %0:s[6] = s_sendmsg_rtn_b32 3 sendmsg(rtn_get_realtime) 473 //! s1: %0:s[7] = s_buffer_load_dword %0:s[0-3], %0:s[0] 474 bld.sop1(aco_opcode::s_sendmsg_rtn_b32, def_s4, Operand::c32(sendmsg_rtn_get_realtime)); 475 bld.smem(aco_opcode::s_buffer_load_dword, def_s5, desc_s4, op_s0); 476 bld.sop1(aco_opcode::s_sendmsg_rtn_b32, def_s6, Operand::c32(sendmsg_rtn_get_realtime)); 477 bld.smem(aco_opcode::s_buffer_load_dword, def_s7, desc_s4, op_s0); 478 479 //~gfx12! s_wait_kmcnt imm:1 480 //~gfx11! s_waitcnt lgkmcnt(1) 481 //! p_unit_test 0, %0:s[4] 482 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0), op_s4); 483 //~gfx12! s_wait_kmcnt imm:0 484 //~gfx11! s_waitcnt lgkmcnt(0) 485 //! p_unit_test 1, %0:s[5] 486 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1), op_s5); 487 //! p_unit_test 2, %0:s[6] 488 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2), op_s6); 489 //! p_unit_test 3, %0:s[7] 490 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3), op_s7); 491 492 finish_waitcnt_test(); 493 } 494 END_TEST 495 496 BEGIN_TEST(insert_waitcnt.vmem_ds) 497 if (!setup_cs(NULL, GFX12)) 498 return; 499 500 Definition def_v4(PhysReg(260), v1); 501 Definition def_v5(PhysReg(261), v1); 502 Operand op_v0(PhysReg(256), v1); 503 Operand op_v1(PhysReg(257), v1); 504 Operand op_v4(PhysReg(260), v1); 505 Operand op_v5(PhysReg(261), v1); 506 Operand desc_s4(PhysReg(0), s4); 507 508 program->workgroup_size = 128; 509 program->wgp_mode = true; 510 511 //>> v1: %0:v[4] = buffer_load_dword %0:s[0-3], %0:v[0], 0 512 //! v1: %0:v[5] = ds_read_b32 %0:v[0] 513 bld.mubuf(aco_opcode::buffer_load_dword, def_v4, desc_s4, op_v0, Operand::zero(), 0, false); 514 bld.ds(aco_opcode::ds_read_b32, def_v5, op_v0); 515 516 //! s_wait_loadcnt_dscnt dscnt(0) loadcnt(0) 517 //! p_unit_test 0, %0:v[4], %0:v[5] 518 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0), op_v4, op_v5); 519 520 //! buffer_store_dword %0:s[0-3], %0:v[0], 0, %0:v[1] storage:buffer 521 //! v1: %0:v[5] = ds_write_b32 %0:v[0], %0:v[1] storage:shared 522 Instruction* instr = 523 bld.mubuf(aco_opcode::buffer_store_dword, desc_s4, op_v0, Operand::zero(), op_v1, 0, false) 524 .instr; 525 instr->mubuf().sync = memory_sync_info(storage_buffer); 526 instr = bld.ds(aco_opcode::ds_write_b32, def_v5, op_v0, op_v1).instr; 527 instr->ds().sync = memory_sync_info(storage_shared); 528 529 //! s_wait_storecnt_dscnt dscnt(0) storecnt(0) 530 bld.barrier(aco_opcode::p_barrier, 531 memory_sync_info(storage_buffer | storage_shared, semantic_acqrel, scope_workgroup)); 532 533 finish_waitcnt_test(); 534 END_TEST 535