1 /* -*- mesa-c++ -*-
2 * Copyright 2022 Collabora LTD
3 * Author: Gert Wollny <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "sfn_instr_tex.h"
8
9 #include "nir_builder.h"
10 #include "sfn_debug.h"
11 #include "sfn_instr_alu.h"
12 #include "sfn_instr_fetch.h"
13 #include "sfn_nir.h"
14
15 namespace r600 {
16
17 using std::string;
18
TexInstr(Opcode op,const RegisterVec4 & dest,const RegisterVec4::Swizzle & dest_swizzle,const RegisterVec4 & src,unsigned resource_id,PRegister resource_offs,int sampler_id,PRegister sampler_offset)19 TexInstr::TexInstr(Opcode op,
20 const RegisterVec4& dest,
21 const RegisterVec4::Swizzle& dest_swizzle,
22 const RegisterVec4& src,
23 unsigned resource_id,
24 PRegister resource_offs,
25 int sampler_id, PRegister sampler_offset):
26 InstrWithVectorResult(dest, dest_swizzle, resource_id, resource_offs),
27 m_opcode(op),
28 m_src(src),
29 m_inst_mode(0),
30 m_sampler(this, sampler_id, sampler_offset)
31 {
32 memset(m_coord_offset, 0, sizeof(m_coord_offset));
33 m_src.add_use(this);
34 }
35
36 void
accept(ConstInstrVisitor & visitor) const37 TexInstr::accept(ConstInstrVisitor& visitor) const
38 {
39 visitor.visit(*this);
40 }
41
42 void
accept(InstrVisitor & visitor)43 TexInstr::accept(InstrVisitor& visitor)
44 {
45 visitor.visit(this);
46 }
47
48 void
set_offset(unsigned index,int32_t val)49 TexInstr::set_offset(unsigned index, int32_t val)
50 {
51 assert(index < 3);
52 m_coord_offset[index] = val;
53 }
54
55 int
get_offset(unsigned index) const56 TexInstr::get_offset(unsigned index) const
57 {
58 assert(index < 3);
59 return m_coord_offset[index] << 1;
60 }
61
62 void
set_gather_comp(int cmp)63 TexInstr::set_gather_comp(int cmp)
64 {
65 m_inst_mode = cmp;
66 }
67
68 bool
is_equal_to(const TexInstr & lhs) const69 TexInstr::is_equal_to(const TexInstr& lhs) const
70 {
71 if (m_opcode != lhs.m_opcode)
72 return false;
73
74 if (!comp_dest(lhs.dst(), lhs.all_dest_swizzle()))
75 return false;
76
77 if (m_src != lhs.m_src)
78 return false;
79
80 if (resource_offset() && lhs.resource_offset()) {
81 if (!resource_offset()->equal_to(*lhs.resource_offset()))
82 return false;
83 } else if ((resource_offset() && !lhs.resource_offset()) ||
84 (!resource_offset() && lhs.resource_offset()))
85 return false;
86
87 if (sampler_offset() && lhs.sampler_offset()) {
88 if (!sampler_offset()->equal_to(*lhs.sampler_offset()))
89 return false;
90 } else if ((sampler_offset() && !lhs.sampler_offset()) ||
91 (!sampler_offset() && lhs.sampler_offset()))
92 return false;
93
94 if (m_tex_flags != lhs.m_tex_flags)
95 return false;
96
97 for (int i = 0; i < 3; ++i) {
98 if (m_coord_offset[i] != lhs.m_coord_offset[i])
99 return false;
100 }
101
102 return m_inst_mode == lhs.m_inst_mode &&
103 resource_id() == lhs.resource_id() &&
104 resource_index_mode() == lhs.resource_index_mode() &&
105 sampler_id() == lhs.sampler_id() &&
106 sampler_index_mode() == lhs.sampler_index_mode();
107 }
108
109 bool
propagate_death()110 TexInstr::propagate_death()
111 {
112 m_src.del_use(this);
113 return true;
114 }
115
forward_set_blockid(int id,int index)116 void TexInstr::forward_set_blockid(int id, int index)
117 {
118 for (auto p : m_prepare_instr)
119 p->set_blockid(id, index);
120 }
121
122 bool
do_ready() const123 TexInstr::do_ready() const
124 {
125 for (auto p : m_prepare_instr)
126 if (!p->ready())
127 return false;
128
129 for (auto p : required_instr())
130 if (!p->is_scheduled() && !p->is_dead()) {
131 return false;
132 }
133
134 if (resource_offset() && !resource_offset()->ready(block_id(), index()))
135 return false;
136 return m_src.ready(block_id(), index());
137 }
138
139 void
do_print(std::ostream & os) const140 TexInstr::do_print(std::ostream& os) const
141 {
142
143 for (auto& p : prepare_instr()) {
144 os << *p << "\n";
145 }
146
147 os << "TEX " << opname(m_opcode) << " ";
148 print_dest(os);
149
150 os << " : ";
151 m_src.print(os);
152
153 os << " RID:" << resource_id();
154 if (resource_offset())
155 os << " RO:" << *resource_offset();
156
157 os << " SID:" << sampler_id();
158 if (sampler_offset())
159 os << " SO:" << *sampler_offset();
160
161 if (m_coord_offset[0])
162 os << " OX:" << m_coord_offset[0];
163 if (m_coord_offset[1])
164 os << " OY:" << m_coord_offset[1];
165 if (m_coord_offset[2])
166 os << " OZ:" << m_coord_offset[2];
167
168 if (m_inst_mode || is_gather(m_opcode))
169 os << " MODE:" << m_inst_mode;
170
171 os << " ";
172 os << (m_tex_flags.test(x_unnormalized) ? "U" : "N");
173 os << (m_tex_flags.test(y_unnormalized) ? "U" : "N");
174 os << (m_tex_flags.test(z_unnormalized) ? "U" : "N");
175 os << (m_tex_flags.test(w_unnormalized) ? "U" : "N");
176 }
177
178 const char *
opname(Opcode op)179 TexInstr::opname(Opcode op)
180 {
181 switch (op) {
182 case ld:
183 return "LD";
184 case get_resinfo:
185 return "GET_TEXTURE_RESINFO";
186 case get_nsamples:
187 return "GET_NUMBER_OF_SAMPLES";
188 case get_tex_lod:
189 return "GET_LOD";
190 case get_gradient_h:
191 return "GET_GRADIENTS_H";
192 case get_gradient_v:
193 return "GET_GRADIENTS_V";
194 case set_offsets:
195 return "SET_TEXTURE_OFFSETS";
196 case keep_gradients:
197 return "KEEP_GRADIENTS";
198 case set_gradient_h:
199 return "SET_GRADIENTS_H";
200 case set_gradient_v:
201 return "SET_GRADIENTS_V";
202 case sample:
203 return "SAMPLE";
204 case sample_l:
205 return "SAMPLE_L";
206 case sample_lb:
207 return "SAMPLE_LB";
208 case sample_lz:
209 return "SAMPLE_LZ";
210 case sample_g:
211 return "SAMPLE_G";
212 case sample_g_lb:
213 return "SAMPLE_G_L";
214 case gather4:
215 return "GATHER4";
216 case gather4_o:
217 return "GATHER4_O";
218 case sample_c:
219 return "SAMPLE_C";
220 case sample_c_l:
221 return "SAMPLE_C_L";
222 case sample_c_lb:
223 return "SAMPLE_C_LB";
224 case sample_c_lz:
225 return "SAMPLE_C_LZ";
226 case sample_c_g:
227 return "SAMPLE_C_G";
228 case sample_c_g_lb:
229 return "SAMPLE_C_G_L";
230 case gather4_c:
231 return "GATHER4_C";
232 case gather4_c_o:
233 return "OP_GATHER4_C_O";
234 default:
235 return "ERROR";
236 }
237 }
238
239 const std::map<TexInstr::Opcode, std::string> TexInstr::s_opcode_map = {
240 {ld, "LD" },
241 {get_resinfo, "GET_TEXTURE_RESINFO" },
242 {get_nsamples, "GET_NUMBER_OF_SAMPLES"},
243 {get_tex_lod, "GET_LOD" },
244 {get_gradient_h, "GET_GRADIENTS_H" },
245 {get_gradient_v, "GET_GRADIENTS_V" },
246 {set_offsets, "SET_TEXTURE_OFFSETS" },
247 {keep_gradients, "KEEP_GRADIENTS" },
248 {set_gradient_h, "SET_GRADIENTS_H" },
249 {set_gradient_v, "SET_GRADIENTS_V" },
250 {sample, "SAMPLE" },
251 {sample_l, "SAMPLE_L" },
252 {sample_lb, "SAMPLE_LB" },
253 {sample_lz, "SAMPLE_LZ" },
254 {sample_g, "SAMPLE_G" },
255 {sample_g_lb, "SAMPLE_G_L" },
256 {gather4, "GATHER4" },
257 {gather4_o, "GATHER4_O" },
258 {sample_c, "SAMPLE_C" },
259 {sample_c_l, "SAMPLE_C_L" },
260 {sample_c_lb, "SAMPLE_C_LB" },
261 {sample_c_lz, "SAMPLE_C_LZ" },
262 {sample_c_g, "SAMPLE_C_G" },
263 {sample_c_g_lb, "SAMPLE_C_G_L" },
264 {gather4_c, "GATHER4_C" },
265 {gather4_c_o, "OP_GATHER4_C_O" },
266 {unknown, "ERROR" }
267 };
268
269 bool
is_gather(Opcode op)270 TexInstr::is_gather(Opcode op)
271 {
272 return op == gather4 || op == gather4_c || op == gather4_o || op == gather4_c_o;
273 }
274
275 TexInstr::Opcode
op_from_string(const std::string & s)276 TexInstr::op_from_string(const std::string& s)
277 {
278 for (auto& [op, str] : s_opcode_map) {
279 if (s == str)
280 return op;
281 }
282 return unknown;
283 }
284
285 Instr::Pointer
from_string(std::istream & is,ValueFactory & value_fctory)286 TexInstr::from_string(std::istream& is, ValueFactory& value_fctory)
287 {
288 string opstr;
289 string deststr;
290 is >> opstr >> deststr;
291
292 auto opcode = TexInstr::op_from_string(opstr);
293
294 RegisterVec4::Swizzle dest_swz;
295
296 auto dest = value_fctory.dest_vec4_from_string(deststr, dest_swz, pin_group);
297
298 char dummy;
299 is >> dummy;
300 assert(dummy == ':');
301
302 string srcstr;
303 is >> srcstr;
304
305 auto src = value_fctory.src_vec4_from_string(srcstr);
306
307 string res_id_str;
308 string sampler_id_str;
309
310 is >> res_id_str >> sampler_id_str;
311
312 int res_id = int_from_string_with_prefix(res_id_str, "RID:");
313 int sampler_id = int_from_string_with_prefix(sampler_id_str, "SID:");
314
315 auto tex = new TexInstr(opcode, dest, dest_swz, src, res_id, nullptr,
316 sampler_id, nullptr);
317
318 while (!is.eof() && is.good()) {
319 std::string next_token;
320 is >> next_token;
321
322 if (next_token.empty())
323 break;
324
325 if (next_token[0] == 'U' || next_token[0] == 'N') {
326 tex->read_tex_coord_normalitazion(next_token);
327 } else {
328 tex->set_tex_param(next_token);
329 }
330 }
331
332 return tex;
333 }
334
335 void
read_tex_coord_normalitazion(const std::string & flags)336 TexInstr::read_tex_coord_normalitazion(const std::string& flags)
337 {
338 assert(flags.length() == 4);
339 if (flags[0] == 'U')
340 set_tex_flag(x_unnormalized);
341 if (flags[1] == 'U')
342 set_tex_flag(y_unnormalized);
343 if (flags[2] == 'U')
344 set_tex_flag(z_unnormalized);
345 if (flags[3] == 'U')
346 set_tex_flag(w_unnormalized);
347 }
348
349 void
set_tex_param(const std::string & token)350 TexInstr::set_tex_param(const std::string& token)
351 {
352 if (token.substr(0, 3) == "OX:")
353 set_offset(0, int_from_string_with_prefix(token, "OX:"));
354 else if (token.substr(0, 3) == "OY:")
355 set_offset(1, int_from_string_with_prefix(token, "OY:"));
356 else if (token.substr(0, 3) == "OZ:")
357 set_offset(2, int_from_string_with_prefix(token, "OZ:"));
358 else if (token.substr(0, 5) == "MODE:")
359 set_inst_mode(int_from_string_with_prefix(token, "MODE:"));
360 else if (token.substr(0, 3) == "SO:")
361 set_sampler_offset(VirtualValue::from_string(token.substr(3))->as_register());
362 else if (token.substr(0, 3) == "RO:")
363 set_resource_offset(VirtualValue::from_string(token.substr(3))->as_register());
364 else {
365 std::cerr << "Token '" << token << "': ";
366 unreachable("Unknown token in tex param");
367 }
368 }
369
370 bool
from_nir(nir_tex_instr * tex,Shader & shader)371 TexInstr::from_nir(nir_tex_instr *tex, Shader& shader)
372 {
373 Inputs src(*tex, shader.value_factory());
374
375 if (nir_tex_instr_src_index(tex, nir_tex_src_backend1) != -1)
376 return emit_lowered_tex(tex, src, shader);
377
378 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
379 switch (tex->op) {
380 case nir_texop_txs:
381 return emit_tex_txs(tex, src, {0, 1, 2, 3}, shader);
382 case nir_texop_txf:
383 return emit_buf_txf(tex, src, shader);
384 default:
385 return false;
386 }
387 } else {
388 switch (tex->op) {
389 case nir_texop_txs:
390 return emit_tex_txs(tex, src, {0, 1, 2, 3}, shader);
391 case nir_texop_lod:
392 return emit_tex_lod(tex, src, shader);
393 case nir_texop_query_levels:
394 return emit_tex_txs(tex, src, {3, 7, 7, 7}, shader);
395 case nir_texop_texture_samples:
396 return emit_tex_texture_samples(tex, src, shader);
397 default:
398 return false;
399 }
400 }
401 return true;
402 }
403
404 bool
replace_source(PRegister old_src,PVirtualValue new_src)405 TexInstr::replace_source(PRegister old_src, PVirtualValue new_src)
406 {
407 if (old_src->pin() != pin_free)
408 return false;
409
410 if (!new_src->as_register())
411 return false;
412
413 bool success = false;
414 for (int i = 0; i < 4; ++i) {
415 if (m_src[i]->equal_to(*old_src)) {
416 m_src.set_value(i, new_src->as_register());
417 success = true;
418 }
419 }
420 m_src.validate();
421 if (success) {
422 old_src->del_use(this);
423 new_src->as_register()->add_use(this);
424 }
425 return success;
426 }
427
update_indirect_addr(PRegister old_reg,PRegister addr)428 void TexInstr::update_indirect_addr(PRegister old_reg, PRegister addr)
429 {
430 if (resource_offset() && old_reg->equal_to(*resource_offset()))
431 set_resource_offset(addr);
432 else if (sampler_offset() && old_reg->equal_to(*sampler_offset()))
433 set_sampler_offset(addr);
434
435 for (auto& p : m_prepare_instr)
436 p->update_indirect_addr(old_reg, addr);
437 }
438
439 uint8_t
allowed_src_chan_mask() const440 TexInstr::allowed_src_chan_mask() const
441 {
442 return m_src.free_chan_mask();
443 }
444
445 struct SamplerId {
446 int id;
447 bool indirect;
448 };
449
450 SamplerId
get_sampler_id(int sampler_id,const nir_variable * deref)451 get_sampler_id(int sampler_id, const nir_variable *deref)
452 {
453 SamplerId result = {sampler_id, false};
454
455 if (deref) {
456 assert(glsl_type_is_sampler(deref->type));
457 result.id = deref->data.binding;
458 }
459 return result;
460 }
461
462 void
emit_set_gradients(nir_tex_instr * tex,int texture_id,Inputs & src,TexInstr * irt,Shader & shader)463 TexInstr::emit_set_gradients(
464 nir_tex_instr *tex, int texture_id, Inputs& src, TexInstr *irt, Shader& shader)
465 {
466 TexInstr *grad[2] = {nullptr, nullptr};
467 RegisterVec4 empty_dst(0, false, {0, 0, 0, 0}, pin_group);
468 grad[0] = new TexInstr(set_gradient_h,
469 empty_dst,
470 {7, 7, 7, 7},
471 src.ddx,
472 texture_id,
473 src.texture_offset);
474 grad[0]->set_rect_coordinate_flags(tex);
475 grad[0]->set_always_keep();
476
477 grad[1] = new TexInstr(set_gradient_v,
478 empty_dst,
479 {7, 7, 7, 7},
480 src.ddy,
481 texture_id,
482 src.texture_offset);
483 grad[1]->set_rect_coordinate_flags(tex);
484 grad[1]->set_always_keep();
485 irt->add_prepare_instr(grad[0]);
486 irt->add_prepare_instr(grad[1]);
487 if (shader.last_txd())
488 irt->add_required_instr(shader.last_txd());
489 shader.set_last_txd(irt);
490 }
491
492 void
emit_set_offsets(nir_tex_instr * tex,int texture_id,Inputs & src,TexInstr * irt,Shader & shader)493 TexInstr::emit_set_offsets(nir_tex_instr *tex, int texture_id, Inputs& src, TexInstr *irt, Shader& shader)
494 {
495 RegisterVec4::Swizzle swizzle = {4, 4, 4, 4};
496 int src_components = tex->coord_components;
497 if (tex->is_array)
498 --src_components;
499
500 for (int i = 0; i < src_components; ++i)
501 swizzle[i] = i;
502
503 auto ofs = shader.value_factory().src_vec4(*src.offset, pin_group, swizzle);
504 RegisterVec4 empty_dst(0, false, {0, 0, 0, 0}, pin_group);
505
506 auto set_ofs = new TexInstr(TexInstr::set_offsets,
507 empty_dst,
508 {7, 7, 7, 7},
509 ofs,
510 texture_id + R600_MAX_CONST_BUFFERS,
511 src.texture_offset);
512 set_ofs->set_always_keep();
513 irt->add_prepare_instr(set_ofs);
514 }
515
516 bool
emit_lowered_tex(nir_tex_instr * tex,Inputs & src,Shader & shader)517 TexInstr::emit_lowered_tex(nir_tex_instr *tex, Inputs& src, Shader& shader)
518 {
519 assert(src.backend1);
520 assert(src.backend2);
521
522 auto& vf = shader.value_factory();
523 sfn_log << SfnLog::instr << "emit '" << *reinterpret_cast<nir_instr *>(tex) << "' ("
524 << __func__ << ")\n";
525
526 auto params = nir_src_as_const_value(*src.backend2);
527 int32_t coord_mask = params[0].i32;
528 int32_t flags = params[1].i32;
529 int32_t inst_mode = params[2].i32;
530 uint32_t dst_swz_packed = params[3].u32;
531
532 auto dst = vf.dest_vec4(tex->def, pin_group);
533
534 RegisterVec4::Swizzle src_swizzle = {0};
535 for (int i = 0; i < 4; ++i)
536 src_swizzle[i] = (coord_mask & (1 << i)) ? i : 7;
537
538 auto src_coord = vf.src_vec4(*src.backend1, pin_group, src_swizzle);
539
540 RegisterVec4::Swizzle dst_swz = {0, 1, 2, 3};
541 if (dst_swz_packed) {
542 for (int i = 0; i < 4; ++i) {
543 dst_swz[i] = (dst_swz_packed >> (8 * i)) & 0xff;
544 }
545 }
546
547 int texture_id = tex->texture_index + R600_MAX_CONST_BUFFERS;
548 auto irt = new TexInstr(src.opcode,
549 dst,
550 dst_swz,
551 src_coord,
552 texture_id,
553 src.texture_offset,
554 tex->sampler_index,
555 src.sampler_offset);
556
557 if (tex->op == nir_texop_txd)
558 emit_set_gradients(tex, texture_id, src, irt, shader);
559
560 if (!irt->set_coord_offsets(src.offset)) {
561 assert(tex->op == nir_texop_tg4);
562 emit_set_offsets(tex, texture_id, src, irt, shader);
563 }
564
565 for (const auto f : TexFlags) {
566 if (flags & (1 << f))
567 irt->set_tex_flag(f);
568 }
569
570 irt->set_inst_mode(inst_mode);
571
572 shader.emit_instruction(irt);
573 return true;
574 }
575
576 bool
emit_buf_txf(nir_tex_instr * tex,Inputs & src,Shader & shader)577 TexInstr::emit_buf_txf(nir_tex_instr *tex, Inputs& src, Shader& shader)
578 {
579 auto& vf = shader.value_factory();
580 auto dst = vf.dest_vec4(tex->def, pin_group);
581
582 PRegister tex_offset = nullptr;
583 if (src.sampler_offset)
584 tex_offset = shader.emit_load_to_register(src.sampler_offset);
585
586 auto *real_dst = &dst;
587 RegisterVec4 tmp = vf.temp_vec4(pin_group);
588
589 if (shader.chip_class() < ISA_CC_EVERGREEN) {
590 real_dst = &tmp;
591 }
592
593 auto ir = new LoadFromBuffer(*real_dst,
594 {0, 1, 2, 3},
595 src.coord[0],
596 0,
597 tex->texture_index + R600_MAX_CONST_BUFFERS,
598 tex_offset,
599 fmt_invalid);
600 ir->set_fetch_flag(FetchInstr::use_const_field);
601 shader.emit_instruction(ir);
602 shader.set_flag(Shader::sh_uses_tex_buffer);
603
604 if (shader.chip_class() < ISA_CC_EVERGREEN) {
605 auto tmp_w = vf.temp_register();
606 int buf_sel = (512 + R600_BUFFER_INFO_OFFSET / 16) + 2 * tex->texture_index;
607 AluInstr *ir = nullptr;
608 for (int i = 0; i < 4; ++i) {
609 auto d = i < 3 ? dst[i] : tmp_w;
610 ir = new AluInstr(op2_and_int,
611 d,
612 tmp[i],
613 vf.uniform(buf_sel, i, R600_BUFFER_INFO_CONST_BUFFER),
614 AluInstr::write);
615 shader.emit_instruction(ir);
616 }
617
618 ir->set_alu_flag(alu_last_instr);
619 shader.emit_instruction(
620 new AluInstr(op2_or_int,
621 dst[3],
622 tmp_w,
623 vf.uniform(buf_sel + 1, 0, R600_BUFFER_INFO_CONST_BUFFER),
624 AluInstr::last_write));
625 }
626
627 return true;
628 }
629
630 bool
emit_tex_texture_samples(nir_tex_instr * instr,Inputs & src,Shader & shader)631 TexInstr::emit_tex_texture_samples(nir_tex_instr *instr, Inputs& src, Shader& shader)
632 {
633 RegisterVec4 dest = shader.value_factory().dest_vec4(instr->def, pin_chan);
634 RegisterVec4 help{
635 0, true, {4, 4, 4, 4}
636 };
637
638 int res_id = R600_MAX_CONST_BUFFERS + instr->texture_index;
639
640 // Fishy: should the zero be instr->sampler_index?
641 auto ir =
642 new TexInstr(src.opcode, dest, {3, 7, 7, 7}, help, res_id, src.texture_offset);
643 shader.emit_instruction(ir);
644 return true;
645 }
646
647 bool
emit_tex_txs(nir_tex_instr * tex,Inputs & src,RegisterVec4::Swizzle dest_swz,Shader & shader)648 TexInstr::emit_tex_txs(nir_tex_instr *tex,
649 Inputs& src,
650 RegisterVec4::Swizzle dest_swz,
651 Shader& shader)
652 {
653 auto& vf = shader.value_factory();
654
655 auto dest = vf.dest_vec4(tex->def, pin_group);
656
657 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
658 if (shader.chip_class() >= ISA_CC_EVERGREEN) {
659 shader.emit_instruction(new QueryBufferSizeInstr(
660 dest, {0, 7, 7, 7}, tex->texture_index + R600_MAX_CONST_BUFFERS));
661 } else {
662 int id = 2 * tex->texture_index + (512 + R600_BUFFER_INFO_OFFSET / 16) + 1;
663 auto src = vf.uniform(id, 1, R600_BUFFER_INFO_CONST_BUFFER);
664 shader.emit_instruction(
665 new AluInstr(op1_mov, dest[0], src, AluInstr::last_write));
666 shader.set_flag(Shader::sh_uses_tex_buffer);
667 }
668 } else {
669
670 auto src_lod = vf.temp_register();
671 shader.emit_instruction(
672 new AluInstr(op1_mov, src_lod, src.lod, AluInstr::last_write));
673
674 RegisterVec4 src_coord(src_lod, src_lod, src_lod, src_lod, pin_free);
675
676 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
677 dest_swz[2] = 7;
678
679 auto ir = new TexInstr(get_resinfo,
680 dest,
681 dest_swz,
682 src_coord,
683 tex->texture_index + R600_MAX_CONST_BUFFERS,
684 src.texture_offset);
685
686 ir->set_dest_swizzle(dest_swz);
687 shader.emit_instruction(ir);
688
689 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
690 auto src_loc = vf.uniform(512 + R600_BUFFER_INFO_OFFSET / 16 + (tex->texture_index >> 2),
691 tex->texture_index & 3,
692 R600_BUFFER_INFO_CONST_BUFFER);
693
694 auto alu = new AluInstr(op1_mov, dest[2], src_loc, AluInstr::last_write);
695 shader.emit_instruction(alu);
696 shader.set_flag(Shader::sh_txs_cube_array_comp);
697 }
698 }
699
700 return true;
701 }
702
703 auto
prepare_source(nir_tex_instr * tex,const Inputs & inputs,Shader & shader)704 TexInstr::prepare_source(nir_tex_instr *tex, const Inputs& inputs, Shader& shader)
705 -> RegisterVec4
706 {
707 RegisterVec4::Swizzle target{7, 7, 7, 7};
708 PVirtualValue src[4]{nullptr, nullptr, nullptr, nullptr};
709
710 for (unsigned i = 0; i < tex->coord_components; ++i) {
711 target[i] = i;
712 src[i] = inputs.coord[i];
713 }
714
715 // array index always goes into z
716 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D) {
717 target[2] = 1;
718 target[1] = 7;
719 src[2] = inputs.coord[1];
720 }
721
722 /* With txl and txb shadow goes into z and lod or bias go into w */
723 if (tex->op == nir_texop_txl || tex->op == nir_texop_txb) {
724 target[3] = 3;
725 src[3] = tex->op == nir_texop_txl ? inputs.lod : inputs.bias;
726 if (tex->is_shadow) {
727 target[2] = 2;
728 src[2] = inputs.comperator;
729 }
730 } else if (tex->is_shadow) {
731 /* Other ops have shadow in w */
732 target[3] = 3;
733 src[3] = inputs.comperator;
734 }
735
736 auto src_coord = shader.value_factory().temp_vec4(pin_group, target);
737
738 AluInstr *ir = nullptr;
739 for (int i = 0; i < 4; ++i) {
740 if (target[i] > 3)
741 continue;
742
743 auto op = tex->is_array && i == 2 ? op1_rndne : op1_mov;
744
745 ir = new AluInstr(op, src_coord[i], src[i], AluInstr::write);
746 shader.emit_instruction(ir);
747 }
748
749 if (ir)
750 ir->set_alu_flag(alu_last_instr);
751
752 return src_coord;
753 }
754
Inputs(const nir_tex_instr & instr,ValueFactory & vf)755 TexInstr::Inputs::Inputs(const nir_tex_instr& instr, ValueFactory& vf):
756 sampler_deref(nullptr),
757 texture_deref(nullptr),
758 bias(nullptr),
759 comperator(nullptr),
760 lod(nullptr),
761 offset(nullptr),
762 gather_comp(nullptr),
763 ms_index(nullptr),
764 texture_offset(nullptr),
765 sampler_offset(nullptr),
766 backend1(nullptr),
767 backend2(nullptr),
768 opcode(ld)
769 {
770 // sfn_log << SfnLog::tex << "Get Inputs with " << instr.coord_components
771 // << " components\n";
772
773 unsigned grad_components = instr.coord_components;
774 if (instr.is_array && !instr.array_is_lowered_cube)
775 --grad_components;
776
777 for (unsigned i = 0; i < instr.num_srcs; ++i) {
778 switch (instr.src[i].src_type) {
779 case nir_tex_src_bias:
780 bias = vf.src(instr.src[i], 0);
781 break;
782
783 case nir_tex_src_coord: {
784 coord = vf.src_vec4(instr.src[i].src,
785 pin_none,
786 swizzle_from_ncomps(instr.coord_components));
787 } break;
788 case nir_tex_src_comparator:
789 comperator = vf.src(instr.src[i], 0);
790 break;
791 case nir_tex_src_ddx:
792 ddx = vf.src_vec4(instr.src[i].src,
793 pin_group,
794 swizzle_from_ncomps(grad_components));
795 break;
796 case nir_tex_src_ddy:
797 ddy = vf.src_vec4(instr.src[i].src,
798 pin_group,
799 swizzle_from_ncomps(grad_components));
800 break;
801 case nir_tex_src_lod:
802 lod = vf.src(instr.src[i].src, 0);
803 break;
804 case nir_tex_src_offset:
805 offset = &instr.src[i].src;
806 break;
807 /* case nir_tex_src_sampler_deref:
808 sampler_deref = get_deref_location(instr.src[i].src);
809 break;
810 case nir_tex_src_texture_deref:
811 texture_deref = get_deref_location(instr.src[i].src);
812 break;
813 */
814 case nir_tex_src_ms_index:
815 ms_index = vf.src(instr.src[i], 0);
816 break;
817 case nir_tex_src_texture_offset:
818 texture_offset = vf.src(instr.src[i], 0)->as_register();
819 break;
820 case nir_tex_src_sampler_offset:
821 sampler_offset = vf.src(instr.src[i], 0)->as_register();
822 break;
823 case nir_tex_src_backend1:
824 backend1 = &instr.src[i].src;
825 break;
826 case nir_tex_src_backend2:
827 backend2 = &instr.src[i].src;
828 break;
829 case nir_tex_src_plane:
830 case nir_tex_src_projector:
831 case nir_tex_src_min_lod:
832 default:
833 unreachable("unsupported texture input type");
834 }
835 }
836
837 opcode = get_opcode(instr);
838 }
839
840 auto
get_opcode(const nir_tex_instr & instr)841 TexInstr::Inputs::get_opcode(const nir_tex_instr& instr) -> Opcode
842 {
843 switch (instr.op) {
844 case nir_texop_tex:
845 return instr.is_shadow ? sample_c : sample;
846 case nir_texop_txf:
847 return ld;
848 case nir_texop_txb:
849 return instr.is_shadow ? sample_c_lb : sample_lb;
850 case nir_texop_txl:
851 return instr.is_shadow ? sample_c_l : sample_l;
852 case nir_texop_txs:
853 return get_resinfo;
854 case nir_texop_lod:
855 return get_resinfo;
856 case nir_texop_txd:
857 return instr.is_shadow ? sample_c_g : sample_g;
858 case nir_texop_tg4: {
859 auto var_offset = offset && nir_src_as_const_value(*offset) == nullptr;
860 return instr.is_shadow ? (var_offset ? gather4_c_o : gather4_c)
861 : (var_offset ? gather4_o : gather4);
862 }
863 case nir_texop_txf_ms:
864 return ld;
865 case nir_texop_query_levels:
866 return get_resinfo;
867 case nir_texop_texture_samples:
868 return TexInstr::get_nsamples;
869 default:
870 unreachable("unsupported texture input opcode");
871 }
872 }
873
874 bool
emit_tex_lod(nir_tex_instr * tex,Inputs & src,Shader & shader)875 TexInstr::emit_tex_lod(nir_tex_instr *tex, Inputs& src, Shader& shader)
876 {
877 auto& vf = shader.value_factory();
878
879 auto dst = shader.value_factory().dest_vec4(tex->def, pin_group);
880
881 auto swizzle = src.swizzle_from_ncomps(tex->coord_components);
882
883 auto src_coord = vf.temp_vec4(pin_group, swizzle);
884
885 AluInstr *ir = nullptr;
886 for (unsigned i = 0; i < tex->coord_components; ++i) {
887 ir = new AluInstr(op1_mov, src_coord[i], src.coord[i], AluInstr::write);
888 shader.emit_instruction(ir);
889 }
890 if (ir)
891 ir->set_alu_flag(alu_last_instr);
892
893 auto irt = new TexInstr(TexInstr::get_tex_lod,
894 dst,
895 {1, 0, 7, 7},
896 src_coord,
897 tex->texture_index + R600_MAX_CONST_BUFFERS,
898 src.texture_offset);
899
900 shader.emit_instruction(irt);
901 return true;
902 }
903
904 RegisterVec4::Swizzle
swizzle_from_ncomps(int comps) const905 TexInstr::Inputs::swizzle_from_ncomps(int comps) const
906 {
907 RegisterVec4::Swizzle swz;
908 for (int i = 0; i < 4; ++i)
909 swz[i] = i < comps ? i : 7;
910 return swz;
911 }
912
913 bool
set_coord_offsets(nir_src * offset)914 TexInstr::set_coord_offsets(nir_src *offset)
915 {
916 if (!offset)
917 return true;
918
919 auto literal = nir_src_as_const_value(*offset);
920 if (!literal)
921 return false;
922
923 for (int i = 0; i < offset->ssa->num_components; ++i)
924 set_offset(i, literal[i].i32);
925 return true;
926 }
927
928 void
set_rect_coordinate_flags(nir_tex_instr * instr)929 TexInstr::set_rect_coordinate_flags(nir_tex_instr *instr)
930 {
931 if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
932 set_tex_flag(x_unnormalized);
933 set_tex_flag(y_unnormalized);
934 }
935 }
936
937 class LowerTexToBackend : public NirLowerInstruction {
938 public:
939 LowerTexToBackend(amd_gfx_level chip_class);
940
941 private:
942 bool filter(const nir_instr *instr) const override;
943 nir_def *lower(nir_instr *instr) override;
944
945 nir_def *lower_tex(nir_tex_instr *tex);
946 nir_def *lower_txf(nir_tex_instr *tex);
947 nir_def *lower_tg4(nir_tex_instr *tex);
948 nir_def *lower_txf_ms(nir_tex_instr *tex);
949 nir_def *lower_txf_ms_direct(nir_tex_instr *tex);
950
951 nir_def *
952 prepare_coord(nir_tex_instr *tex, int& unnormalized_mask, int& used_coord_mask);
953 int get_src_coords(nir_tex_instr *tex,
954 std::array<nir_def *, 4>& coord,
955 bool round_array_index);
956 nir_def *prep_src(std::array<nir_def *, 4>& coord, int& used_coord_mask);
957 nir_def *
958 finalize(nir_tex_instr *tex, nir_def *backend1, nir_def *backend2);
959
960 nir_def *get_undef();
961
962 amd_gfx_level m_chip_class;
963 nir_def *m_undef {nullptr};
964 };
965
966 bool
r600_nir_lower_tex_to_backend(nir_shader * shader,amd_gfx_level chip_class)967 r600_nir_lower_tex_to_backend(nir_shader *shader, amd_gfx_level chip_class)
968 {
969 return LowerTexToBackend(chip_class).run(shader);
970 }
971
LowerTexToBackend(amd_gfx_level chip_class)972 LowerTexToBackend::LowerTexToBackend(amd_gfx_level chip_class):
973 m_chip_class(chip_class)
974 {
975 }
976
977 bool
filter(const nir_instr * instr) const978 LowerTexToBackend::filter(const nir_instr *instr) const
979 {
980 if (instr->type != nir_instr_type_tex)
981 return false;
982
983 auto tex = nir_instr_as_tex(instr);
984 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
985 return false;
986 switch (tex->op) {
987 case nir_texop_tex:
988 case nir_texop_txb:
989 case nir_texop_txl:
990 case nir_texop_txf:
991 case nir_texop_txd:
992 case nir_texop_tg4:
993 case nir_texop_txf_ms:
994 break;
995 default:
996 return false;
997 }
998
999 return nir_tex_instr_src_index(tex, nir_tex_src_backend1) == -1;
1000 }
1001
get_undef()1002 nir_def *LowerTexToBackend::get_undef()
1003 {
1004 if (!m_undef)
1005 m_undef = nir_undef(b, 1, 32);
1006 return m_undef;
1007 }
1008
1009 nir_def *
lower(nir_instr * instr)1010 LowerTexToBackend::lower(nir_instr *instr)
1011 {
1012 b->cursor = nir_before_instr(instr);
1013
1014 auto tex = nir_instr_as_tex(instr);
1015 switch (tex->op) {
1016 case nir_texop_tex:
1017 case nir_texop_txb:
1018 case nir_texop_txl:
1019 case nir_texop_txd:
1020 return lower_tex(tex);
1021 case nir_texop_txf:
1022 return lower_txf(tex);
1023 case nir_texop_tg4:
1024 return lower_tg4(tex);
1025 case nir_texop_txf_ms:
1026 if (m_chip_class < EVERGREEN)
1027 return lower_txf_ms_direct(tex);
1028 else
1029 return lower_txf_ms(tex);
1030 default:
1031 return nullptr;
1032 }
1033 }
1034
1035 nir_def *
lower_tex(nir_tex_instr * tex)1036 LowerTexToBackend::lower_tex(nir_tex_instr *tex)
1037 {
1038 int unnormalized_mask = 0;
1039 int used_coord_mask = 0;
1040
1041 nir_def *backend1 = prepare_coord(tex, unnormalized_mask, used_coord_mask);
1042
1043 nir_def *backend2 = nir_imm_ivec4(b, used_coord_mask, unnormalized_mask, 0, 0);
1044
1045 return finalize(tex, backend1, backend2);
1046 }
1047
1048 nir_def *
lower_txf(nir_tex_instr * tex)1049 LowerTexToBackend::lower_txf(nir_tex_instr *tex)
1050 {
1051 std::array<nir_def *, 4> new_coord = {nullptr, nullptr, nullptr, nullptr};
1052
1053 get_src_coords(tex, new_coord, false);
1054
1055 int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
1056 new_coord[3] = tex->src[lod_idx].src.ssa;
1057
1058 int used_coord_mask = 0;
1059 nir_def *backend1 = prep_src(new_coord, used_coord_mask);
1060 nir_def *backend2 =
1061 nir_imm_ivec4(b, used_coord_mask, tex->is_array ? 0x4 : 0, 0, 0);
1062
1063 return finalize(tex, backend1, backend2);
1064 }
1065
1066 nir_def *
lower_tg4(nir_tex_instr * tex)1067 LowerTexToBackend::lower_tg4(nir_tex_instr *tex)
1068 {
1069 std::array<nir_def *, 4> new_coord = {nullptr, nullptr, nullptr, nullptr};
1070
1071 get_src_coords(tex, new_coord, false);
1072 uint32_t dest_swizzle =
1073 m_chip_class <= EVERGREEN ? 1 | (2 << 8) | (0 << 16) | (3 << 24) : 0;
1074
1075 int used_coord_mask = 0;
1076 int unnormalized_mask = 0;
1077 nir_def *backend1 = prepare_coord(tex, unnormalized_mask, used_coord_mask);
1078
1079 nir_def *backend2 =
1080 nir_imm_ivec4(b, used_coord_mask, unnormalized_mask, tex->component, dest_swizzle);
1081 return finalize(tex, backend1, backend2);
1082 }
1083
1084 nir_def *
lower_txf_ms(nir_tex_instr * tex)1085 LowerTexToBackend::lower_txf_ms(nir_tex_instr *tex)
1086 {
1087 std::array<nir_def *, 4> new_coord = {nullptr, nullptr, nullptr, nullptr};
1088
1089 get_src_coords(tex, new_coord, false);
1090
1091 int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index);
1092 new_coord[3] = tex->src[ms_index].src.ssa;
1093
1094 int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
1095 if (offset_index >= 0) {
1096 auto offset = tex->src[offset_index].src.ssa;
1097 for (int i = 0; i < offset->num_components; ++i) {
1098 new_coord[i] = nir_iadd(b, new_coord[i], nir_channel(b, offset, i));
1099 }
1100 }
1101
1102 auto fetch_sample = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr));
1103 nir_def_init(&fetch_sample->instr, &fetch_sample->def, 4, 32);
1104
1105 int used_coord_mask = 0;
1106 nir_def *backend1 = prep_src(new_coord, used_coord_mask);
1107 nir_def *backend2 = nir_imm_ivec4(b, used_coord_mask, 0xf, 1, 0);
1108
1109 nir_builder_instr_insert(b, &fetch_sample->instr);
1110 finalize(fetch_sample, backend1, backend2);
1111
1112 new_coord[3] = nir_iand_imm(b,
1113 nir_ushr(b,
1114 nir_channel(b, &fetch_sample->def, 0),
1115 nir_ishl_imm(b, new_coord[3], 2)),
1116 15);
1117
1118 nir_def *backend1b = prep_src(new_coord, used_coord_mask);
1119 nir_def *backend2b = nir_imm_ivec4(b, used_coord_mask, 0, 0, 0);
1120 return finalize(tex, backend1b, backend2b);
1121 }
1122
1123 nir_def *
lower_txf_ms_direct(nir_tex_instr * tex)1124 LowerTexToBackend::lower_txf_ms_direct(nir_tex_instr *tex)
1125 {
1126 std::array<nir_def *, 4> new_coord = {nullptr, nullptr, nullptr, nullptr};
1127
1128 get_src_coords(tex, new_coord, false);
1129
1130 int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index);
1131 new_coord[3] = tex->src[ms_index].src.ssa;
1132
1133 int used_coord_mask = 0;
1134 nir_def *backend1 = prep_src(new_coord, used_coord_mask);
1135 nir_def *backend2 = nir_imm_ivec4(b, used_coord_mask, 0, 0, 0);
1136
1137 return finalize(tex, backend1, backend2);
1138 }
1139
1140 nir_def *
finalize(nir_tex_instr * tex,nir_def * backend1,nir_def * backend2)1141 LowerTexToBackend::finalize(nir_tex_instr *tex,
1142 nir_def *backend1,
1143 nir_def *backend2)
1144 {
1145 nir_tex_instr_add_src(tex, nir_tex_src_backend1, backend1);
1146 nir_tex_instr_add_src(tex, nir_tex_src_backend2, backend2);
1147
1148 static const nir_tex_src_type cleanup[] = {nir_tex_src_coord,
1149 nir_tex_src_lod,
1150 nir_tex_src_bias,
1151 nir_tex_src_comparator,
1152 nir_tex_src_ms_index};
1153
1154 for (const auto type : cleanup) {
1155 int pos = nir_tex_instr_src_index(tex, type);
1156 if (pos >= 0)
1157 nir_tex_instr_remove_src(tex, pos);
1158 }
1159 return NIR_LOWER_INSTR_PROGRESS;
1160 }
1161
1162 nir_def *
prep_src(std::array<nir_def *,4> & coord,int & used_coord_mask)1163 LowerTexToBackend::prep_src(std::array<nir_def *, 4>& coord, int& used_coord_mask)
1164 {
1165 int max_coord = 0;
1166 for (int i = 0; i < 4; ++i) {
1167 if (coord[i]) {
1168 used_coord_mask |= 1 << i;
1169 max_coord = i;
1170 } else
1171 coord[i] = get_undef();
1172 }
1173
1174 return nir_vec(b, coord.data(), max_coord + 1);
1175 }
1176
1177 nir_def *
prepare_coord(nir_tex_instr * tex,int & unnormalized_mask,int & used_coord_mask)1178 LowerTexToBackend::prepare_coord(nir_tex_instr *tex,
1179 int& unnormalized_mask,
1180 int& used_coord_mask)
1181 {
1182 std::array<nir_def *, 4> new_coord = {nullptr, nullptr, nullptr, nullptr};
1183
1184 unnormalized_mask = get_src_coords(tex, new_coord, true);
1185 used_coord_mask = 0;
1186
1187 int comp_idx =
1188 tex->is_shadow ? nir_tex_instr_src_index(tex, nir_tex_src_comparator) : -1;
1189
1190 if (tex->op == nir_texop_txl || tex->op == nir_texop_txb) {
1191 int idx = tex->op == nir_texop_txl ? nir_tex_instr_src_index(tex, nir_tex_src_lod)
1192 : nir_tex_instr_src_index(tex, nir_tex_src_bias);
1193 assert(idx != -1);
1194 new_coord[3] = tex->src[idx].src.ssa;
1195
1196 if (comp_idx >= 0)
1197 new_coord[2] = tex->src[comp_idx].src.ssa;
1198 } else if (comp_idx >= 0) {
1199 new_coord[3] = tex->src[comp_idx].src.ssa;
1200 }
1201 return prep_src(new_coord, used_coord_mask);
1202 }
1203
1204 int
get_src_coords(nir_tex_instr * tex,std::array<nir_def *,4> & coord,bool round_array_index)1205 LowerTexToBackend::get_src_coords(nir_tex_instr *tex,
1206 std::array<nir_def *, 4>& coord,
1207 bool round_array_index)
1208 {
1209 int unnormalized_mask = 0;
1210 auto coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1211 assert(coord_idx != -1);
1212 auto old_coord = tex->src[coord_idx];
1213
1214 coord = {nir_channel(b, old_coord.src.ssa, 0), nullptr, nullptr, nullptr};
1215
1216 if (tex->coord_components > 1) {
1217 if (tex->is_array && tex->sampler_dim == GLSL_SAMPLER_DIM_1D)
1218 coord[2] = nir_channel(b, old_coord.src.ssa, 1);
1219 else
1220 coord[1] = nir_channel(b, old_coord.src.ssa, 1);
1221 }
1222
1223 if (tex->coord_components > 2) {
1224 coord[2] = nir_channel(b, old_coord.src.ssa, 2);
1225 }
1226 if (tex->is_array) {
1227 unnormalized_mask |= 0x4;
1228 if (round_array_index)
1229 coord[2] = nir_fround_even(b, coord[2]);
1230 }
1231
1232 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
1233 unnormalized_mask |= 0x3;
1234 }
1235
1236 return unnormalized_mask;
1237 }
1238
1239 } // namespace r600
1240