xref: /aosp_15_r20/external/mesa3d/src/nouveau/compiler/nak_nir_lower_fs_inputs.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2023 Collabora, Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "nak_private.h"
7 #include "nir_builder.h"
8 
9 /** Load a flat FS input */
10 static nir_def *
load_fs_input(nir_builder * b,unsigned num_components,uint32_t addr,UNUSED const struct nak_compiler * nak)11 load_fs_input(nir_builder *b, unsigned num_components, uint32_t addr,
12               UNUSED const struct nak_compiler *nak)
13 {
14    const struct nak_nir_ipa_flags flags = {
15       .interp_mode = NAK_INTERP_MODE_CONSTANT,
16       .interp_freq = NAK_INTERP_FREQ_CONSTANT,
17       .interp_loc = NAK_INTERP_LOC_DEFAULT,
18    };
19    uint32_t flags_u32;
20    memcpy(&flags_u32, &flags, sizeof(flags_u32));
21 
22    nir_def *comps[NIR_MAX_VEC_COMPONENTS];
23    for (unsigned c = 0; c < num_components; c++) {
24       comps[c] = nir_ipa_nv(b, nir_imm_float(b, 0), nir_imm_int(b, 0),
25                             .base = addr + c * 4, .flags = flags_u32);
26    }
27    return nir_vec(b, comps, num_components);
28 }
29 
30 static nir_def *
load_frag_w(nir_builder * b,enum nak_interp_loc interp_loc,nir_def * offset)31 load_frag_w(nir_builder *b, enum nak_interp_loc interp_loc, nir_def *offset)
32 {
33    if (offset == NULL)
34       offset = nir_imm_int(b, 0);
35 
36    const uint16_t w_addr =
37       nak_sysval_attr_addr(SYSTEM_VALUE_FRAG_COORD) + 12;
38 
39    const struct nak_nir_ipa_flags flags = {
40       .interp_mode = NAK_INTERP_MODE_SCREEN_LINEAR,
41       .interp_freq = NAK_INTERP_FREQ_PASS,
42       .interp_loc = interp_loc,
43    };
44    uint32_t flags_u32;
45    memcpy(&flags_u32, &flags, sizeof(flags_u32));
46 
47    return nir_ipa_nv(b, nir_imm_float(b, 0), offset,
48                      .base = w_addr, .flags = flags_u32);
49 }
50 
51 static nir_def *
interp_fs_input(nir_builder * b,unsigned num_components,uint32_t addr,enum nak_interp_mode interp_mode,enum nak_interp_loc interp_loc,nir_def * inv_w,nir_def * offset,const struct nak_compiler * nak)52 interp_fs_input(nir_builder *b, unsigned num_components, uint32_t addr,
53                 enum nak_interp_mode interp_mode,
54                 enum nak_interp_loc interp_loc,
55                 nir_def *inv_w, nir_def *offset,
56                 const struct nak_compiler *nak)
57 {
58    if (offset == NULL)
59       offset = nir_imm_int(b, 0);
60 
61    if (nak->sm >= 70) {
62       const struct nak_nir_ipa_flags flags = {
63          .interp_mode = interp_mode,
64          .interp_freq = NAK_INTERP_FREQ_PASS,
65          .interp_loc = interp_loc,
66       };
67       uint32_t flags_u32;
68       memcpy(&flags_u32, &flags, sizeof(flags_u32));
69 
70       nir_def *comps[NIR_MAX_VEC_COMPONENTS];
71       for (unsigned c = 0; c < num_components; c++) {
72          comps[c] = nir_ipa_nv(b, nir_imm_float(b, 0), offset,
73                                .base = addr + c * 4,
74                                .flags = flags_u32);
75          if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
76             comps[c] = nir_fmul(b, comps[c], inv_w);
77       }
78       return nir_vec(b, comps, num_components);
79    } else if (nak->sm >= 50) {
80       struct nak_nir_ipa_flags flags = {
81          .interp_mode = interp_mode,
82          .interp_freq = NAK_INTERP_FREQ_PASS,
83          .interp_loc = interp_loc,
84       };
85 
86       if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
87          flags.interp_freq = NAK_INTERP_FREQ_PASS_MUL_W;
88       else
89          inv_w = nir_imm_float(b, 0);
90 
91       uint32_t flags_u32;
92       memcpy(&flags_u32, &flags, sizeof(flags_u32));
93 
94       nir_def *comps[NIR_MAX_VEC_COMPONENTS];
95       for (unsigned c = 0; c < num_components; c++) {
96          comps[c] = nir_ipa_nv(b, inv_w, offset,
97                                .base = addr + c * 4,
98                                .flags = flags_u32);
99       }
100       return nir_vec(b, comps, num_components);
101    } else {
102       unreachable("Figure out input interpolation on Kepler");
103    }
104 }
105 
106 static nir_def *
load_sample_pos_u4_at(nir_builder * b,nir_def * sample_id,const struct nak_fs_key * fs_key)107 load_sample_pos_u4_at(nir_builder *b, nir_def *sample_id,
108                       const struct nak_fs_key *fs_key)
109 {
110    nir_def *loc = nir_ldc_nv(b, 1, 8,
111                              nir_imm_int(b, fs_key->sample_info_cb),
112                              nir_iadd_imm(b, sample_id,
113                                           fs_key->sample_locations_offset),
114                              .align_mul = 1, .align_offset = 0);
115 
116    /* The rest of these calculations are in 32-bit */
117    loc = nir_u2u32(b, loc);
118    nir_def *loc_x_u4 = nir_iand_imm(b, loc, 0xf);
119    nir_def *loc_y_u4 = nir_iand_imm(b, nir_ushr_imm(b, loc, 4), 0xf);
120    return nir_vec2(b, loc_x_u4, loc_y_u4);
121 }
122 
123 static nir_def *
load_pass_sample_mask_at(nir_builder * b,nir_def * sample_id,const struct nak_fs_key * fs_key)124 load_pass_sample_mask_at(nir_builder *b, nir_def *sample_id,
125                          const struct nak_fs_key *fs_key)
126 {
127    nir_def *offset =
128       nir_imul_imm(b, sample_id, sizeof(struct nak_sample_mask));
129    offset = nir_iadd_imm(b, offset, fs_key->sample_masks_offset);
130 
131    return nir_ldc_nv(b, 1, 8 * sizeof(struct nak_sample_mask),
132                      nir_imm_int(b, fs_key->sample_info_cb), offset,
133                      .align_mul = sizeof(struct nak_sample_mask),
134                      .align_offset = 0);
135 }
136 
137 static nir_def *
load_sample_pos_at(nir_builder * b,nir_def * sample_id,const struct nak_fs_key * fs_key)138 load_sample_pos_at(nir_builder *b, nir_def *sample_id,
139                    const struct nak_fs_key *fs_key)
140 {
141    nir_def *loc_u4 = load_sample_pos_u4_at(b, sample_id, fs_key);
142    nir_def *result = nir_fmul_imm(b, nir_i2f32(b, loc_u4), 1.0 / 16.0);
143 
144    return result;
145 }
146 
147 static nir_def *
load_barycentric_offset(nir_builder * b,nir_intrinsic_instr * bary,const struct nak_fs_key * fs_key)148 load_barycentric_offset(nir_builder *b, nir_intrinsic_instr *bary,
149                         const struct nak_fs_key *fs_key)
150 {
151    nir_def *offset_s12;
152 
153    if (bary->intrinsic == nir_intrinsic_load_barycentric_coord_at_sample ||
154        bary->intrinsic == nir_intrinsic_load_barycentric_at_sample) {
155       nir_def *sample_id = bary->src[0].ssa;
156       nir_def *offset_u4 = load_sample_pos_u4_at(b, sample_id, fs_key);
157       /* The sample position we loaded is a u4 from the upper-left and the
158        * sample position wanted by ipa.offset is s12
159        */
160       offset_s12 = nir_iadd_imm(b, nir_ishl_imm(b, offset_u4, 8), -2048);
161    } else {
162       nir_def *offset_f = bary->src[0].ssa;
163 
164       offset_f = nir_fclamp(b, offset_f, nir_imm_float(b, -0.5),
165                             nir_imm_float(b, 0.437500));
166       offset_s12 = nir_f2i32(b, nir_fmul_imm(b, offset_f, 4096.0));
167    }
168 
169    return nir_prmt_nv(b, nir_imm_int(b, 0x5410),
170                          nir_channel(b, offset_s12, 0),
171                          nir_channel(b, offset_s12, 1));
172 }
173 
174 struct lower_fs_input_ctx {
175    const struct nak_compiler *nak;
176    const struct nak_fs_key *fs_key;
177 };
178 
179 static uint16_t
fs_input_intrin_addr(nir_intrinsic_instr * intrin)180 fs_input_intrin_addr(nir_intrinsic_instr *intrin)
181 {
182    const nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
183    return nak_varying_attr_addr(sem.location) +
184           nir_src_as_uint(*nir_get_io_offset_src(intrin)) * 16 +
185           nir_intrinsic_component(intrin) * 4;
186 }
187 
188 static bool
lower_fs_input_intrin(nir_builder * b,nir_intrinsic_instr * intrin,void * data)189 lower_fs_input_intrin(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
190 {
191    const struct lower_fs_input_ctx *ctx = data;
192 
193    b->cursor = nir_before_instr(&intrin->instr);
194 
195    nir_def *res;
196    switch (intrin->intrinsic) {
197    case nir_intrinsic_load_barycentric_pixel: {
198       if (!(ctx->fs_key && ctx->fs_key->force_sample_shading))
199          return false;
200 
201       intrin->intrinsic = nir_intrinsic_load_barycentric_sample;
202       return true;
203    }
204 
205    case nir_intrinsic_load_frag_coord:
206    case nir_intrinsic_load_point_coord: {
207       const enum nak_interp_loc interp_loc =
208          b->shader->info.fs.uses_sample_shading ? NAK_INTERP_LOC_CENTROID
209                                                 : NAK_INTERP_LOC_DEFAULT;
210       const uint32_t addr =
211          intrin->intrinsic == nir_intrinsic_load_point_coord ?
212          nak_sysval_attr_addr(SYSTEM_VALUE_POINT_COORD) :
213          nak_sysval_attr_addr(SYSTEM_VALUE_FRAG_COORD);
214 
215       res = interp_fs_input(b, intrin->def.num_components, addr,
216                             NAK_INTERP_MODE_SCREEN_LINEAR,
217                             interp_loc, NULL, NULL,
218                             ctx->nak);
219       break;
220    }
221 
222    case nir_intrinsic_load_front_face:
223    case nir_intrinsic_load_layer_id: {
224       assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
225       const gl_system_value sysval =
226          nir_system_value_from_intrinsic(intrin->intrinsic);
227       const uint32_t addr = nak_sysval_attr_addr(sysval);
228 
229       res = load_fs_input(b, intrin->def.num_components, addr, ctx->nak);
230       if (intrin->def.bit_size == 1)
231          res = nir_i2b(b, res);
232       break;
233    }
234 
235    case nir_intrinsic_load_input: {
236       const uint16_t addr = fs_input_intrin_addr(intrin);
237       res = load_fs_input(b, intrin->def.num_components, addr, ctx->nak);
238       break;
239    }
240 
241    case nir_intrinsic_load_barycentric_coord_pixel:
242    case nir_intrinsic_load_barycentric_coord_centroid:
243    case nir_intrinsic_load_barycentric_coord_sample:
244    case nir_intrinsic_load_barycentric_coord_at_sample:
245    case nir_intrinsic_load_barycentric_coord_at_offset: {
246       uint32_t addr;
247       enum nak_interp_mode interp_mode;
248       if (nir_intrinsic_interp_mode(intrin) == INTERP_MODE_NOPERSPECTIVE) {
249          addr = NAK_ATTR_BARY_COORD_NO_PERSP;
250          interp_mode = NAK_INTERP_MODE_SCREEN_LINEAR;
251       } else {
252          addr = NAK_ATTR_BARY_COORD;
253          interp_mode = NAK_INTERP_MODE_PERSPECTIVE;
254       }
255 
256       nir_def *offset = NULL;
257       enum nak_interp_loc interp_loc;
258       switch (intrin->intrinsic) {
259       case nir_intrinsic_load_barycentric_coord_at_sample:
260       case nir_intrinsic_load_barycentric_coord_at_offset:
261          interp_loc = NAK_INTERP_LOC_OFFSET;
262          offset = load_barycentric_offset(b, intrin, ctx->fs_key);
263          break;
264       case nir_intrinsic_load_barycentric_coord_centroid:
265       case nir_intrinsic_load_barycentric_coord_sample:
266          interp_loc = NAK_INTERP_LOC_CENTROID;
267          break;
268       case nir_intrinsic_load_barycentric_coord_pixel:
269          interp_loc = NAK_INTERP_LOC_DEFAULT;
270          break;
271       default:
272          unreachable("Unknown intrinsic");
273       }
274 
275       nir_def *inv_w = NULL;
276       if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
277          inv_w = nir_frcp(b, load_frag_w(b, interp_loc, offset));
278 
279       res = interp_fs_input(b, intrin->def.num_components,
280                             addr, interp_mode, interp_loc,
281                             inv_w, offset, ctx->nak);
282       break;
283    }
284 
285    case nir_intrinsic_load_interpolated_input: {
286       const uint16_t addr = fs_input_intrin_addr(intrin);
287       nir_intrinsic_instr *bary = nir_src_as_intrinsic(intrin->src[0]);
288 
289       enum nak_interp_mode interp_mode;
290       if (nir_intrinsic_interp_mode(bary) == INTERP_MODE_SMOOTH ||
291           nir_intrinsic_interp_mode(bary) == INTERP_MODE_NONE)
292          interp_mode = NAK_INTERP_MODE_PERSPECTIVE;
293       else
294          interp_mode = NAK_INTERP_MODE_SCREEN_LINEAR;
295 
296       nir_def *offset = NULL;
297       enum nak_interp_loc interp_loc;
298       switch (bary->intrinsic) {
299       case nir_intrinsic_load_barycentric_at_offset:
300       case nir_intrinsic_load_barycentric_at_sample: {
301          interp_loc = NAK_INTERP_LOC_OFFSET;
302          offset = load_barycentric_offset(b, bary, ctx->fs_key);
303          break;
304       }
305 
306       case nir_intrinsic_load_barycentric_centroid:
307       case nir_intrinsic_load_barycentric_sample:
308          interp_loc = NAK_INTERP_LOC_CENTROID;
309          break;
310 
311       case nir_intrinsic_load_barycentric_pixel:
312          interp_loc = NAK_INTERP_LOC_DEFAULT;
313          break;
314 
315       default:
316          unreachable("Unsupported barycentric");
317       }
318 
319       nir_def *inv_w = NULL;
320       if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
321          inv_w = nir_frcp(b, load_frag_w(b, interp_loc, offset));
322 
323       res = interp_fs_input(b, intrin->def.num_components,
324                             addr, interp_mode, interp_loc,
325                             inv_w, offset, ctx->nak);
326       break;
327    }
328 
329    case nir_intrinsic_load_sample_mask_in:
330       b->cursor = nir_after_instr(&intrin->instr);
331 
332       /* pixld.covmask returns the coverage mask for the entire pixel being
333        * shaded, not the set of samples covered by the current FS invocation.
334        * We need to mask off excess samples in order to get the GL/Vulkan
335        * behavior.
336        */
337       if (b->shader->info.fs.uses_sample_shading) {
338          /* Mask off just the current sample */
339          nir_def *sample = nir_load_sample_id(b);
340          nir_def *mask = nir_ishl(b, nir_imm_int(b, 1), sample);
341          mask = nir_iand(b, &intrin->def, mask);
342          nir_def_rewrite_uses_after(&intrin->def, mask, mask->parent_instr);
343 
344          return true;
345       } else if (ctx->fs_key && ctx->fs_key->force_sample_shading) {
346          /* In this case we don't know up-front how many passes will be run so
347           * we need to take the per-pass sample mask from the driver and AND
348           * that with the coverage mask.
349           */
350          nir_def *sample = nir_load_sample_id(b);
351          nir_def *mask = load_pass_sample_mask_at(b, sample, ctx->fs_key);
352          mask = nir_iand(b, &intrin->def, nir_u2u32(b, mask));
353          nir_def_rewrite_uses_after(&intrin->def, mask, mask->parent_instr);
354 
355          return true;
356       } else {
357          /* We're always executing single-pass so just use the sample mask as
358           * given by the hardware.
359           */
360          return false;
361       }
362       break;
363 
364    case nir_intrinsic_load_sample_pos:
365       res = load_sample_pos_at(b, nir_load_sample_id(b), ctx->fs_key);
366       break;
367 
368    case nir_intrinsic_load_input_vertex: {
369       const uint16_t addr = fs_input_intrin_addr(intrin);
370       unsigned vertex_id = nir_src_as_uint(intrin->src[0]);
371       assert(vertex_id < 3);
372 
373       nir_def *comps[NIR_MAX_VEC_COMPONENTS];
374       for (unsigned c = 0; c < intrin->def.num_components; c++) {
375          nir_def *data = nir_ldtram_nv(b, .base = addr + c * 4,
376                                        .flags = vertex_id == 2);
377          comps[c] = nir_channel(b, data, vertex_id & 1);
378       }
379       res = nir_vec(b, comps, intrin->num_components);
380       break;
381    }
382 
383    default:
384       return false;
385    }
386 
387    nir_def_replace(&intrin->def, res);
388 
389    return true;
390 }
391 
392 bool
nak_nir_lower_fs_inputs(nir_shader * nir,const struct nak_compiler * nak,const struct nak_fs_key * fs_key)393 nak_nir_lower_fs_inputs(nir_shader *nir,
394                         const struct nak_compiler *nak,
395                         const struct nak_fs_key *fs_key)
396 {
397    const struct lower_fs_input_ctx fs_in_ctx = {
398       .nak = nak,
399       .fs_key = fs_key,
400    };
401    NIR_PASS_V(nir, nir_shader_intrinsics_pass, lower_fs_input_intrin,
402               nir_metadata_control_flow,
403               (void *)&fs_in_ctx);
404 
405    return true;
406 }
407