1 /*
2 * Copyright © 2023 Collabora, Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "nak_private.h"
7 #include "nir_builder.h"
8
9 /** Load a flat FS input */
10 static nir_def *
load_fs_input(nir_builder * b,unsigned num_components,uint32_t addr,UNUSED const struct nak_compiler * nak)11 load_fs_input(nir_builder *b, unsigned num_components, uint32_t addr,
12 UNUSED const struct nak_compiler *nak)
13 {
14 const struct nak_nir_ipa_flags flags = {
15 .interp_mode = NAK_INTERP_MODE_CONSTANT,
16 .interp_freq = NAK_INTERP_FREQ_CONSTANT,
17 .interp_loc = NAK_INTERP_LOC_DEFAULT,
18 };
19 uint32_t flags_u32;
20 memcpy(&flags_u32, &flags, sizeof(flags_u32));
21
22 nir_def *comps[NIR_MAX_VEC_COMPONENTS];
23 for (unsigned c = 0; c < num_components; c++) {
24 comps[c] = nir_ipa_nv(b, nir_imm_float(b, 0), nir_imm_int(b, 0),
25 .base = addr + c * 4, .flags = flags_u32);
26 }
27 return nir_vec(b, comps, num_components);
28 }
29
30 static nir_def *
load_frag_w(nir_builder * b,enum nak_interp_loc interp_loc,nir_def * offset)31 load_frag_w(nir_builder *b, enum nak_interp_loc interp_loc, nir_def *offset)
32 {
33 if (offset == NULL)
34 offset = nir_imm_int(b, 0);
35
36 const uint16_t w_addr =
37 nak_sysval_attr_addr(SYSTEM_VALUE_FRAG_COORD) + 12;
38
39 const struct nak_nir_ipa_flags flags = {
40 .interp_mode = NAK_INTERP_MODE_SCREEN_LINEAR,
41 .interp_freq = NAK_INTERP_FREQ_PASS,
42 .interp_loc = interp_loc,
43 };
44 uint32_t flags_u32;
45 memcpy(&flags_u32, &flags, sizeof(flags_u32));
46
47 return nir_ipa_nv(b, nir_imm_float(b, 0), offset,
48 .base = w_addr, .flags = flags_u32);
49 }
50
51 static nir_def *
interp_fs_input(nir_builder * b,unsigned num_components,uint32_t addr,enum nak_interp_mode interp_mode,enum nak_interp_loc interp_loc,nir_def * inv_w,nir_def * offset,const struct nak_compiler * nak)52 interp_fs_input(nir_builder *b, unsigned num_components, uint32_t addr,
53 enum nak_interp_mode interp_mode,
54 enum nak_interp_loc interp_loc,
55 nir_def *inv_w, nir_def *offset,
56 const struct nak_compiler *nak)
57 {
58 if (offset == NULL)
59 offset = nir_imm_int(b, 0);
60
61 if (nak->sm >= 70) {
62 const struct nak_nir_ipa_flags flags = {
63 .interp_mode = interp_mode,
64 .interp_freq = NAK_INTERP_FREQ_PASS,
65 .interp_loc = interp_loc,
66 };
67 uint32_t flags_u32;
68 memcpy(&flags_u32, &flags, sizeof(flags_u32));
69
70 nir_def *comps[NIR_MAX_VEC_COMPONENTS];
71 for (unsigned c = 0; c < num_components; c++) {
72 comps[c] = nir_ipa_nv(b, nir_imm_float(b, 0), offset,
73 .base = addr + c * 4,
74 .flags = flags_u32);
75 if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
76 comps[c] = nir_fmul(b, comps[c], inv_w);
77 }
78 return nir_vec(b, comps, num_components);
79 } else if (nak->sm >= 50) {
80 struct nak_nir_ipa_flags flags = {
81 .interp_mode = interp_mode,
82 .interp_freq = NAK_INTERP_FREQ_PASS,
83 .interp_loc = interp_loc,
84 };
85
86 if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
87 flags.interp_freq = NAK_INTERP_FREQ_PASS_MUL_W;
88 else
89 inv_w = nir_imm_float(b, 0);
90
91 uint32_t flags_u32;
92 memcpy(&flags_u32, &flags, sizeof(flags_u32));
93
94 nir_def *comps[NIR_MAX_VEC_COMPONENTS];
95 for (unsigned c = 0; c < num_components; c++) {
96 comps[c] = nir_ipa_nv(b, inv_w, offset,
97 .base = addr + c * 4,
98 .flags = flags_u32);
99 }
100 return nir_vec(b, comps, num_components);
101 } else {
102 unreachable("Figure out input interpolation on Kepler");
103 }
104 }
105
106 static nir_def *
load_sample_pos_u4_at(nir_builder * b,nir_def * sample_id,const struct nak_fs_key * fs_key)107 load_sample_pos_u4_at(nir_builder *b, nir_def *sample_id,
108 const struct nak_fs_key *fs_key)
109 {
110 nir_def *loc = nir_ldc_nv(b, 1, 8,
111 nir_imm_int(b, fs_key->sample_info_cb),
112 nir_iadd_imm(b, sample_id,
113 fs_key->sample_locations_offset),
114 .align_mul = 1, .align_offset = 0);
115
116 /* The rest of these calculations are in 32-bit */
117 loc = nir_u2u32(b, loc);
118 nir_def *loc_x_u4 = nir_iand_imm(b, loc, 0xf);
119 nir_def *loc_y_u4 = nir_iand_imm(b, nir_ushr_imm(b, loc, 4), 0xf);
120 return nir_vec2(b, loc_x_u4, loc_y_u4);
121 }
122
123 static nir_def *
load_pass_sample_mask_at(nir_builder * b,nir_def * sample_id,const struct nak_fs_key * fs_key)124 load_pass_sample_mask_at(nir_builder *b, nir_def *sample_id,
125 const struct nak_fs_key *fs_key)
126 {
127 nir_def *offset =
128 nir_imul_imm(b, sample_id, sizeof(struct nak_sample_mask));
129 offset = nir_iadd_imm(b, offset, fs_key->sample_masks_offset);
130
131 return nir_ldc_nv(b, 1, 8 * sizeof(struct nak_sample_mask),
132 nir_imm_int(b, fs_key->sample_info_cb), offset,
133 .align_mul = sizeof(struct nak_sample_mask),
134 .align_offset = 0);
135 }
136
137 static nir_def *
load_sample_pos_at(nir_builder * b,nir_def * sample_id,const struct nak_fs_key * fs_key)138 load_sample_pos_at(nir_builder *b, nir_def *sample_id,
139 const struct nak_fs_key *fs_key)
140 {
141 nir_def *loc_u4 = load_sample_pos_u4_at(b, sample_id, fs_key);
142 nir_def *result = nir_fmul_imm(b, nir_i2f32(b, loc_u4), 1.0 / 16.0);
143
144 return result;
145 }
146
147 static nir_def *
load_barycentric_offset(nir_builder * b,nir_intrinsic_instr * bary,const struct nak_fs_key * fs_key)148 load_barycentric_offset(nir_builder *b, nir_intrinsic_instr *bary,
149 const struct nak_fs_key *fs_key)
150 {
151 nir_def *offset_s12;
152
153 if (bary->intrinsic == nir_intrinsic_load_barycentric_coord_at_sample ||
154 bary->intrinsic == nir_intrinsic_load_barycentric_at_sample) {
155 nir_def *sample_id = bary->src[0].ssa;
156 nir_def *offset_u4 = load_sample_pos_u4_at(b, sample_id, fs_key);
157 /* The sample position we loaded is a u4 from the upper-left and the
158 * sample position wanted by ipa.offset is s12
159 */
160 offset_s12 = nir_iadd_imm(b, nir_ishl_imm(b, offset_u4, 8), -2048);
161 } else {
162 nir_def *offset_f = bary->src[0].ssa;
163
164 offset_f = nir_fclamp(b, offset_f, nir_imm_float(b, -0.5),
165 nir_imm_float(b, 0.437500));
166 offset_s12 = nir_f2i32(b, nir_fmul_imm(b, offset_f, 4096.0));
167 }
168
169 return nir_prmt_nv(b, nir_imm_int(b, 0x5410),
170 nir_channel(b, offset_s12, 0),
171 nir_channel(b, offset_s12, 1));
172 }
173
174 struct lower_fs_input_ctx {
175 const struct nak_compiler *nak;
176 const struct nak_fs_key *fs_key;
177 };
178
179 static uint16_t
fs_input_intrin_addr(nir_intrinsic_instr * intrin)180 fs_input_intrin_addr(nir_intrinsic_instr *intrin)
181 {
182 const nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
183 return nak_varying_attr_addr(sem.location) +
184 nir_src_as_uint(*nir_get_io_offset_src(intrin)) * 16 +
185 nir_intrinsic_component(intrin) * 4;
186 }
187
188 static bool
lower_fs_input_intrin(nir_builder * b,nir_intrinsic_instr * intrin,void * data)189 lower_fs_input_intrin(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
190 {
191 const struct lower_fs_input_ctx *ctx = data;
192
193 b->cursor = nir_before_instr(&intrin->instr);
194
195 nir_def *res;
196 switch (intrin->intrinsic) {
197 case nir_intrinsic_load_barycentric_pixel: {
198 if (!(ctx->fs_key && ctx->fs_key->force_sample_shading))
199 return false;
200
201 intrin->intrinsic = nir_intrinsic_load_barycentric_sample;
202 return true;
203 }
204
205 case nir_intrinsic_load_frag_coord:
206 case nir_intrinsic_load_point_coord: {
207 const enum nak_interp_loc interp_loc =
208 b->shader->info.fs.uses_sample_shading ? NAK_INTERP_LOC_CENTROID
209 : NAK_INTERP_LOC_DEFAULT;
210 const uint32_t addr =
211 intrin->intrinsic == nir_intrinsic_load_point_coord ?
212 nak_sysval_attr_addr(SYSTEM_VALUE_POINT_COORD) :
213 nak_sysval_attr_addr(SYSTEM_VALUE_FRAG_COORD);
214
215 res = interp_fs_input(b, intrin->def.num_components, addr,
216 NAK_INTERP_MODE_SCREEN_LINEAR,
217 interp_loc, NULL, NULL,
218 ctx->nak);
219 break;
220 }
221
222 case nir_intrinsic_load_front_face:
223 case nir_intrinsic_load_layer_id: {
224 assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
225 const gl_system_value sysval =
226 nir_system_value_from_intrinsic(intrin->intrinsic);
227 const uint32_t addr = nak_sysval_attr_addr(sysval);
228
229 res = load_fs_input(b, intrin->def.num_components, addr, ctx->nak);
230 if (intrin->def.bit_size == 1)
231 res = nir_i2b(b, res);
232 break;
233 }
234
235 case nir_intrinsic_load_input: {
236 const uint16_t addr = fs_input_intrin_addr(intrin);
237 res = load_fs_input(b, intrin->def.num_components, addr, ctx->nak);
238 break;
239 }
240
241 case nir_intrinsic_load_barycentric_coord_pixel:
242 case nir_intrinsic_load_barycentric_coord_centroid:
243 case nir_intrinsic_load_barycentric_coord_sample:
244 case nir_intrinsic_load_barycentric_coord_at_sample:
245 case nir_intrinsic_load_barycentric_coord_at_offset: {
246 uint32_t addr;
247 enum nak_interp_mode interp_mode;
248 if (nir_intrinsic_interp_mode(intrin) == INTERP_MODE_NOPERSPECTIVE) {
249 addr = NAK_ATTR_BARY_COORD_NO_PERSP;
250 interp_mode = NAK_INTERP_MODE_SCREEN_LINEAR;
251 } else {
252 addr = NAK_ATTR_BARY_COORD;
253 interp_mode = NAK_INTERP_MODE_PERSPECTIVE;
254 }
255
256 nir_def *offset = NULL;
257 enum nak_interp_loc interp_loc;
258 switch (intrin->intrinsic) {
259 case nir_intrinsic_load_barycentric_coord_at_sample:
260 case nir_intrinsic_load_barycentric_coord_at_offset:
261 interp_loc = NAK_INTERP_LOC_OFFSET;
262 offset = load_barycentric_offset(b, intrin, ctx->fs_key);
263 break;
264 case nir_intrinsic_load_barycentric_coord_centroid:
265 case nir_intrinsic_load_barycentric_coord_sample:
266 interp_loc = NAK_INTERP_LOC_CENTROID;
267 break;
268 case nir_intrinsic_load_barycentric_coord_pixel:
269 interp_loc = NAK_INTERP_LOC_DEFAULT;
270 break;
271 default:
272 unreachable("Unknown intrinsic");
273 }
274
275 nir_def *inv_w = NULL;
276 if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
277 inv_w = nir_frcp(b, load_frag_w(b, interp_loc, offset));
278
279 res = interp_fs_input(b, intrin->def.num_components,
280 addr, interp_mode, interp_loc,
281 inv_w, offset, ctx->nak);
282 break;
283 }
284
285 case nir_intrinsic_load_interpolated_input: {
286 const uint16_t addr = fs_input_intrin_addr(intrin);
287 nir_intrinsic_instr *bary = nir_src_as_intrinsic(intrin->src[0]);
288
289 enum nak_interp_mode interp_mode;
290 if (nir_intrinsic_interp_mode(bary) == INTERP_MODE_SMOOTH ||
291 nir_intrinsic_interp_mode(bary) == INTERP_MODE_NONE)
292 interp_mode = NAK_INTERP_MODE_PERSPECTIVE;
293 else
294 interp_mode = NAK_INTERP_MODE_SCREEN_LINEAR;
295
296 nir_def *offset = NULL;
297 enum nak_interp_loc interp_loc;
298 switch (bary->intrinsic) {
299 case nir_intrinsic_load_barycentric_at_offset:
300 case nir_intrinsic_load_barycentric_at_sample: {
301 interp_loc = NAK_INTERP_LOC_OFFSET;
302 offset = load_barycentric_offset(b, bary, ctx->fs_key);
303 break;
304 }
305
306 case nir_intrinsic_load_barycentric_centroid:
307 case nir_intrinsic_load_barycentric_sample:
308 interp_loc = NAK_INTERP_LOC_CENTROID;
309 break;
310
311 case nir_intrinsic_load_barycentric_pixel:
312 interp_loc = NAK_INTERP_LOC_DEFAULT;
313 break;
314
315 default:
316 unreachable("Unsupported barycentric");
317 }
318
319 nir_def *inv_w = NULL;
320 if (interp_mode == NAK_INTERP_MODE_PERSPECTIVE)
321 inv_w = nir_frcp(b, load_frag_w(b, interp_loc, offset));
322
323 res = interp_fs_input(b, intrin->def.num_components,
324 addr, interp_mode, interp_loc,
325 inv_w, offset, ctx->nak);
326 break;
327 }
328
329 case nir_intrinsic_load_sample_mask_in:
330 b->cursor = nir_after_instr(&intrin->instr);
331
332 /* pixld.covmask returns the coverage mask for the entire pixel being
333 * shaded, not the set of samples covered by the current FS invocation.
334 * We need to mask off excess samples in order to get the GL/Vulkan
335 * behavior.
336 */
337 if (b->shader->info.fs.uses_sample_shading) {
338 /* Mask off just the current sample */
339 nir_def *sample = nir_load_sample_id(b);
340 nir_def *mask = nir_ishl(b, nir_imm_int(b, 1), sample);
341 mask = nir_iand(b, &intrin->def, mask);
342 nir_def_rewrite_uses_after(&intrin->def, mask, mask->parent_instr);
343
344 return true;
345 } else if (ctx->fs_key && ctx->fs_key->force_sample_shading) {
346 /* In this case we don't know up-front how many passes will be run so
347 * we need to take the per-pass sample mask from the driver and AND
348 * that with the coverage mask.
349 */
350 nir_def *sample = nir_load_sample_id(b);
351 nir_def *mask = load_pass_sample_mask_at(b, sample, ctx->fs_key);
352 mask = nir_iand(b, &intrin->def, nir_u2u32(b, mask));
353 nir_def_rewrite_uses_after(&intrin->def, mask, mask->parent_instr);
354
355 return true;
356 } else {
357 /* We're always executing single-pass so just use the sample mask as
358 * given by the hardware.
359 */
360 return false;
361 }
362 break;
363
364 case nir_intrinsic_load_sample_pos:
365 res = load_sample_pos_at(b, nir_load_sample_id(b), ctx->fs_key);
366 break;
367
368 case nir_intrinsic_load_input_vertex: {
369 const uint16_t addr = fs_input_intrin_addr(intrin);
370 unsigned vertex_id = nir_src_as_uint(intrin->src[0]);
371 assert(vertex_id < 3);
372
373 nir_def *comps[NIR_MAX_VEC_COMPONENTS];
374 for (unsigned c = 0; c < intrin->def.num_components; c++) {
375 nir_def *data = nir_ldtram_nv(b, .base = addr + c * 4,
376 .flags = vertex_id == 2);
377 comps[c] = nir_channel(b, data, vertex_id & 1);
378 }
379 res = nir_vec(b, comps, intrin->num_components);
380 break;
381 }
382
383 default:
384 return false;
385 }
386
387 nir_def_replace(&intrin->def, res);
388
389 return true;
390 }
391
392 bool
nak_nir_lower_fs_inputs(nir_shader * nir,const struct nak_compiler * nak,const struct nak_fs_key * fs_key)393 nak_nir_lower_fs_inputs(nir_shader *nir,
394 const struct nak_compiler *nak,
395 const struct nak_fs_key *fs_key)
396 {
397 const struct lower_fs_input_ctx fs_in_ctx = {
398 .nak = nak,
399 .fs_key = fs_key,
400 };
401 NIR_PASS_V(nir, nir_shader_intrinsics_pass, lower_fs_input_intrin,
402 nir_metadata_control_flow,
403 (void *)&fs_in_ctx);
404
405 return true;
406 }
407