1 /*
2 * Copyright © 2023 Valve Corporation
3 * Copyright © 2015 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 /*
26 * This lowering pass supports (as configured via nir_lower_tex_options)
27 * various texture related conversions:
28 * + texture projector lowering: converts the coordinate division for
29 * texture projection to be done in ALU instructions instead of
30 * asking the texture operation to do so.
31 * + lowering RECT: converts the un-normalized RECT texture coordinates
32 * to normalized coordinates with txs plus ALU instructions
33 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
34 * inserts instructions to clamp specified coordinates to [0.0, 1.0].
35 * Note that this automatically triggers texture projector lowering if
36 * needed, since clamping must happen after projector lowering.
37 * + YUV-to-RGB conversion: to allow sampling YUV values as RGB values
38 * according to a specific YUV color space and range.
39 */
40
41 #include "nir.h"
42 #include "nir_builder.h"
43 #include "nir_builtin_builder.h"
44 #include "nir_format_convert.h"
45
46 typedef struct nir_const_value_3_4 {
47 nir_const_value v[3][4];
48 } nir_const_value_3_4;
49
50 static const nir_const_value_3_4 bt601_limited_range_csc_coeffs = { {
51 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } },
52 { { .f32 = 0.0f }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f } },
53 { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f } },
54 } };
55 static const nir_const_value_3_4 bt601_full_range_csc_coeffs = { {
56 { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } },
57 { { .f32 = 0.0f }, { .f32 = -0.34413629f }, { .f32 = 1.772f } },
58 { { .f32 = 1.402f }, { .f32 = -0.71413629f }, { .f32 = 0.0f } },
59 } };
60 static const nir_const_value_3_4 bt709_limited_range_csc_coeffs = { {
61 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } },
62 { { .f32 = 0.0f }, { .f32 = -0.21324861f }, { .f32 = 2.11240179f } },
63 { { .f32 = 1.79274107f }, { .f32 = -0.53290933f }, { .f32 = 0.0f } },
64 } };
65 static const nir_const_value_3_4 bt709_full_range_csc_coeffs = { {
66 { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } },
67 { { .f32 = 0.0f }, { .f32 = -0.18732427f }, { .f32 = 1.8556f } },
68 { { .f32 = 1.5748f }, { .f32 = -0.46812427f }, { .f32 = 0.0f } },
69 } };
70 static const nir_const_value_3_4 bt2020_limited_range_csc_coeffs = { {
71 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } },
72 { { .f32 = 0.0f }, { .f32 = -0.18732610f }, { .f32 = 2.14177232f } },
73 { { .f32 = 1.67878795f }, { .f32 = -0.65046843f }, { .f32 = 0.0f } },
74 } };
75 static const nir_const_value_3_4 bt2020_full_range_csc_coeffs = { {
76 { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } },
77 { { .f32 = 0.0f }, { .f32 = -0.16455313f }, { .f32 = 1.88140000f } },
78 { { .f32 = 1.4747f }, { .f32 = -0.57139187f }, { .f32 = 0.0f } },
79 } };
80
81 static const float bt601_limited_range_csc_offsets[3] = {
82 -0.874202218f, 0.531667823f, -1.085630789f
83 };
84 static const float bt601_full_range_csc_offsets[3] = {
85 -0.701000000f, 0.529136286f, -0.886000000f
86 };
87 static const float bt709_limited_range_csc_offsets[3] = {
88 -0.972945075f, 0.301482665f, -1.133402218f
89 };
90 static const float bt709_full_range_csc_offsets[3] = {
91 -0.787400000f, 0.327724273f, -0.927800000f
92 };
93 static const float bt2020_limited_range_csc_offsets[3] = {
94 -0.915745075f, 0.347480639f, -1.148145075f
95 };
96 static const float bt2020_full_range_csc_offsets[3] = {
97 -0.737350000f, 0.367972500f, -0.940700000f
98 };
99
100 static bool
project_src(nir_builder * b,nir_tex_instr * tex)101 project_src(nir_builder *b, nir_tex_instr *tex)
102 {
103 nir_def *proj = nir_steal_tex_src(tex, nir_tex_src_projector);
104 if (!proj)
105 return false;
106
107 b->cursor = nir_before_instr(&tex->instr);
108 nir_def *inv_proj = nir_frcp(b, proj);
109
110 /* Walk through the sources projecting the arguments. */
111 for (unsigned i = 0; i < tex->num_srcs; i++) {
112 switch (tex->src[i].src_type) {
113 case nir_tex_src_coord:
114 case nir_tex_src_comparator:
115 break;
116 default:
117 continue;
118 }
119 nir_def *unprojected =
120 tex->src[i].src.ssa;
121 nir_def *projected = nir_fmul(b, unprojected, inv_proj);
122
123 /* Array indices don't get projected, so make an new vector with the
124 * coordinate's array index untouched.
125 */
126 if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
127 switch (tex->coord_components) {
128 case 4:
129 projected = nir_vec4(b,
130 nir_channel(b, projected, 0),
131 nir_channel(b, projected, 1),
132 nir_channel(b, projected, 2),
133 nir_channel(b, unprojected, 3));
134 break;
135 case 3:
136 projected = nir_vec3(b,
137 nir_channel(b, projected, 0),
138 nir_channel(b, projected, 1),
139 nir_channel(b, unprojected, 2));
140 break;
141 case 2:
142 projected = nir_vec2(b,
143 nir_channel(b, projected, 0),
144 nir_channel(b, unprojected, 1));
145 break;
146 default:
147 unreachable("bad texture coord count for array");
148 break;
149 }
150 }
151
152 nir_src_rewrite(&tex->src[i].src, projected);
153 }
154
155 return true;
156 }
157
158 static bool
lower_offset(nir_builder * b,nir_tex_instr * tex)159 lower_offset(nir_builder *b, nir_tex_instr *tex)
160 {
161 nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset);
162 if (!offset)
163 return false;
164
165 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
166 assert(coord_index >= 0);
167
168 nir_def *coord = tex->src[coord_index].src.ssa;
169
170 b->cursor = nir_before_instr(&tex->instr);
171
172 nir_def *offset_coord;
173 if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
174 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
175 offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
176 } else {
177 nir_def *scale = NULL;
178
179 if (b->shader->options->has_texture_scaling) {
180 nir_def *idx = nir_imm_int(b, tex->texture_index);
181 scale = nir_load_texture_scale(b, 32, idx);
182 } else {
183 nir_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
184 scale = nir_frcp(b, txs);
185 }
186
187 offset_coord = nir_fadd(b, coord,
188 nir_fmul(b,
189 nir_i2f32(b, offset),
190 scale));
191 }
192 } else {
193 offset_coord = nir_iadd(b, coord, offset);
194 }
195
196 if (tex->is_array) {
197 /* The offset is not applied to the array index */
198 if (tex->coord_components == 2) {
199 offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
200 nir_channel(b, coord, 1));
201 } else if (tex->coord_components == 3) {
202 offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
203 nir_channel(b, offset_coord, 1),
204 nir_channel(b, coord, 2));
205 } else {
206 unreachable("Invalid number of components");
207 }
208 }
209
210 nir_src_rewrite(&tex->src[coord_index].src, offset_coord);
211
212 return true;
213 }
214
215 static void
lower_rect(nir_builder * b,nir_tex_instr * tex)216 lower_rect(nir_builder *b, nir_tex_instr *tex)
217 {
218 /* Set the sampler_dim to 2D here so that get_texture_size picks up the
219 * right dimensionality.
220 */
221 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
222
223 nir_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
224 nir_def *scale = nir_frcp(b, txs);
225 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
226
227 if (coord_index != -1) {
228 nir_def *coords =
229 tex->src[coord_index].src.ssa;
230 nir_src_rewrite(&tex->src[coord_index].src, nir_fmul(b, coords, scale));
231 }
232 }
233
234 static void
lower_rect_tex_scale(nir_builder * b,nir_tex_instr * tex)235 lower_rect_tex_scale(nir_builder *b, nir_tex_instr *tex)
236 {
237 b->cursor = nir_before_instr(&tex->instr);
238
239 nir_def *idx = nir_imm_int(b, tex->texture_index);
240 nir_def *scale = nir_load_texture_scale(b, 32, idx);
241 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
242
243 if (coord_index != -1) {
244 nir_def *coords =
245 tex->src[coord_index].src.ssa;
246 nir_src_rewrite(&tex->src[coord_index].src, nir_fmul(b, coords, scale));
247 }
248 }
249
250 static void
lower_1d(nir_builder * b,nir_tex_instr * tex)251 lower_1d(nir_builder *b, nir_tex_instr *tex)
252 {
253 b->cursor = nir_before_instr(&tex->instr);
254
255 nir_def *coords = nir_steal_tex_src(tex, nir_tex_src_coord);
256 nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset);
257 nir_def *ddx = nir_steal_tex_src(tex, nir_tex_src_ddx);
258 nir_def *ddy = nir_steal_tex_src(tex, nir_tex_src_ddy);
259
260 /* Add in 2D sources to become a 2D operation */
261 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
262
263 if (coords) {
264 /* We want to fetch texel 0 along the Y-axis. To do so, we sample at 0.5
265 * to get texel 0 with correct handling of wrap modes.
266 */
267 nir_def *y = nir_imm_floatN_t(b, tex->op == nir_texop_txf ? 0.0 : 0.5,
268 coords->bit_size);
269
270 tex->coord_components++;
271
272 if (tex->is_array && tex->op != nir_texop_lod) {
273 assert(tex->coord_components == 3);
274
275 nir_def *x = nir_channel(b, coords, 0);
276 nir_def *idx = nir_channel(b, coords, 1);
277 coords = nir_vec3(b, x, y, idx);
278 } else {
279 assert(tex->coord_components == 2);
280 coords = nir_vec2(b, coords, y);
281 }
282
283 nir_tex_instr_add_src(tex, nir_tex_src_coord, coords);
284 }
285
286 if (offset) {
287 nir_tex_instr_add_src(tex, nir_tex_src_offset,
288 nir_pad_vector_imm_int(b, offset, 0, 2));
289 }
290
291 if (ddx || ddy) {
292 nir_tex_instr_add_src(tex, nir_tex_src_ddx,
293 nir_pad_vector_imm_int(b, ddx, 0, 2));
294
295 nir_tex_instr_add_src(tex, nir_tex_src_ddy,
296 nir_pad_vector_imm_int(b, ddy, 0, 2));
297 }
298
299 /* Handle destination component mismatch for txs. */
300 if (tex->op == nir_texop_txs) {
301 b->cursor = nir_after_instr(&tex->instr);
302
303 nir_def *dst;
304 if (tex->is_array) {
305 assert(tex->def.num_components == 2);
306 tex->def.num_components = 3;
307
308 /* For array, we take .xz to skip the newly added height */
309 dst = nir_channels(b, &tex->def, (1 << 0) | (1 << 2));
310 } else {
311 assert(tex->def.num_components == 1);
312 tex->def.num_components = 2;
313
314 dst = nir_channel(b, &tex->def, 0);
315 }
316
317 nir_def_rewrite_uses_after(&tex->def, dst, dst->parent_instr);
318 }
319 }
320
321 static void
lower_lod(nir_builder * b,nir_tex_instr * tex,nir_def * lod)322 lower_lod(nir_builder *b, nir_tex_instr *tex, nir_def *lod)
323 {
324 assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
325 assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
326 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
327 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
328
329 /* If we have a bias, add it in */
330 nir_def *bias = nir_steal_tex_src(tex, nir_tex_src_bias);
331 if (bias)
332 lod = nir_fadd(b, lod, bias);
333
334 /* If we have a minimum LOD, clamp LOD accordingly */
335 nir_def *min_lod = nir_steal_tex_src(tex, nir_tex_src_min_lod);
336 if (min_lod)
337 lod = nir_fmax(b, lod, min_lod);
338
339 nir_tex_instr_add_src(tex, nir_tex_src_lod, lod);
340 tex->op = nir_texop_txl;
341 }
342
343 static void
lower_implicit_lod(nir_builder * b,nir_tex_instr * tex)344 lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
345 {
346 b->cursor = nir_before_instr(&tex->instr);
347 lower_lod(b, tex, nir_get_texture_lod(b, tex));
348 }
349
350 static void
lower_zero_lod(nir_builder * b,nir_tex_instr * tex)351 lower_zero_lod(nir_builder *b, nir_tex_instr *tex)
352 {
353 b->cursor = nir_before_instr(&tex->instr);
354
355 if (tex->op == nir_texop_lod) {
356 nir_def_replace(&tex->def, nir_imm_int(b, 0));
357 return;
358 }
359
360 lower_lod(b, tex, nir_imm_int(b, 0));
361 }
362
363 static nir_def *
sample_plane(nir_builder * b,nir_tex_instr * tex,int plane,const nir_lower_tex_options * options)364 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
365 const nir_lower_tex_options *options)
366 {
367 assert(nir_tex_instr_dest_size(tex) == 4);
368 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
369 assert(tex->op == nir_texop_tex);
370 assert(tex->coord_components == 2);
371
372 nir_tex_instr *plane_tex =
373 nir_tex_instr_create(b->shader, tex->num_srcs + 1);
374 for (unsigned i = 0; i < tex->num_srcs; i++) {
375 plane_tex->src[i].src = nir_src_for_ssa(tex->src[i].src.ssa);
376 plane_tex->src[i].src_type = tex->src[i].src_type;
377 }
378 plane_tex->src[tex->num_srcs] = nir_tex_src_for_ssa(nir_tex_src_plane,
379 nir_imm_int(b, plane));
380 plane_tex->op = nir_texop_tex;
381 plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
382 plane_tex->dest_type = nir_type_float | tex->def.bit_size;
383 plane_tex->coord_components = 2;
384
385 plane_tex->texture_index = tex->texture_index;
386 plane_tex->sampler_index = tex->sampler_index;
387
388 nir_def_init(&plane_tex->instr, &plane_tex->def, 4,
389 tex->def.bit_size);
390
391 nir_builder_instr_insert(b, &plane_tex->instr);
392
393 /* If scaling_factor is set, return a scaled value. */
394 if (options->scale_factors[tex->texture_index])
395 return nir_fmul_imm(b, &plane_tex->def,
396 options->scale_factors[tex->texture_index]);
397
398 return &plane_tex->def;
399 }
400
401 static void
convert_yuv_to_rgb(nir_builder * b,nir_tex_instr * tex,nir_def * y,nir_def * u,nir_def * v,nir_def * a,const nir_lower_tex_options * options,unsigned texture_index)402 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
403 nir_def *y, nir_def *u, nir_def *v,
404 nir_def *a,
405 const nir_lower_tex_options *options,
406 unsigned texture_index)
407 {
408
409 const float *offset_vals;
410 const nir_const_value_3_4 *m;
411 assert((options->bt709_external & options->bt2020_external) == 0);
412 if (options->yuv_full_range_external & (1u << texture_index)) {
413 if (options->bt709_external & (1u << texture_index)) {
414 m = &bt709_full_range_csc_coeffs;
415 offset_vals = bt709_full_range_csc_offsets;
416 } else if (options->bt2020_external & (1u << texture_index)) {
417 m = &bt2020_full_range_csc_coeffs;
418 offset_vals = bt2020_full_range_csc_offsets;
419 } else {
420 m = &bt601_full_range_csc_coeffs;
421 offset_vals = bt601_full_range_csc_offsets;
422 }
423 } else {
424 if (options->bt709_external & (1u << texture_index)) {
425 m = &bt709_limited_range_csc_coeffs;
426 offset_vals = bt709_limited_range_csc_offsets;
427 } else if (options->bt2020_external & (1u << texture_index)) {
428 m = &bt2020_limited_range_csc_coeffs;
429 offset_vals = bt2020_limited_range_csc_offsets;
430 } else {
431 m = &bt601_limited_range_csc_coeffs;
432 offset_vals = bt601_limited_range_csc_offsets;
433 }
434 }
435
436 unsigned bit_size = tex->def.bit_size;
437
438 nir_def *offset =
439 nir_vec4(b,
440 nir_imm_floatN_t(b, offset_vals[0], a->bit_size),
441 nir_imm_floatN_t(b, offset_vals[1], a->bit_size),
442 nir_imm_floatN_t(b, offset_vals[2], a->bit_size),
443 a);
444
445 offset = nir_f2fN(b, offset, bit_size);
446
447 nir_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[0]), bit_size);
448 nir_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[1]), bit_size);
449 nir_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[2]), bit_size);
450
451 nir_def *result =
452 nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset)));
453
454 nir_def_rewrite_uses(&tex->def, result);
455 }
456
457 static void
lower_y_uv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)458 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
459 const nir_lower_tex_options *options,
460 unsigned texture_index)
461 {
462 b->cursor = nir_after_instr(&tex->instr);
463
464 nir_def *y = sample_plane(b, tex, 0, options);
465 nir_def *uv = sample_plane(b, tex, 1, options);
466
467 convert_yuv_to_rgb(b, tex,
468 nir_channel(b, y, 0),
469 nir_channel(b, uv, 0),
470 nir_channel(b, uv, 1),
471 nir_imm_float(b, 1.0f),
472 options,
473 texture_index);
474 }
475
476 static void
lower_y_vu_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)477 lower_y_vu_external(nir_builder *b, nir_tex_instr *tex,
478 const nir_lower_tex_options *options,
479 unsigned texture_index)
480 {
481 b->cursor = nir_after_instr(&tex->instr);
482
483 nir_def *y = sample_plane(b, tex, 0, options);
484 nir_def *vu = sample_plane(b, tex, 1, options);
485
486 convert_yuv_to_rgb(b, tex,
487 nir_channel(b, y, 0),
488 nir_channel(b, vu, 1),
489 nir_channel(b, vu, 0),
490 nir_imm_float(b, 1.0f),
491 options,
492 texture_index);
493 }
494
495 static void
lower_y_u_v_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)496 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
497 const nir_lower_tex_options *options,
498 unsigned texture_index)
499 {
500 b->cursor = nir_after_instr(&tex->instr);
501
502 nir_def *y = sample_plane(b, tex, 0, options);
503 nir_def *u = sample_plane(b, tex, 1, options);
504 nir_def *v = sample_plane(b, tex, 2, options);
505
506 convert_yuv_to_rgb(b, tex,
507 nir_channel(b, y, 0),
508 nir_channel(b, u, 0),
509 nir_channel(b, v, 0),
510 nir_imm_float(b, 1.0f),
511 options,
512 texture_index);
513 }
514
515 static void
lower_yx_xuxv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)516 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
517 const nir_lower_tex_options *options,
518 unsigned texture_index)
519 {
520 b->cursor = nir_after_instr(&tex->instr);
521
522 nir_def *y = sample_plane(b, tex, 0, options);
523 nir_def *xuxv = sample_plane(b, tex, 1, options);
524
525 convert_yuv_to_rgb(b, tex,
526 nir_channel(b, y, 0),
527 nir_channel(b, xuxv, 1),
528 nir_channel(b, xuxv, 3),
529 nir_imm_float(b, 1.0f),
530 options,
531 texture_index);
532 }
533
534 static void
lower_yx_xvxu_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)535 lower_yx_xvxu_external(nir_builder *b, nir_tex_instr *tex,
536 const nir_lower_tex_options *options,
537 unsigned texture_index)
538 {
539 b->cursor = nir_after_instr(&tex->instr);
540
541 nir_def *y = sample_plane(b, tex, 0, options);
542 nir_def *xvxu = sample_plane(b, tex, 1, options);
543
544 convert_yuv_to_rgb(b, tex,
545 nir_channel(b, y, 0),
546 nir_channel(b, xvxu, 3),
547 nir_channel(b, xvxu, 1),
548 nir_imm_float(b, 1.0f),
549 options,
550 texture_index);
551 }
552
553 static void
lower_xy_uxvx_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)554 lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
555 const nir_lower_tex_options *options,
556 unsigned texture_index)
557 {
558 b->cursor = nir_after_instr(&tex->instr);
559
560 nir_def *y = sample_plane(b, tex, 0, options);
561 nir_def *uxvx = sample_plane(b, tex, 1, options);
562
563 convert_yuv_to_rgb(b, tex,
564 nir_channel(b, y, 1),
565 nir_channel(b, uxvx, 0),
566 nir_channel(b, uxvx, 2),
567 nir_imm_float(b, 1.0f),
568 options,
569 texture_index);
570 }
571
572 static void
lower_xy_vxux_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)573 lower_xy_vxux_external(nir_builder *b, nir_tex_instr *tex,
574 const nir_lower_tex_options *options,
575 unsigned texture_index)
576 {
577 b->cursor = nir_after_instr(&tex->instr);
578
579 nir_def *y = sample_plane(b, tex, 0, options);
580 nir_def *vxux = sample_plane(b, tex, 1, options);
581
582 convert_yuv_to_rgb(b, tex,
583 nir_channel(b, y, 1),
584 nir_channel(b, vxux, 2),
585 nir_channel(b, vxux, 0),
586 nir_imm_float(b, 1.0f),
587 options,
588 texture_index);
589 }
590
591 static void
lower_ayuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)592 lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
593 const nir_lower_tex_options *options,
594 unsigned texture_index)
595 {
596 b->cursor = nir_after_instr(&tex->instr);
597
598 nir_def *ayuv = sample_plane(b, tex, 0, options);
599
600 convert_yuv_to_rgb(b, tex,
601 nir_channel(b, ayuv, 2),
602 nir_channel(b, ayuv, 1),
603 nir_channel(b, ayuv, 0),
604 nir_channel(b, ayuv, 3),
605 options,
606 texture_index);
607 }
608
609 static void
lower_y41x_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)610 lower_y41x_external(nir_builder *b, nir_tex_instr *tex,
611 const nir_lower_tex_options *options,
612 unsigned texture_index)
613 {
614 b->cursor = nir_after_instr(&tex->instr);
615
616 nir_def *y41x = sample_plane(b, tex, 0, options);
617
618 convert_yuv_to_rgb(b, tex,
619 nir_channel(b, y41x, 1),
620 nir_channel(b, y41x, 0),
621 nir_channel(b, y41x, 2),
622 nir_channel(b, y41x, 3),
623 options,
624 texture_index);
625 }
626
627 static void
lower_xyuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)628 lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
629 const nir_lower_tex_options *options,
630 unsigned texture_index)
631 {
632 b->cursor = nir_after_instr(&tex->instr);
633
634 nir_def *xyuv = sample_plane(b, tex, 0, options);
635
636 convert_yuv_to_rgb(b, tex,
637 nir_channel(b, xyuv, 2),
638 nir_channel(b, xyuv, 1),
639 nir_channel(b, xyuv, 0),
640 nir_imm_float(b, 1.0f),
641 options,
642 texture_index);
643 }
644
645 static void
lower_yuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)646 lower_yuv_external(nir_builder *b, nir_tex_instr *tex,
647 const nir_lower_tex_options *options,
648 unsigned texture_index)
649 {
650 b->cursor = nir_after_instr(&tex->instr);
651
652 nir_def *yuv = sample_plane(b, tex, 0, options);
653
654 convert_yuv_to_rgb(b, tex,
655 nir_channel(b, yuv, 0),
656 nir_channel(b, yuv, 1),
657 nir_channel(b, yuv, 2),
658 nir_imm_float(b, 1.0f),
659 options,
660 texture_index);
661 }
662
663 static void
lower_yu_yv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)664 lower_yu_yv_external(nir_builder *b, nir_tex_instr *tex,
665 const nir_lower_tex_options *options,
666 unsigned texture_index)
667 {
668 b->cursor = nir_after_instr(&tex->instr);
669
670 nir_def *yuv = sample_plane(b, tex, 0, options);
671
672 convert_yuv_to_rgb(b, tex,
673 nir_channel(b, yuv, 1),
674 nir_channel(b, yuv, 2),
675 nir_channel(b, yuv, 0),
676 nir_imm_float(b, 1.0f),
677 options,
678 texture_index);
679 }
680
681 static void
lower_yv_yu_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)682 lower_yv_yu_external(nir_builder *b, nir_tex_instr *tex,
683 const nir_lower_tex_options *options,
684 unsigned texture_index)
685 {
686 b->cursor = nir_after_instr(&tex->instr);
687
688 nir_def *yuv = sample_plane(b, tex, 0, options);
689
690 convert_yuv_to_rgb(b, tex,
691 nir_channel(b, yuv, 2),
692 nir_channel(b, yuv, 1),
693 nir_channel(b, yuv, 0),
694 nir_imm_float(b, 1.0f),
695 options,
696 texture_index);
697 }
698
699 /*
700 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
701 * computed from the gradients.
702 */
703 static void
replace_gradient_with_lod(nir_builder * b,nir_def * lod,nir_tex_instr * tex)704 replace_gradient_with_lod(nir_builder *b, nir_def *lod, nir_tex_instr *tex)
705 {
706 assert(tex->op == nir_texop_txd);
707
708 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
709 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
710
711 /* If we have a minimum LOD, clamp LOD accordingly */
712 nir_def *min_lod = nir_steal_tex_src(tex, nir_tex_src_min_lod);
713 if (min_lod)
714 lod = nir_fmax(b, lod, min_lod);
715
716 nir_tex_instr_add_src(tex, nir_tex_src_lod, lod);
717 tex->op = nir_texop_txl;
718 }
719
720 static void
lower_gradient_cube_map(nir_builder * b,nir_tex_instr * tex)721 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
722 {
723 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
724 assert(tex->op == nir_texop_txd);
725
726 /* Use textureSize() to get the width and height of LOD 0 */
727 nir_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
728
729 /* Cubemap texture lookups first generate a texture coordinate normalized
730 * to [-1, 1] on the appropiate face. The appropiate face is determined
731 * by which component has largest magnitude and its sign. The texture
732 * coordinate is the quotient of the remaining texture coordinates against
733 * that absolute value of the component of largest magnitude. This
734 * division requires that the computing of the derivative of the texel
735 * coordinate must use the quotient rule. The high level GLSL code is as
736 * follows:
737 *
738 * Step 1: selection
739 *
740 * vec3 abs_p, Q, dQdx, dQdy;
741 * abs_p = abs(ir->coordinate);
742 * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
743 * Q = ir->coordinate.yzx;
744 * dQdx = ir->lod_info.grad.dPdx.yzx;
745 * dQdy = ir->lod_info.grad.dPdy.yzx;
746 * }
747 * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
748 * Q = ir->coordinate.xzy;
749 * dQdx = ir->lod_info.grad.dPdx.xzy;
750 * dQdy = ir->lod_info.grad.dPdy.xzy;
751 * }
752 * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
753 * Q = ir->coordinate;
754 * dQdx = ir->lod_info.grad.dPdx;
755 * dQdy = ir->lod_info.grad.dPdy;
756 * }
757 *
758 * Step 2: use quotient rule to compute derivative. The normalized to
759 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
760 * only concerned with the magnitudes of the derivatives whose values are
761 * not affected by the sign. We drop the sign from the computation.
762 *
763 * vec2 dx, dy;
764 * float recip;
765 *
766 * recip = 1.0 / Q.z;
767 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
768 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
769 *
770 * Step 3: compute LOD. At this point we have the derivatives of the
771 * texture coordinates normalized to [-1,1]. We take the LOD to be
772 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
773 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
774 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
775 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
776 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
777 * where L is the dimension of the cubemap. The code is:
778 *
779 * float M, result;
780 * M = max(dot(dx, dx), dot(dy, dy));
781 * L = textureSize(sampler, 0).x;
782 * result = -1.0 + 0.5 * log2(L * L * M);
783 */
784
785 /* coordinate */
786 nir_def *p =
787 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
788
789 /* unmodified dPdx, dPdy values */
790 nir_def *dPdx =
791 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
792 nir_def *dPdy =
793 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
794
795 nir_def *abs_p = nir_fabs(b, p);
796 nir_def *abs_p_x = nir_channel(b, abs_p, 0);
797 nir_def *abs_p_y = nir_channel(b, abs_p, 1);
798 nir_def *abs_p_z = nir_channel(b, abs_p, 2);
799
800 /* 1. compute selector */
801 nir_def *Q, *dQdx, *dQdy;
802
803 nir_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
804 nir_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
805
806 unsigned yzx[3] = { 1, 2, 0 };
807 unsigned xzy[3] = { 0, 2, 1 };
808
809 Q = nir_bcsel(b, cond_z,
810 p,
811 nir_bcsel(b, cond_y,
812 nir_swizzle(b, p, xzy, 3),
813 nir_swizzle(b, p, yzx, 3)));
814
815 dQdx = nir_bcsel(b, cond_z,
816 dPdx,
817 nir_bcsel(b, cond_y,
818 nir_swizzle(b, dPdx, xzy, 3),
819 nir_swizzle(b, dPdx, yzx, 3)));
820
821 dQdy = nir_bcsel(b, cond_z,
822 dPdy,
823 nir_bcsel(b, cond_y,
824 nir_swizzle(b, dPdy, xzy, 3),
825 nir_swizzle(b, dPdy, yzx, 3)));
826
827 /* 2. quotient rule */
828
829 /* tmp = Q.xy * recip;
830 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
831 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
832 */
833 nir_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
834
835 nir_def *Q_xy = nir_trim_vector(b, Q, 2);
836 nir_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
837
838 nir_def *dQdx_xy = nir_trim_vector(b, dQdx, 2);
839 nir_def *dQdx_z = nir_channel(b, dQdx, 2);
840 nir_def *dx =
841 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
842
843 nir_def *dQdy_xy = nir_trim_vector(b, dQdy, 2);
844 nir_def *dQdy_z = nir_channel(b, dQdy, 2);
845 nir_def *dy =
846 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
847
848 /* M = max(dot(dx, dx), dot(dy, dy)); */
849 nir_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
850
851 /* size has textureSize() of LOD 0 */
852 nir_def *L = nir_channel(b, size, 0);
853
854 /* lod = -1.0 + 0.5 * log2(L * L * M); */
855 nir_def *lod =
856 nir_fadd(b,
857 nir_imm_float(b, -1.0f),
858 nir_fmul(b,
859 nir_imm_float(b, 0.5f),
860 nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
861
862 /* 3. Replace the gradient instruction with an equivalent lod instruction */
863 replace_gradient_with_lod(b, lod, tex);
864 }
865
866 static void
lower_gradient(nir_builder * b,nir_tex_instr * tex)867 lower_gradient(nir_builder *b, nir_tex_instr *tex)
868 {
869 /* Cubes are more complicated and have their own function */
870 if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
871 lower_gradient_cube_map(b, tex);
872 return;
873 }
874
875 assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
876 assert(tex->op == nir_texop_txd);
877
878 /* Use textureSize() to get the width and height of LOD 0 */
879 unsigned component_mask;
880 switch (tex->sampler_dim) {
881 case GLSL_SAMPLER_DIM_3D:
882 component_mask = 7;
883 break;
884 case GLSL_SAMPLER_DIM_1D:
885 component_mask = 1;
886 break;
887 default:
888 component_mask = 3;
889 break;
890 }
891
892 nir_def *size =
893 nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)),
894 component_mask);
895
896 /* Scale the gradients by width and height. Effectively, the incoming
897 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
898 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
899 */
900 nir_def *ddx =
901 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
902 nir_def *ddy =
903 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
904
905 nir_def *dPdx = nir_fmul(b, ddx, size);
906 nir_def *dPdy = nir_fmul(b, ddy, size);
907
908 nir_def *rho;
909 if (dPdx->num_components == 1) {
910 rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
911 } else {
912 rho = nir_fmax(b,
913 nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
914 nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
915 }
916
917 /* lod = log2(rho). We're ignoring GL state biases for now. */
918 nir_def *lod = nir_flog2(b, rho);
919
920 /* Replace the gradient instruction with an equivalent lod instruction */
921 replace_gradient_with_lod(b, lod, tex);
922 }
923
924 /* tex(s, coord) = txd(s, coord, dfdx(coord), dfdy(coord)) */
925 static nir_tex_instr *
lower_tex_to_txd(nir_builder * b,nir_tex_instr * tex)926 lower_tex_to_txd(nir_builder *b, nir_tex_instr *tex)
927 {
928 b->cursor = nir_after_instr(&tex->instr);
929 nir_tex_instr *txd = nir_tex_instr_create(b->shader, tex->num_srcs + 2);
930
931 txd->op = nir_texop_txd;
932 txd->sampler_dim = tex->sampler_dim;
933 txd->dest_type = tex->dest_type;
934 txd->coord_components = tex->coord_components;
935 txd->texture_index = tex->texture_index;
936 txd->sampler_index = tex->sampler_index;
937 txd->is_array = tex->is_array;
938 txd->is_shadow = tex->is_shadow;
939 txd->is_new_style_shadow = tex->is_new_style_shadow;
940
941 /* reuse existing srcs */
942 for (unsigned i = 0; i < tex->num_srcs; i++) {
943 txd->src[i].src = nir_src_for_ssa(tex->src[i].src.ssa);
944 txd->src[i].src_type = tex->src[i].src_type;
945 }
946 int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
947 assert(coord_idx >= 0);
948 nir_def *coord = tex->src[coord_idx].src.ssa;
949 /* don't take the derivative of the array index */
950 if (tex->is_array)
951 coord = nir_channels(b, coord, nir_component_mask(coord->num_components - 1));
952 nir_def *dfdx = nir_ddx(b, coord);
953 nir_def *dfdy = nir_ddy(b, coord);
954 txd->src[tex->num_srcs] = nir_tex_src_for_ssa(nir_tex_src_ddx, dfdx);
955 txd->src[tex->num_srcs + 1] = nir_tex_src_for_ssa(nir_tex_src_ddy, dfdy);
956
957 nir_def_init(&txd->instr, &txd->def,
958 tex->def.num_components,
959 tex->def.bit_size);
960 nir_builder_instr_insert(b, &txd->instr);
961 nir_def_replace(&tex->def, &txd->def);
962 return txd;
963 }
964
965 /* txb(s, coord, bias) = txl(s, coord, lod(s, coord).y + bias) */
966 static nir_tex_instr *
lower_txb_to_txl(nir_builder * b,nir_tex_instr * tex)967 lower_txb_to_txl(nir_builder *b, nir_tex_instr *tex)
968 {
969 b->cursor = nir_after_instr(&tex->instr);
970 nir_tex_instr *txl = nir_tex_instr_create(b->shader, tex->num_srcs);
971
972 txl->op = nir_texop_txl;
973 txl->sampler_dim = tex->sampler_dim;
974 txl->dest_type = tex->dest_type;
975 txl->coord_components = tex->coord_components;
976 txl->texture_index = tex->texture_index;
977 txl->sampler_index = tex->sampler_index;
978 txl->is_array = tex->is_array;
979 txl->is_shadow = tex->is_shadow;
980 txl->is_new_style_shadow = tex->is_new_style_shadow;
981
982 /* reuse all but bias src */
983 for (int i = 0; i < tex->num_srcs; i++) {
984 if (tex->src[i].src_type != nir_tex_src_bias) {
985 txl->src[i].src = nir_src_for_ssa(tex->src[i].src.ssa);
986 txl->src[i].src_type = tex->src[i].src_type;
987 }
988 }
989 nir_def *lod = nir_get_texture_lod(b, tex);
990
991 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
992 assert(bias_idx >= 0);
993 lod = nir_fadd(b, lod, tex->src[bias_idx].src.ssa);
994 txl->src[tex->num_srcs - 1] = nir_tex_src_for_ssa(nir_tex_src_lod, lod);
995
996 nir_def_init(&txl->instr, &txl->def,
997 tex->def.num_components,
998 tex->def.bit_size);
999 nir_builder_instr_insert(b, &txl->instr);
1000 nir_def_replace(&tex->def, &txl->def);
1001 return txl;
1002 }
1003
1004 static nir_tex_instr *
saturate_src(nir_builder * b,nir_tex_instr * tex,unsigned sat_mask)1005 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
1006 {
1007 if (tex->op == nir_texop_tex)
1008 tex = lower_tex_to_txd(b, tex);
1009 else if (tex->op == nir_texop_txb)
1010 tex = lower_txb_to_txl(b, tex);
1011
1012 b->cursor = nir_before_instr(&tex->instr);
1013 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1014
1015 if (coord_index != -1) {
1016 nir_def *src =
1017 tex->src[coord_index].src.ssa;
1018
1019 /* split src into components: */
1020 nir_def *comp[4];
1021
1022 assume(tex->coord_components >= 1);
1023
1024 for (unsigned j = 0; j < tex->coord_components; j++)
1025 comp[j] = nir_channel(b, src, j);
1026
1027 /* clamp requested components, array index does not get clamped: */
1028 unsigned ncomp = tex->coord_components;
1029 if (tex->is_array)
1030 ncomp--;
1031
1032 for (unsigned j = 0; j < ncomp; j++) {
1033 if ((1 << j) & sat_mask) {
1034 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
1035 /* non-normalized texture coords, so clamp to texture
1036 * size rather than [0.0, 1.0]
1037 */
1038 nir_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
1039 comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
1040 comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
1041 } else {
1042 comp[j] = nir_fsat(b, comp[j]);
1043 }
1044 }
1045 }
1046
1047 /* and move the result back into a single vecN: */
1048 src = nir_vec(b, comp, tex->coord_components);
1049
1050 nir_src_rewrite(&tex->src[coord_index].src, src);
1051 }
1052 return tex;
1053 }
1054
1055 static nir_def *
get_zero_or_one(nir_builder * b,nir_alu_type type,uint8_t swizzle_val)1056 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
1057 {
1058 nir_const_value v[4];
1059
1060 memset(&v, 0, sizeof(v));
1061
1062 if (swizzle_val == 4) {
1063 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
1064 } else {
1065 assert(swizzle_val == 5);
1066 if (type == nir_type_float32)
1067 v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
1068 else
1069 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
1070 }
1071
1072 return nir_build_imm(b, 4, 32, v);
1073 }
1074
1075 static void
swizzle_tg4_broadcom(nir_builder * b,nir_tex_instr * tex)1076 swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
1077 {
1078 b->cursor = nir_after_instr(&tex->instr);
1079
1080 assert(nir_tex_instr_dest_size(tex) == 4);
1081 unsigned swiz[4] = { 2, 3, 1, 0 };
1082 nir_def *swizzled = nir_swizzle(b, &tex->def, swiz, 4);
1083
1084 nir_def_rewrite_uses_after(&tex->def, swizzled,
1085 swizzled->parent_instr);
1086 }
1087
1088 static void
swizzle_result(nir_builder * b,nir_tex_instr * tex,const uint8_t swizzle[4])1089 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
1090 {
1091 b->cursor = nir_after_instr(&tex->instr);
1092
1093 nir_def *swizzled;
1094 if (tex->op == nir_texop_tg4) {
1095 if (swizzle[tex->component] < 4) {
1096 /* This one's easy */
1097 tex->component = swizzle[tex->component];
1098 return;
1099 } else {
1100 swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
1101 }
1102 } else {
1103 assert(nir_tex_instr_dest_size(tex) == 4);
1104 if (swizzle[0] < 4 && swizzle[1] < 4 &&
1105 swizzle[2] < 4 && swizzle[3] < 4) {
1106 unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
1107 /* We have no 0s or 1s, just emit a swizzling MOV */
1108 swizzled = nir_swizzle(b, &tex->def, swiz, 4);
1109 } else {
1110 nir_scalar srcs[4];
1111 for (unsigned i = 0; i < 4; i++) {
1112 if (swizzle[i] < 4) {
1113 srcs[i] = nir_get_scalar(&tex->def, swizzle[i]);
1114 } else {
1115 srcs[i] = nir_get_scalar(get_zero_or_one(b, tex->dest_type, swizzle[i]), 0);
1116 }
1117 }
1118 swizzled = nir_vec_scalars(b, srcs, 4);
1119 }
1120 }
1121
1122 nir_def_rewrite_uses_after(&tex->def, swizzled,
1123 swizzled->parent_instr);
1124 }
1125
1126 static void
linearize_srgb_result(nir_builder * b,nir_tex_instr * tex)1127 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
1128 {
1129 assert(nir_tex_instr_dest_size(tex) == 4);
1130 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
1131
1132 b->cursor = nir_after_instr(&tex->instr);
1133
1134 nir_def *rgb =
1135 nir_format_srgb_to_linear(b, nir_trim_vector(b, &tex->def, 3));
1136
1137 /* alpha is untouched: */
1138 nir_def *result = nir_vec4(b,
1139 nir_channel(b, rgb, 0),
1140 nir_channel(b, rgb, 1),
1141 nir_channel(b, rgb, 2),
1142 nir_channel(b, &tex->def, 3));
1143
1144 nir_def_rewrite_uses_after(&tex->def, result,
1145 result->parent_instr);
1146 }
1147
1148 /**
1149 * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
1150 * i16, or u16, or a single unorm4x8 value.
1151 *
1152 * Note that we don't change the destination num_components, because
1153 * nir_tex_instr_dest_size() will still return 4. The driver is just expected
1154 * to not store the other channels, given that nothing at the NIR level will
1155 * read them.
1156 */
1157 static bool
lower_tex_packing(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)1158 lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
1159 const nir_lower_tex_options *options)
1160 {
1161 nir_def *color = &tex->def;
1162
1163 b->cursor = nir_after_instr(&tex->instr);
1164
1165 assert(options->lower_tex_packing_cb);
1166 enum nir_lower_tex_packing packing =
1167 options->lower_tex_packing_cb(tex, options->lower_tex_packing_data);
1168
1169 switch (packing) {
1170 case nir_lower_tex_packing_none:
1171 return false;
1172
1173 case nir_lower_tex_packing_16: {
1174 static const unsigned bits[4] = { 16, 16, 16, 16 };
1175
1176 switch (nir_alu_type_get_base_type(tex->dest_type)) {
1177 case nir_type_float:
1178 switch (nir_tex_instr_dest_size(tex)) {
1179 case 1:
1180 assert(tex->is_shadow && tex->is_new_style_shadow);
1181 color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
1182 break;
1183 case 2: {
1184 nir_def *rg = nir_channel(b, color, 0);
1185 color = nir_vec2(b,
1186 nir_unpack_half_2x16_split_x(b, rg),
1187 nir_unpack_half_2x16_split_y(b, rg));
1188 break;
1189 }
1190 case 4: {
1191 nir_def *rg = nir_channel(b, color, 0);
1192 nir_def *ba = nir_channel(b, color, 1);
1193 color = nir_vec4(b,
1194 nir_unpack_half_2x16_split_x(b, rg),
1195 nir_unpack_half_2x16_split_y(b, rg),
1196 nir_unpack_half_2x16_split_x(b, ba),
1197 nir_unpack_half_2x16_split_y(b, ba));
1198 break;
1199 }
1200 default:
1201 unreachable("wrong dest_size");
1202 }
1203 break;
1204
1205 case nir_type_int:
1206 color = nir_format_unpack_sint(b, color, bits, 4);
1207 break;
1208
1209 case nir_type_uint:
1210 color = nir_format_unpack_uint(b, color, bits, 4);
1211 break;
1212
1213 default:
1214 unreachable("unknown base type");
1215 }
1216 break;
1217 }
1218
1219 case nir_lower_tex_packing_8:
1220 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
1221 color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
1222 break;
1223 }
1224
1225 nir_def_rewrite_uses_after(&tex->def, color,
1226 color->parent_instr);
1227 return true;
1228 }
1229
1230 static bool
sampler_index_lt(nir_tex_instr * tex,unsigned max)1231 sampler_index_lt(nir_tex_instr *tex, unsigned max)
1232 {
1233 assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
1234
1235 unsigned sampler_index = tex->sampler_index;
1236
1237 int sampler_offset_idx =
1238 nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
1239 if (sampler_offset_idx >= 0) {
1240 if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
1241 return false;
1242
1243 sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
1244 }
1245
1246 return sampler_index < max;
1247 }
1248
1249 static bool
lower_tg4_offsets(nir_builder * b,nir_tex_instr * tex)1250 lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
1251 {
1252 assert(tex->op == nir_texop_tg4);
1253 assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
1254 assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
1255
1256 b->cursor = nir_after_instr(&tex->instr);
1257
1258 nir_scalar dest[5] = { 0 };
1259 nir_def *residency = NULL;
1260 for (unsigned i = 0; i < 4; ++i) {
1261 nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
1262 tex_copy->op = tex->op;
1263 tex_copy->coord_components = tex->coord_components;
1264 tex_copy->sampler_dim = tex->sampler_dim;
1265 tex_copy->is_array = tex->is_array;
1266 tex_copy->is_shadow = tex->is_shadow;
1267 tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
1268 tex_copy->is_sparse = tex->is_sparse;
1269 tex_copy->is_gather_implicit_lod = tex->is_gather_implicit_lod;
1270 tex_copy->component = tex->component;
1271 tex_copy->dest_type = tex->dest_type;
1272 tex_copy->texture_index = tex->texture_index;
1273 tex_copy->sampler_index = tex->sampler_index;
1274 tex_copy->backend_flags = tex->backend_flags;
1275
1276 for (unsigned j = 0; j < tex->num_srcs; ++j) {
1277 tex_copy->src[j].src = nir_src_for_ssa(tex->src[j].src.ssa);
1278 tex_copy->src[j].src_type = tex->src[j].src_type;
1279 }
1280
1281 nir_def *offset = nir_imm_ivec2(b, tex->tg4_offsets[i][0],
1282 tex->tg4_offsets[i][1]);
1283 nir_tex_src src = nir_tex_src_for_ssa(nir_tex_src_offset, offset);
1284 tex_copy->src[tex_copy->num_srcs - 1] = src;
1285
1286 nir_def_init(&tex_copy->instr, &tex_copy->def,
1287 nir_tex_instr_dest_size(tex), 32);
1288
1289 nir_builder_instr_insert(b, &tex_copy->instr);
1290
1291 dest[i] = nir_get_scalar(&tex_copy->def, 3);
1292 if (tex->is_sparse) {
1293 nir_def *code = nir_channel(b, &tex_copy->def, 4);
1294 if (residency)
1295 residency = nir_sparse_residency_code_and(b, residency, code);
1296 else
1297 residency = code;
1298 }
1299 }
1300 dest[4] = nir_get_scalar(residency, 0);
1301
1302 nir_def *res = nir_vec_scalars(b, dest, tex->def.num_components);
1303 nir_def_replace(&tex->def, res);
1304
1305 return true;
1306 }
1307
1308 static bool
nir_lower_txs_lod(nir_builder * b,nir_tex_instr * tex)1309 nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
1310 {
1311 int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
1312 if (lod_idx < 0 ||
1313 (nir_src_is_const(tex->src[lod_idx].src) &&
1314 nir_src_as_int(tex->src[lod_idx].src) == 0))
1315 return false;
1316
1317 unsigned dest_size = nir_tex_instr_dest_size(tex);
1318
1319 b->cursor = nir_before_instr(&tex->instr);
1320 nir_def *lod = tex->src[lod_idx].src.ssa;
1321
1322 /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
1323 nir_src_rewrite(&tex->src[lod_idx].src, nir_imm_int(b, 0));
1324
1325 /* TXS(LOD) = max(TXS(0) >> LOD, 1)
1326 * But we do min(TXS(0), TXS(LOD)) to catch the case of a null surface,
1327 * which should return 0, not 1.
1328 */
1329 b->cursor = nir_after_instr(&tex->instr);
1330 nir_def *minified = nir_imin(b, &tex->def,
1331 nir_imax(b, nir_ushr(b, &tex->def, lod),
1332 nir_imm_int(b, 1)));
1333
1334 /* Make sure the component encoding the array size (if any) is not
1335 * minified.
1336 */
1337 if (tex->is_array) {
1338 nir_def *comp[3];
1339
1340 assert(dest_size <= ARRAY_SIZE(comp));
1341 for (unsigned i = 0; i < dest_size - 1; i++)
1342 comp[i] = nir_channel(b, minified, i);
1343
1344 comp[dest_size - 1] = nir_channel(b, &tex->def, dest_size - 1);
1345 minified = nir_vec(b, comp, dest_size);
1346 }
1347
1348 nir_def_rewrite_uses_after(&tex->def, minified,
1349 minified->parent_instr);
1350 return true;
1351 }
1352
1353 static void
nir_lower_txs_cube_array(nir_builder * b,nir_tex_instr * tex)1354 nir_lower_txs_cube_array(nir_builder *b, nir_tex_instr *tex)
1355 {
1356 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array);
1357 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1358
1359 b->cursor = nir_after_instr(&tex->instr);
1360
1361 assert(tex->def.num_components == 3);
1362 nir_def *size = &tex->def;
1363 size = nir_vec3(b, nir_channel(b, size, 1),
1364 nir_channel(b, size, 1),
1365 nir_idiv(b, nir_channel(b, size, 2),
1366 nir_imm_int(b, 6)));
1367
1368 nir_def_rewrite_uses_after(&tex->def, size, size->parent_instr);
1369 }
1370
1371 /* Adjust the sample index according to AMD FMASK (fragment mask).
1372 *
1373 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
1374 * which is the identity mapping. Each nibble says which physical sample
1375 * should be fetched to get that sample.
1376 *
1377 * For example, 0x11111100 means there are only 2 samples stored and
1378 * the second sample covers 3/4 of the pixel. When reading samples 0
1379 * and 1, return physical sample 0 (determined by the first two 0s
1380 * in FMASK), otherwise return physical sample 1.
1381 *
1382 * The sample index should be adjusted as follows:
1383 * sample_index = ubfe(fmask, sample_index * 4, 3);
1384 *
1385 * Only extract 3 bits because EQAA can generate number 8 in FMASK, which
1386 * means the physical sample index is unknown. We can map 8 to any valid
1387 * sample index, and extracting only 3 bits will map it to 0, which works
1388 * with all MSAA modes.
1389 */
1390 static void
nir_lower_ms_txf_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1391 nir_lower_ms_txf_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1392 {
1393 lower_offset(b, tex);
1394
1395 b->cursor = nir_before_instr(&tex->instr);
1396
1397 /* Create FMASK fetch. */
1398 assert(tex->texture_index == 0);
1399 nir_tex_instr *fmask_fetch = nir_tex_instr_create(b->shader, tex->num_srcs - 1);
1400 fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1401 fmask_fetch->coord_components = tex->coord_components;
1402 fmask_fetch->sampler_dim = tex->sampler_dim;
1403 fmask_fetch->is_array = tex->is_array;
1404 fmask_fetch->texture_non_uniform = tex->texture_non_uniform;
1405 fmask_fetch->dest_type = nir_type_uint32;
1406 nir_def_init(&fmask_fetch->instr, &fmask_fetch->def, 1, 32);
1407
1408 fmask_fetch->num_srcs = 0;
1409 for (unsigned i = 0; i < tex->num_srcs; i++) {
1410 if (tex->src[i].src_type == nir_tex_src_ms_index)
1411 continue;
1412 nir_tex_src *src = &fmask_fetch->src[fmask_fetch->num_srcs++];
1413 src->src = nir_src_for_ssa(tex->src[i].src.ssa);
1414 src->src_type = tex->src[i].src_type;
1415 }
1416
1417 nir_builder_instr_insert(b, &fmask_fetch->instr);
1418
1419 /* Obtain new sample index. */
1420 int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index);
1421 assert(ms_index >= 0);
1422 nir_def *sample = tex->src[ms_index].src.ssa;
1423 nir_def *new_sample = nir_ubfe(b, &fmask_fetch->def,
1424 nir_u2u32(b, nir_ishl_imm(b, sample, 2)),
1425 nir_imm_int(b, 3));
1426
1427 /* Update instruction. */
1428 tex->op = nir_texop_fragment_fetch_amd;
1429 nir_src_rewrite(&tex->src[ms_index].src,
1430 nir_u2uN(b, new_sample, sample->bit_size));
1431 }
1432
1433 static void
nir_lower_samples_identical_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1434 nir_lower_samples_identical_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1435 {
1436 b->cursor = nir_after_instr(&tex->instr);
1437
1438 nir_tex_instr *fmask_fetch = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr));
1439 fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1440 fmask_fetch->dest_type = nir_type_uint32;
1441 nir_def_init(&fmask_fetch->instr, &fmask_fetch->def, 1, 32);
1442 nir_builder_instr_insert(b, &fmask_fetch->instr);
1443
1444 nir_def_rewrite_uses(&tex->def, nir_ieq_imm(b, &fmask_fetch->def, 0));
1445 nir_instr_remove_v(&tex->instr);
1446 }
1447
1448 static void
nir_lower_lod_zero_width(nir_builder * b,nir_tex_instr * tex)1449 nir_lower_lod_zero_width(nir_builder *b, nir_tex_instr *tex)
1450 {
1451 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1452 assert(coord_index >= 0);
1453
1454 b->cursor = nir_after_instr(&tex->instr);
1455
1456 nir_def *is_zero = nir_imm_true(b);
1457 for (unsigned i = 0; i < tex->coord_components; i++) {
1458 nir_def *coord = nir_channel(b, tex->src[coord_index].src.ssa, i);
1459
1460 /* Compute the sum of the absolute values of derivatives. */
1461 nir_def *dfdx = nir_ddx(b, coord);
1462 nir_def *dfdy = nir_ddy(b, coord);
1463 nir_def *fwidth = nir_fadd(b, nir_fabs(b, dfdx), nir_fabs(b, dfdy));
1464
1465 /* Check if the sum is 0. */
1466 is_zero = nir_iand(b, is_zero, nir_feq_imm(b, fwidth, 0.0));
1467 }
1468
1469 /* Replace the raw LOD by -FLT_MAX if the sum is 0 for all coordinates. */
1470 nir_def *adjusted_lod =
1471 nir_bcsel(b, is_zero, nir_imm_float(b, -FLT_MAX),
1472 nir_channel(b, &tex->def, 1));
1473
1474 nir_def *def =
1475 nir_vec2(b, nir_channel(b, &tex->def, 0), adjusted_lod);
1476
1477 nir_def_rewrite_uses_after(&tex->def, def, def->parent_instr);
1478 }
1479
1480 static bool
lower_index_to_offset(nir_builder * b,nir_tex_instr * tex)1481 lower_index_to_offset(nir_builder *b, nir_tex_instr *tex)
1482 {
1483 bool progress = false;
1484 b->cursor = nir_before_instr(&tex->instr);
1485
1486 for (unsigned i = 0; i < tex->num_srcs; i++) {
1487 unsigned *index;
1488 switch (tex->src[i].src_type) {
1489 case nir_tex_src_texture_offset:
1490 index = &tex->texture_index;
1491 break;
1492 case nir_tex_src_sampler_offset:
1493 index = &tex->sampler_index;
1494 break;
1495 default:
1496 continue;
1497 }
1498
1499 /* If there's no base index, there's nothing to lower */
1500 if ((*index) == 0)
1501 continue;
1502
1503 nir_def *sum = nir_iadd_imm(b, tex->src[i].src.ssa, *index);
1504 nir_src_rewrite(&tex->src[i].src, sum);
1505 *index = 0;
1506 progress = true;
1507 }
1508
1509 return progress;
1510 }
1511
1512 static bool
nir_lower_tex_block(nir_block * block,nir_builder * b,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1513 nir_lower_tex_block(nir_block *block, nir_builder *b,
1514 const nir_lower_tex_options *options,
1515 const struct nir_shader_compiler_options *compiler_options)
1516 {
1517 bool progress = false;
1518
1519 nir_foreach_instr_safe(instr, block) {
1520 if (instr->type != nir_instr_type_tex)
1521 continue;
1522
1523 nir_tex_instr *tex = nir_instr_as_tex(instr);
1524 bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
1525
1526 /* mask of src coords to saturate (clamp): */
1527 unsigned sat_mask = 0;
1528 /* ignore saturate for txf ops: these don't use samplers and can't GL_CLAMP */
1529 if (nir_tex_instr_need_sampler(tex)) {
1530 if ((1 << tex->sampler_index) & options->saturate_r)
1531 sat_mask |= (1 << 2); /* .z */
1532 if ((1 << tex->sampler_index) & options->saturate_t)
1533 sat_mask |= (1 << 1); /* .y */
1534 if ((1 << tex->sampler_index) & options->saturate_s)
1535 sat_mask |= (1 << 0); /* .x */
1536 }
1537
1538 if (options->lower_index_to_offset)
1539 progress |= lower_index_to_offset(b, tex);
1540
1541 /* If we are clamping any coords, we must lower projector first
1542 * as clamping happens *after* projection:
1543 */
1544 if (lower_txp || sat_mask ||
1545 (options->lower_txp_array && tex->is_array)) {
1546 progress |= project_src(b, tex);
1547 }
1548
1549 if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
1550 (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
1551 (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
1552 options->lower_rect_offset) ||
1553 (options->lower_offset_filter &&
1554 options->lower_offset_filter(instr, options->callback_data))) {
1555 progress = lower_offset(b, tex) || progress;
1556 }
1557
1558 if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect &&
1559 tex->op != nir_texop_txf) {
1560 if (nir_tex_instr_is_query(tex))
1561 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1562 else if (compiler_options->has_texture_scaling)
1563 lower_rect_tex_scale(b, tex);
1564 else
1565 lower_rect(b, tex);
1566
1567 progress = true;
1568 }
1569
1570 if (tex->sampler_dim == GLSL_SAMPLER_DIM_1D &&
1571 (options->lower_1d || (tex->is_shadow && options->lower_1d_shadow))) {
1572 lower_1d(b, tex);
1573 progress = true;
1574 }
1575
1576 unsigned texture_index = tex->texture_index;
1577 uint32_t texture_mask = 1u << texture_index;
1578 int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1579 if (tex_index >= 0) {
1580 nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_index].src);
1581 nir_variable *var = nir_deref_instr_get_variable(deref);
1582 texture_index = var ? var->data.binding : 0;
1583 texture_mask = var && texture_index < 32 ? (1u << texture_index) : 0u;
1584 }
1585
1586 if (texture_mask & options->lower_y_uv_external) {
1587 lower_y_uv_external(b, tex, options, texture_index);
1588 progress = true;
1589 }
1590
1591 if (texture_mask & options->lower_y_vu_external) {
1592 lower_y_vu_external(b, tex, options, texture_index);
1593 progress = true;
1594 }
1595
1596 if (texture_mask & options->lower_y_u_v_external) {
1597 lower_y_u_v_external(b, tex, options, texture_index);
1598 progress = true;
1599 }
1600
1601 if (texture_mask & options->lower_yx_xuxv_external) {
1602 lower_yx_xuxv_external(b, tex, options, texture_index);
1603 progress = true;
1604 }
1605
1606 if (texture_mask & options->lower_yx_xvxu_external) {
1607 lower_yx_xvxu_external(b, tex, options, texture_index);
1608 progress = true;
1609 }
1610
1611 if (texture_mask & options->lower_xy_uxvx_external) {
1612 lower_xy_uxvx_external(b, tex, options, texture_index);
1613 progress = true;
1614 }
1615
1616 if (texture_mask & options->lower_xy_vxux_external) {
1617 lower_xy_vxux_external(b, tex, options, texture_index);
1618 progress = true;
1619 }
1620
1621 if (texture_mask & options->lower_ayuv_external) {
1622 lower_ayuv_external(b, tex, options, texture_index);
1623 progress = true;
1624 }
1625
1626 if (texture_mask & options->lower_xyuv_external) {
1627 lower_xyuv_external(b, tex, options, texture_index);
1628 progress = true;
1629 }
1630
1631 if (texture_mask & options->lower_yuv_external) {
1632 lower_yuv_external(b, tex, options, texture_index);
1633 progress = true;
1634 }
1635
1636 if ((1 << tex->texture_index) & options->lower_yu_yv_external) {
1637 lower_yu_yv_external(b, tex, options, texture_index);
1638 progress = true;
1639 }
1640
1641 if ((1 << tex->texture_index) & options->lower_yv_yu_external) {
1642 lower_yv_yu_external(b, tex, options, texture_index);
1643 progress = true;
1644 }
1645
1646 if ((1 << tex->texture_index) & options->lower_y41x_external) {
1647 lower_y41x_external(b, tex, options, texture_index);
1648 progress = true;
1649 }
1650
1651 if (sat_mask) {
1652 tex = saturate_src(b, tex, sat_mask);
1653 progress = true;
1654 }
1655
1656 if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1657 swizzle_tg4_broadcom(b, tex);
1658 progress = true;
1659 }
1660
1661 if ((texture_mask & options->swizzle_result) &&
1662 !nir_tex_instr_is_query(tex) &&
1663 !(tex->is_shadow && tex->is_new_style_shadow)) {
1664 swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1665 progress = true;
1666 }
1667
1668 /* should be after swizzle so we know which channels are rgb: */
1669 if ((texture_mask & options->lower_srgb) &&
1670 !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1671 linearize_srgb_result(b, tex);
1672 progress = true;
1673 }
1674
1675 const bool has_min_lod =
1676 nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1677 const bool has_offset =
1678 nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1679
1680 if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1681 options->lower_txb_shadow_clamp) {
1682 lower_implicit_lod(b, tex);
1683 progress = true;
1684 }
1685
1686 if (options->lower_tex_packing_cb &&
1687 tex->op != nir_texop_txs &&
1688 tex->op != nir_texop_query_levels &&
1689 tex->op != nir_texop_texture_samples) {
1690 progress |= lower_tex_packing(b, tex, options);
1691 }
1692
1693 if (tex->op == nir_texop_txd &&
1694 (options->lower_txd ||
1695 (options->lower_txd_clamp && has_min_lod) ||
1696 (options->lower_txd_shadow && tex->is_shadow) ||
1697 (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1698 (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1699 (options->lower_txd_clamp_bindless_sampler && has_min_lod &&
1700 nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) ||
1701 (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1702 has_min_lod && !sampler_index_lt(tex, 16)) ||
1703 (options->lower_txd_cube_map &&
1704 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1705 (options->lower_txd_3d &&
1706 tex->sampler_dim == GLSL_SAMPLER_DIM_3D) ||
1707 (options->lower_txd_array && tex->is_array))) {
1708 lower_gradient(b, tex);
1709 progress = true;
1710 continue;
1711 }
1712
1713 /* TXF, TXS and TXL require a LOD but not everything we implement using those
1714 * three opcodes provides one. Provide a default LOD of 0.
1715 */
1716 if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1717 (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1718 tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) {
1719 b->cursor = nir_before_instr(&tex->instr);
1720 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_imm_int(b, 0));
1721 progress = true;
1722 continue;
1723 }
1724
1725 /* Only fragment and compute (in some cases) support implicit
1726 * derivatives. Lower those opcodes which use implicit derivatives to
1727 * use an explicit LOD of 0.
1728 * But don't touch RECT samplers because they don't have mips.
1729 */
1730 if (options->lower_invalid_implicit_lod &&
1731 nir_tex_instr_has_implicit_derivative(tex) &&
1732 tex->sampler_dim != GLSL_SAMPLER_DIM_RECT &&
1733 !nir_shader_supports_implicit_lod(b->shader)) {
1734 lower_zero_lod(b, tex);
1735 progress = true;
1736 }
1737
1738 if (options->lower_txs_lod && tex->op == nir_texop_txs) {
1739 progress |= nir_lower_txs_lod(b, tex);
1740 continue;
1741 }
1742
1743 if (options->lower_txs_cube_array && tex->op == nir_texop_txs &&
1744 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array) {
1745 nir_lower_txs_cube_array(b, tex);
1746 progress = true;
1747 continue;
1748 }
1749
1750 /* has to happen after all the other lowerings as the original tg4 gets
1751 * replaced by 4 tg4 instructions.
1752 */
1753 if (tex->op == nir_texop_tg4 &&
1754 nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1755 options->lower_tg4_offsets) {
1756 progress |= lower_tg4_offsets(b, tex);
1757 continue;
1758 }
1759
1760 if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_txf_ms) {
1761 nir_lower_ms_txf_to_fragment_fetch(b, tex);
1762 progress = true;
1763 continue;
1764 }
1765
1766 if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_samples_identical) {
1767 nir_lower_samples_identical_to_fragment_fetch(b, tex);
1768 progress = true;
1769 continue;
1770 }
1771
1772 if (options->lower_lod_zero_width && tex->op == nir_texop_lod) {
1773 nir_lower_lod_zero_width(b, tex);
1774 progress = true;
1775 continue;
1776 }
1777 }
1778
1779 return progress;
1780 }
1781
1782 static bool
nir_lower_tex_impl(nir_function_impl * impl,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1783 nir_lower_tex_impl(nir_function_impl *impl,
1784 const nir_lower_tex_options *options,
1785 const struct nir_shader_compiler_options *compiler_options)
1786 {
1787 bool progress = false;
1788 nir_builder builder = nir_builder_create(impl);
1789
1790 nir_foreach_block(block, impl) {
1791 progress |= nir_lower_tex_block(block, &builder, options, compiler_options);
1792 }
1793
1794 nir_metadata_preserve(impl, nir_metadata_control_flow);
1795 return progress;
1796 }
1797
1798 bool
nir_lower_tex(nir_shader * shader,const nir_lower_tex_options * options)1799 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1800 {
1801 bool progress = false;
1802
1803 /* lower_tg4_offsets injects new tg4 instructions that won't be lowered
1804 * if lower_tg4_broadcom_swizzle is also requested so when both are set
1805 * we want to run lower_tg4_offsets in a separate pass first.
1806 */
1807 if (options->lower_tg4_offsets && options->lower_tg4_broadcom_swizzle) {
1808 nir_lower_tex_options _options = {
1809 .lower_tg4_offsets = true,
1810 };
1811 progress = nir_lower_tex(shader, &_options);
1812 }
1813
1814 nir_foreach_function_impl(impl, shader) {
1815 progress |= nir_lower_tex_impl(impl, options, shader->options);
1816 }
1817
1818 return progress;
1819 }
1820