xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_lower_tex.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2023 Valve Corporation
3  * Copyright © 2015 Broadcom
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 /*
26  * This lowering pass supports (as configured via nir_lower_tex_options)
27  * various texture related conversions:
28  *   + texture projector lowering: converts the coordinate division for
29  *     texture projection to be done in ALU instructions instead of
30  *     asking the texture operation to do so.
31  *   + lowering RECT: converts the un-normalized RECT texture coordinates
32  *     to normalized coordinates with txs plus ALU instructions
33  *   + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
34  *     inserts instructions to clamp specified coordinates to [0.0, 1.0].
35  *     Note that this automatically triggers texture projector lowering if
36  *     needed, since clamping must happen after projector lowering.
37  *   + YUV-to-RGB conversion: to allow sampling YUV values as RGB values
38  *     according to a specific YUV color space and range.
39  */
40 
41 #include "nir.h"
42 #include "nir_builder.h"
43 #include "nir_builtin_builder.h"
44 #include "nir_format_convert.h"
45 
46 typedef struct nir_const_value_3_4 {
47    nir_const_value v[3][4];
48 } nir_const_value_3_4;
49 
50 static const nir_const_value_3_4 bt601_limited_range_csc_coeffs = { {
51    { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } },
52    { { .f32 = 0.0f }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f } },
53    { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f } },
54 } };
55 static const nir_const_value_3_4 bt601_full_range_csc_coeffs = { {
56    { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } },
57    { { .f32 = 0.0f }, { .f32 = -0.34413629f }, { .f32 = 1.772f } },
58    { { .f32 = 1.402f }, { .f32 = -0.71413629f }, { .f32 = 0.0f } },
59 } };
60 static const nir_const_value_3_4 bt709_limited_range_csc_coeffs = { {
61    { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } },
62    { { .f32 = 0.0f }, { .f32 = -0.21324861f }, { .f32 = 2.11240179f } },
63    { { .f32 = 1.79274107f }, { .f32 = -0.53290933f }, { .f32 = 0.0f } },
64 } };
65 static const nir_const_value_3_4 bt709_full_range_csc_coeffs = { {
66    { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } },
67    { { .f32 = 0.0f }, { .f32 = -0.18732427f }, { .f32 = 1.8556f } },
68    { { .f32 = 1.5748f }, { .f32 = -0.46812427f }, { .f32 = 0.0f } },
69 } };
70 static const nir_const_value_3_4 bt2020_limited_range_csc_coeffs = { {
71    { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } },
72    { { .f32 = 0.0f }, { .f32 = -0.18732610f }, { .f32 = 2.14177232f } },
73    { { .f32 = 1.67878795f }, { .f32 = -0.65046843f }, { .f32 = 0.0f } },
74 } };
75 static const nir_const_value_3_4 bt2020_full_range_csc_coeffs = { {
76    { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } },
77    { { .f32 = 0.0f }, { .f32 = -0.16455313f }, { .f32 = 1.88140000f } },
78    { { .f32 = 1.4747f }, { .f32 = -0.57139187f }, { .f32 = 0.0f } },
79 } };
80 
81 static const float bt601_limited_range_csc_offsets[3] = {
82    -0.874202218f, 0.531667823f, -1.085630789f
83 };
84 static const float bt601_full_range_csc_offsets[3] = {
85    -0.701000000f, 0.529136286f, -0.886000000f
86 };
87 static const float bt709_limited_range_csc_offsets[3] = {
88    -0.972945075f, 0.301482665f, -1.133402218f
89 };
90 static const float bt709_full_range_csc_offsets[3] = {
91    -0.787400000f, 0.327724273f, -0.927800000f
92 };
93 static const float bt2020_limited_range_csc_offsets[3] = {
94    -0.915745075f, 0.347480639f, -1.148145075f
95 };
96 static const float bt2020_full_range_csc_offsets[3] = {
97    -0.737350000f, 0.367972500f, -0.940700000f
98 };
99 
100 static bool
project_src(nir_builder * b,nir_tex_instr * tex)101 project_src(nir_builder *b, nir_tex_instr *tex)
102 {
103    nir_def *proj = nir_steal_tex_src(tex, nir_tex_src_projector);
104    if (!proj)
105       return false;
106 
107    b->cursor = nir_before_instr(&tex->instr);
108    nir_def *inv_proj = nir_frcp(b, proj);
109 
110    /* Walk through the sources projecting the arguments. */
111    for (unsigned i = 0; i < tex->num_srcs; i++) {
112       switch (tex->src[i].src_type) {
113       case nir_tex_src_coord:
114       case nir_tex_src_comparator:
115          break;
116       default:
117          continue;
118       }
119       nir_def *unprojected =
120          tex->src[i].src.ssa;
121       nir_def *projected = nir_fmul(b, unprojected, inv_proj);
122 
123       /* Array indices don't get projected, so make an new vector with the
124        * coordinate's array index untouched.
125        */
126       if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
127          switch (tex->coord_components) {
128          case 4:
129             projected = nir_vec4(b,
130                                  nir_channel(b, projected, 0),
131                                  nir_channel(b, projected, 1),
132                                  nir_channel(b, projected, 2),
133                                  nir_channel(b, unprojected, 3));
134             break;
135          case 3:
136             projected = nir_vec3(b,
137                                  nir_channel(b, projected, 0),
138                                  nir_channel(b, projected, 1),
139                                  nir_channel(b, unprojected, 2));
140             break;
141          case 2:
142             projected = nir_vec2(b,
143                                  nir_channel(b, projected, 0),
144                                  nir_channel(b, unprojected, 1));
145             break;
146          default:
147             unreachable("bad texture coord count for array");
148             break;
149          }
150       }
151 
152       nir_src_rewrite(&tex->src[i].src, projected);
153    }
154 
155    return true;
156 }
157 
158 static bool
lower_offset(nir_builder * b,nir_tex_instr * tex)159 lower_offset(nir_builder *b, nir_tex_instr *tex)
160 {
161    nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset);
162    if (!offset)
163       return false;
164 
165    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
166    assert(coord_index >= 0);
167 
168    nir_def *coord = tex->src[coord_index].src.ssa;
169 
170    b->cursor = nir_before_instr(&tex->instr);
171 
172    nir_def *offset_coord;
173    if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
174       if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
175          offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
176       } else {
177          nir_def *scale = NULL;
178 
179          if (b->shader->options->has_texture_scaling) {
180             nir_def *idx = nir_imm_int(b, tex->texture_index);
181             scale = nir_load_texture_scale(b, 32, idx);
182          } else {
183             nir_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
184             scale = nir_frcp(b, txs);
185          }
186 
187          offset_coord = nir_fadd(b, coord,
188                                  nir_fmul(b,
189                                           nir_i2f32(b, offset),
190                                           scale));
191       }
192    } else {
193       offset_coord = nir_iadd(b, coord, offset);
194    }
195 
196    if (tex->is_array) {
197       /* The offset is not applied to the array index */
198       if (tex->coord_components == 2) {
199          offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
200                                  nir_channel(b, coord, 1));
201       } else if (tex->coord_components == 3) {
202          offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
203                                  nir_channel(b, offset_coord, 1),
204                                  nir_channel(b, coord, 2));
205       } else {
206          unreachable("Invalid number of components");
207       }
208    }
209 
210    nir_src_rewrite(&tex->src[coord_index].src, offset_coord);
211 
212    return true;
213 }
214 
215 static void
lower_rect(nir_builder * b,nir_tex_instr * tex)216 lower_rect(nir_builder *b, nir_tex_instr *tex)
217 {
218    /* Set the sampler_dim to 2D here so that get_texture_size picks up the
219     * right dimensionality.
220     */
221    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
222 
223    nir_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
224    nir_def *scale = nir_frcp(b, txs);
225    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
226 
227    if (coord_index != -1) {
228       nir_def *coords =
229          tex->src[coord_index].src.ssa;
230       nir_src_rewrite(&tex->src[coord_index].src, nir_fmul(b, coords, scale));
231    }
232 }
233 
234 static void
lower_rect_tex_scale(nir_builder * b,nir_tex_instr * tex)235 lower_rect_tex_scale(nir_builder *b, nir_tex_instr *tex)
236 {
237    b->cursor = nir_before_instr(&tex->instr);
238 
239    nir_def *idx = nir_imm_int(b, tex->texture_index);
240    nir_def *scale = nir_load_texture_scale(b, 32, idx);
241    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
242 
243    if (coord_index != -1) {
244       nir_def *coords =
245          tex->src[coord_index].src.ssa;
246       nir_src_rewrite(&tex->src[coord_index].src, nir_fmul(b, coords, scale));
247    }
248 }
249 
250 static void
lower_1d(nir_builder * b,nir_tex_instr * tex)251 lower_1d(nir_builder *b, nir_tex_instr *tex)
252 {
253    b->cursor = nir_before_instr(&tex->instr);
254 
255    nir_def *coords = nir_steal_tex_src(tex, nir_tex_src_coord);
256    nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset);
257    nir_def *ddx = nir_steal_tex_src(tex, nir_tex_src_ddx);
258    nir_def *ddy = nir_steal_tex_src(tex, nir_tex_src_ddy);
259 
260    /* Add in 2D sources to become a 2D operation */
261    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
262 
263    if (coords) {
264       /* We want to fetch texel 0 along the Y-axis. To do so, we sample at 0.5
265        * to get texel 0 with correct handling of wrap modes.
266        */
267       nir_def *y = nir_imm_floatN_t(b, tex->op == nir_texop_txf ? 0.0 : 0.5,
268                                     coords->bit_size);
269 
270       tex->coord_components++;
271 
272       if (tex->is_array && tex->op != nir_texop_lod) {
273          assert(tex->coord_components == 3);
274 
275          nir_def *x = nir_channel(b, coords, 0);
276          nir_def *idx = nir_channel(b, coords, 1);
277          coords = nir_vec3(b, x, y, idx);
278       } else {
279          assert(tex->coord_components == 2);
280          coords = nir_vec2(b, coords, y);
281       }
282 
283       nir_tex_instr_add_src(tex, nir_tex_src_coord, coords);
284    }
285 
286    if (offset) {
287       nir_tex_instr_add_src(tex, nir_tex_src_offset,
288                             nir_pad_vector_imm_int(b, offset, 0, 2));
289    }
290 
291    if (ddx || ddy) {
292       nir_tex_instr_add_src(tex, nir_tex_src_ddx,
293                             nir_pad_vector_imm_int(b, ddx, 0, 2));
294 
295       nir_tex_instr_add_src(tex, nir_tex_src_ddy,
296                             nir_pad_vector_imm_int(b, ddy, 0, 2));
297    }
298 
299    /* Handle destination component mismatch for txs. */
300    if (tex->op == nir_texop_txs) {
301       b->cursor = nir_after_instr(&tex->instr);
302 
303       nir_def *dst;
304       if (tex->is_array) {
305          assert(tex->def.num_components == 2);
306          tex->def.num_components = 3;
307 
308          /* For array, we take .xz to skip the newly added height */
309          dst = nir_channels(b, &tex->def, (1 << 0) | (1 << 2));
310       } else {
311          assert(tex->def.num_components == 1);
312          tex->def.num_components = 2;
313 
314          dst = nir_channel(b, &tex->def, 0);
315       }
316 
317       nir_def_rewrite_uses_after(&tex->def, dst, dst->parent_instr);
318    }
319 }
320 
321 static void
lower_lod(nir_builder * b,nir_tex_instr * tex,nir_def * lod)322 lower_lod(nir_builder *b, nir_tex_instr *tex, nir_def *lod)
323 {
324    assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
325    assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
326    assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
327    assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
328 
329    /* If we have a bias, add it in */
330    nir_def *bias = nir_steal_tex_src(tex, nir_tex_src_bias);
331    if (bias)
332       lod = nir_fadd(b, lod, bias);
333 
334    /* If we have a minimum LOD, clamp LOD accordingly */
335    nir_def *min_lod = nir_steal_tex_src(tex, nir_tex_src_min_lod);
336    if (min_lod)
337       lod = nir_fmax(b, lod, min_lod);
338 
339    nir_tex_instr_add_src(tex, nir_tex_src_lod, lod);
340    tex->op = nir_texop_txl;
341 }
342 
343 static void
lower_implicit_lod(nir_builder * b,nir_tex_instr * tex)344 lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
345 {
346    b->cursor = nir_before_instr(&tex->instr);
347    lower_lod(b, tex, nir_get_texture_lod(b, tex));
348 }
349 
350 static void
lower_zero_lod(nir_builder * b,nir_tex_instr * tex)351 lower_zero_lod(nir_builder *b, nir_tex_instr *tex)
352 {
353    b->cursor = nir_before_instr(&tex->instr);
354 
355    if (tex->op == nir_texop_lod) {
356       nir_def_replace(&tex->def, nir_imm_int(b, 0));
357       return;
358    }
359 
360    lower_lod(b, tex, nir_imm_int(b, 0));
361 }
362 
363 static nir_def *
sample_plane(nir_builder * b,nir_tex_instr * tex,int plane,const nir_lower_tex_options * options)364 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
365              const nir_lower_tex_options *options)
366 {
367    assert(nir_tex_instr_dest_size(tex) == 4);
368    assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
369    assert(tex->op == nir_texop_tex);
370    assert(tex->coord_components == 2);
371 
372    nir_tex_instr *plane_tex =
373       nir_tex_instr_create(b->shader, tex->num_srcs + 1);
374    for (unsigned i = 0; i < tex->num_srcs; i++) {
375       plane_tex->src[i].src = nir_src_for_ssa(tex->src[i].src.ssa);
376       plane_tex->src[i].src_type = tex->src[i].src_type;
377    }
378    plane_tex->src[tex->num_srcs] = nir_tex_src_for_ssa(nir_tex_src_plane,
379                                                        nir_imm_int(b, plane));
380    plane_tex->op = nir_texop_tex;
381    plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
382    plane_tex->dest_type = nir_type_float | tex->def.bit_size;
383    plane_tex->coord_components = 2;
384 
385    plane_tex->texture_index = tex->texture_index;
386    plane_tex->sampler_index = tex->sampler_index;
387 
388    nir_def_init(&plane_tex->instr, &plane_tex->def, 4,
389                 tex->def.bit_size);
390 
391    nir_builder_instr_insert(b, &plane_tex->instr);
392 
393    /* If scaling_factor is set, return a scaled value. */
394    if (options->scale_factors[tex->texture_index])
395       return nir_fmul_imm(b, &plane_tex->def,
396                           options->scale_factors[tex->texture_index]);
397 
398    return &plane_tex->def;
399 }
400 
401 static void
convert_yuv_to_rgb(nir_builder * b,nir_tex_instr * tex,nir_def * y,nir_def * u,nir_def * v,nir_def * a,const nir_lower_tex_options * options,unsigned texture_index)402 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
403                    nir_def *y, nir_def *u, nir_def *v,
404                    nir_def *a,
405                    const nir_lower_tex_options *options,
406                    unsigned texture_index)
407 {
408 
409    const float *offset_vals;
410    const nir_const_value_3_4 *m;
411    assert((options->bt709_external & options->bt2020_external) == 0);
412    if (options->yuv_full_range_external & (1u << texture_index)) {
413       if (options->bt709_external & (1u << texture_index)) {
414          m = &bt709_full_range_csc_coeffs;
415          offset_vals = bt709_full_range_csc_offsets;
416       } else if (options->bt2020_external & (1u << texture_index)) {
417          m = &bt2020_full_range_csc_coeffs;
418          offset_vals = bt2020_full_range_csc_offsets;
419       } else {
420          m = &bt601_full_range_csc_coeffs;
421          offset_vals = bt601_full_range_csc_offsets;
422       }
423    } else {
424       if (options->bt709_external & (1u << texture_index)) {
425          m = &bt709_limited_range_csc_coeffs;
426          offset_vals = bt709_limited_range_csc_offsets;
427       } else if (options->bt2020_external & (1u << texture_index)) {
428          m = &bt2020_limited_range_csc_coeffs;
429          offset_vals = bt2020_limited_range_csc_offsets;
430       } else {
431          m = &bt601_limited_range_csc_coeffs;
432          offset_vals = bt601_limited_range_csc_offsets;
433       }
434    }
435 
436    unsigned bit_size = tex->def.bit_size;
437 
438    nir_def *offset =
439       nir_vec4(b,
440                nir_imm_floatN_t(b, offset_vals[0], a->bit_size),
441                nir_imm_floatN_t(b, offset_vals[1], a->bit_size),
442                nir_imm_floatN_t(b, offset_vals[2], a->bit_size),
443                a);
444 
445    offset = nir_f2fN(b, offset, bit_size);
446 
447    nir_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[0]), bit_size);
448    nir_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[1]), bit_size);
449    nir_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[2]), bit_size);
450 
451    nir_def *result =
452       nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset)));
453 
454    nir_def_rewrite_uses(&tex->def, result);
455 }
456 
457 static void
lower_y_uv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)458 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
459                     const nir_lower_tex_options *options,
460                     unsigned texture_index)
461 {
462    b->cursor = nir_after_instr(&tex->instr);
463 
464    nir_def *y = sample_plane(b, tex, 0, options);
465    nir_def *uv = sample_plane(b, tex, 1, options);
466 
467    convert_yuv_to_rgb(b, tex,
468                       nir_channel(b, y, 0),
469                       nir_channel(b, uv, 0),
470                       nir_channel(b, uv, 1),
471                       nir_imm_float(b, 1.0f),
472                       options,
473                       texture_index);
474 }
475 
476 static void
lower_y_vu_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)477 lower_y_vu_external(nir_builder *b, nir_tex_instr *tex,
478                     const nir_lower_tex_options *options,
479                     unsigned texture_index)
480 {
481    b->cursor = nir_after_instr(&tex->instr);
482 
483    nir_def *y = sample_plane(b, tex, 0, options);
484    nir_def *vu = sample_plane(b, tex, 1, options);
485 
486    convert_yuv_to_rgb(b, tex,
487                       nir_channel(b, y, 0),
488                       nir_channel(b, vu, 1),
489                       nir_channel(b, vu, 0),
490                       nir_imm_float(b, 1.0f),
491                       options,
492                       texture_index);
493 }
494 
495 static void
lower_y_u_v_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)496 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
497                      const nir_lower_tex_options *options,
498                      unsigned texture_index)
499 {
500    b->cursor = nir_after_instr(&tex->instr);
501 
502    nir_def *y = sample_plane(b, tex, 0, options);
503    nir_def *u = sample_plane(b, tex, 1, options);
504    nir_def *v = sample_plane(b, tex, 2, options);
505 
506    convert_yuv_to_rgb(b, tex,
507                       nir_channel(b, y, 0),
508                       nir_channel(b, u, 0),
509                       nir_channel(b, v, 0),
510                       nir_imm_float(b, 1.0f),
511                       options,
512                       texture_index);
513 }
514 
515 static void
lower_yx_xuxv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)516 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
517                        const nir_lower_tex_options *options,
518                        unsigned texture_index)
519 {
520    b->cursor = nir_after_instr(&tex->instr);
521 
522    nir_def *y = sample_plane(b, tex, 0, options);
523    nir_def *xuxv = sample_plane(b, tex, 1, options);
524 
525    convert_yuv_to_rgb(b, tex,
526                       nir_channel(b, y, 0),
527                       nir_channel(b, xuxv, 1),
528                       nir_channel(b, xuxv, 3),
529                       nir_imm_float(b, 1.0f),
530                       options,
531                       texture_index);
532 }
533 
534 static void
lower_yx_xvxu_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)535 lower_yx_xvxu_external(nir_builder *b, nir_tex_instr *tex,
536                        const nir_lower_tex_options *options,
537                        unsigned texture_index)
538 {
539    b->cursor = nir_after_instr(&tex->instr);
540 
541    nir_def *y = sample_plane(b, tex, 0, options);
542    nir_def *xvxu = sample_plane(b, tex, 1, options);
543 
544    convert_yuv_to_rgb(b, tex,
545                       nir_channel(b, y, 0),
546                       nir_channel(b, xvxu, 3),
547                       nir_channel(b, xvxu, 1),
548                       nir_imm_float(b, 1.0f),
549                       options,
550                       texture_index);
551 }
552 
553 static void
lower_xy_uxvx_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)554 lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
555                        const nir_lower_tex_options *options,
556                        unsigned texture_index)
557 {
558    b->cursor = nir_after_instr(&tex->instr);
559 
560    nir_def *y = sample_plane(b, tex, 0, options);
561    nir_def *uxvx = sample_plane(b, tex, 1, options);
562 
563    convert_yuv_to_rgb(b, tex,
564                       nir_channel(b, y, 1),
565                       nir_channel(b, uxvx, 0),
566                       nir_channel(b, uxvx, 2),
567                       nir_imm_float(b, 1.0f),
568                       options,
569                       texture_index);
570 }
571 
572 static void
lower_xy_vxux_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)573 lower_xy_vxux_external(nir_builder *b, nir_tex_instr *tex,
574                        const nir_lower_tex_options *options,
575                        unsigned texture_index)
576 {
577    b->cursor = nir_after_instr(&tex->instr);
578 
579    nir_def *y = sample_plane(b, tex, 0, options);
580    nir_def *vxux = sample_plane(b, tex, 1, options);
581 
582    convert_yuv_to_rgb(b, tex,
583                       nir_channel(b, y, 1),
584                       nir_channel(b, vxux, 2),
585                       nir_channel(b, vxux, 0),
586                       nir_imm_float(b, 1.0f),
587                       options,
588                       texture_index);
589 }
590 
591 static void
lower_ayuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)592 lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
593                     const nir_lower_tex_options *options,
594                     unsigned texture_index)
595 {
596    b->cursor = nir_after_instr(&tex->instr);
597 
598    nir_def *ayuv = sample_plane(b, tex, 0, options);
599 
600    convert_yuv_to_rgb(b, tex,
601                       nir_channel(b, ayuv, 2),
602                       nir_channel(b, ayuv, 1),
603                       nir_channel(b, ayuv, 0),
604                       nir_channel(b, ayuv, 3),
605                       options,
606                       texture_index);
607 }
608 
609 static void
lower_y41x_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)610 lower_y41x_external(nir_builder *b, nir_tex_instr *tex,
611                     const nir_lower_tex_options *options,
612                     unsigned texture_index)
613 {
614    b->cursor = nir_after_instr(&tex->instr);
615 
616    nir_def *y41x = sample_plane(b, tex, 0, options);
617 
618    convert_yuv_to_rgb(b, tex,
619                       nir_channel(b, y41x, 1),
620                       nir_channel(b, y41x, 0),
621                       nir_channel(b, y41x, 2),
622                       nir_channel(b, y41x, 3),
623                       options,
624                       texture_index);
625 }
626 
627 static void
lower_xyuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)628 lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
629                     const nir_lower_tex_options *options,
630                     unsigned texture_index)
631 {
632    b->cursor = nir_after_instr(&tex->instr);
633 
634    nir_def *xyuv = sample_plane(b, tex, 0, options);
635 
636    convert_yuv_to_rgb(b, tex,
637                       nir_channel(b, xyuv, 2),
638                       nir_channel(b, xyuv, 1),
639                       nir_channel(b, xyuv, 0),
640                       nir_imm_float(b, 1.0f),
641                       options,
642                       texture_index);
643 }
644 
645 static void
lower_yuv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)646 lower_yuv_external(nir_builder *b, nir_tex_instr *tex,
647                    const nir_lower_tex_options *options,
648                    unsigned texture_index)
649 {
650    b->cursor = nir_after_instr(&tex->instr);
651 
652    nir_def *yuv = sample_plane(b, tex, 0, options);
653 
654    convert_yuv_to_rgb(b, tex,
655                       nir_channel(b, yuv, 0),
656                       nir_channel(b, yuv, 1),
657                       nir_channel(b, yuv, 2),
658                       nir_imm_float(b, 1.0f),
659                       options,
660                       texture_index);
661 }
662 
663 static void
lower_yu_yv_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)664 lower_yu_yv_external(nir_builder *b, nir_tex_instr *tex,
665                      const nir_lower_tex_options *options,
666                      unsigned texture_index)
667 {
668    b->cursor = nir_after_instr(&tex->instr);
669 
670    nir_def *yuv = sample_plane(b, tex, 0, options);
671 
672    convert_yuv_to_rgb(b, tex,
673                       nir_channel(b, yuv, 1),
674                       nir_channel(b, yuv, 2),
675                       nir_channel(b, yuv, 0),
676                       nir_imm_float(b, 1.0f),
677                       options,
678                       texture_index);
679 }
680 
681 static void
lower_yv_yu_external(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options,unsigned texture_index)682 lower_yv_yu_external(nir_builder *b, nir_tex_instr *tex,
683                      const nir_lower_tex_options *options,
684                      unsigned texture_index)
685 {
686    b->cursor = nir_after_instr(&tex->instr);
687 
688    nir_def *yuv = sample_plane(b, tex, 0, options);
689 
690    convert_yuv_to_rgb(b, tex,
691                       nir_channel(b, yuv, 2),
692                       nir_channel(b, yuv, 1),
693                       nir_channel(b, yuv, 0),
694                       nir_imm_float(b, 1.0f),
695                       options,
696                       texture_index);
697 }
698 
699 /*
700  * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
701  * computed from the gradients.
702  */
703 static void
replace_gradient_with_lod(nir_builder * b,nir_def * lod,nir_tex_instr * tex)704 replace_gradient_with_lod(nir_builder *b, nir_def *lod, nir_tex_instr *tex)
705 {
706    assert(tex->op == nir_texop_txd);
707 
708    nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
709    nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
710 
711    /* If we have a minimum LOD, clamp LOD accordingly */
712    nir_def *min_lod = nir_steal_tex_src(tex, nir_tex_src_min_lod);
713    if (min_lod)
714       lod = nir_fmax(b, lod, min_lod);
715 
716    nir_tex_instr_add_src(tex, nir_tex_src_lod, lod);
717    tex->op = nir_texop_txl;
718 }
719 
720 static void
lower_gradient_cube_map(nir_builder * b,nir_tex_instr * tex)721 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
722 {
723    assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
724    assert(tex->op == nir_texop_txd);
725 
726    /* Use textureSize() to get the width and height of LOD 0 */
727    nir_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
728 
729    /* Cubemap texture lookups first generate a texture coordinate normalized
730     * to [-1, 1] on the appropiate face. The appropiate face is determined
731     * by which component has largest magnitude and its sign. The texture
732     * coordinate is the quotient of the remaining texture coordinates against
733     * that absolute value of the component of largest magnitude. This
734     * division requires that the computing of the derivative of the texel
735     * coordinate must use the quotient rule. The high level GLSL code is as
736     * follows:
737     *
738     * Step 1: selection
739     *
740     * vec3 abs_p, Q, dQdx, dQdy;
741     * abs_p = abs(ir->coordinate);
742     * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
743     *    Q = ir->coordinate.yzx;
744     *    dQdx = ir->lod_info.grad.dPdx.yzx;
745     *    dQdy = ir->lod_info.grad.dPdy.yzx;
746     * }
747     * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
748     *    Q = ir->coordinate.xzy;
749     *    dQdx = ir->lod_info.grad.dPdx.xzy;
750     *    dQdy = ir->lod_info.grad.dPdy.xzy;
751     * }
752     * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
753     *    Q = ir->coordinate;
754     *    dQdx = ir->lod_info.grad.dPdx;
755     *    dQdy = ir->lod_info.grad.dPdy;
756     * }
757     *
758     * Step 2: use quotient rule to compute derivative. The normalized to
759     * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
760     * only concerned with the magnitudes of the derivatives whose values are
761     * not affected by the sign. We drop the sign from the computation.
762     *
763     * vec2 dx, dy;
764     * float recip;
765     *
766     * recip = 1.0 / Q.z;
767     * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
768     * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
769     *
770     * Step 3: compute LOD. At this point we have the derivatives of the
771     * texture coordinates normalized to [-1,1]. We take the LOD to be
772     *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
773     *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
774     *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
775     *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
776     *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
777     * where L is the dimension of the cubemap. The code is:
778     *
779     * float M, result;
780     * M = max(dot(dx, dx), dot(dy, dy));
781     * L = textureSize(sampler, 0).x;
782     * result = -1.0 + 0.5 * log2(L * L * M);
783     */
784 
785    /* coordinate */
786    nir_def *p =
787       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
788 
789    /* unmodified dPdx, dPdy values */
790    nir_def *dPdx =
791       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
792    nir_def *dPdy =
793       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
794 
795    nir_def *abs_p = nir_fabs(b, p);
796    nir_def *abs_p_x = nir_channel(b, abs_p, 0);
797    nir_def *abs_p_y = nir_channel(b, abs_p, 1);
798    nir_def *abs_p_z = nir_channel(b, abs_p, 2);
799 
800    /* 1. compute selector */
801    nir_def *Q, *dQdx, *dQdy;
802 
803    nir_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
804    nir_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
805 
806    unsigned yzx[3] = { 1, 2, 0 };
807    unsigned xzy[3] = { 0, 2, 1 };
808 
809    Q = nir_bcsel(b, cond_z,
810                  p,
811                  nir_bcsel(b, cond_y,
812                            nir_swizzle(b, p, xzy, 3),
813                            nir_swizzle(b, p, yzx, 3)));
814 
815    dQdx = nir_bcsel(b, cond_z,
816                     dPdx,
817                     nir_bcsel(b, cond_y,
818                               nir_swizzle(b, dPdx, xzy, 3),
819                               nir_swizzle(b, dPdx, yzx, 3)));
820 
821    dQdy = nir_bcsel(b, cond_z,
822                     dPdy,
823                     nir_bcsel(b, cond_y,
824                               nir_swizzle(b, dPdy, xzy, 3),
825                               nir_swizzle(b, dPdy, yzx, 3)));
826 
827    /* 2. quotient rule */
828 
829    /* tmp = Q.xy * recip;
830     * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
831     * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
832     */
833    nir_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
834 
835    nir_def *Q_xy = nir_trim_vector(b, Q, 2);
836    nir_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
837 
838    nir_def *dQdx_xy = nir_trim_vector(b, dQdx, 2);
839    nir_def *dQdx_z = nir_channel(b, dQdx, 2);
840    nir_def *dx =
841       nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
842 
843    nir_def *dQdy_xy = nir_trim_vector(b, dQdy, 2);
844    nir_def *dQdy_z = nir_channel(b, dQdy, 2);
845    nir_def *dy =
846       nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
847 
848    /* M = max(dot(dx, dx), dot(dy, dy)); */
849    nir_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
850 
851    /* size has textureSize() of LOD 0 */
852    nir_def *L = nir_channel(b, size, 0);
853 
854    /* lod = -1.0 + 0.5 * log2(L * L * M); */
855    nir_def *lod =
856       nir_fadd(b,
857                nir_imm_float(b, -1.0f),
858                nir_fmul(b,
859                         nir_imm_float(b, 0.5f),
860                         nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
861 
862    /* 3. Replace the gradient instruction with an equivalent lod instruction */
863    replace_gradient_with_lod(b, lod, tex);
864 }
865 
866 static void
lower_gradient(nir_builder * b,nir_tex_instr * tex)867 lower_gradient(nir_builder *b, nir_tex_instr *tex)
868 {
869    /* Cubes are more complicated and have their own function */
870    if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
871       lower_gradient_cube_map(b, tex);
872       return;
873    }
874 
875    assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
876    assert(tex->op == nir_texop_txd);
877 
878    /* Use textureSize() to get the width and height of LOD 0 */
879    unsigned component_mask;
880    switch (tex->sampler_dim) {
881    case GLSL_SAMPLER_DIM_3D:
882       component_mask = 7;
883       break;
884    case GLSL_SAMPLER_DIM_1D:
885       component_mask = 1;
886       break;
887    default:
888       component_mask = 3;
889       break;
890    }
891 
892    nir_def *size =
893       nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)),
894                    component_mask);
895 
896    /* Scale the gradients by width and height.  Effectively, the incoming
897     * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
898     * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
899     */
900    nir_def *ddx =
901       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
902    nir_def *ddy =
903       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
904 
905    nir_def *dPdx = nir_fmul(b, ddx, size);
906    nir_def *dPdy = nir_fmul(b, ddy, size);
907 
908    nir_def *rho;
909    if (dPdx->num_components == 1) {
910       rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
911    } else {
912       rho = nir_fmax(b,
913                      nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
914                      nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
915    }
916 
917    /* lod = log2(rho).  We're ignoring GL state biases for now. */
918    nir_def *lod = nir_flog2(b, rho);
919 
920    /* Replace the gradient instruction with an equivalent lod instruction */
921    replace_gradient_with_lod(b, lod, tex);
922 }
923 
924 /* tex(s, coord) = txd(s, coord, dfdx(coord), dfdy(coord)) */
925 static nir_tex_instr *
lower_tex_to_txd(nir_builder * b,nir_tex_instr * tex)926 lower_tex_to_txd(nir_builder *b, nir_tex_instr *tex)
927 {
928    b->cursor = nir_after_instr(&tex->instr);
929    nir_tex_instr *txd = nir_tex_instr_create(b->shader, tex->num_srcs + 2);
930 
931    txd->op = nir_texop_txd;
932    txd->sampler_dim = tex->sampler_dim;
933    txd->dest_type = tex->dest_type;
934    txd->coord_components = tex->coord_components;
935    txd->texture_index = tex->texture_index;
936    txd->sampler_index = tex->sampler_index;
937    txd->is_array = tex->is_array;
938    txd->is_shadow = tex->is_shadow;
939    txd->is_new_style_shadow = tex->is_new_style_shadow;
940 
941    /* reuse existing srcs */
942    for (unsigned i = 0; i < tex->num_srcs; i++) {
943       txd->src[i].src = nir_src_for_ssa(tex->src[i].src.ssa);
944       txd->src[i].src_type = tex->src[i].src_type;
945    }
946    int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
947    assert(coord_idx >= 0);
948    nir_def *coord = tex->src[coord_idx].src.ssa;
949    /* don't take the derivative of the array index */
950    if (tex->is_array)
951       coord = nir_channels(b, coord, nir_component_mask(coord->num_components - 1));
952    nir_def *dfdx = nir_ddx(b, coord);
953    nir_def *dfdy = nir_ddy(b, coord);
954    txd->src[tex->num_srcs] = nir_tex_src_for_ssa(nir_tex_src_ddx, dfdx);
955    txd->src[tex->num_srcs + 1] = nir_tex_src_for_ssa(nir_tex_src_ddy, dfdy);
956 
957    nir_def_init(&txd->instr, &txd->def,
958                 tex->def.num_components,
959                 tex->def.bit_size);
960    nir_builder_instr_insert(b, &txd->instr);
961    nir_def_replace(&tex->def, &txd->def);
962    return txd;
963 }
964 
965 /* txb(s, coord, bias) = txl(s, coord, lod(s, coord).y + bias) */
966 static nir_tex_instr *
lower_txb_to_txl(nir_builder * b,nir_tex_instr * tex)967 lower_txb_to_txl(nir_builder *b, nir_tex_instr *tex)
968 {
969    b->cursor = nir_after_instr(&tex->instr);
970    nir_tex_instr *txl = nir_tex_instr_create(b->shader, tex->num_srcs);
971 
972    txl->op = nir_texop_txl;
973    txl->sampler_dim = tex->sampler_dim;
974    txl->dest_type = tex->dest_type;
975    txl->coord_components = tex->coord_components;
976    txl->texture_index = tex->texture_index;
977    txl->sampler_index = tex->sampler_index;
978    txl->is_array = tex->is_array;
979    txl->is_shadow = tex->is_shadow;
980    txl->is_new_style_shadow = tex->is_new_style_shadow;
981 
982    /* reuse all but bias src */
983    for (int i = 0; i < tex->num_srcs; i++) {
984       if (tex->src[i].src_type != nir_tex_src_bias) {
985          txl->src[i].src = nir_src_for_ssa(tex->src[i].src.ssa);
986          txl->src[i].src_type = tex->src[i].src_type;
987       }
988    }
989    nir_def *lod = nir_get_texture_lod(b, tex);
990 
991    int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
992    assert(bias_idx >= 0);
993    lod = nir_fadd(b, lod, tex->src[bias_idx].src.ssa);
994    txl->src[tex->num_srcs - 1] = nir_tex_src_for_ssa(nir_tex_src_lod, lod);
995 
996    nir_def_init(&txl->instr, &txl->def,
997                 tex->def.num_components,
998                 tex->def.bit_size);
999    nir_builder_instr_insert(b, &txl->instr);
1000    nir_def_replace(&tex->def, &txl->def);
1001    return txl;
1002 }
1003 
1004 static nir_tex_instr *
saturate_src(nir_builder * b,nir_tex_instr * tex,unsigned sat_mask)1005 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
1006 {
1007    if (tex->op == nir_texop_tex)
1008       tex = lower_tex_to_txd(b, tex);
1009    else if (tex->op == nir_texop_txb)
1010       tex = lower_txb_to_txl(b, tex);
1011 
1012    b->cursor = nir_before_instr(&tex->instr);
1013    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1014 
1015    if (coord_index != -1) {
1016       nir_def *src =
1017          tex->src[coord_index].src.ssa;
1018 
1019       /* split src into components: */
1020       nir_def *comp[4];
1021 
1022       assume(tex->coord_components >= 1);
1023 
1024       for (unsigned j = 0; j < tex->coord_components; j++)
1025          comp[j] = nir_channel(b, src, j);
1026 
1027       /* clamp requested components, array index does not get clamped: */
1028       unsigned ncomp = tex->coord_components;
1029       if (tex->is_array)
1030          ncomp--;
1031 
1032       for (unsigned j = 0; j < ncomp; j++) {
1033          if ((1 << j) & sat_mask) {
1034             if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
1035                /* non-normalized texture coords, so clamp to texture
1036                 * size rather than [0.0, 1.0]
1037                 */
1038                nir_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
1039                comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
1040                comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
1041             } else {
1042                comp[j] = nir_fsat(b, comp[j]);
1043             }
1044          }
1045       }
1046 
1047       /* and move the result back into a single vecN: */
1048       src = nir_vec(b, comp, tex->coord_components);
1049 
1050       nir_src_rewrite(&tex->src[coord_index].src, src);
1051    }
1052    return tex;
1053 }
1054 
1055 static nir_def *
get_zero_or_one(nir_builder * b,nir_alu_type type,uint8_t swizzle_val)1056 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
1057 {
1058    nir_const_value v[4];
1059 
1060    memset(&v, 0, sizeof(v));
1061 
1062    if (swizzle_val == 4) {
1063       v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
1064    } else {
1065       assert(swizzle_val == 5);
1066       if (type == nir_type_float32)
1067          v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
1068       else
1069          v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
1070    }
1071 
1072    return nir_build_imm(b, 4, 32, v);
1073 }
1074 
1075 static void
swizzle_tg4_broadcom(nir_builder * b,nir_tex_instr * tex)1076 swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
1077 {
1078    b->cursor = nir_after_instr(&tex->instr);
1079 
1080    assert(nir_tex_instr_dest_size(tex) == 4);
1081    unsigned swiz[4] = { 2, 3, 1, 0 };
1082    nir_def *swizzled = nir_swizzle(b, &tex->def, swiz, 4);
1083 
1084    nir_def_rewrite_uses_after(&tex->def, swizzled,
1085                               swizzled->parent_instr);
1086 }
1087 
1088 static void
swizzle_result(nir_builder * b,nir_tex_instr * tex,const uint8_t swizzle[4])1089 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
1090 {
1091    b->cursor = nir_after_instr(&tex->instr);
1092 
1093    nir_def *swizzled;
1094    if (tex->op == nir_texop_tg4) {
1095       if (swizzle[tex->component] < 4) {
1096          /* This one's easy */
1097          tex->component = swizzle[tex->component];
1098          return;
1099       } else {
1100          swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
1101       }
1102    } else {
1103       assert(nir_tex_instr_dest_size(tex) == 4);
1104       if (swizzle[0] < 4 && swizzle[1] < 4 &&
1105           swizzle[2] < 4 && swizzle[3] < 4) {
1106          unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
1107          /* We have no 0s or 1s, just emit a swizzling MOV */
1108          swizzled = nir_swizzle(b, &tex->def, swiz, 4);
1109       } else {
1110          nir_scalar srcs[4];
1111          for (unsigned i = 0; i < 4; i++) {
1112             if (swizzle[i] < 4) {
1113                srcs[i] = nir_get_scalar(&tex->def, swizzle[i]);
1114             } else {
1115                srcs[i] = nir_get_scalar(get_zero_or_one(b, tex->dest_type, swizzle[i]), 0);
1116             }
1117          }
1118          swizzled = nir_vec_scalars(b, srcs, 4);
1119       }
1120    }
1121 
1122    nir_def_rewrite_uses_after(&tex->def, swizzled,
1123                               swizzled->parent_instr);
1124 }
1125 
1126 static void
linearize_srgb_result(nir_builder * b,nir_tex_instr * tex)1127 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
1128 {
1129    assert(nir_tex_instr_dest_size(tex) == 4);
1130    assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
1131 
1132    b->cursor = nir_after_instr(&tex->instr);
1133 
1134    nir_def *rgb =
1135       nir_format_srgb_to_linear(b, nir_trim_vector(b, &tex->def, 3));
1136 
1137    /* alpha is untouched: */
1138    nir_def *result = nir_vec4(b,
1139                               nir_channel(b, rgb, 0),
1140                               nir_channel(b, rgb, 1),
1141                               nir_channel(b, rgb, 2),
1142                               nir_channel(b, &tex->def, 3));
1143 
1144    nir_def_rewrite_uses_after(&tex->def, result,
1145                               result->parent_instr);
1146 }
1147 
1148 /**
1149  * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
1150  * i16, or u16, or a single unorm4x8 value.
1151  *
1152  * Note that we don't change the destination num_components, because
1153  * nir_tex_instr_dest_size() will still return 4.  The driver is just expected
1154  * to not store the other channels, given that nothing at the NIR level will
1155  * read them.
1156  */
1157 static bool
lower_tex_packing(nir_builder * b,nir_tex_instr * tex,const nir_lower_tex_options * options)1158 lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
1159                   const nir_lower_tex_options *options)
1160 {
1161    nir_def *color = &tex->def;
1162 
1163    b->cursor = nir_after_instr(&tex->instr);
1164 
1165    assert(options->lower_tex_packing_cb);
1166    enum nir_lower_tex_packing packing =
1167       options->lower_tex_packing_cb(tex, options->lower_tex_packing_data);
1168 
1169    switch (packing) {
1170    case nir_lower_tex_packing_none:
1171       return false;
1172 
1173    case nir_lower_tex_packing_16: {
1174       static const unsigned bits[4] = { 16, 16, 16, 16 };
1175 
1176       switch (nir_alu_type_get_base_type(tex->dest_type)) {
1177       case nir_type_float:
1178          switch (nir_tex_instr_dest_size(tex)) {
1179          case 1:
1180             assert(tex->is_shadow && tex->is_new_style_shadow);
1181             color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
1182             break;
1183          case 2: {
1184             nir_def *rg = nir_channel(b, color, 0);
1185             color = nir_vec2(b,
1186                              nir_unpack_half_2x16_split_x(b, rg),
1187                              nir_unpack_half_2x16_split_y(b, rg));
1188             break;
1189          }
1190          case 4: {
1191             nir_def *rg = nir_channel(b, color, 0);
1192             nir_def *ba = nir_channel(b, color, 1);
1193             color = nir_vec4(b,
1194                              nir_unpack_half_2x16_split_x(b, rg),
1195                              nir_unpack_half_2x16_split_y(b, rg),
1196                              nir_unpack_half_2x16_split_x(b, ba),
1197                              nir_unpack_half_2x16_split_y(b, ba));
1198             break;
1199          }
1200          default:
1201             unreachable("wrong dest_size");
1202          }
1203          break;
1204 
1205       case nir_type_int:
1206          color = nir_format_unpack_sint(b, color, bits, 4);
1207          break;
1208 
1209       case nir_type_uint:
1210          color = nir_format_unpack_uint(b, color, bits, 4);
1211          break;
1212 
1213       default:
1214          unreachable("unknown base type");
1215       }
1216       break;
1217    }
1218 
1219    case nir_lower_tex_packing_8:
1220       assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
1221       color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
1222       break;
1223    }
1224 
1225    nir_def_rewrite_uses_after(&tex->def, color,
1226                               color->parent_instr);
1227    return true;
1228 }
1229 
1230 static bool
sampler_index_lt(nir_tex_instr * tex,unsigned max)1231 sampler_index_lt(nir_tex_instr *tex, unsigned max)
1232 {
1233    assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
1234 
1235    unsigned sampler_index = tex->sampler_index;
1236 
1237    int sampler_offset_idx =
1238       nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
1239    if (sampler_offset_idx >= 0) {
1240       if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
1241          return false;
1242 
1243       sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
1244    }
1245 
1246    return sampler_index < max;
1247 }
1248 
1249 static bool
lower_tg4_offsets(nir_builder * b,nir_tex_instr * tex)1250 lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
1251 {
1252    assert(tex->op == nir_texop_tg4);
1253    assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
1254    assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
1255 
1256    b->cursor = nir_after_instr(&tex->instr);
1257 
1258    nir_scalar dest[5] = { 0 };
1259    nir_def *residency = NULL;
1260    for (unsigned i = 0; i < 4; ++i) {
1261       nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
1262       tex_copy->op = tex->op;
1263       tex_copy->coord_components = tex->coord_components;
1264       tex_copy->sampler_dim = tex->sampler_dim;
1265       tex_copy->is_array = tex->is_array;
1266       tex_copy->is_shadow = tex->is_shadow;
1267       tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
1268       tex_copy->is_sparse = tex->is_sparse;
1269       tex_copy->is_gather_implicit_lod = tex->is_gather_implicit_lod;
1270       tex_copy->component = tex->component;
1271       tex_copy->dest_type = tex->dest_type;
1272       tex_copy->texture_index = tex->texture_index;
1273       tex_copy->sampler_index = tex->sampler_index;
1274       tex_copy->backend_flags = tex->backend_flags;
1275 
1276       for (unsigned j = 0; j < tex->num_srcs; ++j) {
1277          tex_copy->src[j].src = nir_src_for_ssa(tex->src[j].src.ssa);
1278          tex_copy->src[j].src_type = tex->src[j].src_type;
1279       }
1280 
1281       nir_def *offset = nir_imm_ivec2(b, tex->tg4_offsets[i][0],
1282                                       tex->tg4_offsets[i][1]);
1283       nir_tex_src src = nir_tex_src_for_ssa(nir_tex_src_offset, offset);
1284       tex_copy->src[tex_copy->num_srcs - 1] = src;
1285 
1286       nir_def_init(&tex_copy->instr, &tex_copy->def,
1287                    nir_tex_instr_dest_size(tex), 32);
1288 
1289       nir_builder_instr_insert(b, &tex_copy->instr);
1290 
1291       dest[i] = nir_get_scalar(&tex_copy->def, 3);
1292       if (tex->is_sparse) {
1293          nir_def *code = nir_channel(b, &tex_copy->def, 4);
1294          if (residency)
1295             residency = nir_sparse_residency_code_and(b, residency, code);
1296          else
1297             residency = code;
1298       }
1299    }
1300    dest[4] = nir_get_scalar(residency, 0);
1301 
1302    nir_def *res = nir_vec_scalars(b, dest, tex->def.num_components);
1303    nir_def_replace(&tex->def, res);
1304 
1305    return true;
1306 }
1307 
1308 static bool
nir_lower_txs_lod(nir_builder * b,nir_tex_instr * tex)1309 nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
1310 {
1311    int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
1312    if (lod_idx < 0 ||
1313        (nir_src_is_const(tex->src[lod_idx].src) &&
1314         nir_src_as_int(tex->src[lod_idx].src) == 0))
1315       return false;
1316 
1317    unsigned dest_size = nir_tex_instr_dest_size(tex);
1318 
1319    b->cursor = nir_before_instr(&tex->instr);
1320    nir_def *lod = tex->src[lod_idx].src.ssa;
1321 
1322    /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
1323    nir_src_rewrite(&tex->src[lod_idx].src, nir_imm_int(b, 0));
1324 
1325    /* TXS(LOD) = max(TXS(0) >> LOD, 1)
1326     * But we do min(TXS(0), TXS(LOD)) to catch the case of a null surface,
1327     * which should return 0, not 1.
1328     */
1329    b->cursor = nir_after_instr(&tex->instr);
1330    nir_def *minified = nir_imin(b, &tex->def,
1331                                 nir_imax(b, nir_ushr(b, &tex->def, lod),
1332                                          nir_imm_int(b, 1)));
1333 
1334    /* Make sure the component encoding the array size (if any) is not
1335     * minified.
1336     */
1337    if (tex->is_array) {
1338       nir_def *comp[3];
1339 
1340       assert(dest_size <= ARRAY_SIZE(comp));
1341       for (unsigned i = 0; i < dest_size - 1; i++)
1342          comp[i] = nir_channel(b, minified, i);
1343 
1344       comp[dest_size - 1] = nir_channel(b, &tex->def, dest_size - 1);
1345       minified = nir_vec(b, comp, dest_size);
1346    }
1347 
1348    nir_def_rewrite_uses_after(&tex->def, minified,
1349                               minified->parent_instr);
1350    return true;
1351 }
1352 
1353 static void
nir_lower_txs_cube_array(nir_builder * b,nir_tex_instr * tex)1354 nir_lower_txs_cube_array(nir_builder *b, nir_tex_instr *tex)
1355 {
1356    assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array);
1357    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1358 
1359    b->cursor = nir_after_instr(&tex->instr);
1360 
1361    assert(tex->def.num_components == 3);
1362    nir_def *size = &tex->def;
1363    size = nir_vec3(b, nir_channel(b, size, 1),
1364                    nir_channel(b, size, 1),
1365                    nir_idiv(b, nir_channel(b, size, 2),
1366                             nir_imm_int(b, 6)));
1367 
1368    nir_def_rewrite_uses_after(&tex->def, size, size->parent_instr);
1369 }
1370 
1371 /* Adjust the sample index according to AMD FMASK (fragment mask).
1372  *
1373  * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
1374  * which is the identity mapping. Each nibble says which physical sample
1375  * should be fetched to get that sample.
1376  *
1377  * For example, 0x11111100 means there are only 2 samples stored and
1378  * the second sample covers 3/4 of the pixel. When reading samples 0
1379  * and 1, return physical sample 0 (determined by the first two 0s
1380  * in FMASK), otherwise return physical sample 1.
1381  *
1382  * The sample index should be adjusted as follows:
1383  *   sample_index = ubfe(fmask, sample_index * 4, 3);
1384  *
1385  * Only extract 3 bits because EQAA can generate number 8 in FMASK, which
1386  * means the physical sample index is unknown. We can map 8 to any valid
1387  * sample index, and extracting only 3 bits will map it to 0, which works
1388  * with all MSAA modes.
1389  */
1390 static void
nir_lower_ms_txf_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1391 nir_lower_ms_txf_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1392 {
1393    lower_offset(b, tex);
1394 
1395    b->cursor = nir_before_instr(&tex->instr);
1396 
1397    /* Create FMASK fetch. */
1398    assert(tex->texture_index == 0);
1399    nir_tex_instr *fmask_fetch = nir_tex_instr_create(b->shader, tex->num_srcs - 1);
1400    fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1401    fmask_fetch->coord_components = tex->coord_components;
1402    fmask_fetch->sampler_dim = tex->sampler_dim;
1403    fmask_fetch->is_array = tex->is_array;
1404    fmask_fetch->texture_non_uniform = tex->texture_non_uniform;
1405    fmask_fetch->dest_type = nir_type_uint32;
1406    nir_def_init(&fmask_fetch->instr, &fmask_fetch->def, 1, 32);
1407 
1408    fmask_fetch->num_srcs = 0;
1409    for (unsigned i = 0; i < tex->num_srcs; i++) {
1410       if (tex->src[i].src_type == nir_tex_src_ms_index)
1411          continue;
1412       nir_tex_src *src = &fmask_fetch->src[fmask_fetch->num_srcs++];
1413       src->src = nir_src_for_ssa(tex->src[i].src.ssa);
1414       src->src_type = tex->src[i].src_type;
1415    }
1416 
1417    nir_builder_instr_insert(b, &fmask_fetch->instr);
1418 
1419    /* Obtain new sample index. */
1420    int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index);
1421    assert(ms_index >= 0);
1422    nir_def *sample = tex->src[ms_index].src.ssa;
1423    nir_def *new_sample = nir_ubfe(b, &fmask_fetch->def,
1424                                   nir_u2u32(b, nir_ishl_imm(b, sample, 2)),
1425                                   nir_imm_int(b, 3));
1426 
1427    /* Update instruction. */
1428    tex->op = nir_texop_fragment_fetch_amd;
1429    nir_src_rewrite(&tex->src[ms_index].src,
1430                    nir_u2uN(b, new_sample, sample->bit_size));
1431 }
1432 
1433 static void
nir_lower_samples_identical_to_fragment_fetch(nir_builder * b,nir_tex_instr * tex)1434 nir_lower_samples_identical_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1435 {
1436    b->cursor = nir_after_instr(&tex->instr);
1437 
1438    nir_tex_instr *fmask_fetch = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr));
1439    fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1440    fmask_fetch->dest_type = nir_type_uint32;
1441    nir_def_init(&fmask_fetch->instr, &fmask_fetch->def, 1, 32);
1442    nir_builder_instr_insert(b, &fmask_fetch->instr);
1443 
1444    nir_def_rewrite_uses(&tex->def, nir_ieq_imm(b, &fmask_fetch->def, 0));
1445    nir_instr_remove_v(&tex->instr);
1446 }
1447 
1448 static void
nir_lower_lod_zero_width(nir_builder * b,nir_tex_instr * tex)1449 nir_lower_lod_zero_width(nir_builder *b, nir_tex_instr *tex)
1450 {
1451    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1452    assert(coord_index >= 0);
1453 
1454    b->cursor = nir_after_instr(&tex->instr);
1455 
1456    nir_def *is_zero = nir_imm_true(b);
1457    for (unsigned i = 0; i < tex->coord_components; i++) {
1458       nir_def *coord = nir_channel(b, tex->src[coord_index].src.ssa, i);
1459 
1460       /* Compute the sum of the absolute values of derivatives. */
1461       nir_def *dfdx = nir_ddx(b, coord);
1462       nir_def *dfdy = nir_ddy(b, coord);
1463       nir_def *fwidth = nir_fadd(b, nir_fabs(b, dfdx), nir_fabs(b, dfdy));
1464 
1465       /* Check if the sum is 0. */
1466       is_zero = nir_iand(b, is_zero, nir_feq_imm(b, fwidth, 0.0));
1467    }
1468 
1469    /* Replace the raw LOD by -FLT_MAX if the sum is 0 for all coordinates. */
1470    nir_def *adjusted_lod =
1471       nir_bcsel(b, is_zero, nir_imm_float(b, -FLT_MAX),
1472                 nir_channel(b, &tex->def, 1));
1473 
1474    nir_def *def =
1475       nir_vec2(b, nir_channel(b, &tex->def, 0), adjusted_lod);
1476 
1477    nir_def_rewrite_uses_after(&tex->def, def, def->parent_instr);
1478 }
1479 
1480 static bool
lower_index_to_offset(nir_builder * b,nir_tex_instr * tex)1481 lower_index_to_offset(nir_builder *b, nir_tex_instr *tex)
1482 {
1483    bool progress = false;
1484    b->cursor = nir_before_instr(&tex->instr);
1485 
1486    for (unsigned i = 0; i < tex->num_srcs; i++) {
1487       unsigned *index;
1488       switch (tex->src[i].src_type) {
1489       case nir_tex_src_texture_offset:
1490          index = &tex->texture_index;
1491          break;
1492       case nir_tex_src_sampler_offset:
1493          index = &tex->sampler_index;
1494          break;
1495       default:
1496          continue;
1497       }
1498 
1499       /* If there's no base index, there's nothing to lower */
1500       if ((*index) == 0)
1501          continue;
1502 
1503       nir_def *sum = nir_iadd_imm(b, tex->src[i].src.ssa, *index);
1504       nir_src_rewrite(&tex->src[i].src, sum);
1505       *index = 0;
1506       progress = true;
1507    }
1508 
1509    return progress;
1510 }
1511 
1512 static bool
nir_lower_tex_block(nir_block * block,nir_builder * b,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1513 nir_lower_tex_block(nir_block *block, nir_builder *b,
1514                     const nir_lower_tex_options *options,
1515                     const struct nir_shader_compiler_options *compiler_options)
1516 {
1517    bool progress = false;
1518 
1519    nir_foreach_instr_safe(instr, block) {
1520       if (instr->type != nir_instr_type_tex)
1521          continue;
1522 
1523       nir_tex_instr *tex = nir_instr_as_tex(instr);
1524       bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
1525 
1526       /* mask of src coords to saturate (clamp): */
1527       unsigned sat_mask = 0;
1528       /* ignore saturate for txf ops: these don't use samplers and can't GL_CLAMP */
1529       if (nir_tex_instr_need_sampler(tex)) {
1530          if ((1 << tex->sampler_index) & options->saturate_r)
1531             sat_mask |= (1 << 2); /* .z */
1532          if ((1 << tex->sampler_index) & options->saturate_t)
1533             sat_mask |= (1 << 1); /* .y */
1534          if ((1 << tex->sampler_index) & options->saturate_s)
1535             sat_mask |= (1 << 0); /* .x */
1536       }
1537 
1538       if (options->lower_index_to_offset)
1539          progress |= lower_index_to_offset(b, tex);
1540 
1541       /* If we are clamping any coords, we must lower projector first
1542        * as clamping happens *after* projection:
1543        */
1544       if (lower_txp || sat_mask ||
1545           (options->lower_txp_array && tex->is_array)) {
1546          progress |= project_src(b, tex);
1547       }
1548 
1549       if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
1550           (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
1551           (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
1552            options->lower_rect_offset) ||
1553           (options->lower_offset_filter &&
1554            options->lower_offset_filter(instr, options->callback_data))) {
1555          progress = lower_offset(b, tex) || progress;
1556       }
1557 
1558       if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect &&
1559           tex->op != nir_texop_txf) {
1560          if (nir_tex_instr_is_query(tex))
1561             tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1562          else if (compiler_options->has_texture_scaling)
1563             lower_rect_tex_scale(b, tex);
1564          else
1565             lower_rect(b, tex);
1566 
1567          progress = true;
1568       }
1569 
1570       if (tex->sampler_dim == GLSL_SAMPLER_DIM_1D &&
1571           (options->lower_1d || (tex->is_shadow && options->lower_1d_shadow))) {
1572          lower_1d(b, tex);
1573          progress = true;
1574       }
1575 
1576       unsigned texture_index = tex->texture_index;
1577       uint32_t texture_mask = 1u << texture_index;
1578       int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1579       if (tex_index >= 0) {
1580          nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_index].src);
1581          nir_variable *var = nir_deref_instr_get_variable(deref);
1582          texture_index = var ? var->data.binding : 0;
1583          texture_mask = var && texture_index < 32 ? (1u << texture_index) : 0u;
1584       }
1585 
1586       if (texture_mask & options->lower_y_uv_external) {
1587          lower_y_uv_external(b, tex, options, texture_index);
1588          progress = true;
1589       }
1590 
1591       if (texture_mask & options->lower_y_vu_external) {
1592          lower_y_vu_external(b, tex, options, texture_index);
1593          progress = true;
1594       }
1595 
1596       if (texture_mask & options->lower_y_u_v_external) {
1597          lower_y_u_v_external(b, tex, options, texture_index);
1598          progress = true;
1599       }
1600 
1601       if (texture_mask & options->lower_yx_xuxv_external) {
1602          lower_yx_xuxv_external(b, tex, options, texture_index);
1603          progress = true;
1604       }
1605 
1606       if (texture_mask & options->lower_yx_xvxu_external) {
1607          lower_yx_xvxu_external(b, tex, options, texture_index);
1608          progress = true;
1609       }
1610 
1611       if (texture_mask & options->lower_xy_uxvx_external) {
1612          lower_xy_uxvx_external(b, tex, options, texture_index);
1613          progress = true;
1614       }
1615 
1616       if (texture_mask & options->lower_xy_vxux_external) {
1617          lower_xy_vxux_external(b, tex, options, texture_index);
1618          progress = true;
1619       }
1620 
1621       if (texture_mask & options->lower_ayuv_external) {
1622          lower_ayuv_external(b, tex, options, texture_index);
1623          progress = true;
1624       }
1625 
1626       if (texture_mask & options->lower_xyuv_external) {
1627          lower_xyuv_external(b, tex, options, texture_index);
1628          progress = true;
1629       }
1630 
1631       if (texture_mask & options->lower_yuv_external) {
1632          lower_yuv_external(b, tex, options, texture_index);
1633          progress = true;
1634       }
1635 
1636       if ((1 << tex->texture_index) & options->lower_yu_yv_external) {
1637          lower_yu_yv_external(b, tex, options, texture_index);
1638          progress = true;
1639       }
1640 
1641       if ((1 << tex->texture_index) & options->lower_yv_yu_external) {
1642          lower_yv_yu_external(b, tex, options, texture_index);
1643          progress = true;
1644       }
1645 
1646       if ((1 << tex->texture_index) & options->lower_y41x_external) {
1647          lower_y41x_external(b, tex, options, texture_index);
1648          progress = true;
1649       }
1650 
1651       if (sat_mask) {
1652          tex = saturate_src(b, tex, sat_mask);
1653          progress = true;
1654       }
1655 
1656       if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1657          swizzle_tg4_broadcom(b, tex);
1658          progress = true;
1659       }
1660 
1661       if ((texture_mask & options->swizzle_result) &&
1662           !nir_tex_instr_is_query(tex) &&
1663           !(tex->is_shadow && tex->is_new_style_shadow)) {
1664          swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1665          progress = true;
1666       }
1667 
1668       /* should be after swizzle so we know which channels are rgb: */
1669       if ((texture_mask & options->lower_srgb) &&
1670           !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1671          linearize_srgb_result(b, tex);
1672          progress = true;
1673       }
1674 
1675       const bool has_min_lod =
1676          nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1677       const bool has_offset =
1678          nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1679 
1680       if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1681           options->lower_txb_shadow_clamp) {
1682          lower_implicit_lod(b, tex);
1683          progress = true;
1684       }
1685 
1686       if (options->lower_tex_packing_cb &&
1687           tex->op != nir_texop_txs &&
1688           tex->op != nir_texop_query_levels &&
1689           tex->op != nir_texop_texture_samples) {
1690          progress |= lower_tex_packing(b, tex, options);
1691       }
1692 
1693       if (tex->op == nir_texop_txd &&
1694           (options->lower_txd ||
1695            (options->lower_txd_clamp && has_min_lod) ||
1696            (options->lower_txd_shadow && tex->is_shadow) ||
1697            (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1698            (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1699            (options->lower_txd_clamp_bindless_sampler && has_min_lod &&
1700             nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) ||
1701            (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1702             has_min_lod && !sampler_index_lt(tex, 16)) ||
1703            (options->lower_txd_cube_map &&
1704             tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1705            (options->lower_txd_3d &&
1706             tex->sampler_dim == GLSL_SAMPLER_DIM_3D) ||
1707            (options->lower_txd_array && tex->is_array))) {
1708          lower_gradient(b, tex);
1709          progress = true;
1710          continue;
1711       }
1712 
1713       /* TXF, TXS and TXL require a LOD but not everything we implement using those
1714        * three opcodes provides one.  Provide a default LOD of 0.
1715        */
1716       if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1717           (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1718            tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) {
1719          b->cursor = nir_before_instr(&tex->instr);
1720          nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_imm_int(b, 0));
1721          progress = true;
1722          continue;
1723       }
1724 
1725       /* Only fragment and compute (in some cases) support implicit
1726        * derivatives.  Lower those opcodes which use implicit derivatives to
1727        * use an explicit LOD of 0.
1728        * But don't touch RECT samplers because they don't have mips.
1729        */
1730       if (options->lower_invalid_implicit_lod &&
1731           nir_tex_instr_has_implicit_derivative(tex) &&
1732           tex->sampler_dim != GLSL_SAMPLER_DIM_RECT &&
1733           !nir_shader_supports_implicit_lod(b->shader)) {
1734          lower_zero_lod(b, tex);
1735          progress = true;
1736       }
1737 
1738       if (options->lower_txs_lod && tex->op == nir_texop_txs) {
1739          progress |= nir_lower_txs_lod(b, tex);
1740          continue;
1741       }
1742 
1743       if (options->lower_txs_cube_array && tex->op == nir_texop_txs &&
1744           tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array) {
1745          nir_lower_txs_cube_array(b, tex);
1746          progress = true;
1747          continue;
1748       }
1749 
1750       /* has to happen after all the other lowerings as the original tg4 gets
1751        * replaced by 4 tg4 instructions.
1752        */
1753       if (tex->op == nir_texop_tg4 &&
1754           nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1755           options->lower_tg4_offsets) {
1756          progress |= lower_tg4_offsets(b, tex);
1757          continue;
1758       }
1759 
1760       if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_txf_ms) {
1761          nir_lower_ms_txf_to_fragment_fetch(b, tex);
1762          progress = true;
1763          continue;
1764       }
1765 
1766       if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_samples_identical) {
1767          nir_lower_samples_identical_to_fragment_fetch(b, tex);
1768          progress = true;
1769          continue;
1770       }
1771 
1772       if (options->lower_lod_zero_width && tex->op == nir_texop_lod) {
1773          nir_lower_lod_zero_width(b, tex);
1774          progress = true;
1775          continue;
1776       }
1777    }
1778 
1779    return progress;
1780 }
1781 
1782 static bool
nir_lower_tex_impl(nir_function_impl * impl,const nir_lower_tex_options * options,const struct nir_shader_compiler_options * compiler_options)1783 nir_lower_tex_impl(nir_function_impl *impl,
1784                    const nir_lower_tex_options *options,
1785                    const struct nir_shader_compiler_options *compiler_options)
1786 {
1787    bool progress = false;
1788    nir_builder builder = nir_builder_create(impl);
1789 
1790    nir_foreach_block(block, impl) {
1791       progress |= nir_lower_tex_block(block, &builder, options, compiler_options);
1792    }
1793 
1794    nir_metadata_preserve(impl, nir_metadata_control_flow);
1795    return progress;
1796 }
1797 
1798 bool
nir_lower_tex(nir_shader * shader,const nir_lower_tex_options * options)1799 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1800 {
1801    bool progress = false;
1802 
1803    /* lower_tg4_offsets injects new tg4 instructions that won't be lowered
1804     * if lower_tg4_broadcom_swizzle is also requested so when both are set
1805     * we want to run lower_tg4_offsets in a separate pass first.
1806     */
1807    if (options->lower_tg4_offsets && options->lower_tg4_broadcom_swizzle) {
1808       nir_lower_tex_options _options = {
1809          .lower_tg4_offsets = true,
1810       };
1811       progress = nir_lower_tex(shader, &_options);
1812    }
1813 
1814    nir_foreach_function_impl(impl, shader) {
1815       progress |= nir_lower_tex_impl(impl, options, shader->options);
1816    }
1817 
1818    return progress;
1819 }
1820