xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_lower_image.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * This lowering pass supports (as configured via nir_lower_image_options)
26  * image related conversions:
27  *   + cube array size lowering. The size operation is converted from cube
28  *     size to a 2d-array with the z component divided by 6.
29  */
30 
31 #include "nir.h"
32 #include "nir_builder.h"
33 
34 static void
lower_cube_size(nir_builder * b,nir_intrinsic_instr * intrin)35 lower_cube_size(nir_builder *b, nir_intrinsic_instr *intrin)
36 {
37    assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE);
38 
39    b->cursor = nir_before_instr(&intrin->instr);
40 
41    nir_intrinsic_instr *_2darray_size =
42       nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intrin->instr));
43    nir_intrinsic_set_image_dim(_2darray_size, GLSL_SAMPLER_DIM_2D);
44    nir_intrinsic_set_image_array(_2darray_size, true);
45    nir_builder_instr_insert(b, &_2darray_size->instr);
46 
47    nir_def *size = nir_instr_def(&_2darray_size->instr);
48    nir_scalar comps[NIR_MAX_VEC_COMPONENTS] = { 0 };
49    unsigned coord_comps = intrin->def.num_components;
50    for (unsigned c = 0; c < coord_comps; c++) {
51       if (c == 2) {
52          comps[2] = nir_get_scalar(nir_idiv(b, nir_channel(b, size, 2), nir_imm_int(b, 6)), 0);
53       } else {
54          comps[c] = nir_get_scalar(size, c);
55       }
56    }
57 
58    nir_def *vec = nir_vec_scalars(b, comps, intrin->def.num_components);
59    nir_def_replace(&intrin->def, vec);
60    nir_instr_free(&intrin->instr);
61 }
62 
63 /* Adjust the sample index according to AMD FMASK (fragment mask).
64  *
65  * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
66  * which is the identity mapping. Each nibble says which physical sample
67  * should be fetched to get that sample.
68  *
69  * For example, 0x11111100 means there are only 2 samples stored and
70  * the second sample covers 3/4 of the pixel. When reading samples 0
71  * and 1, return physical sample 0 (determined by the first two 0s
72  * in FMASK), otherwise return physical sample 1.
73  *
74  * The sample index should be adjusted as follows:
75  *   sample_index = ubfe(fmask, sample_index * 4, 3);
76  *
77  * Only extract 3 bits because EQAA can generate number 8 in FMASK, which
78  * means the physical sample index is unknown. We can map 8 to any valid
79  * sample index, and extracting only 3 bits will map it to 0, which works
80  * with all MSAA modes.
81  */
82 static void
lower_image_to_fragment_mask_load(nir_builder * b,nir_intrinsic_instr * intrin)83 lower_image_to_fragment_mask_load(nir_builder *b, nir_intrinsic_instr *intrin)
84 {
85    b->cursor = nir_before_instr(&intrin->instr);
86 
87    nir_intrinsic_op fmask_op;
88    switch (intrin->intrinsic) {
89    case nir_intrinsic_image_load:
90       fmask_op = nir_intrinsic_image_fragment_mask_load_amd;
91       break;
92    case nir_intrinsic_image_deref_load:
93       fmask_op = nir_intrinsic_image_deref_fragment_mask_load_amd;
94       break;
95    case nir_intrinsic_bindless_image_load:
96       fmask_op = nir_intrinsic_bindless_image_fragment_mask_load_amd;
97       break;
98    default:
99       unreachable("bad intrinsic");
100       break;
101    }
102 
103    nir_def *fmask =
104       nir_image_fragment_mask_load_amd(b, intrin->src[0].ssa, intrin->src[1].ssa,
105                                        .image_dim = nir_intrinsic_image_dim(intrin),
106                                        .image_array = nir_intrinsic_image_array(intrin),
107                                        .format = nir_intrinsic_format(intrin),
108                                        .access = nir_intrinsic_access(intrin));
109 
110    /* fix intrinsic op */
111    nir_intrinsic_instr *fmask_load = nir_instr_as_intrinsic(fmask->parent_instr);
112    fmask_load->intrinsic = fmask_op;
113 
114    /* extract real color buffer index from fmask buffer */
115    nir_def *sample_index_old = intrin->src[2].ssa;
116    nir_def *fmask_offset = nir_u2u32(b, nir_ishl_imm(b, sample_index_old, 2));
117    nir_def *fmask_width = nir_imm_int(b, 3);
118    nir_def *sample_index_new = nir_ubfe(b, fmask, fmask_offset, fmask_width);
119 
120    /* fix color buffer load */
121    nir_src_rewrite(&intrin->src[2],
122                    nir_u2uN(b, sample_index_new, sample_index_old->bit_size));
123 
124    /* Mark uses fmask to prevent lower this intrinsic again. */
125    enum gl_access_qualifier access = nir_intrinsic_access(intrin);
126    nir_intrinsic_set_access(intrin, access | ACCESS_FMASK_LOWERED_AMD);
127 }
128 
129 static void
lower_image_samples_identical_to_fragment_mask_load(nir_builder * b,nir_intrinsic_instr * intrin)130 lower_image_samples_identical_to_fragment_mask_load(nir_builder *b, nir_intrinsic_instr *intrin)
131 {
132    b->cursor = nir_before_instr(&intrin->instr);
133 
134    nir_intrinsic_instr *fmask_load =
135       nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intrin->instr));
136 
137    switch (intrin->intrinsic) {
138    case nir_intrinsic_image_samples_identical:
139       fmask_load->intrinsic = nir_intrinsic_image_fragment_mask_load_amd;
140       break;
141    case nir_intrinsic_image_deref_samples_identical:
142       fmask_load->intrinsic = nir_intrinsic_image_deref_fragment_mask_load_amd;
143       break;
144    case nir_intrinsic_bindless_image_samples_identical:
145       fmask_load->intrinsic = nir_intrinsic_bindless_image_fragment_mask_load_amd;
146       break;
147    default:
148       unreachable("bad intrinsic");
149       break;
150    }
151 
152    nir_def_init(&fmask_load->instr, &fmask_load->def, 1, 32);
153    nir_builder_instr_insert(b, &fmask_load->instr);
154 
155    nir_def *samples_identical = nir_ieq_imm(b, &fmask_load->def, 0);
156    nir_def_replace(&intrin->def, samples_identical);
157    nir_instr_free(&intrin->instr);
158 }
159 
160 static bool
lower_image_intrin(nir_builder * b,nir_intrinsic_instr * intrin,void * state)161 lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
162 {
163    const nir_lower_image_options *options = state;
164 
165    switch (intrin->intrinsic) {
166    case nir_intrinsic_image_size:
167    case nir_intrinsic_image_deref_size:
168    case nir_intrinsic_bindless_image_size:
169       if (options->lower_cube_size &&
170           nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE) {
171          lower_cube_size(b, intrin);
172          return true;
173       }
174       return false;
175 
176    case nir_intrinsic_image_load:
177    case nir_intrinsic_image_deref_load:
178    case nir_intrinsic_bindless_image_load:
179       if (options->lower_to_fragment_mask_load_amd &&
180           nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_MS &&
181           /* Don't lower again. */
182           !(nir_intrinsic_access(intrin) & ACCESS_FMASK_LOWERED_AMD)) {
183          lower_image_to_fragment_mask_load(b, intrin);
184          return true;
185       }
186       return false;
187 
188    case nir_intrinsic_image_samples_identical:
189    case nir_intrinsic_image_deref_samples_identical:
190    case nir_intrinsic_bindless_image_samples_identical:
191       if (options->lower_to_fragment_mask_load_amd &&
192           nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_MS) {
193          lower_image_samples_identical_to_fragment_mask_load(b, intrin);
194          return true;
195       }
196       return false;
197 
198    case nir_intrinsic_image_samples:
199    case nir_intrinsic_image_deref_samples:
200    case nir_intrinsic_bindless_image_samples: {
201       if (options->lower_image_samples_to_one) {
202          b->cursor = nir_after_instr(&intrin->instr);
203          nir_def *samples = nir_imm_intN_t(b, 1, intrin->def.bit_size);
204          nir_def_rewrite_uses(&intrin->def, samples);
205          return true;
206       }
207       return false;
208    }
209    default:
210       return false;
211    }
212 }
213 
214 bool
nir_lower_image(nir_shader * nir,const nir_lower_image_options * options)215 nir_lower_image(nir_shader *nir, const nir_lower_image_options *options)
216 {
217    return nir_shader_intrinsics_pass(nir, lower_image_intrin,
218                                      nir_metadata_control_flow,
219                                      (void *)options);
220 }
221