xref: /aosp_15_r20/external/mesa3d/src/freedreno/ir3/ir3_context.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015-2018 Rob Clark <[email protected]>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Rob Clark <[email protected]>
7  */
8 
9 #ifndef IR3_CONTEXT_H_
10 #define IR3_CONTEXT_H_
11 
12 #include "ir3.h"
13 #include "ir3_compiler.h"
14 #include "ir3_nir.h"
15 
16 /* for conditionally setting boolean flag(s): */
17 #define COND(bool, val) ((bool) ? (val) : 0)
18 
19 #define DBG(fmt, ...)                                                          \
20    do {                                                                        \
21       mesa_logd("%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__);             \
22    } while (0)
23 
24 /**
25  * The context for compilation of a single shader.
26  */
27 struct ir3_context {
28    struct ir3_compiler *compiler;
29    const struct ir3_context_funcs *funcs;
30 
31    struct nir_shader *s;
32 
33    struct nir_instr *cur_instr; /* current instruction, just for debug */
34 
35    struct ir3 *ir;
36    struct ir3_shader_variant *so;
37 
38    /* Tables of scalar inputs/outputs.  Because of the way varying packing
39     * works, we could have inputs w/ fractional location, which is a bit
40     * awkward to deal with unless we keep track of the split scalar in/
41     * out components.
42     *
43     * These *only* have inputs/outputs that are touched by load_*input and
44     * store_output.
45     */
46    unsigned ninputs, noutputs;
47    struct ir3_instruction **inputs;
48    struct ir3_instruction **outputs;
49 
50    struct ir3_block *block;    /* the current block */
51    struct ir3_block *in_block; /* block created for shader inputs */
52 
53    nir_function_impl *impl;
54 
55    /* For fragment shaders, varyings are not actual shader inputs,
56     * instead the hw passes a ij coord which is used with
57     * bary.f.
58     *
59     * But NIR doesn't know that, it still declares varyings as
60     * inputs.  So we do all the input tracking normally and fix
61     * things up after compile_instructions()
62     */
63    struct ir3_instruction *ij[IJ_COUNT];
64 
65    /* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */
66    struct ir3_instruction *frag_face, *frag_coord;
67 
68    /* For vertex shaders, keep track of the system values sources */
69    struct ir3_instruction *vertex_id, *basevertex, *instance_id, *base_instance,
70       *draw_id, *view_index, *is_indexed_draw;
71 
72    /* For fragment shaders: */
73    struct ir3_instruction *samp_id, *samp_mask_in;
74 
75    /* For geometry shaders: */
76    struct ir3_instruction *primitive_id;
77    struct ir3_instruction *gs_header;
78 
79    /* For tessellation shaders: */
80    struct ir3_instruction *tcs_header;
81    struct ir3_instruction *tess_coord;
82    struct ir3_instruction *rel_patch_id;
83 
84    /* Compute shader inputs: */
85    struct ir3_instruction *local_invocation_id, *work_group_id;
86 
87    /* mapping from nir_register to defining instruction: */
88    struct hash_table *def_ht;
89 
90    unsigned num_arrays;
91 
92    unsigned loop_depth;
93 
94    /* a common pattern for indirect addressing is to request the
95     * same address register multiple times.  To avoid generating
96     * duplicate instruction sequences (which our backend does not
97     * try to clean up, since that should be done as the NIR stage)
98     * we cache the address value generated for a given src value:
99     *
100     * Note that we have to cache these per alignment, since same
101     * src used for an array of vec1 cannot be also used for an
102     * array of vec4.
103     */
104    struct hash_table *addr0_ht[4];
105 
106    /* The same for a1.x. We only support immediate values for a1.x, as this
107     * is the only use so far.
108     */
109    struct hash_table_u64 *addr1_ht;
110 
111    struct hash_table *sel_cond_conversions;
112    struct hash_table *predicate_conversions;
113 
114    /* last dst array, for indirect we need to insert a var-store.
115     */
116    struct ir3_instruction **last_dst;
117    unsigned last_dst_n;
118 
119    /* maps nir_block to ir3_block, mostly for the purposes of
120     * figuring out the blocks successors
121     */
122    struct hash_table *block_ht;
123 
124    /* maps nir_block at the top of a loop to ir3_block collecting continue
125     * edges.
126     */
127    struct hash_table *continue_block_ht;
128 
129    /* on a4xx, bitmask of samplers which need astc+srgb workaround: */
130    unsigned astc_srgb;
131 
132    /* on a4xx, per-sampler per-component swizzles, for tg4: */
133    uint16_t sampler_swizzles[16];
134 
135    unsigned samples; /* bitmask of x,y sample shifts */
136 
137    unsigned max_texture_index;
138 
139    unsigned prefetch_limit;
140 
141    /* set if we encounter something we can't handle yet, so we
142     * can bail cleanly and fallback to TGSI compiler f/e
143     */
144    bool error;
145 };
146 
147 struct ir3_context_funcs {
148    void (*emit_intrinsic_load_ssbo)(struct ir3_context *ctx,
149                                     nir_intrinsic_instr *intr,
150                                     struct ir3_instruction **dst);
151    void (*emit_intrinsic_store_ssbo)(struct ir3_context *ctx,
152                                      nir_intrinsic_instr *intr);
153    struct ir3_instruction *(*emit_intrinsic_atomic_ssbo)(
154       struct ir3_context *ctx, nir_intrinsic_instr *intr);
155    void (*emit_intrinsic_load_image)(struct ir3_context *ctx,
156                                      nir_intrinsic_instr *intr,
157                                      struct ir3_instruction **dst);
158    void (*emit_intrinsic_store_image)(struct ir3_context *ctx,
159                                       nir_intrinsic_instr *intr);
160    struct ir3_instruction *(*emit_intrinsic_atomic_image)(
161       struct ir3_context *ctx, nir_intrinsic_instr *intr);
162    void (*emit_intrinsic_image_size)(struct ir3_context *ctx,
163                                      nir_intrinsic_instr *intr,
164                                      struct ir3_instruction **dst);
165    void (*emit_intrinsic_load_global_ir3)(struct ir3_context *ctx,
166                                           nir_intrinsic_instr *intr,
167                                           struct ir3_instruction **dst);
168    void (*emit_intrinsic_store_global_ir3)(struct ir3_context *ctx,
169                                            nir_intrinsic_instr *intr);
170    struct ir3_instruction *(*emit_intrinsic_atomic_global)(
171       struct ir3_context *ctx, nir_intrinsic_instr *intr);
172 };
173 
174 extern const struct ir3_context_funcs ir3_a4xx_funcs;
175 extern const struct ir3_context_funcs ir3_a6xx_funcs;
176 
177 struct ir3_context *ir3_context_init(struct ir3_compiler *compiler,
178                                      struct ir3_shader *shader,
179                                      struct ir3_shader_variant *so);
180 void ir3_context_free(struct ir3_context *ctx);
181 
182 struct ir3_instruction **ir3_get_dst_ssa(struct ir3_context *ctx,
183                                          nir_def *dst, unsigned n);
184 struct ir3_instruction **ir3_get_def(struct ir3_context *ctx, nir_def *def,
185                                      unsigned n);
186 struct ir3_instruction *const *ir3_get_src_maybe_shared(struct ir3_context *ctx,
187                                                         nir_src *src);
188 struct ir3_instruction *const *ir3_get_src_shared(struct ir3_context *ctx,
189                                                   nir_src *src, bool shared);
190 
191 static inline struct ir3_instruction *const *
ir3_get_src(struct ir3_context * ctx,nir_src * src)192 ir3_get_src(struct ir3_context *ctx, nir_src *src)
193 {
194    return ir3_get_src_shared(ctx, src, false);
195 }
196 
197 void ir3_put_def(struct ir3_context *ctx, nir_def *def);
198 struct ir3_instruction *ir3_create_collect(struct ir3_block *block,
199                                            struct ir3_instruction *const *arr,
200                                            unsigned arrsz);
201 void ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
202                     struct ir3_instruction *src, unsigned base, unsigned n);
203 void ir3_handle_bindless_cat6(struct ir3_instruction *instr, nir_src rsrc);
204 void ir3_handle_nonuniform(struct ir3_instruction *instr,
205                            nir_intrinsic_instr *intrin);
206 void emit_intrinsic_image_size_tex(struct ir3_context *ctx,
207                                    nir_intrinsic_instr *intr,
208                                    struct ir3_instruction **dst);
209 
210 #define ir3_collect(block, ...)                                                \
211    ({                                                                          \
212       struct ir3_instruction *__arr[] = {__VA_ARGS__};                         \
213       ir3_create_collect(block, __arr, ARRAY_SIZE(__arr));                     \
214    })
215 
216 NORETURN void ir3_context_error(struct ir3_context *ctx, const char *format,
217                                 ...);
218 
219 #define compile_assert(ctx, cond)                                              \
220    do {                                                                        \
221       if (!(cond))                                                             \
222          ir3_context_error((ctx), "failed assert: " #cond "\n");               \
223    } while (0)
224 
225 struct ir3_instruction *ir3_get_addr0(struct ir3_context *ctx,
226                                       struct ir3_instruction *src, int align);
227 struct ir3_instruction *ir3_get_addr1(struct ir3_context *ctx,
228                                       unsigned const_val);
229 struct ir3_instruction *ir3_get_predicate(struct ir3_context *ctx,
230                                           struct ir3_instruction *src);
231 
232 void ir3_declare_array(struct ir3_context *ctx, nir_intrinsic_instr *decl);
233 struct ir3_array *ir3_get_array(struct ir3_context *ctx, nir_def *reg);
234 struct ir3_instruction *ir3_create_array_load(struct ir3_context *ctx,
235                                               struct ir3_array *arr, int n,
236                                               struct ir3_instruction *address);
237 void ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr,
238                             int n, struct ir3_instruction *src,
239                             struct ir3_instruction *address);
240 void ir3_lower_imm_offset(struct ir3_context *ctx, nir_intrinsic_instr *intr,
241                           nir_src *offset_src, unsigned imm_offset_bits,
242                           struct ir3_instruction **offset,
243                           unsigned *imm_offset);
244 
245 static inline type_t
utype_for_size(unsigned bit_size)246 utype_for_size(unsigned bit_size)
247 {
248    switch (bit_size) {
249    case 32:
250       return TYPE_U32;
251    case 16:
252       return TYPE_U16;
253    case 8:
254       return TYPE_U8;
255    default:
256       unreachable("bad bitsize");
257       return ~0;
258    }
259 }
260 
261 static inline type_t
utype_src(nir_src src)262 utype_src(nir_src src)
263 {
264    return utype_for_size(nir_src_bit_size(src));
265 }
266 
267 static inline type_t
utype_def(nir_def * def)268 utype_def(nir_def *def)
269 {
270    return utype_for_size(def->bit_size);
271 }
272 
273 /**
274  * Convert nir bitsize to ir3 bitsize, handling the special case of 1b bools
275  * which can be 16b or 32b depending on gen.
276  */
277 static inline unsigned
ir3_bitsize(struct ir3_context * ctx,unsigned nir_bitsize)278 ir3_bitsize(struct ir3_context *ctx, unsigned nir_bitsize)
279 {
280    if (nir_bitsize == 1)
281       return type_size(ctx->compiler->bool_type);
282    return nir_bitsize;
283 }
284 
285 #endif /* IR3_CONTEXT_H_ */
286