1 /*
2 * Copyright © 2015-2018 Rob Clark <[email protected]>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <[email protected]>
7 */
8
9 #ifndef IR3_CONTEXT_H_
10 #define IR3_CONTEXT_H_
11
12 #include "ir3.h"
13 #include "ir3_compiler.h"
14 #include "ir3_nir.h"
15
16 /* for conditionally setting boolean flag(s): */
17 #define COND(bool, val) ((bool) ? (val) : 0)
18
19 #define DBG(fmt, ...) \
20 do { \
21 mesa_logd("%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__); \
22 } while (0)
23
24 /**
25 * The context for compilation of a single shader.
26 */
27 struct ir3_context {
28 struct ir3_compiler *compiler;
29 const struct ir3_context_funcs *funcs;
30
31 struct nir_shader *s;
32
33 struct nir_instr *cur_instr; /* current instruction, just for debug */
34
35 struct ir3 *ir;
36 struct ir3_shader_variant *so;
37
38 /* Tables of scalar inputs/outputs. Because of the way varying packing
39 * works, we could have inputs w/ fractional location, which is a bit
40 * awkward to deal with unless we keep track of the split scalar in/
41 * out components.
42 *
43 * These *only* have inputs/outputs that are touched by load_*input and
44 * store_output.
45 */
46 unsigned ninputs, noutputs;
47 struct ir3_instruction **inputs;
48 struct ir3_instruction **outputs;
49
50 struct ir3_block *block; /* the current block */
51 struct ir3_block *in_block; /* block created for shader inputs */
52
53 nir_function_impl *impl;
54
55 /* For fragment shaders, varyings are not actual shader inputs,
56 * instead the hw passes a ij coord which is used with
57 * bary.f.
58 *
59 * But NIR doesn't know that, it still declares varyings as
60 * inputs. So we do all the input tracking normally and fix
61 * things up after compile_instructions()
62 */
63 struct ir3_instruction *ij[IJ_COUNT];
64
65 /* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */
66 struct ir3_instruction *frag_face, *frag_coord;
67
68 /* For vertex shaders, keep track of the system values sources */
69 struct ir3_instruction *vertex_id, *basevertex, *instance_id, *base_instance,
70 *draw_id, *view_index, *is_indexed_draw;
71
72 /* For fragment shaders: */
73 struct ir3_instruction *samp_id, *samp_mask_in;
74
75 /* For geometry shaders: */
76 struct ir3_instruction *primitive_id;
77 struct ir3_instruction *gs_header;
78
79 /* For tessellation shaders: */
80 struct ir3_instruction *tcs_header;
81 struct ir3_instruction *tess_coord;
82 struct ir3_instruction *rel_patch_id;
83
84 /* Compute shader inputs: */
85 struct ir3_instruction *local_invocation_id, *work_group_id;
86
87 /* mapping from nir_register to defining instruction: */
88 struct hash_table *def_ht;
89
90 unsigned num_arrays;
91
92 unsigned loop_depth;
93
94 /* a common pattern for indirect addressing is to request the
95 * same address register multiple times. To avoid generating
96 * duplicate instruction sequences (which our backend does not
97 * try to clean up, since that should be done as the NIR stage)
98 * we cache the address value generated for a given src value:
99 *
100 * Note that we have to cache these per alignment, since same
101 * src used for an array of vec1 cannot be also used for an
102 * array of vec4.
103 */
104 struct hash_table *addr0_ht[4];
105
106 /* The same for a1.x. We only support immediate values for a1.x, as this
107 * is the only use so far.
108 */
109 struct hash_table_u64 *addr1_ht;
110
111 struct hash_table *sel_cond_conversions;
112 struct hash_table *predicate_conversions;
113
114 /* last dst array, for indirect we need to insert a var-store.
115 */
116 struct ir3_instruction **last_dst;
117 unsigned last_dst_n;
118
119 /* maps nir_block to ir3_block, mostly for the purposes of
120 * figuring out the blocks successors
121 */
122 struct hash_table *block_ht;
123
124 /* maps nir_block at the top of a loop to ir3_block collecting continue
125 * edges.
126 */
127 struct hash_table *continue_block_ht;
128
129 /* on a4xx, bitmask of samplers which need astc+srgb workaround: */
130 unsigned astc_srgb;
131
132 /* on a4xx, per-sampler per-component swizzles, for tg4: */
133 uint16_t sampler_swizzles[16];
134
135 unsigned samples; /* bitmask of x,y sample shifts */
136
137 unsigned max_texture_index;
138
139 unsigned prefetch_limit;
140
141 /* set if we encounter something we can't handle yet, so we
142 * can bail cleanly and fallback to TGSI compiler f/e
143 */
144 bool error;
145 };
146
147 struct ir3_context_funcs {
148 void (*emit_intrinsic_load_ssbo)(struct ir3_context *ctx,
149 nir_intrinsic_instr *intr,
150 struct ir3_instruction **dst);
151 void (*emit_intrinsic_store_ssbo)(struct ir3_context *ctx,
152 nir_intrinsic_instr *intr);
153 struct ir3_instruction *(*emit_intrinsic_atomic_ssbo)(
154 struct ir3_context *ctx, nir_intrinsic_instr *intr);
155 void (*emit_intrinsic_load_image)(struct ir3_context *ctx,
156 nir_intrinsic_instr *intr,
157 struct ir3_instruction **dst);
158 void (*emit_intrinsic_store_image)(struct ir3_context *ctx,
159 nir_intrinsic_instr *intr);
160 struct ir3_instruction *(*emit_intrinsic_atomic_image)(
161 struct ir3_context *ctx, nir_intrinsic_instr *intr);
162 void (*emit_intrinsic_image_size)(struct ir3_context *ctx,
163 nir_intrinsic_instr *intr,
164 struct ir3_instruction **dst);
165 void (*emit_intrinsic_load_global_ir3)(struct ir3_context *ctx,
166 nir_intrinsic_instr *intr,
167 struct ir3_instruction **dst);
168 void (*emit_intrinsic_store_global_ir3)(struct ir3_context *ctx,
169 nir_intrinsic_instr *intr);
170 struct ir3_instruction *(*emit_intrinsic_atomic_global)(
171 struct ir3_context *ctx, nir_intrinsic_instr *intr);
172 };
173
174 extern const struct ir3_context_funcs ir3_a4xx_funcs;
175 extern const struct ir3_context_funcs ir3_a6xx_funcs;
176
177 struct ir3_context *ir3_context_init(struct ir3_compiler *compiler,
178 struct ir3_shader *shader,
179 struct ir3_shader_variant *so);
180 void ir3_context_free(struct ir3_context *ctx);
181
182 struct ir3_instruction **ir3_get_dst_ssa(struct ir3_context *ctx,
183 nir_def *dst, unsigned n);
184 struct ir3_instruction **ir3_get_def(struct ir3_context *ctx, nir_def *def,
185 unsigned n);
186 struct ir3_instruction *const *ir3_get_src_maybe_shared(struct ir3_context *ctx,
187 nir_src *src);
188 struct ir3_instruction *const *ir3_get_src_shared(struct ir3_context *ctx,
189 nir_src *src, bool shared);
190
191 static inline struct ir3_instruction *const *
ir3_get_src(struct ir3_context * ctx,nir_src * src)192 ir3_get_src(struct ir3_context *ctx, nir_src *src)
193 {
194 return ir3_get_src_shared(ctx, src, false);
195 }
196
197 void ir3_put_def(struct ir3_context *ctx, nir_def *def);
198 struct ir3_instruction *ir3_create_collect(struct ir3_block *block,
199 struct ir3_instruction *const *arr,
200 unsigned arrsz);
201 void ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
202 struct ir3_instruction *src, unsigned base, unsigned n);
203 void ir3_handle_bindless_cat6(struct ir3_instruction *instr, nir_src rsrc);
204 void ir3_handle_nonuniform(struct ir3_instruction *instr,
205 nir_intrinsic_instr *intrin);
206 void emit_intrinsic_image_size_tex(struct ir3_context *ctx,
207 nir_intrinsic_instr *intr,
208 struct ir3_instruction **dst);
209
210 #define ir3_collect(block, ...) \
211 ({ \
212 struct ir3_instruction *__arr[] = {__VA_ARGS__}; \
213 ir3_create_collect(block, __arr, ARRAY_SIZE(__arr)); \
214 })
215
216 NORETURN void ir3_context_error(struct ir3_context *ctx, const char *format,
217 ...);
218
219 #define compile_assert(ctx, cond) \
220 do { \
221 if (!(cond)) \
222 ir3_context_error((ctx), "failed assert: " #cond "\n"); \
223 } while (0)
224
225 struct ir3_instruction *ir3_get_addr0(struct ir3_context *ctx,
226 struct ir3_instruction *src, int align);
227 struct ir3_instruction *ir3_get_addr1(struct ir3_context *ctx,
228 unsigned const_val);
229 struct ir3_instruction *ir3_get_predicate(struct ir3_context *ctx,
230 struct ir3_instruction *src);
231
232 void ir3_declare_array(struct ir3_context *ctx, nir_intrinsic_instr *decl);
233 struct ir3_array *ir3_get_array(struct ir3_context *ctx, nir_def *reg);
234 struct ir3_instruction *ir3_create_array_load(struct ir3_context *ctx,
235 struct ir3_array *arr, int n,
236 struct ir3_instruction *address);
237 void ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr,
238 int n, struct ir3_instruction *src,
239 struct ir3_instruction *address);
240 void ir3_lower_imm_offset(struct ir3_context *ctx, nir_intrinsic_instr *intr,
241 nir_src *offset_src, unsigned imm_offset_bits,
242 struct ir3_instruction **offset,
243 unsigned *imm_offset);
244
245 static inline type_t
utype_for_size(unsigned bit_size)246 utype_for_size(unsigned bit_size)
247 {
248 switch (bit_size) {
249 case 32:
250 return TYPE_U32;
251 case 16:
252 return TYPE_U16;
253 case 8:
254 return TYPE_U8;
255 default:
256 unreachable("bad bitsize");
257 return ~0;
258 }
259 }
260
261 static inline type_t
utype_src(nir_src src)262 utype_src(nir_src src)
263 {
264 return utype_for_size(nir_src_bit_size(src));
265 }
266
267 static inline type_t
utype_def(nir_def * def)268 utype_def(nir_def *def)
269 {
270 return utype_for_size(def->bit_size);
271 }
272
273 /**
274 * Convert nir bitsize to ir3 bitsize, handling the special case of 1b bools
275 * which can be 16b or 32b depending on gen.
276 */
277 static inline unsigned
ir3_bitsize(struct ir3_context * ctx,unsigned nir_bitsize)278 ir3_bitsize(struct ir3_context *ctx, unsigned nir_bitsize)
279 {
280 if (nir_bitsize == 1)
281 return type_size(ctx->compiler->bool_type);
282 return nir_bitsize;
283 }
284
285 #endif /* IR3_CONTEXT_H_ */
286