1 /**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **************************************************************************/
25
26 #ifndef LP_BLD_NIR_H
27 #define LP_BLD_NIR_H
28
29 #include "gallivm/lp_bld.h"
30 #include "gallivm/lp_bld_limits.h"
31 #include "gallivm/lp_bld_flow.h"
32 #include "lp_bld_type.h"
33
34 #include "gallivm/lp_bld_tgsi.h"
35 #include "nir.h"
36
37 struct nir_shader;
38
39 /*
40 * 2 reserved functions args for each function call,
41 * exec mask and context.
42 */
43 #define LP_RESV_FUNC_ARGS 2
44
45 void lp_build_nir_soa(struct gallivm_state *gallivm,
46 struct nir_shader *shader,
47 const struct lp_build_tgsi_params *params,
48 LLVMValueRef (*outputs)[4]);
49
50 void lp_build_nir_soa_func(struct gallivm_state *gallivm,
51 struct nir_shader *shader,
52 nir_function_impl *impl,
53 const struct lp_build_tgsi_params *params,
54 LLVMValueRef (*outputs)[4]);
55
56 void lp_build_nir_aos(struct gallivm_state *gallivm,
57 struct nir_shader *shader,
58 struct lp_type type,
59 const unsigned char swizzles[4],
60 LLVMValueRef consts_ptr,
61 const LLVMValueRef *inputs,
62 LLVMValueRef *outputs,
63 const struct lp_build_sampler_aos *sampler);
64
65 struct lp_build_fn {
66 LLVMTypeRef fn_type;
67 LLVMValueRef fn;
68 };
69
70 struct lp_build_nir_context
71 {
72 struct lp_build_context base;
73 struct lp_build_context uint_bld;
74 struct lp_build_context int_bld;
75 struct lp_build_context uint8_bld;
76 struct lp_build_context int8_bld;
77 struct lp_build_context uint16_bld;
78 struct lp_build_context int16_bld;
79 struct lp_build_context half_bld;
80 struct lp_build_context dbl_bld;
81 struct lp_build_context uint64_bld;
82 struct lp_build_context int64_bld;
83
84 LLVMValueRef *ssa_defs;
85 struct hash_table *regs;
86 struct hash_table *vars;
87 struct hash_table *fns;
88
89 /** Value range analysis hash table used in code generation. */
90 struct hash_table *range_ht;
91
92 LLVMValueRef aniso_filter_table;
93
94 LLVMValueRef func;
95 nir_shader *shader;
96
97 struct lp_build_if_state if_stack[LP_MAX_TGSI_NESTING];
98 uint32_t if_stack_size;
99
100 void (*load_ubo)(struct lp_build_nir_context *bld_base,
101 unsigned nc,
102 unsigned bit_size,
103 bool offset_is_uniform,
104 LLVMValueRef index, LLVMValueRef offset, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]);
105
106 void (*load_kernel_arg)(struct lp_build_nir_context *bld_base,
107 unsigned nc,
108 unsigned bit_size,
109 unsigned offset_bit_size,
110 bool offset_is_uniform,
111 LLVMValueRef offset, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]);
112
113 void (*load_global)(struct lp_build_nir_context *bld_base,
114 unsigned nc, unsigned bit_size,
115 unsigned offset_bit_size,
116 bool offset_is_global,
117 LLVMValueRef offset, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]);
118
119 void (*store_global)(struct lp_build_nir_context *bld_base,
120 unsigned writemask,
121 unsigned nc, unsigned bit_size,
122 unsigned addr_bit_size,
123 LLVMValueRef addr, LLVMValueRef dst);
124
125 void (*atomic_global)(struct lp_build_nir_context *bld_base,
126 nir_atomic_op nir_op,
127 unsigned addr_bit_size,
128 unsigned val_bit_size,
129 LLVMValueRef addr,
130 LLVMValueRef val, LLVMValueRef val2,
131 LLVMValueRef *result);
132
133 /* for SSBO and shared memory */
134 void (*load_mem)(struct lp_build_nir_context *bld_base,
135 unsigned nc, unsigned bit_size,
136 bool index_and_offset_are_uniform, bool payload,
137 LLVMValueRef index, LLVMValueRef offset, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]);
138 void (*store_mem)(struct lp_build_nir_context *bld_base,
139 unsigned writemask, unsigned nc, unsigned bit_size,
140 bool index_and_offset_are_uniform, bool payload,
141 LLVMValueRef index, LLVMValueRef offset, LLVMValueRef dst);
142
143 void (*atomic_mem)(struct lp_build_nir_context *bld_base,
144 nir_atomic_op op,
145 unsigned bit_size,
146 bool payload,
147 LLVMValueRef index, LLVMValueRef offset,
148 LLVMValueRef val, LLVMValueRef val2,
149 LLVMValueRef *result);
150
151 void (*barrier)(struct lp_build_nir_context *bld_base);
152
153 void (*image_op)(struct lp_build_nir_context *bld_base,
154 struct lp_img_params *params);
155 void (*image_size)(struct lp_build_nir_context *bld_base,
156 struct lp_sampler_size_query_params *params);
157 LLVMValueRef (*get_ssbo_size)(struct lp_build_nir_context *bld_base,
158 LLVMValueRef index);
159
160 void (*load_const)(struct lp_build_nir_context *bld_base,
161 const nir_load_const_instr *instr,
162 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]);
163 void (*load_var)(struct lp_build_nir_context *bld_base,
164 nir_variable_mode deref_mode,
165 unsigned num_components,
166 unsigned bit_size,
167 nir_variable *var,
168 unsigned vertex_index,
169 LLVMValueRef indir_vertex_index,
170 unsigned const_index,
171 LLVMValueRef indir_index,
172 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]);
173 void (*store_var)(struct lp_build_nir_context *bld_base,
174 nir_variable_mode deref_mode,
175 unsigned num_components,
176 unsigned bit_size,
177 nir_variable *var,
178 unsigned writemask,
179 LLVMValueRef indir_vertex_index,
180 unsigned const_index,
181 LLVMValueRef indir_index,
182 LLVMValueRef dst);
183
184 LLVMValueRef (*load_reg)(struct lp_build_nir_context *bld_base,
185 struct lp_build_context *reg_bld,
186 const nir_intrinsic_instr *decl,
187 unsigned base,
188 LLVMValueRef indir_src,
189 LLVMValueRef reg_storage);
190 void (*store_reg)(struct lp_build_nir_context *bld_base,
191 struct lp_build_context *reg_bld,
192 const nir_intrinsic_instr *decl,
193 unsigned writemask,
194 unsigned base,
195 LLVMValueRef indir_src,
196 LLVMValueRef reg_storage,
197 LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS]);
198
199 void (*load_scratch)(struct lp_build_nir_context *bld_base,
200 unsigned nc, unsigned bit_size,
201 LLVMValueRef offset,
202 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]);
203 void (*store_scratch)(struct lp_build_nir_context *bld_base,
204 unsigned writemask, unsigned nc,
205 unsigned bit_size, LLVMValueRef offset,
206 LLVMValueRef val);
207
208 void (*emit_var_decl)(struct lp_build_nir_context *bld_base,
209 nir_variable *var);
210
211 void (*tex)(struct lp_build_nir_context *bld_base,
212 struct lp_sampler_params *params);
213
214 void (*tex_size)(struct lp_build_nir_context *bld_base,
215 struct lp_sampler_size_query_params *params);
216
217 void (*sysval_intrin)(struct lp_build_nir_context *bld_base,
218 nir_intrinsic_instr *instr,
219 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]);
220 void (*discard)(struct lp_build_nir_context *bld_base,
221 LLVMValueRef cond);
222
223 void (*bgnloop)(struct lp_build_nir_context *bld_base);
224 void (*endloop)(struct lp_build_nir_context *bld_base);
225 void (*if_cond)(struct lp_build_nir_context *bld_base, LLVMValueRef cond, bool flatten);
226 void (*else_stmt)(struct lp_build_nir_context *bld_base, bool flatten_then, bool flatten_else);
227 void (*endif_stmt)(struct lp_build_nir_context *bld_base, bool flatten);
228 void (*break_stmt)(struct lp_build_nir_context *bld_base);
229 void (*continue_stmt)(struct lp_build_nir_context *bld_base);
230
231 void (*emit_vertex)(struct lp_build_nir_context *bld_base, uint32_t stream_id);
232 void (*end_primitive)(struct lp_build_nir_context *bld_base, uint32_t stream_id);
233
234 void (*vote)(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef dst[4]);
235 void (*elect)(struct lp_build_nir_context *bld_base, LLVMValueRef dst[4]);
236 void (*reduce)(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef dst[4]);
237 void (*ballot)(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef dst[4]);
238 #if LLVM_VERSION_MAJOR >= 10
239 void (*shuffle)(struct lp_build_nir_context *bld_base,
240 LLVMValueRef src,
241 LLVMValueRef index,
242 nir_intrinsic_instr *instr,
243 LLVMValueRef dst[4]);
244 #endif
245 void (*read_invocation)(struct lp_build_nir_context *bld_base,
246 LLVMValueRef src, unsigned bit_size, LLVMValueRef invoc,
247 LLVMValueRef dst[4]);
248 void (*helper_invocation)(struct lp_build_nir_context *bld_base, LLVMValueRef *dst);
249
250 void (*clock)(struct lp_build_nir_context *bld_Base, LLVMValueRef dst[4]);
251 void (*interp_at)(struct lp_build_nir_context *bld_base,
252 unsigned num_components,
253 nir_variable *var,
254 bool centroid, bool sample,
255 unsigned const_index,
256 LLVMValueRef indir_index,
257 LLVMValueRef offsets[2], LLVMValueRef dst[4]);
258 void (*set_vertex_and_primitive_count)(struct lp_build_nir_context *bld_base,
259 LLVMValueRef vert_count,
260 LLVMValueRef prim_count);
261 void (*launch_mesh_workgroups)(struct lp_build_nir_context *bld_base,
262 LLVMValueRef launch_grid);
263
264 void (*call)(struct lp_build_nir_context *bld_base,
265 struct lp_build_fn *fn,
266 int num_args,
267 LLVMValueRef *args);
268 // LLVMValueRef main_function
269 };
270
271 struct lp_build_nir_soa_context
272 {
273 struct lp_build_nir_context bld_base;
274
275 /* Builder for scalar elements of shader's data type (float) */
276 struct lp_build_context elem_bld;
277 struct lp_build_context uint_elem_bld;
278
279 LLVMValueRef consts_ptr;
280 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
281 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS];
282 int num_inputs;
283 LLVMTypeRef context_type;
284 LLVMValueRef context_ptr;
285 LLVMTypeRef resources_type;
286 LLVMValueRef resources_ptr;
287 LLVMTypeRef thread_data_type;
288 LLVMValueRef thread_data_ptr;
289
290 LLVMValueRef ssbo_ptr;
291
292 LLVMValueRef shared_ptr;
293 LLVMValueRef payload_ptr;
294 LLVMValueRef scratch_ptr;
295 unsigned scratch_size;
296
297 const struct lp_build_coro_suspend_info *coro;
298
299 const struct lp_build_sampler_soa *sampler;
300 const struct lp_build_image_soa *image;
301
302 const struct lp_build_gs_iface *gs_iface;
303 const struct lp_build_tcs_iface *tcs_iface;
304 const struct lp_build_tes_iface *tes_iface;
305 const struct lp_build_fs_iface *fs_iface;
306 const struct lp_build_mesh_iface *mesh_iface;
307
308 LLVMValueRef emitted_prims_vec_ptr[PIPE_MAX_VERTEX_STREAMS];
309 LLVMValueRef total_emitted_vertices_vec_ptr[PIPE_MAX_VERTEX_STREAMS];
310 LLVMValueRef emitted_vertices_vec_ptr[PIPE_MAX_VERTEX_STREAMS];
311 LLVMValueRef max_output_vertices_vec;
312 struct lp_bld_tgsi_system_values system_values;
313
314 nir_variable_mode indirects;
315 struct lp_build_mask_context *mask;
316 struct lp_exec_mask exec_mask;
317
318 /* We allocate/use this array of inputs if (indirects & nir_var_shader_in) is
319 * set. The inputs[] array above is unused then.
320 */
321 LLVMValueRef inputs_array;
322
323 LLVMValueRef kernel_args_ptr;
324 unsigned gs_vertex_streams;
325
326 LLVMTypeRef call_context_type;
327 LLVMValueRef call_context_ptr;
328 };
329
330 void
331 lp_build_nir_prepasses(struct nir_shader *nir);
332
333 bool
334 lp_build_nir_llvm(struct lp_build_nir_context *bld_base,
335 struct nir_shader *nir,
336 nir_function_impl *impl);
337
338 void
339 lp_build_opt_nir(struct nir_shader *nir);
340
341
342 static inline LLVMValueRef
lp_nir_array_build_gather_values(LLVMBuilderRef builder,LLVMValueRef * values,unsigned value_count)343 lp_nir_array_build_gather_values(LLVMBuilderRef builder,
344 LLVMValueRef * values,
345 unsigned value_count)
346 {
347 LLVMTypeRef arr_type = LLVMArrayType(LLVMTypeOf(values[0]), value_count);
348 LLVMValueRef arr = LLVMGetUndef(arr_type);
349
350 for (unsigned i = 0; i < value_count; i++) {
351 arr = LLVMBuildInsertValue(builder, arr, values[i], i, "");
352 }
353 return arr;
354 }
355
356
357 static inline struct lp_build_context *
get_flt_bld(struct lp_build_nir_context * bld_base,unsigned op_bit_size)358 get_flt_bld(struct lp_build_nir_context *bld_base,
359 unsigned op_bit_size)
360 {
361 switch (op_bit_size) {
362 case 64:
363 return &bld_base->dbl_bld;
364 case 16:
365 return &bld_base->half_bld;
366 default:
367 case 32:
368 return &bld_base->base;
369 }
370 }
371
372
373 static inline struct lp_build_context *
get_int_bld(struct lp_build_nir_context * bld_base,bool is_unsigned,unsigned op_bit_size)374 get_int_bld(struct lp_build_nir_context *bld_base,
375 bool is_unsigned,
376 unsigned op_bit_size)
377 {
378 if (is_unsigned) {
379 switch (op_bit_size) {
380 case 64:
381 return &bld_base->uint64_bld;
382 case 32:
383 default:
384 return &bld_base->uint_bld;
385 case 16:
386 return &bld_base->uint16_bld;
387 case 8:
388 return &bld_base->uint8_bld;
389 }
390 } else {
391 switch (op_bit_size) {
392 case 64:
393 return &bld_base->int64_bld;
394 default:
395 case 32:
396 return &bld_base->int_bld;
397 case 16:
398 return &bld_base->int16_bld;
399 case 8:
400 return &bld_base->int8_bld;
401 }
402 }
403 }
404
405
406 unsigned
407 lp_nir_aos_swizzle(struct lp_build_nir_context *bld_base, unsigned chan);
408
409 LLVMAtomicRMWBinOp
410 lp_translate_atomic_op(nir_atomic_op op);
411
412 uint32_t
413 lp_build_nir_sample_key(gl_shader_stage stage, nir_tex_instr *instr);
414
415
416 void lp_img_op_from_intrinsic(struct lp_img_params *params, nir_intrinsic_instr *instr);
417
418 enum lp_nir_call_context_args {
419 LP_NIR_CALL_CONTEXT_CONTEXT,
420 LP_NIR_CALL_CONTEXT_RESOURCES,
421 LP_NIR_CALL_CONTEXT_SHARED,
422 LP_NIR_CALL_CONTEXT_SCRATCH,
423 LP_NIR_CALL_CONTEXT_WORK_DIM,
424 LP_NIR_CALL_CONTEXT_THREAD_ID_0,
425 LP_NIR_CALL_CONTEXT_THREAD_ID_1,
426 LP_NIR_CALL_CONTEXT_THREAD_ID_2,
427 LP_NIR_CALL_CONTEXT_BLOCK_ID_0,
428 LP_NIR_CALL_CONTEXT_BLOCK_ID_1,
429 LP_NIR_CALL_CONTEXT_BLOCK_ID_2,
430 LP_NIR_CALL_CONTEXT_GRID_SIZE_0,
431 LP_NIR_CALL_CONTEXT_GRID_SIZE_1,
432 LP_NIR_CALL_CONTEXT_GRID_SIZE_2,
433 LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0,
434 LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1,
435 LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2,
436 LP_NIR_CALL_CONTEXT_MAX_ARGS,
437 };
438
439 LLVMTypeRef
440 lp_build_cs_func_call_context(struct gallivm_state *gallivm, int length,
441 LLVMTypeRef context_type, LLVMTypeRef resources_type);
442
443
444
445 #endif
446