1 /**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 **************************************************************************/
25
26 #include "util/u_memory.h"
27 #include "util/os_time.h"
28 #include "util/u_dump.h"
29 #include "util/u_string.h"
30 #include "gallivm/lp_bld_const.h"
31 #include "gallivm/lp_bld_debug.h"
32 #include "gallivm/lp_bld_intr.h"
33 #include "gallivm/lp_bld_flow.h"
34 #include "gallivm/lp_bld_pack.h"
35 #include "gallivm/lp_bld_gather.h"
36 #include "gallivm/lp_bld_coro.h"
37 #include "gallivm/lp_bld_nir.h"
38 #include "gallivm/lp_bld_jit_sample.h"
39 #include "lp_state_cs.h"
40 #include "lp_context.h"
41 #include "lp_setup_context.h"
42 #include "lp_debug.h"
43 #include "lp_state.h"
44 #include "lp_perf.h"
45 #include "lp_screen.h"
46 #include "lp_memory.h"
47 #include "lp_query.h"
48 #include "lp_cs_tpool.h"
49 #include "frontend/sw_winsys.h"
50 #include "nir/nir_to_tgsi_info.h"
51 #include "nir/tgsi_to_nir.h"
52 #include "util/mesa-sha1.h"
53 #include "nir_serialize.h"
54
55 #include "draw/draw_context.h"
56 #include "draw/draw_llvm.h"
57 #include "draw/draw_mesh_prim.h"
58
59 /** Fragment shader number (for debugging) */
60 static unsigned cs_no = 0;
61 static unsigned task_no = 0;
62 static unsigned mesh_no = 0;
63
64 struct lp_cs_job_info {
65 unsigned grid_size[3];
66 unsigned iter_size[3];
67 unsigned grid_base[3];
68 unsigned block_size[3];
69 unsigned req_local_mem;
70 unsigned work_dim;
71 unsigned draw_id;
72 bool zero_initialize_shared_memory;
73 bool use_iters;
74 struct lp_cs_exec *current;
75 struct vertex_header *io;
76 size_t io_stride;
77 void *payload;
78 size_t payload_stride;
79 };
80
81 enum {
82 CS_ARG_CONTEXT,
83 CS_ARG_RESOURCES,
84 CS_ARG_BLOCK_X_SIZE,
85 CS_ARG_BLOCK_Y_SIZE,
86 CS_ARG_BLOCK_Z_SIZE,
87 CS_ARG_GRID_X,
88 CS_ARG_GRID_Y,
89 CS_ARG_GRID_Z,
90 CS_ARG_GRID_SIZE_X,
91 CS_ARG_GRID_SIZE_Y,
92 CS_ARG_GRID_SIZE_Z,
93 CS_ARG_WORK_DIM,
94 CS_ARG_DRAW_ID,
95 CS_ARG_VERTEX_DATA,
96 CS_ARG_PER_THREAD_DATA,
97 CS_ARG_OUTER_COUNT,
98 CS_ARG_CORO_SUBGROUP_COUNT = CS_ARG_OUTER_COUNT,
99 CS_ARG_CORO_PARTIALS,
100 CS_ARG_CORO_BLOCK_X_SIZE,
101 CS_ARG_CORO_BLOCK_Y_SIZE,
102 CS_ARG_CORO_BLOCK_Z_SIZE,
103 CS_ARG_CORO_IDX,
104 CS_ARG_CORO_MEM,
105 CS_ARG_CORO_OUTPUTS,
106 CS_ARG_MAX,
107 };
108
109 struct lp_mesh_llvm_iface {
110 struct lp_build_mesh_iface base;
111
112 LLVMValueRef vertex_count;
113 LLVMValueRef prim_count;
114 LLVMValueRef outputs;
115 };
116
117 static inline const struct lp_mesh_llvm_iface *
lp_mesh_llvm_iface(const struct lp_build_mesh_iface * iface)118 lp_mesh_llvm_iface(const struct lp_build_mesh_iface *iface)
119 {
120 return (const struct lp_mesh_llvm_iface *)iface;
121 }
122
123
124 static LLVMTypeRef
create_mesh_jit_output_type_deref(struct gallivm_state * gallivm)125 create_mesh_jit_output_type_deref(struct gallivm_state *gallivm)
126 {
127 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
128 LLVMTypeRef output_array;
129
130 output_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
131 output_array = LLVMArrayType(output_array, PIPE_MAX_SHADER_OUTPUTS); /* num attrs per vertex */
132 return output_array;
133 }
134
135 static void
lp_mesh_llvm_emit_store_output(const struct lp_build_mesh_iface * mesh_iface,struct lp_build_context * bld,unsigned name,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index,LLVMValueRef value,LLVMValueRef mask_vec)136 lp_mesh_llvm_emit_store_output(const struct lp_build_mesh_iface *mesh_iface,
137 struct lp_build_context *bld,
138 unsigned name,
139 bool is_vindex_indirect,
140 LLVMValueRef vertex_index,
141 bool is_aindex_indirect,
142 LLVMValueRef attrib_index,
143 bool is_sindex_indirect,
144 LLVMValueRef swizzle_index,
145 LLVMValueRef value,
146 LLVMValueRef mask_vec)
147 {
148 const struct lp_mesh_llvm_iface *mesh = lp_mesh_llvm_iface(mesh_iface);
149 struct gallivm_state *gallivm = bld->gallivm;
150 LLVMBuilderRef builder = gallivm->builder;
151 LLVMValueRef indices[3];
152 LLVMValueRef res;
153 struct lp_type type = bld->type;
154 LLVMTypeRef output_type = create_mesh_jit_output_type_deref(gallivm);
155
156 if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
157 for (int i = 0; i < type.length; ++i) {
158 LLVMValueRef idx = lp_build_const_int32(gallivm, i);
159 LLVMValueRef vert_chan_index = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
160 LLVMValueRef attr_chan_index = attrib_index;
161 LLVMValueRef swiz_chan_index = swizzle_index;
162 LLVMValueRef channel_vec;
163
164 if (is_vindex_indirect) {
165 vert_chan_index = LLVMBuildExtractElement(builder,
166 vertex_index, idx, "");
167 }
168 if (is_aindex_indirect) {
169 attr_chan_index = LLVMBuildExtractElement(builder,
170 attrib_index, idx, "");
171 }
172
173 if (is_sindex_indirect) {
174 swiz_chan_index = LLVMBuildExtractElement(builder,
175 swizzle_index, idx, "");
176 }
177
178 indices[0] = vert_chan_index;
179 indices[1] = attr_chan_index;
180 indices[2] = swiz_chan_index;
181
182 channel_vec = LLVMBuildGEP2(builder, output_type, mesh->outputs, indices, 3, "");
183
184 res = LLVMBuildExtractElement(builder, value, idx, "");
185
186 struct lp_build_if_state ifthen;
187 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
188 cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
189 lp_build_if(&ifthen, gallivm, cond);
190 LLVMBuildStore(builder, res, channel_vec);
191 lp_build_endif(&ifthen);
192 }
193 } else {
194 indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
195 indices[1] = attrib_index;
196 indices[2] = swizzle_index;
197
198 res = LLVMBuildGEP2(builder, output_type, mesh->outputs, indices, 3, "");
199 for (unsigned i = 0; i < type.length; ++i) {
200 LLVMValueRef idx = lp_build_const_int32(gallivm, i);
201 LLVMValueRef val = LLVMBuildExtractElement(builder, value, idx, "");
202
203 struct lp_build_if_state ifthen;
204 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
205 cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
206 lp_build_if(&ifthen, gallivm, cond);
207 LLVMBuildStore(builder, val, res);
208 lp_build_endif(&ifthen);
209 }
210 }
211 }
212
213 static void
lp_mesh_emit_vertex_and_primitive_count(const struct lp_build_mesh_iface * mesh_iface,struct lp_build_context * bld,LLVMValueRef vertices_count,LLVMValueRef primitives_count)214 lp_mesh_emit_vertex_and_primitive_count(const struct lp_build_mesh_iface *mesh_iface,
215 struct lp_build_context *bld,
216 LLVMValueRef vertices_count,
217 LLVMValueRef primitives_count)
218 {
219 const struct lp_mesh_llvm_iface *mesh = lp_mesh_llvm_iface(mesh_iface);
220 struct gallivm_state *gallivm = bld->gallivm;
221
222 LLVMBuildStore(gallivm->builder, vertices_count, mesh->vertex_count);
223 LLVMBuildStore(gallivm->builder, primitives_count, mesh->prim_count);
224 }
225
226 static void
mesh_convert_to_aos(struct gallivm_state * gallivm,nir_shader * nir,bool vert_only,LLVMTypeRef io_type,LLVMValueRef io,LLVMValueRef outputs,LLVMValueRef clipmask,LLVMValueRef vertex_index,struct lp_type soa_type,int primid_slot,bool need_edgeflag)227 mesh_convert_to_aos(struct gallivm_state *gallivm,
228 nir_shader *nir,
229 bool vert_only,
230 LLVMTypeRef io_type,
231 LLVMValueRef io,
232 LLVMValueRef outputs,
233 LLVMValueRef clipmask,
234 LLVMValueRef vertex_index,
235 struct lp_type soa_type,
236 int primid_slot,
237 bool need_edgeflag)
238 {
239 LLVMBuilderRef builder = gallivm->builder;
240 LLVMValueRef inds[3];
241 LLVMTypeRef output_type = create_mesh_jit_output_type_deref(gallivm);
242 #if DEBUG_STORE
243 lp_build_printf(gallivm, " # storing begin\n");
244 #endif
245 int first_per_prim_attrib = -1;
246 nir_foreach_shader_out_variable(var, nir) {
247 if (var->data.per_primitive) {
248 first_per_prim_attrib = var->data.driver_location;
249 break;
250 }
251 }
252 nir_foreach_shader_out_variable(var, nir) {
253
254 if (vert_only && var->data.per_primitive)
255 continue;
256 if (!vert_only && !var->data.per_primitive)
257 continue;
258 int attrib = var->data.driver_location;
259 int slots = glsl_count_attribute_slots(glsl_get_array_element(var->type), false);
260
261 for (unsigned s = 0; s < slots; s++) {
262 LLVMValueRef soa[TGSI_NUM_CHANNELS];
263 LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32];
264 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
265 inds[0] = vertex_index;
266 inds[1] = lp_build_const_int32(gallivm, attrib);
267 inds[2] = lp_build_const_int32(gallivm, chan);
268
269 LLVMValueRef res = LLVMBuildGEP2(builder, output_type, outputs, inds, 3, "");
270 LLVMTypeRef single_type = (attrib == primid_slot) ? lp_build_int_elem_type(gallivm, soa_type) : lp_build_elem_type(gallivm, soa_type);
271 LLVMValueRef out = LLVMBuildLoad2(builder, single_type, res, "");
272 lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
273 #if DEBUG_STORE
274 lp_build_printf(gallivm, "output %d : %d ",
275 LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
276 attrib, 0),
277 LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
278 chan, 0));
279 lp_build_print_value(gallivm, "val = ", out);
280 {
281 LLVMValueRef iv =
282 LLVMBuildBitCast(builder, out, lp_build_int_elem_type(gallivm, soa_type), "");
283
284 lp_build_print_value(gallivm, " ival = ", iv);
285 }
286 #endif
287 soa[chan] = out;
288 }
289 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
290 aos[0] = LLVMGetUndef(LLVMVectorType(float_type, 4));
291 for (unsigned i = 0; i < 4; i++)
292 aos[0] = LLVMBuildInsertElement(builder, aos[0], soa[i], lp_build_const_int32(gallivm, i), "");
293 int aos_attrib = attrib;
294 if (var->data.per_primitive)
295 aos_attrib -= first_per_prim_attrib;
296 draw_store_aos_array(gallivm,
297 soa_type,
298 io_type,
299 io,
300 NULL,
301 aos,
302 aos_attrib,
303 clipmask,
304 need_edgeflag, var->data.per_primitive);
305 attrib++;
306 }
307 }
308 #if DEBUG_STORE
309 lp_build_printf(gallivm, " # storing end\n");
310 #endif
311 }
312
313 static void
generate_compute(struct llvmpipe_context * lp,struct lp_compute_shader * shader,struct lp_compute_shader_variant * variant)314 generate_compute(struct llvmpipe_context *lp,
315 struct lp_compute_shader *shader,
316 struct lp_compute_shader_variant *variant)
317 {
318 struct gallivm_state *gallivm = variant->gallivm;
319 struct nir_shader *nir = shader->base.ir.nir;
320 const struct lp_compute_shader_variant_key *key = &variant->key;
321 char func_name[64], func_name_coro[64];
322 LLVMTypeRef arg_types[CS_ARG_MAX];
323 LLVMTypeRef func_type, coro_func_type;
324 LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
325 LLVMValueRef context_ptr, resources_ptr;
326 LLVMValueRef block_x_size_arg, block_y_size_arg, block_z_size_arg;
327 LLVMValueRef grid_x_arg, grid_y_arg, grid_z_arg;
328 LLVMValueRef grid_size_x_arg, grid_size_y_arg, grid_size_z_arg;
329 LLVMValueRef work_dim_arg, draw_id_arg, thread_data_ptr, io_ptr;
330 LLVMBasicBlockRef block;
331 LLVMBuilderRef builder;
332 struct lp_build_sampler_soa *sampler;
333 struct lp_build_image_soa *image;
334 LLVMValueRef function, coro;
335 struct lp_type cs_type;
336 struct lp_mesh_llvm_iface mesh_iface;
337 bool is_mesh = nir->info.stage == MESA_SHADER_MESH;
338 unsigned i;
339
340 LLVMValueRef output_array = NULL;
341
342 /*
343 * This function has two parts
344 * a) setup the coroutine execution environment loop.
345 * b) build the compute shader llvm for use inside the coroutine.
346 */
347 assert(lp_native_vector_width / 32 >= 4);
348
349 memset(&cs_type, 0, sizeof cs_type);
350 cs_type.floating = true; /* floating point values */
351 cs_type.sign = true; /* values are signed */
352 cs_type.norm = false; /* values are not limited to [0,1] or [-1,1] */
353 cs_type.width = 32; /* 32-bit float */
354 cs_type.length = MIN2(lp_native_vector_width / 32, 16); /* n*4 elements per vector */
355 snprintf(func_name, sizeof(func_name), "cs_variant");
356
357 snprintf(func_name_coro, sizeof(func_name), "cs_co_variant");
358
359 arg_types[CS_ARG_CONTEXT] = variant->jit_cs_context_ptr_type; /* context */
360 arg_types[CS_ARG_RESOURCES]= variant->jit_resources_ptr_type;
361 arg_types[CS_ARG_BLOCK_X_SIZE] = int32_type; /* block_x_size */
362 arg_types[CS_ARG_BLOCK_Y_SIZE] = int32_type; /* block_y_size */
363 arg_types[CS_ARG_BLOCK_Z_SIZE] = int32_type; /* block_z_size */
364 arg_types[CS_ARG_GRID_X] = int32_type; /* grid_x */
365 arg_types[CS_ARG_GRID_Y] = int32_type; /* grid_y */
366 arg_types[CS_ARG_GRID_Z] = int32_type; /* grid_z */
367 arg_types[CS_ARG_GRID_SIZE_X] = int32_type; /* grid_size_x */
368 arg_types[CS_ARG_GRID_SIZE_Y] = int32_type; /* grid_size_y */
369 arg_types[CS_ARG_GRID_SIZE_Z] = int32_type; /* grid_size_z */
370 arg_types[CS_ARG_WORK_DIM] = int32_type; /* work dim */
371 arg_types[CS_ARG_DRAW_ID] = int32_type; /* draw id */
372 if (variant->jit_vertex_header_ptr_type)
373 arg_types[CS_ARG_VERTEX_DATA] = variant->jit_vertex_header_ptr_type; /* mesh shaders only */
374 else
375 arg_types[CS_ARG_VERTEX_DATA] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* mesh shaders only */
376 arg_types[CS_ARG_PER_THREAD_DATA] = variant->jit_cs_thread_data_ptr_type; /* per thread data */
377 arg_types[CS_ARG_CORO_SUBGROUP_COUNT] = int32_type; /* coro only - subgroup count */
378 arg_types[CS_ARG_CORO_PARTIALS] = int32_type; /* coro only - partials */
379 arg_types[CS_ARG_CORO_BLOCK_X_SIZE] = int32_type; /* coro block_x_size */
380 arg_types[CS_ARG_CORO_BLOCK_Y_SIZE] = int32_type; /* coro block_y_size */
381 arg_types[CS_ARG_CORO_BLOCK_Z_SIZE] = int32_type; /* coro block_z_size */
382 arg_types[CS_ARG_CORO_IDX] = int32_type; /* coro idx */
383 arg_types[CS_ARG_CORO_MEM] = LLVMPointerType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), 0);
384 arg_types[CS_ARG_CORO_OUTPUTS] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* mesh shaders only */
385
386 func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
387 arg_types, CS_ARG_OUTER_COUNT, 0);
388
389 coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0),
390 arg_types, CS_ARG_MAX - (!is_mesh), 0);
391
392 function = LLVMAddFunction(gallivm->module, func_name, func_type);
393 LLVMSetFunctionCallConv(function, LLVMCCallConv);
394
395 coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type);
396 LLVMSetFunctionCallConv(coro, LLVMCCallConv);
397 lp_build_coro_add_presplit(coro);
398
399 variant->function = function;
400 variant->function_name = MALLOC(strlen(func_name)+1);
401 strcpy(variant->function_name, func_name);
402
403
404 for (i = 0; i < CS_ARG_MAX - !is_mesh; ++i) {
405 if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
406 lp_add_function_attr(coro, i + 1, LP_FUNC_ATTR_NOALIAS);
407 if (i < CS_ARG_OUTER_COUNT)
408 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
409 }
410 }
411
412 if (variant->gallivm->cache->data_size) {
413 gallivm_stub_func(gallivm, function);
414 gallivm_stub_func(gallivm, coro);
415 return;
416 }
417
418 context_ptr = LLVMGetParam(function, CS_ARG_CONTEXT);
419 resources_ptr = LLVMGetParam(function, CS_ARG_RESOURCES);
420 block_x_size_arg = LLVMGetParam(function, CS_ARG_BLOCK_X_SIZE);
421 block_y_size_arg = LLVMGetParam(function, CS_ARG_BLOCK_Y_SIZE);
422 block_z_size_arg = LLVMGetParam(function, CS_ARG_BLOCK_Z_SIZE);
423 grid_x_arg = LLVMGetParam(function, CS_ARG_GRID_X);
424 grid_y_arg = LLVMGetParam(function, CS_ARG_GRID_Y);
425 grid_z_arg = LLVMGetParam(function, CS_ARG_GRID_Z);
426 grid_size_x_arg = LLVMGetParam(function, CS_ARG_GRID_SIZE_X);
427 grid_size_y_arg = LLVMGetParam(function, CS_ARG_GRID_SIZE_Y);
428 grid_size_z_arg = LLVMGetParam(function, CS_ARG_GRID_SIZE_Z);
429 work_dim_arg = LLVMGetParam(function, CS_ARG_WORK_DIM);
430 draw_id_arg = LLVMGetParam(function, CS_ARG_DRAW_ID);
431 io_ptr = LLVMGetParam(function, CS_ARG_VERTEX_DATA);
432 thread_data_ptr = LLVMGetParam(function, CS_ARG_PER_THREAD_DATA);
433
434 lp_build_name(context_ptr, "context");
435 lp_build_name(resources_ptr, "resources");
436 lp_build_name(block_x_size_arg, "x_size");
437 lp_build_name(block_y_size_arg, "y_size");
438 lp_build_name(block_z_size_arg, "z_size");
439 lp_build_name(grid_x_arg, "grid_x");
440 lp_build_name(grid_y_arg, "grid_y");
441 lp_build_name(grid_z_arg, "grid_z");
442 lp_build_name(grid_size_x_arg, "grid_size_x");
443 lp_build_name(grid_size_y_arg, "grid_size_y");
444 lp_build_name(grid_size_z_arg, "grid_size_z");
445 lp_build_name(work_dim_arg, "work_dim");
446 lp_build_name(draw_id_arg, "draw_id");
447 lp_build_name(thread_data_ptr, "thread_data");
448 lp_build_name(io_ptr, "vertex_io");
449
450 lp_build_nir_prepasses(nir);
451 struct hash_table *fns = _mesa_pointer_hash_table_create(NULL);
452
453 sampler = lp_llvm_sampler_soa_create(lp_cs_variant_key_samplers(key),
454 MAX2(key->nr_samplers,
455 key->nr_sampler_views));
456 image = lp_bld_llvm_image_soa_create(lp_cs_variant_key_images(key), key->nr_images);
457
458 if (exec_list_length(&nir->functions) > 1) {
459 LLVMTypeRef call_context_type = lp_build_cs_func_call_context(gallivm, cs_type.length,
460 variant->jit_cs_context_type,
461 variant->jit_resources_type);
462 nir_foreach_function(func, nir) {
463 if (func->is_entrypoint)
464 continue;
465
466 LLVMTypeRef args[32];
467 int num_args;
468
469 num_args = func->num_params + LP_RESV_FUNC_ARGS;
470
471 args[0] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), cs_type.length); /* mask */
472 args[1] = LLVMPointerType(call_context_type, 0);
473 for (int i = 0; i < func->num_params; i++) {
474 args[i + LP_RESV_FUNC_ARGS] = LLVMVectorType(LLVMIntTypeInContext(gallivm->context, func->params[i].bit_size), cs_type.length);
475 if (func->params[i].num_components > 1)
476 args[i + LP_RESV_FUNC_ARGS] = LLVMArrayType(args[i + LP_RESV_FUNC_ARGS], func->params[i].num_components);
477 }
478
479 LLVMTypeRef func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
480 args, num_args, 0);
481 LLVMValueRef lfunc = LLVMAddFunction(gallivm->module, func->name, func_type);
482 LLVMSetFunctionCallConv(lfunc, LLVMCCallConv);
483
484 struct lp_build_fn *new_fn = ralloc(fns, struct lp_build_fn);
485 new_fn->fn_type = func_type;
486 new_fn->fn = lfunc;
487 _mesa_hash_table_insert(fns, func, new_fn);
488 }
489
490 nir_foreach_function(func, nir) {
491 if (func->is_entrypoint)
492 continue;
493
494 struct hash_entry *entry = _mesa_hash_table_search(fns, func);
495 assert(entry);
496 struct lp_build_fn *new_fn = entry->data;
497 LLVMValueRef lfunc = new_fn->fn;
498 block = LLVMAppendBasicBlockInContext(gallivm->context, lfunc, "entry");
499
500 builder = gallivm->builder;
501 LLVMPositionBuilderAtEnd(builder, block);
502 LLVMValueRef mask_param = LLVMGetParam(lfunc, 0);
503 LLVMValueRef call_context_ptr = LLVMGetParam(lfunc, 1);
504 LLVMValueRef call_context = LLVMBuildLoad2(builder, call_context_type, call_context_ptr, "");
505 struct lp_build_mask_context mask;
506 struct lp_bld_tgsi_system_values system_values;
507
508 memset(&system_values, 0, sizeof(system_values));
509
510 lp_build_mask_begin(&mask, gallivm, cs_type, mask_param);
511 lp_build_mask_check(&mask);
512
513 struct lp_build_tgsi_params params;
514 memset(¶ms, 0, sizeof(params));
515 params.type = cs_type;
516 params.mask = &mask;
517 params.fns = fns;
518 params.current_func = lfunc;
519 params.context_type = variant->jit_cs_context_type;
520 params.resources_type = variant->jit_resources_type;
521 params.call_context_ptr = call_context_ptr;
522 params.context_ptr = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_CONTEXT, "");
523 params.resources_ptr = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_RESOURCES, "");
524 params.shared_ptr = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_SHARED, "");
525 params.scratch_ptr = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_SCRATCH, "");
526 system_values.work_dim = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_WORK_DIM, "");
527 system_values.thread_id[0] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_THREAD_ID_0, "");
528 system_values.thread_id[1] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_THREAD_ID_1, "");
529 system_values.thread_id[2] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_THREAD_ID_2, "");
530 system_values.block_id[0] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_BLOCK_ID_0, "");
531 system_values.block_id[1] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_BLOCK_ID_1, "");
532 system_values.block_id[2] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_BLOCK_ID_2, "");
533 system_values.grid_size[0] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_GRID_SIZE_0, "");
534 system_values.grid_size[1] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_GRID_SIZE_1, "");
535 system_values.grid_size[2] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_GRID_SIZE_2, "");
536 system_values.block_size[0] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0, "");
537 system_values.block_size[1] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1, "");
538 system_values.block_size[2] = LLVMBuildExtractValue(builder, call_context, LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2, "");
539
540 params.system_values = &system_values;
541
542 params.consts_ptr = lp_jit_resources_constants(gallivm,
543 variant->jit_resources_type,
544 params.resources_ptr);
545 params.sampler = sampler;
546 params.ssbo_ptr = lp_jit_resources_ssbos(gallivm,
547 variant->jit_resources_type,
548 params.resources_ptr);
549 params.image = image;
550 params.aniso_filter_table = lp_jit_resources_aniso_filter_table(gallivm,
551 variant->jit_resources_type,
552 params.resources_ptr);
553
554 lp_build_nir_soa_func(gallivm, shader->base.ir.nir,
555 func->impl,
556 ¶ms,
557 NULL);
558
559 lp_build_mask_end(&mask);
560
561 LLVMBuildRetVoid(builder);
562 gallivm_verify_function(gallivm, lfunc);
563 }
564 }
565
566 block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
567 builder = gallivm->builder;
568 assert(builder);
569 LLVMPositionBuilderAtEnd(builder, block);
570
571 if (is_mesh) {
572 LLVMTypeRef output_type = create_mesh_jit_output_type_deref(gallivm);
573 output_array = lp_build_array_alloca(gallivm, output_type, lp_build_const_int32(gallivm, align(MAX2(nir->info.mesh.max_primitives_out, nir->info.mesh.max_vertices_out), 8)), "outputs");
574 }
575
576 struct lp_build_loop_state loop_state[2];
577
578 LLVMValueRef vec_length = lp_build_const_int32(gallivm, cs_type.length);
579
580 LLVMValueRef invocation_count = LLVMBuildMul(gallivm->builder, block_x_size_arg, block_y_size_arg, "");
581 invocation_count = LLVMBuildMul(gallivm->builder, invocation_count, block_z_size_arg, "");
582
583 LLVMValueRef partials = LLVMBuildURem(gallivm->builder, invocation_count, vec_length, "");
584
585 LLVMValueRef num_subgroup_loop = LLVMBuildAdd(gallivm->builder, invocation_count, lp_build_const_int32(gallivm, cs_type.length - 1), "");
586 num_subgroup_loop = LLVMBuildUDiv(gallivm->builder, num_subgroup_loop, vec_length, "");
587
588 /* build a ptr in memory to store all the frames in later. */
589 LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
590 LLVMValueRef coro_mem = LLVMBuildAlloca(gallivm->builder, hdl_ptr_type, "coro_mem");
591 LLVMBuildStore(builder, LLVMConstNull(hdl_ptr_type), coro_mem);
592
593 LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_subgroup_loop, "coro_hdls");
594
595 unsigned end_coroutine = INT_MAX;
596
597 /*
598 * This is the main coroutine execution loop. It iterates over the dimensions
599 * and calls the coroutine main entrypoint on the first pass, but in subsequent
600 * passes it checks if the coroutine has completed and resumes it if not.
601 */
602 lp_build_loop_begin(&loop_state[1], gallivm,
603 lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
604 lp_build_loop_begin(&loop_state[0], gallivm,
605 lp_build_const_int32(gallivm, 0)); /* subgroup loop */
606 {
607 LLVMValueRef args[CS_ARG_MAX];
608 args[CS_ARG_CONTEXT] = context_ptr;
609 args[CS_ARG_RESOURCES] = resources_ptr;
610 args[CS_ARG_BLOCK_X_SIZE] = LLVMGetUndef(int32_type);
611 args[CS_ARG_BLOCK_Y_SIZE] = LLVMGetUndef(int32_type);
612 args[CS_ARG_BLOCK_Z_SIZE] = LLVMGetUndef(int32_type);
613 args[CS_ARG_GRID_X] = grid_x_arg;
614 args[CS_ARG_GRID_Y] = grid_y_arg;
615 args[CS_ARG_GRID_Z] = grid_z_arg;
616 args[CS_ARG_GRID_SIZE_X] = grid_size_x_arg;
617 args[CS_ARG_GRID_SIZE_Y] = grid_size_y_arg;
618 args[CS_ARG_GRID_SIZE_Z] = grid_size_z_arg;
619 args[CS_ARG_WORK_DIM] = work_dim_arg;
620 args[CS_ARG_DRAW_ID] = draw_id_arg;
621 args[CS_ARG_VERTEX_DATA] = io_ptr;
622 args[CS_ARG_PER_THREAD_DATA] = thread_data_ptr;
623 args[CS_ARG_CORO_SUBGROUP_COUNT] = num_subgroup_loop;
624 args[CS_ARG_CORO_PARTIALS] = partials;
625 args[CS_ARG_CORO_BLOCK_X_SIZE] = block_x_size_arg;
626 args[CS_ARG_CORO_BLOCK_Y_SIZE] = block_y_size_arg;
627 args[CS_ARG_CORO_BLOCK_Z_SIZE] = block_z_size_arg;
628
629 args[CS_ARG_CORO_IDX] = loop_state[0].counter;
630
631 args[CS_ARG_CORO_MEM] = coro_mem;
632
633 if (is_mesh)
634 args[CS_ARG_CORO_OUTPUTS] = output_array;
635
636 LLVMValueRef coro_entry = LLVMBuildGEP2(gallivm->builder, hdl_ptr_type, coro_hdls, &loop_state[0].counter, 1, "");
637
638 LLVMValueRef coro_hdl = LLVMBuildLoad2(gallivm->builder, hdl_ptr_type, coro_entry, "coro_hdl");
639
640 struct lp_build_if_state ifstate;
641 LLVMValueRef cmp = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, loop_state[1].counter,
642 lp_build_const_int32(gallivm, 0), "");
643 /* first time here - call the coroutine function entry point */
644 lp_build_if(&ifstate, gallivm, cmp);
645 LLVMValueRef coro_ret = LLVMBuildCall2(gallivm->builder, coro_func_type, coro, args, CS_ARG_MAX - !is_mesh, "");
646 LLVMBuildStore(gallivm->builder, coro_ret, coro_entry);
647 lp_build_else(&ifstate);
648 /* subsequent calls for this invocation - check if done. */
649 LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl);
650 struct lp_build_if_state ifstate2;
651 lp_build_if(&ifstate2, gallivm, coro_done);
652 /* if done destroy and force loop exit */
653 lp_build_coro_destroy(gallivm, coro_hdl);
654 lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1));
655 lp_build_else(&ifstate2);
656 /* otherwise resume the coroutine */
657 lp_build_coro_resume(gallivm, coro_hdl);
658 lp_build_endif(&ifstate2);
659 lp_build_endif(&ifstate);
660 lp_build_loop_force_reload_counter(&loop_state[1]);
661 }
662 lp_build_loop_end_cond(&loop_state[0],
663 num_subgroup_loop,
664 NULL, LLVMIntUGE);
665 lp_build_loop_end_cond(&loop_state[1],
666 lp_build_const_int32(gallivm, end_coroutine),
667 NULL, LLVMIntEQ);
668
669 LLVMValueRef coro_mem_ptr = LLVMBuildLoad2(builder, hdl_ptr_type, coro_mem, "");
670 LLVMTypeRef mem_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
671 LLVMTypeRef free_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), &mem_ptr_type, 1, 0);
672 LLVMBuildCall2(gallivm->builder, free_type, gallivm->coro_free_hook, &coro_mem_ptr, 1, "");
673
674 LLVMBuildRetVoid(builder);
675
676 /* This is stage (b) - generate the compute shader code inside the coroutine. */
677 context_ptr = LLVMGetParam(coro, CS_ARG_CONTEXT);
678 resources_ptr = LLVMGetParam(coro, CS_ARG_RESOURCES);
679 grid_x_arg = LLVMGetParam(coro, CS_ARG_GRID_X);
680 grid_y_arg = LLVMGetParam(coro, CS_ARG_GRID_Y);
681 grid_z_arg = LLVMGetParam(coro, CS_ARG_GRID_Z);
682 grid_size_x_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_X);
683 grid_size_y_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_Y);
684 grid_size_z_arg = LLVMGetParam(coro, CS_ARG_GRID_SIZE_Z);
685 work_dim_arg = LLVMGetParam(coro, CS_ARG_WORK_DIM);
686 draw_id_arg = LLVMGetParam(coro, CS_ARG_DRAW_ID);
687 io_ptr = LLVMGetParam(coro, CS_ARG_VERTEX_DATA);
688 thread_data_ptr = LLVMGetParam(coro, CS_ARG_PER_THREAD_DATA);
689 num_subgroup_loop = LLVMGetParam(coro, CS_ARG_CORO_SUBGROUP_COUNT);
690 partials = LLVMGetParam(coro, CS_ARG_CORO_PARTIALS);
691 block_x_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_X_SIZE);
692 block_y_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_Y_SIZE);
693 block_z_size_arg = LLVMGetParam(coro, CS_ARG_CORO_BLOCK_Z_SIZE);
694 LLVMValueRef subgroup_id = LLVMGetParam(coro, CS_ARG_CORO_IDX);
695 coro_mem = LLVMGetParam(coro, CS_ARG_CORO_MEM);
696 if (is_mesh)
697 output_array = LLVMGetParam(coro, CS_ARG_CORO_OUTPUTS);
698 block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "entry");
699 LLVMPositionBuilderAtEnd(builder, block);
700 {
701 LLVMValueRef consts_ptr;
702 LLVMValueRef ssbo_ptr;
703 LLVMValueRef shared_ptr;
704 LLVMValueRef payload_ptr;
705 LLVMValueRef kernel_args_ptr;
706 struct lp_build_mask_context mask;
707 struct lp_bld_tgsi_system_values system_values;
708
709 memset(&system_values, 0, sizeof(system_values));
710 consts_ptr = lp_jit_resources_constants(gallivm, variant->jit_resources_type, resources_ptr);
711 ssbo_ptr = lp_jit_resources_ssbos(gallivm, variant->jit_resources_type, resources_ptr);
712 kernel_args_ptr = lp_jit_cs_context_kernel_args(gallivm,
713 variant->jit_cs_context_type,
714 context_ptr);
715
716 shared_ptr = lp_jit_cs_thread_data_shared(gallivm,
717 variant->jit_cs_thread_data_type,
718 thread_data_ptr);
719 payload_ptr = lp_jit_cs_thread_data_payload(gallivm,
720 variant->jit_cs_thread_data_type,
721 thread_data_ptr);
722
723 /* these are coroutine entrypoint necessities */
724 LLVMValueRef coro_id = lp_build_coro_id(gallivm);
725 LLVMValueRef coro_entry = lp_build_coro_alloc_mem_array(gallivm, coro_mem, subgroup_id, num_subgroup_loop);
726 LLVMTypeRef mem_ptr_type = LLVMInt8TypeInContext(gallivm->context);
727 LLVMValueRef alloced_ptr = LLVMBuildLoad2(gallivm->builder, hdl_ptr_type, coro_mem, "");
728 alloced_ptr = LLVMBuildGEP2(gallivm->builder, mem_ptr_type, alloced_ptr, &coro_entry, 1, "");
729 LLVMValueRef coro_hdl = lp_build_coro_begin(gallivm, coro_id, alloced_ptr);
730 LLVMValueRef has_partials = LLVMBuildICmp(gallivm->builder, LLVMIntNE, partials, lp_build_const_int32(gallivm, 0), "");
731
732 struct lp_build_context bld;
733 lp_build_context_init(&bld, gallivm, lp_uint_type(cs_type));
734
735 LLVMValueRef base_val = LLVMBuildMul(gallivm->builder, subgroup_id, vec_length, "");
736 LLVMValueRef invocation_indices[LP_MAX_VECTOR_LENGTH];
737 for (i = 0; i < cs_type.length; i++)
738 invocation_indices[i] = LLVMBuildAdd(gallivm->builder, base_val, lp_build_const_int32(gallivm, i), "");
739 LLVMValueRef invocation_index = lp_build_gather_values(gallivm, invocation_indices, cs_type.length);
740
741 LLVMValueRef block_x_size_vec = lp_build_broadcast_scalar(&bld, block_x_size_arg);
742 LLVMValueRef block_y_size_vec = lp_build_broadcast_scalar(&bld, block_y_size_arg);
743
744 system_values.thread_id[0] = LLVMBuildURem(gallivm->builder, invocation_index, block_x_size_vec, "");
745 system_values.thread_id[1] = LLVMBuildUDiv(gallivm->builder, invocation_index, block_x_size_vec, "");
746 system_values.thread_id[1] = LLVMBuildURem(gallivm->builder, system_values.thread_id[1], block_y_size_vec, "");
747 system_values.thread_id[2] = LLVMBuildUDiv(gallivm->builder, invocation_index, block_x_size_vec, "");
748 system_values.thread_id[2] = LLVMBuildUDiv(gallivm->builder, system_values.thread_id[2], block_y_size_vec, "");
749
750 system_values.block_id[0] = grid_x_arg;
751 system_values.block_id[1] = grid_y_arg;
752 system_values.block_id[2] = grid_z_arg;
753
754 system_values.grid_size[0] = grid_size_x_arg;
755 system_values.grid_size[1] = grid_size_y_arg;
756 system_values.grid_size[2] = grid_size_z_arg;
757
758 system_values.work_dim = work_dim_arg;
759 system_values.draw_id = draw_id_arg;
760
761 system_values.subgroup_id = subgroup_id;
762 system_values.num_subgroups = num_subgroup_loop;
763
764 system_values.block_size[0] = block_x_size_arg;
765 system_values.block_size[1] = block_y_size_arg;
766 system_values.block_size[2] = block_z_size_arg;
767
768 LLVMValueRef last_loop = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, subgroup_id, LLVMBuildSub(gallivm->builder, num_subgroup_loop, lp_build_const_int32(gallivm, 1), ""), "");
769 LLVMValueRef use_partial_mask = LLVMBuildAnd(gallivm->builder, last_loop, has_partials, "");
770 struct lp_build_if_state if_state;
771 LLVMTypeRef mask_type = LLVMVectorType(int32_type, cs_type.length);
772 LLVMValueRef mask_val = lp_build_alloca(gallivm, mask_type, "mask");
773 LLVMValueRef full_mask_val = lp_build_const_int_vec(gallivm, cs_type, ~0);
774 LLVMBuildStore(gallivm->builder, full_mask_val, mask_val);
775
776 lp_build_if(&if_state, gallivm, use_partial_mask);
777 struct lp_build_loop_state mask_loop_state;
778 lp_build_loop_begin(&mask_loop_state, gallivm, partials);
779 LLVMValueRef tmask_val = LLVMBuildLoad2(gallivm->builder, mask_type, mask_val, "");
780 tmask_val = LLVMBuildInsertElement(gallivm->builder, tmask_val, lp_build_const_int32(gallivm, 0), mask_loop_state.counter, "");
781 LLVMBuildStore(gallivm->builder, tmask_val, mask_val);
782 lp_build_loop_end_cond(&mask_loop_state, vec_length, NULL, LLVMIntUGE);
783 lp_build_endif(&if_state);
784
785 mask_val = LLVMBuildLoad2(gallivm->builder, mask_type, mask_val, "");
786 lp_build_mask_begin(&mask, gallivm, cs_type, mask_val);
787
788 struct lp_build_coro_suspend_info coro_info;
789
790 LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "suspend");
791 LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, coro, "cleanup");
792
793 coro_info.suspend = sus_block;
794 coro_info.cleanup = clean_block;
795
796 if (is_mesh) {
797 LLVMValueRef vertex_count = lp_build_alloca(gallivm, LLVMInt32TypeInContext(gallivm->context), "vertex_count");
798 LLVMValueRef primitive_count = lp_build_alloca(gallivm, LLVMInt32TypeInContext(gallivm->context), "prim_count");
799 mesh_iface.base.emit_store_output = lp_mesh_llvm_emit_store_output;
800 mesh_iface.base.emit_vertex_and_primitive_count = lp_mesh_emit_vertex_and_primitive_count;
801 mesh_iface.vertex_count = vertex_count;
802 mesh_iface.prim_count = primitive_count;
803 mesh_iface.outputs = output_array;
804 }
805
806 struct lp_build_tgsi_params params;
807 memset(¶ms, 0, sizeof(params));
808
809 params.type = cs_type;
810 params.mask = &mask;
811 params.consts_ptr = consts_ptr;
812 params.system_values = &system_values;
813 params.context_type = variant->jit_cs_context_type;
814 params.context_ptr = context_ptr;
815 params.resources_type = variant->jit_resources_type;
816 params.resources_ptr = resources_ptr;
817 params.sampler = sampler;
818 params.ssbo_ptr = ssbo_ptr;
819 params.image = image;
820 params.shared_ptr = shared_ptr;
821 params.payload_ptr = payload_ptr;
822 params.coro = &coro_info;
823 params.kernel_args = kernel_args_ptr;
824 params.aniso_filter_table = lp_jit_resources_aniso_filter_table(gallivm,
825 variant->jit_resources_type,
826 resources_ptr);
827 params.mesh_iface = &mesh_iface.base;
828
829 params.current_func = NULL;
830 params.fns = fns;
831 lp_build_nir_soa_func(gallivm, nir,
832 nir_shader_get_entrypoint(nir),
833 ¶ms, NULL);
834
835 if (is_mesh) {
836 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
837 LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
838 lp_int_type(cs_type), 0);
839
840 struct lp_build_if_state iter0state;
841 LLVMValueRef is_iter0 = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, subgroup_id,
842 lp_build_const_int32(gallivm, 0), "");
843 LLVMValueRef vertex_count = LLVMBuildLoad2(gallivm->builder, i32t, mesh_iface.vertex_count, "");
844 LLVMValueRef prim_count = LLVMBuildLoad2(gallivm->builder, i32t, mesh_iface.prim_count, "");
845
846 LLVMValueRef vert_count_ptr, prim_count_ptr;
847 LLVMValueRef indices = lp_build_const_int32(gallivm, 1);
848 vert_count_ptr = LLVMBuildGEP2(gallivm->builder, i32t, io_ptr, &indices, 1, "");
849 indices = lp_build_const_int32(gallivm, 2);
850 prim_count_ptr = LLVMBuildGEP2(gallivm->builder, i32t, io_ptr, &indices, 1, "");
851
852 lp_build_if(&iter0state, gallivm, is_iter0);
853 LLVMBuildStore(gallivm->builder, vertex_count, vert_count_ptr);
854 LLVMBuildStore(gallivm->builder, prim_count, prim_count_ptr);
855 lp_build_endif(&iter0state);
856
857 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
858
859 lp_build_coro_suspend_switch(gallivm, params.coro, resume, false);
860 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
861
862 vertex_count = LLVMBuildLoad2(gallivm->builder, i32t, vert_count_ptr, "");
863 prim_count = LLVMBuildLoad2(gallivm->builder, i32t, prim_count_ptr, "");
864
865 int per_prim_count = util_bitcount64(nir->info.per_primitive_outputs);
866 int out_count = util_bitcount64(nir->info.outputs_written);
867 int per_vert_count = out_count - per_prim_count;
868 int vsize = (sizeof(struct vertex_header) + per_vert_count * 4 * sizeof(float)) * 8;
869 int psize = (per_prim_count * 4 * sizeof(float)) * 8;
870 struct lp_build_loop_state vertex_loop_state;
871
872 lp_build_loop_begin(&vertex_loop_state, gallivm,
873 lp_build_const_int32(gallivm, 0));
874 LLVMValueRef io;
875 io = LLVMBuildPtrToInt(gallivm->builder, io_ptr, LLVMInt64TypeInContext(gallivm->context), "");
876 io = LLVMBuildAdd(builder, io, LLVMBuildZExt(builder, LLVMBuildMul(builder, vertex_loop_state.counter, lp_build_const_int32(gallivm, vsize), ""), LLVMInt64TypeInContext(gallivm->context), ""), "");
877 io = LLVMBuildIntToPtr(gallivm->builder, io, LLVMPointerType(LLVMVoidTypeInContext(gallivm->context), 0), "");
878 mesh_convert_to_aos(gallivm, shader->base.ir.nir, true, variant->jit_vertex_header_type,
879 io, output_array, clipmask,
880 vertex_loop_state.counter, lp_elem_type(cs_type), -1, false);
881 lp_build_loop_end_cond(&vertex_loop_state,
882 vertex_count,
883 NULL, LLVMIntUGE);
884
885 struct lp_build_loop_state prim_loop_state;
886 lp_build_loop_begin(&prim_loop_state, gallivm,
887 lp_build_const_int32(gallivm, 0));
888 io = LLVMBuildPtrToInt(gallivm->builder, io_ptr, LLVMInt64TypeInContext(gallivm->context), "");
889 LLVMValueRef prim_offset = LLVMBuildMul(builder, prim_loop_state.counter, lp_build_const_int32(gallivm, psize), "");
890 prim_offset = LLVMBuildAdd(builder, prim_offset, lp_build_const_int32(gallivm, vsize * (nir->info.mesh.max_vertices_out + 8)), "");
891 io = LLVMBuildAdd(builder, io, LLVMBuildZExt(builder, prim_offset, LLVMInt64TypeInContext(gallivm->context), ""), "");
892 io = LLVMBuildIntToPtr(gallivm->builder, io, LLVMPointerType(LLVMVoidTypeInContext(gallivm->context), 0), "");
893 mesh_convert_to_aos(gallivm, shader->base.ir.nir, false, variant->jit_prim_type,
894 io, output_array, clipmask,
895 prim_loop_state.counter, lp_elem_type(cs_type), -1, false);
896 lp_build_loop_end_cond(&prim_loop_state,
897 prim_count,
898 NULL, LLVMIntUGE);
899 }
900
901 mask_val = lp_build_mask_end(&mask);
902
903 lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true);
904 LLVMPositionBuilderAtEnd(builder, clean_block);
905
906 LLVMBuildBr(builder, sus_block);
907 LLVMPositionBuilderAtEnd(builder, sus_block);
908
909 lp_build_coro_end(gallivm, coro_hdl);
910 LLVMBuildRet(builder, coro_hdl);
911 }
912
913 lp_bld_llvm_sampler_soa_destroy(sampler);
914 lp_bld_llvm_image_soa_destroy(image);
915 _mesa_hash_table_destroy(fns, NULL);
916
917 gallivm_verify_function(gallivm, coro);
918 gallivm_verify_function(gallivm, function);
919 }
920
921
922 static void *
llvmpipe_create_compute_state(struct pipe_context * pipe,const struct pipe_compute_state * templ)923 llvmpipe_create_compute_state(struct pipe_context *pipe,
924 const struct pipe_compute_state *templ)
925 {
926 struct lp_compute_shader *shader = CALLOC_STRUCT(lp_compute_shader);
927 struct nir_shader *nir = NULL;
928 if (!shader)
929 return NULL;
930
931 shader->no = cs_no++;
932
933 shader->base.type = PIPE_SHADER_IR_NIR;
934
935 if (templ->ir_type == PIPE_SHADER_IR_TGSI) {
936 shader->base.ir.nir = tgsi_to_nir(templ->prog, pipe->screen, false);
937 } else if (templ->ir_type == PIPE_SHADER_IR_NIR_SERIALIZED) {
938 struct blob_reader reader;
939 const struct pipe_binary_program_header *hdr = templ->prog;
940
941 blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
942 shader->base.ir.nir = nir_deserialize(NULL, pipe->screen->get_compiler_options(pipe->screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE), &reader);
943
944 pipe->screen->finalize_nir(pipe->screen, shader->base.ir.nir);
945 } else if (templ->ir_type == PIPE_SHADER_IR_NIR) {
946 shader->base.ir.nir = (struct nir_shader *)templ->prog;
947 }
948
949 nir = (struct nir_shader *)shader->base.ir.nir;
950 shader->req_local_mem += nir->info.shared_size;
951 shader->zero_initialize_shared_memory = nir->info.zero_initialize_shared_memory;
952
953 llvmpipe_register_shader(pipe, &shader->base);
954
955 list_inithead(&shader->variants.list);
956
957 int nr_samplers = BITSET_LAST_BIT(nir->info.samplers_used);
958 int nr_sampler_views = BITSET_LAST_BIT(nir->info.textures_used);
959 int nr_images = BITSET_LAST_BIT(nir->info.images_used);
960 shader->variant_key_size = lp_cs_variant_key_size(MAX2(nr_samplers, nr_sampler_views), nr_images);
961
962 return shader;
963 }
964
965
966 static void
llvmpipe_bind_compute_state(struct pipe_context * pipe,void * cs)967 llvmpipe_bind_compute_state(struct pipe_context *pipe,
968 void *cs)
969 {
970 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
971
972 if (llvmpipe->cs == cs)
973 return;
974
975 llvmpipe->cs = (struct lp_compute_shader *)cs;
976 llvmpipe->cs_dirty |= LP_CSNEW_CS;
977 }
978
979 static void
llvmpipe_get_compute_state_info(struct pipe_context * pipe,void * cs,struct pipe_compute_state_object_info * info)980 llvmpipe_get_compute_state_info(struct pipe_context *pipe, void *cs,
981 struct pipe_compute_state_object_info *info)
982 {
983 struct lp_compute_shader* shader = cs;
984 struct nir_shader* nir = shader->base.ir.nir;
985
986 info->max_threads = 1024;
987 info->simd_sizes = lp_native_vector_width / 32;
988 info->preferred_simd_size = info->simd_sizes;
989 // TODO: this is a bad estimate, but not much we can do without actually compiling the shaders
990 info->private_memory = nir->scratch_size;
991 }
992
993
994 /**
995 * Remove shader variant from two lists: the shader's variant list
996 * and the context's variant list.
997 */
998 static void
llvmpipe_remove_cs_shader_variant(struct llvmpipe_context * lp,struct lp_compute_shader_variant * variant)999 llvmpipe_remove_cs_shader_variant(struct llvmpipe_context *lp,
1000 struct lp_compute_shader_variant *variant)
1001 {
1002 if ((LP_DEBUG & DEBUG_CS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
1003 debug_printf("llvmpipe: del cs #%u var %u v created %u v cached %u "
1004 "v total cached %u inst %u total inst %u\n",
1005 variant->shader->no, variant->no,
1006 variant->shader->variants_created,
1007 variant->shader->variants_cached,
1008 lp->nr_cs_variants, variant->nr_instrs, lp->nr_cs_instrs);
1009 }
1010
1011 gallivm_destroy(variant->gallivm);
1012
1013 /* remove from shader's list */
1014 list_del(&variant->list_item_local.list);
1015 variant->shader->variants_cached--;
1016
1017 /* remove from context's list */
1018 list_del(&variant->list_item_global.list);
1019 lp->nr_cs_variants--;
1020 lp->nr_cs_instrs -= variant->nr_instrs;
1021
1022 if(variant->function_name)
1023 FREE(variant->function_name);
1024 FREE(variant);
1025 }
1026
1027
1028 static void
llvmpipe_delete_compute_state(struct pipe_context * pipe,void * cs)1029 llvmpipe_delete_compute_state(struct pipe_context *pipe,
1030 void *cs)
1031 {
1032 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1033 struct lp_compute_shader *shader = cs;
1034 struct lp_cs_variant_list_item *li, *next;
1035
1036 if (llvmpipe->cs == cs)
1037 llvmpipe->cs = NULL;
1038 for (unsigned i = 0; i < shader->max_global_buffers; i++)
1039 pipe_resource_reference(&shader->global_buffers[i], NULL);
1040 FREE(shader->global_buffers);
1041
1042 /* Delete all the variants */
1043 LIST_FOR_EACH_ENTRY_SAFE(li, next, &shader->variants.list, list) {
1044 llvmpipe_remove_cs_shader_variant(llvmpipe, li->base);
1045 }
1046 ralloc_free(shader->base.ir.nir);
1047 FREE(shader);
1048 }
1049
1050
1051 static struct lp_compute_shader_variant_key *
make_variant_key(struct llvmpipe_context * lp,struct lp_compute_shader * shader,enum pipe_shader_type sh_type,char * store)1052 make_variant_key(struct llvmpipe_context *lp,
1053 struct lp_compute_shader *shader,
1054 enum pipe_shader_type sh_type,
1055 char *store)
1056 {
1057 struct lp_compute_shader_variant_key *key =
1058 (struct lp_compute_shader_variant_key *)store;
1059 memset(key, 0, sizeof(*key));
1060
1061 struct nir_shader *nir = (struct nir_shader *)shader->base.ir.nir;
1062 /* This value will be the same for all the variants of a given shader:
1063 */
1064 key->nr_samplers = BITSET_LAST_BIT(nir->info.samplers_used);
1065 key->nr_sampler_views = BITSET_LAST_BIT(nir->info.textures_used);
1066 struct lp_sampler_static_state *cs_sampler;
1067
1068 cs_sampler = lp_cs_variant_key_samplers(key);
1069
1070 memset(cs_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *cs_sampler);
1071 for (unsigned i = 0; i < key->nr_samplers; ++i) {
1072 if (BITSET_TEST(nir->info.samplers_used, i)) {
1073 lp_sampler_static_sampler_state(&cs_sampler[i].sampler_state,
1074 lp->samplers[sh_type][i]);
1075 }
1076 }
1077
1078 /*
1079 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
1080 * are dx10-style? Can't really have mixed opcodes, at least not
1081 * if we want to skip the holes here (without rescanning tgsi).
1082 */
1083 if (!BITSET_IS_EMPTY(nir->info.textures_used)) {
1084 for (unsigned i = 0; i < key->nr_sampler_views; ++i) {
1085 /*
1086 * Note sview may exceed what's representable by file_mask.
1087 * This will still work, the only downside is that not actually
1088 * used views may be included in the shader key.
1089 */
1090 if (BITSET_TEST(nir->info.textures_used, i)) {
1091 lp_sampler_static_texture_state(&cs_sampler[i].texture_state,
1092 lp->sampler_views[sh_type][i]);
1093 }
1094 }
1095 } else {
1096 key->nr_sampler_views = key->nr_samplers;
1097 for (unsigned i = 0; i < key->nr_sampler_views; ++i) {
1098 if (BITSET_TEST(nir->info.samplers_used, i)) {
1099 lp_sampler_static_texture_state(&cs_sampler[i].texture_state,
1100 lp->sampler_views[sh_type][i]);
1101 }
1102 }
1103 }
1104
1105 struct lp_image_static_state *lp_image;
1106 lp_image = lp_cs_variant_key_images(key);
1107 key->nr_images = BITSET_LAST_BIT(nir->info.images_used);
1108
1109 if (key->nr_images)
1110 memset(lp_image, 0,
1111 key->nr_images * sizeof *lp_image);
1112 for (unsigned i = 0; i < key->nr_images; ++i) {
1113 if (BITSET_TEST(nir->info.images_used, i)) {
1114 lp_sampler_static_texture_state_image(&lp_image[i].image_state,
1115 &lp->images[sh_type][i]);
1116 }
1117 }
1118 return key;
1119 }
1120
1121
1122 static void
dump_cs_variant_key(const struct lp_compute_shader_variant_key * key)1123 dump_cs_variant_key(const struct lp_compute_shader_variant_key *key)
1124 {
1125 int i;
1126 debug_printf("cs variant %p:\n", (void *) key);
1127
1128 for (i = 0; i < key->nr_samplers; ++i) {
1129 const struct lp_sampler_static_state *samplers = lp_cs_variant_key_samplers(key);
1130 const struct lp_static_sampler_state *sampler = &samplers[i].sampler_state;
1131 debug_printf("sampler[%u] = \n", i);
1132 debug_printf(" .wrap = %s %s %s\n",
1133 util_str_tex_wrap(sampler->wrap_s, true),
1134 util_str_tex_wrap(sampler->wrap_t, true),
1135 util_str_tex_wrap(sampler->wrap_r, true));
1136 debug_printf(" .min_img_filter = %s\n",
1137 util_str_tex_filter(sampler->min_img_filter, true));
1138 debug_printf(" .min_mip_filter = %s\n",
1139 util_str_tex_mipfilter(sampler->min_mip_filter, true));
1140 debug_printf(" .mag_img_filter = %s\n",
1141 util_str_tex_filter(sampler->mag_img_filter, true));
1142 if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE)
1143 debug_printf(" .compare_func = %s\n", util_str_func(sampler->compare_func, true));
1144 debug_printf(" .normalized_coords = %u\n", sampler->normalized_coords);
1145 debug_printf(" .min_max_lod_equal = %u\n", sampler->min_max_lod_equal);
1146 debug_printf(" .lod_bias_non_zero = %u\n", sampler->lod_bias_non_zero);
1147 debug_printf(" .apply_min_lod = %u\n", sampler->apply_min_lod);
1148 debug_printf(" .apply_max_lod = %u\n", sampler->apply_max_lod);
1149 debug_printf(" .aniso = %u\n", sampler->aniso);
1150 }
1151 for (i = 0; i < key->nr_sampler_views; ++i) {
1152 const struct lp_sampler_static_state *samplers = lp_cs_variant_key_samplers(key);
1153 const struct lp_static_texture_state *texture = &samplers[i].texture_state;
1154 debug_printf("texture[%u] = \n", i);
1155 debug_printf(" .format = %s\n",
1156 util_format_name(texture->format));
1157 debug_printf(" .target = %s\n",
1158 util_str_tex_target(texture->target, true));
1159 debug_printf(" .level_zero_only = %u\n",
1160 texture->level_zero_only);
1161 debug_printf(" .pot = %u %u %u\n",
1162 texture->pot_width,
1163 texture->pot_height,
1164 texture->pot_depth);
1165 }
1166 struct lp_image_static_state *images = lp_cs_variant_key_images(key);
1167 for (i = 0; i < key->nr_images; ++i) {
1168 const struct lp_static_texture_state *image = &images[i].image_state;
1169 debug_printf("image[%u] = \n", i);
1170 debug_printf(" .format = %s\n",
1171 util_format_name(image->format));
1172 debug_printf(" .target = %s\n",
1173 util_str_tex_target(image->target, true));
1174 debug_printf(" .level_zero_only = %u\n",
1175 image->level_zero_only);
1176 debug_printf(" .pot = %u %u %u\n",
1177 image->pot_width,
1178 image->pot_height,
1179 image->pot_depth);
1180 }
1181 }
1182
1183
1184 static void
lp_debug_cs_variant(const struct lp_compute_shader_variant * variant)1185 lp_debug_cs_variant(const struct lp_compute_shader_variant *variant)
1186 {
1187 debug_printf("llvmpipe: Compute shader #%u variant #%u:\n",
1188 variant->shader->no, variant->no);
1189 nir_print_shader(variant->shader->base.ir.nir, stderr);
1190 dump_cs_variant_key(&variant->key);
1191 debug_printf("\n");
1192 }
1193
1194
1195 static void
lp_cs_get_ir_cache_key(struct lp_compute_shader_variant * variant,unsigned char ir_sha1_cache_key[20])1196 lp_cs_get_ir_cache_key(struct lp_compute_shader_variant *variant,
1197 unsigned char ir_sha1_cache_key[20])
1198 {
1199 struct blob blob = { 0 };
1200 unsigned ir_size;
1201 void *ir_binary;
1202
1203 blob_init(&blob);
1204 nir_serialize(&blob, variant->shader->base.ir.nir, true);
1205 ir_binary = blob.data;
1206 ir_size = blob.size;
1207
1208 struct mesa_sha1 ctx;
1209 _mesa_sha1_init(&ctx);
1210 _mesa_sha1_update(&ctx, &variant->key, variant->shader->variant_key_size);
1211 _mesa_sha1_update(&ctx, ir_binary, ir_size);
1212 _mesa_sha1_final(&ctx, ir_sha1_cache_key);
1213
1214 blob_finish(&blob);
1215 }
1216
1217
1218 static struct lp_compute_shader_variant *
generate_variant(struct llvmpipe_context * lp,struct lp_compute_shader * shader,enum pipe_shader_type sh_type,const struct lp_compute_shader_variant_key * key)1219 generate_variant(struct llvmpipe_context *lp,
1220 struct lp_compute_shader *shader,
1221 enum pipe_shader_type sh_type,
1222 const struct lp_compute_shader_variant_key *key)
1223 {
1224 struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
1225
1226 struct lp_compute_shader_variant *variant =
1227 MALLOC(sizeof *variant + shader->variant_key_size - sizeof variant->key);
1228 if (!variant)
1229 return NULL;
1230
1231 memset(variant, 0, sizeof(*variant));
1232
1233 char module_name[64];
1234 const char *shname = sh_type == PIPE_SHADER_MESH ? "ms" :
1235 (sh_type == PIPE_SHADER_TASK ? "ts" : "cs");
1236 snprintf(module_name, sizeof(module_name), "%s%u_variant%u",
1237 shname, shader->no, shader->variants_created);
1238
1239 variant->shader = shader;
1240 memcpy(&variant->key, key, shader->variant_key_size);
1241
1242 unsigned char ir_sha1_cache_key[20];
1243 struct lp_cached_code cached = { 0 };
1244 bool needs_caching = false;
1245
1246 lp_cs_get_ir_cache_key(variant, ir_sha1_cache_key);
1247
1248 lp_disk_cache_find_shader(screen, &cached, ir_sha1_cache_key);
1249 if (!cached.data_size)
1250 needs_caching = true;
1251
1252 variant->gallivm = gallivm_create(module_name, &lp->context, &cached);
1253 if (!variant->gallivm) {
1254 FREE(variant);
1255 return NULL;
1256 }
1257
1258 variant->list_item_global.base = variant;
1259 variant->list_item_local.base = variant;
1260 variant->no = shader->variants_created++;
1261
1262 if ((LP_DEBUG & DEBUG_CS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
1263 lp_debug_cs_variant(variant);
1264 }
1265
1266 lp_jit_init_cs_types(variant);
1267
1268 if (sh_type == PIPE_SHADER_MESH) {
1269 struct nir_shader *nir = shader->base.ir.nir;
1270 int per_prim_count = util_bitcount64(nir->info.per_primitive_outputs);
1271 int out_count = util_bitcount64(nir->info.outputs_written);
1272 int per_vert_count = out_count - per_prim_count;
1273 variant->jit_vertex_header_type = lp_build_create_jit_vertex_header_type(variant->gallivm, per_vert_count);
1274 variant->jit_vertex_header_ptr_type = LLVMPointerType(variant->jit_vertex_header_type, 0);
1275 variant->jit_prim_type = LLVMArrayType(LLVMArrayType(LLVMFloatTypeInContext(variant->gallivm->context), 4), per_prim_count);
1276 }
1277
1278 generate_compute(lp, shader, variant);
1279
1280 #if GALLIVM_USE_ORCJIT
1281 /* module has been moved into ORCJIT after gallivm_compile_module */
1282 variant->nr_instrs += lp_build_count_ir_module(variant->gallivm->module);
1283
1284 gallivm_compile_module(variant->gallivm);
1285 #else
1286 gallivm_compile_module(variant->gallivm);
1287
1288 variant->nr_instrs += lp_build_count_ir_module(variant->gallivm->module);
1289 #endif
1290
1291 variant->jit_function = (lp_jit_cs_func)
1292 gallivm_jit_function(variant->gallivm, variant->function, variant->function_name);
1293
1294 if (needs_caching) {
1295 lp_disk_cache_insert_shader(screen, &cached, ir_sha1_cache_key);
1296 }
1297 gallivm_free_ir(variant->gallivm);
1298 return variant;
1299 }
1300
1301
1302 static void
lp_cs_ctx_set_cs_variant(struct lp_cs_context * csctx,struct lp_compute_shader_variant * variant)1303 lp_cs_ctx_set_cs_variant(struct lp_cs_context *csctx,
1304 struct lp_compute_shader_variant *variant)
1305 {
1306 csctx->cs.current.variant = variant;
1307 }
1308
1309
1310 static struct lp_compute_shader_variant *
llvmpipe_update_cs_variant(struct llvmpipe_context * lp,enum pipe_shader_type sh_type,struct lp_compute_shader * shader)1311 llvmpipe_update_cs_variant(struct llvmpipe_context *lp,
1312 enum pipe_shader_type sh_type,
1313 struct lp_compute_shader *shader)
1314 {
1315 char store[LP_CS_MAX_VARIANT_KEY_SIZE];
1316 struct lp_compute_shader_variant_key *key =
1317 make_variant_key(lp, shader, sh_type, store);
1318 struct lp_compute_shader_variant *variant = NULL;
1319 struct lp_cs_variant_list_item *li;
1320
1321 /* Search the variants for one which matches the key */
1322 LIST_FOR_EACH_ENTRY(li, &shader->variants.list, list) {
1323 if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
1324 variant = li->base;
1325 break;
1326 }
1327 }
1328
1329 if (variant) {
1330 /* Move this variant to the head of the list to implement LRU
1331 * deletion of shader's when we have too many.
1332 */
1333 list_move_to(&variant->list_item_global.list,
1334 &lp->cs_variants_list.list);
1335 } else {
1336 /* variant not found, create it now */
1337
1338 if (LP_DEBUG & DEBUG_CS) {
1339 debug_printf("%u variants,\t%u instrs,\t%u instrs/variant\n",
1340 lp->nr_cs_variants,
1341 lp->nr_cs_instrs,
1342 lp->nr_cs_variants
1343 ? lp->nr_cs_instrs / lp->nr_cs_variants : 0);
1344 }
1345
1346 /* First, check if we've exceeded the max number of shader variants.
1347 * If so, free 6.25% of them (the least recently used ones).
1348 */
1349 unsigned variants_to_cull = lp->nr_cs_variants >= LP_MAX_SHADER_VARIANTS
1350 ? LP_MAX_SHADER_VARIANTS / 16 : 0;
1351
1352 if (variants_to_cull ||
1353 lp->nr_cs_instrs >= LP_MAX_SHADER_INSTRUCTIONS) {
1354 if (gallivm_debug & GALLIVM_DEBUG_PERF) {
1355 debug_printf("Evicting CS: %u cs variants,\t%u total variants,"
1356 "\t%u instrs,\t%u instrs/variant\n",
1357 shader->variants_cached,
1358 lp->nr_cs_variants, lp->nr_cs_instrs,
1359 lp->nr_cs_instrs / lp->nr_cs_variants);
1360 }
1361
1362 /*
1363 * We need to re-check lp->nr_cs_variants because an arbitrarily large
1364 * number of shader variants (potentially all of them) could be
1365 * pending for destruction on flush.
1366 */
1367 for (unsigned i = 0;
1368 i < variants_to_cull ||
1369 lp->nr_cs_instrs >= LP_MAX_SHADER_INSTRUCTIONS; i++) {
1370 struct lp_cs_variant_list_item *item;
1371 if (list_is_empty(&lp->cs_variants_list.list)) {
1372 break;
1373 }
1374 item = list_last_entry(&lp->cs_variants_list.list,
1375 struct lp_cs_variant_list_item, list);
1376 assert(item);
1377 assert(item->base);
1378 llvmpipe_remove_cs_shader_variant(lp, item->base);
1379 }
1380 }
1381
1382 /*
1383 * Generate the new variant.
1384 */
1385 int64_t t0, t1, dt;
1386 t0 = os_time_get();
1387 variant = generate_variant(lp, shader, sh_type, key);
1388 t1 = os_time_get();
1389 dt = t1 - t0;
1390 LP_COUNT_ADD(llvm_compile_time, dt);
1391 LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */
1392
1393 /* Put the new variant into the list */
1394 if (variant) {
1395 list_add(&variant->list_item_local.list, &shader->variants.list);
1396 list_add(&variant->list_item_global.list, &lp->cs_variants_list.list);
1397 lp->nr_cs_variants++;
1398 lp->nr_cs_instrs += variant->nr_instrs;
1399 shader->variants_cached++;
1400 }
1401 }
1402 return variant;
1403 }
1404
1405 static void
llvmpipe_update_cs(struct llvmpipe_context * lp)1406 llvmpipe_update_cs(struct llvmpipe_context *lp)
1407 {
1408 struct lp_compute_shader_variant *variant;
1409 variant = llvmpipe_update_cs_variant(lp, PIPE_SHADER_COMPUTE, lp->cs);
1410 /* Bind this variant */
1411 lp_cs_ctx_set_cs_variant(lp->csctx, variant);
1412 }
1413
1414
1415 /**
1416 * Called during state validation when LP_CSNEW_SAMPLER_VIEW is set.
1417 */
1418 static void
lp_csctx_set_sampler_views(struct lp_cs_context * csctx,unsigned num,struct pipe_sampler_view ** views)1419 lp_csctx_set_sampler_views(struct lp_cs_context *csctx,
1420 unsigned num,
1421 struct pipe_sampler_view **views)
1422 {
1423 LP_DBG(DEBUG_SETUP, "%s\n", __func__);
1424
1425 assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
1426
1427 const unsigned max_tex_num = MAX2(num, csctx->cs.current_tex_num);
1428
1429 for (unsigned i = 0; i < max_tex_num; i++) {
1430 struct pipe_sampler_view *view = i < num ? views[i] : NULL;
1431
1432 /* We are going to overwrite/unref the current texture further below. If
1433 * set, make sure to unmap its resource to avoid leaking previous
1434 * mapping. */
1435 if (csctx->cs.current_tex[i])
1436 llvmpipe_resource_unmap(csctx->cs.current_tex[i], 0, 0);
1437
1438 if (view) {
1439 struct pipe_resource *res = view->texture;
1440 struct lp_jit_texture *jit_tex;
1441 jit_tex = &csctx->cs.current.jit_resources.textures[i];
1442
1443 /* We're referencing the texture's internal data, so save a
1444 * reference to it.
1445 */
1446 pipe_resource_reference(&csctx->cs.current_tex[i], res);
1447
1448 lp_jit_texture_from_pipe(jit_tex, view);
1449 } else {
1450 pipe_resource_reference(&csctx->cs.current_tex[i], NULL);
1451 }
1452 }
1453 csctx->cs.current_tex_num = num;
1454 }
1455
1456
1457 /**
1458 * Called during state validation when LP_NEW_SAMPLER is set.
1459 */
1460 static void
lp_csctx_set_sampler_state(struct lp_cs_context * csctx,unsigned num,struct pipe_sampler_state ** samplers)1461 lp_csctx_set_sampler_state(struct lp_cs_context *csctx,
1462 unsigned num,
1463 struct pipe_sampler_state **samplers)
1464 {
1465 LP_DBG(DEBUG_SETUP, "%s\n", __func__);
1466
1467 assert(num <= PIPE_MAX_SAMPLERS);
1468
1469 for (unsigned i = 0; i < PIPE_MAX_SAMPLERS; i++) {
1470 const struct pipe_sampler_state *sampler = i < num ? samplers[i] : NULL;
1471
1472 if (sampler) {
1473 struct lp_jit_sampler *jit_sam;
1474 jit_sam = &csctx->cs.current.jit_resources.samplers[i];
1475
1476 jit_sam->min_lod = sampler->min_lod;
1477 jit_sam->max_lod = sampler->max_lod;
1478 jit_sam->lod_bias = sampler->lod_bias;
1479 jit_sam->max_aniso = sampler->max_anisotropy;
1480 COPY_4V(jit_sam->border_color, sampler->border_color.f);
1481 }
1482 }
1483 }
1484
1485
1486 static void
lp_csctx_set_cs_constants(struct lp_cs_context * csctx,unsigned num,struct pipe_constant_buffer * buffers)1487 lp_csctx_set_cs_constants(struct lp_cs_context *csctx,
1488 unsigned num,
1489 struct pipe_constant_buffer *buffers)
1490 {
1491 unsigned i;
1492
1493 LP_DBG(DEBUG_SETUP, "%s %p\n", __func__, (void *) buffers);
1494
1495 assert(num <= ARRAY_SIZE(csctx->constants));
1496
1497 for (i = 0; i < num; ++i) {
1498 util_copy_constant_buffer(&csctx->constants[i].current, &buffers[i], false);
1499 }
1500 for (; i < ARRAY_SIZE(csctx->constants); i++) {
1501 util_copy_constant_buffer(&csctx->constants[i].current, NULL, false);
1502 }
1503 }
1504
1505
1506 static void
lp_csctx_set_cs_ssbos(struct lp_cs_context * csctx,unsigned num,struct pipe_shader_buffer * buffers)1507 lp_csctx_set_cs_ssbos(struct lp_cs_context *csctx,
1508 unsigned num,
1509 struct pipe_shader_buffer *buffers)
1510 {
1511 int i;
1512 LP_DBG(DEBUG_SETUP, "%s %p\n", __func__, (void *)buffers);
1513
1514 assert (num <= ARRAY_SIZE(csctx->ssbos));
1515
1516 for (i = 0; i < num; ++i) {
1517 util_copy_shader_buffer(&csctx->ssbos[i].current, &buffers[i]);
1518 }
1519 for (; i < ARRAY_SIZE(csctx->ssbos); i++) {
1520 util_copy_shader_buffer(&csctx->ssbos[i].current, NULL);
1521 }
1522 }
1523
1524
1525 static void
lp_csctx_set_cs_images(struct lp_cs_context * csctx,unsigned num,struct pipe_image_view * images)1526 lp_csctx_set_cs_images(struct lp_cs_context *csctx,
1527 unsigned num,
1528 struct pipe_image_view *images)
1529 {
1530 unsigned i;
1531
1532 LP_DBG(DEBUG_SETUP, "%s %p\n", __func__, (void *) images);
1533
1534 assert(num <= ARRAY_SIZE(csctx->images));
1535
1536 for (i = 0; i < num; ++i) {
1537 struct pipe_image_view *image = &images[i];
1538 util_copy_image_view(&csctx->images[i].current, &images[i]);
1539
1540 struct pipe_resource *res = image->resource;
1541 struct llvmpipe_resource *lp_res = llvmpipe_resource(res);
1542 struct lp_jit_image *jit_image;
1543
1544 jit_image = &csctx->cs.current.jit_resources.images[i];
1545 if (!lp_res)
1546 continue;
1547
1548 lp_jit_image_from_pipe(jit_image, image);
1549 }
1550 for (; i < ARRAY_SIZE(csctx->images); i++) {
1551 util_copy_image_view(&csctx->images[i].current, NULL);
1552 }
1553 }
1554
1555
1556 static void
update_csctx_consts(struct llvmpipe_context * llvmpipe,struct lp_cs_context * csctx)1557 update_csctx_consts(struct llvmpipe_context *llvmpipe,
1558 struct lp_cs_context *csctx)
1559 {
1560 for (int i = 0; i < ARRAY_SIZE(csctx->constants); ++i) {
1561 lp_jit_buffer_from_pipe_const(&csctx->cs.current.jit_resources.constants[i],
1562 &csctx->constants[i].current, llvmpipe->pipe.screen);
1563 }
1564 }
1565
1566
1567 static void
update_csctx_ssbo(struct llvmpipe_context * llvmpipe,struct lp_cs_context * csctx)1568 update_csctx_ssbo(struct llvmpipe_context *llvmpipe,
1569 struct lp_cs_context *csctx)
1570 {
1571 for (int i = 0; i < ARRAY_SIZE(csctx->ssbos); ++i) {
1572 struct pipe_resource *buffer = csctx->ssbos[i].current.buffer;
1573 const uint8_t *current_data = NULL;
1574
1575 /* resource buffer */
1576 if (buffer)
1577 current_data = (uint8_t *) llvmpipe_resource_data(buffer);
1578 if (current_data) {
1579 current_data += csctx->ssbos[i].current.buffer_offset;
1580
1581 csctx->cs.current.jit_resources.ssbos[i].u = (const uint32_t *)current_data;
1582 csctx->cs.current.jit_resources.ssbos[i].num_elements = csctx->ssbos[i].current.buffer_size;
1583 } else {
1584 csctx->cs.current.jit_resources.ssbos[i].u = NULL;
1585 csctx->cs.current.jit_resources.ssbos[i].num_elements = 0;
1586 }
1587 }
1588 }
1589
1590
1591 static void
llvmpipe_cs_update_derived(struct llvmpipe_context * llvmpipe,const void * input)1592 llvmpipe_cs_update_derived(struct llvmpipe_context *llvmpipe, const void *input)
1593 {
1594 if (llvmpipe->cs_dirty & LP_CSNEW_CONSTANTS) {
1595 lp_csctx_set_cs_constants(llvmpipe->csctx,
1596 ARRAY_SIZE(llvmpipe->constants[PIPE_SHADER_COMPUTE]),
1597 llvmpipe->constants[PIPE_SHADER_COMPUTE]);
1598 update_csctx_consts(llvmpipe, llvmpipe->csctx);
1599 }
1600
1601 if (llvmpipe->cs_dirty & LP_CSNEW_SSBOS) {
1602 lp_csctx_set_cs_ssbos(llvmpipe->csctx,
1603 ARRAY_SIZE(llvmpipe->ssbos[PIPE_SHADER_COMPUTE]),
1604 llvmpipe->ssbos[PIPE_SHADER_COMPUTE]);
1605 update_csctx_ssbo(llvmpipe, llvmpipe->csctx);
1606 }
1607
1608 if (llvmpipe->cs_dirty & LP_CSNEW_SAMPLER_VIEW)
1609 lp_csctx_set_sampler_views(llvmpipe->csctx,
1610 llvmpipe->num_sampler_views[PIPE_SHADER_COMPUTE],
1611 llvmpipe->sampler_views[PIPE_SHADER_COMPUTE]);
1612
1613 if (llvmpipe->cs_dirty & LP_CSNEW_SAMPLER)
1614 lp_csctx_set_sampler_state(llvmpipe->csctx,
1615 llvmpipe->num_samplers[PIPE_SHADER_COMPUTE],
1616 llvmpipe->samplers[PIPE_SHADER_COMPUTE]);
1617
1618 if (llvmpipe->cs_dirty & LP_CSNEW_IMAGES)
1619 lp_csctx_set_cs_images(llvmpipe->csctx,
1620 ARRAY_SIZE(llvmpipe->images[PIPE_SHADER_COMPUTE]),
1621 llvmpipe->images[PIPE_SHADER_COMPUTE]);
1622
1623 struct lp_cs_context *csctx = llvmpipe->csctx;
1624 csctx->cs.current.jit_resources.aniso_filter_table = lp_build_sample_aniso_filter_table();
1625 if (input) {
1626 csctx->input = input;
1627 csctx->cs.current.jit_context.kernel_args = input;
1628 }
1629
1630 if (llvmpipe->cs_dirty & (LP_CSNEW_CS |
1631 LP_CSNEW_IMAGES |
1632 LP_CSNEW_SAMPLER_VIEW |
1633 LP_CSNEW_SAMPLER))
1634 llvmpipe_update_cs(llvmpipe);
1635
1636
1637 llvmpipe->cs_dirty = 0;
1638 }
1639
1640
1641 static void
cs_exec_fn(void * init_data,int iter_idx,struct lp_cs_local_mem * lmem)1642 cs_exec_fn(void *init_data, int iter_idx, struct lp_cs_local_mem *lmem)
1643 {
1644 struct lp_cs_job_info *job_info = init_data;
1645 struct lp_jit_cs_thread_data thread_data;
1646
1647 memset(&thread_data, 0, sizeof(thread_data));
1648
1649 if (lmem->local_size < job_info->req_local_mem) {
1650 lmem->local_mem_ptr = REALLOC(lmem->local_mem_ptr, lmem->local_size,
1651 job_info->req_local_mem);
1652 lmem->local_size = job_info->req_local_mem;
1653 }
1654 if (job_info->zero_initialize_shared_memory)
1655 memset(lmem->local_mem_ptr, 0, job_info->req_local_mem);
1656 thread_data.shared = lmem->local_mem_ptr;
1657
1658 thread_data.payload = job_info->payload;
1659
1660 unsigned grid_z, grid_y, grid_x;
1661
1662 if (job_info->use_iters) {
1663 grid_z = iter_idx / (job_info->iter_size[0] * job_info->iter_size[1]);
1664 grid_y = (iter_idx - (grid_z * (job_info->iter_size[0] * job_info->iter_size[1]))) / job_info->iter_size[0];
1665 grid_x = (iter_idx - (grid_z * (job_info->iter_size[0] * job_info->iter_size[1])) - (grid_y * job_info->iter_size[0]));
1666 } else {
1667 grid_z = iter_idx / (job_info->grid_size[0] * job_info->grid_size[1]);
1668 grid_y = (iter_idx - (grid_z * (job_info->grid_size[0] * job_info->grid_size[1]))) / job_info->grid_size[0];
1669 grid_x = (iter_idx - (grid_z * (job_info->grid_size[0] * job_info->grid_size[1])) - (grid_y * job_info->grid_size[0]));
1670 }
1671
1672 grid_z += job_info->grid_base[2];
1673 grid_y += job_info->grid_base[1];
1674 grid_x += job_info->grid_base[0];
1675 struct lp_compute_shader_variant *variant = job_info->current->variant;
1676
1677 void *io_ptr = NULL;
1678 if (job_info->io) {
1679 size_t io_offset = job_info->io_stride * iter_idx;
1680 io_ptr = (char *)job_info->io + io_offset;
1681 }
1682 if (thread_data.payload) {
1683 size_t payload_offset = job_info->payload_stride * iter_idx;
1684 thread_data.payload = (char *)thread_data.payload + payload_offset;
1685 }
1686 variant->jit_function(&job_info->current->jit_context,
1687 &job_info->current->jit_resources,
1688 job_info->block_size[0], job_info->block_size[1], job_info->block_size[2],
1689 grid_x, grid_y, grid_z,
1690 job_info->grid_size[0], job_info->grid_size[1], job_info->grid_size[2],
1691 job_info->work_dim, job_info->draw_id,
1692 io_ptr,
1693 &thread_data);
1694 }
1695
1696
1697 static void
fill_grid_size(struct pipe_context * pipe,int idx,const struct pipe_grid_info * info,uint32_t grid_size[3])1698 fill_grid_size(struct pipe_context *pipe,
1699 int idx,
1700 const struct pipe_grid_info *info,
1701 uint32_t grid_size[3])
1702 {
1703 struct pipe_transfer *transfer;
1704 uint32_t *params;
1705 if (!info->indirect) {
1706 grid_size[0] = info->grid[0];
1707 grid_size[1] = info->grid[1];
1708 grid_size[2] = info->grid[2];
1709 return;
1710 }
1711 params = pipe_buffer_map_range(pipe, info->indirect,
1712 (info->indirect_stride * idx) + info->indirect_offset,
1713 3 * sizeof(uint32_t),
1714 PIPE_MAP_READ,
1715 &transfer);
1716
1717 if (!transfer)
1718 return;
1719
1720 grid_size[0] = params[0];
1721 grid_size[1] = params[1];
1722 grid_size[2] = params[2];
1723 pipe_buffer_unmap(pipe, transfer);
1724 }
1725
1726
1727 static void
llvmpipe_launch_grid(struct pipe_context * pipe,const struct pipe_grid_info * info)1728 llvmpipe_launch_grid(struct pipe_context *pipe,
1729 const struct pipe_grid_info *info)
1730 {
1731 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1732 struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
1733 struct lp_cs_job_info job_info;
1734
1735 if (!llvmpipe_check_render_cond(llvmpipe))
1736 return;
1737
1738 memset(&job_info, 0, sizeof(job_info));
1739
1740 llvmpipe_cs_update_derived(llvmpipe, info->input);
1741
1742 fill_grid_size(pipe, 0, info, job_info.grid_size);
1743
1744 job_info.grid_base[0] = info->grid_base[0];
1745 job_info.grid_base[1] = info->grid_base[1];
1746 job_info.grid_base[2] = info->grid_base[2];
1747 job_info.block_size[0] = info->block[0];
1748 job_info.block_size[1] = info->block[1];
1749 job_info.block_size[2] = info->block[2];
1750 job_info.work_dim = info->work_dim;
1751 job_info.req_local_mem = llvmpipe->cs->req_local_mem + info->variable_shared_mem;
1752 job_info.zero_initialize_shared_memory = llvmpipe->cs->zero_initialize_shared_memory;
1753 job_info.current = &llvmpipe->csctx->cs.current;
1754
1755 int num_tasks = job_info.grid_size[2] * job_info.grid_size[1] * job_info.grid_size[0];
1756 if (num_tasks) {
1757 struct lp_cs_tpool_task *task;
1758 mtx_lock(&screen->cs_mutex);
1759 task = lp_cs_tpool_queue_task(screen->cs_tpool, cs_exec_fn, &job_info, num_tasks);
1760 mtx_unlock(&screen->cs_mutex);
1761
1762 lp_cs_tpool_wait_for_task(screen->cs_tpool, &task);
1763 }
1764 if (!llvmpipe->queries_disabled)
1765 llvmpipe->pipeline_statistics.cs_invocations += num_tasks * info->block[0] * info->block[1] * info->block[2];
1766 }
1767
1768
1769 static void
llvmpipe_set_compute_resources(struct pipe_context * pipe,unsigned start,unsigned count,struct pipe_surface ** resources)1770 llvmpipe_set_compute_resources(struct pipe_context *pipe,
1771 unsigned start, unsigned count,
1772 struct pipe_surface **resources)
1773 {
1774 }
1775
1776
1777 static void
llvmpipe_set_global_binding(struct pipe_context * pipe,unsigned first,unsigned count,struct pipe_resource ** resources,uint32_t ** handles)1778 llvmpipe_set_global_binding(struct pipe_context *pipe,
1779 unsigned first, unsigned count,
1780 struct pipe_resource **resources,
1781 uint32_t **handles)
1782 {
1783 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1784 struct lp_compute_shader *cs = llvmpipe->cs;
1785
1786 if (first + count > cs->max_global_buffers) {
1787 unsigned old_max = cs->max_global_buffers;
1788 cs->max_global_buffers = first + count;
1789 cs->global_buffers = realloc(cs->global_buffers,
1790 cs->max_global_buffers * sizeof(cs->global_buffers[0]));
1791 if (!cs->global_buffers) {
1792 return;
1793 }
1794
1795 memset(&cs->global_buffers[old_max], 0, (cs->max_global_buffers - old_max) * sizeof(cs->global_buffers[0]));
1796 }
1797
1798 if (!resources) {
1799 for (unsigned i = 0; i < count; i++)
1800 pipe_resource_reference(&cs->global_buffers[first + i], NULL);
1801 return;
1802 }
1803
1804 for (unsigned i = 0; i < count; i++) {
1805 uintptr_t va;
1806 uint32_t offset;
1807 pipe_resource_reference(&cs->global_buffers[first + i], resources[i]);
1808 struct llvmpipe_resource *lp_res = llvmpipe_resource(resources[i]);
1809 offset = *handles[i];
1810 va = (uintptr_t)((char *)lp_res->data + offset);
1811 memcpy(handles[i], &va, sizeof(va));
1812 }
1813 }
1814
1815
1816 void
llvmpipe_init_compute_funcs(struct llvmpipe_context * llvmpipe)1817 llvmpipe_init_compute_funcs(struct llvmpipe_context *llvmpipe)
1818 {
1819 llvmpipe->pipe.create_compute_state = llvmpipe_create_compute_state;
1820 llvmpipe->pipe.bind_compute_state = llvmpipe_bind_compute_state;
1821 llvmpipe->pipe.get_compute_state_info = llvmpipe_get_compute_state_info;
1822 llvmpipe->pipe.delete_compute_state = llvmpipe_delete_compute_state;
1823 llvmpipe->pipe.set_compute_resources = llvmpipe_set_compute_resources;
1824 llvmpipe->pipe.set_global_binding = llvmpipe_set_global_binding;
1825 llvmpipe->pipe.launch_grid = llvmpipe_launch_grid;
1826 }
1827
1828
1829 void
lp_csctx_destroy(struct lp_cs_context * csctx)1830 lp_csctx_destroy(struct lp_cs_context *csctx)
1831 {
1832 unsigned i;
1833 for (i = 0; i < ARRAY_SIZE(csctx->cs.current_tex); i++) {
1834 struct pipe_resource **res_ptr = &csctx->cs.current_tex[i];
1835 if (*res_ptr)
1836 llvmpipe_resource_unmap(*res_ptr, 0, 0);
1837 pipe_resource_reference(res_ptr, NULL);
1838 }
1839 for (i = 0; i < ARRAY_SIZE(csctx->constants); i++) {
1840 pipe_resource_reference(&csctx->constants[i].current.buffer, NULL);
1841 }
1842 for (i = 0; i < ARRAY_SIZE(csctx->ssbos); i++) {
1843 pipe_resource_reference(&csctx->ssbos[i].current.buffer, NULL);
1844 }
1845 for (i = 0; i < ARRAY_SIZE(csctx->images); i++) {
1846 pipe_resource_reference(&csctx->images[i].current.resource, NULL);
1847 }
1848 FREE(csctx);
1849 }
1850
1851
1852 struct lp_cs_context *
lp_csctx_create(struct pipe_context * pipe)1853 lp_csctx_create(struct pipe_context *pipe)
1854 {
1855 struct lp_cs_context *csctx = CALLOC_STRUCT(lp_cs_context);
1856 if (!csctx)
1857 return NULL;
1858
1859 csctx->pipe = pipe;
1860 return csctx;
1861 }
1862
1863 void
llvmpipe_update_task_shader(struct llvmpipe_context * lp)1864 llvmpipe_update_task_shader(struct llvmpipe_context *lp)
1865 {
1866 if (!lp->tss)
1867 return;
1868 struct lp_compute_shader_variant *variant = llvmpipe_update_cs_variant(lp, PIPE_SHADER_TASK, lp->tss);
1869 lp_cs_ctx_set_cs_variant(lp->task_ctx, variant);
1870 }
1871
1872 static void *
llvmpipe_create_ts_state(struct pipe_context * pipe,const struct pipe_shader_state * templ)1873 llvmpipe_create_ts_state(struct pipe_context *pipe,
1874 const struct pipe_shader_state *templ)
1875 {
1876 struct lp_compute_shader *shader = CALLOC_STRUCT(lp_compute_shader);
1877 if (!shader)
1878 return NULL;
1879
1880 llvmpipe_register_shader(pipe, templ);
1881
1882 shader->no = task_no++;
1883 shader->base.type = templ->type;
1884
1885 shader->base.ir.nir = templ->ir.nir;
1886 shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size;
1887 list_inithead(&shader->variants.list);
1888
1889 struct nir_shader *nir = shader->base.ir.nir;
1890 int nr_samplers = BITSET_LAST_BIT(nir->info.samplers_used);
1891 int nr_sampler_views = BITSET_LAST_BIT(nir->info.textures_used);
1892 int nr_images = BITSET_LAST_BIT(nir->info.images_used);
1893 shader->variant_key_size = lp_cs_variant_key_size(MAX2(nr_samplers, nr_sampler_views), nr_images);
1894 return shader;
1895 }
1896
1897
1898 static void
llvmpipe_bind_ts_state(struct pipe_context * pipe,void * _task)1899 llvmpipe_bind_ts_state(struct pipe_context *pipe, void *_task)
1900 {
1901 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1902
1903 if (llvmpipe->tss == _task)
1904 return;
1905
1906 llvmpipe->tss = (struct lp_compute_shader *)_task;
1907 llvmpipe->dirty |= LP_NEW_TASK;
1908 }
1909
1910 static void
llvmpipe_delete_ts_state(struct pipe_context * pipe,void * _task)1911 llvmpipe_delete_ts_state(struct pipe_context *pipe, void *_task)
1912 {
1913 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1914 struct lp_compute_shader *shader = _task;
1915 struct lp_cs_variant_list_item *li, *next;
1916
1917 /* Delete all the variants */
1918 LIST_FOR_EACH_ENTRY_SAFE(li, next, &shader->variants.list, list) {
1919 llvmpipe_remove_cs_shader_variant(llvmpipe, li->base);
1920 }
1921 ralloc_free(shader->base.ir.nir);
1922 FREE(shader);
1923 }
1924
1925 void
llvmpipe_init_task_funcs(struct llvmpipe_context * llvmpipe)1926 llvmpipe_init_task_funcs(struct llvmpipe_context *llvmpipe)
1927 {
1928 llvmpipe->pipe.create_ts_state = llvmpipe_create_ts_state;
1929 llvmpipe->pipe.bind_ts_state = llvmpipe_bind_ts_state;
1930 llvmpipe->pipe.delete_ts_state = llvmpipe_delete_ts_state;
1931 }
1932
1933 void
llvmpipe_update_mesh_shader(struct llvmpipe_context * lp)1934 llvmpipe_update_mesh_shader(struct llvmpipe_context *lp)
1935 {
1936 if (!lp->mhs)
1937 return;
1938 struct lp_compute_shader_variant *variant = llvmpipe_update_cs_variant(lp, PIPE_SHADER_MESH, lp->mhs);
1939 lp_cs_ctx_set_cs_variant(lp->mesh_ctx, variant);
1940 }
1941
1942 static void *
llvmpipe_create_ms_state(struct pipe_context * pipe,const struct pipe_shader_state * templ)1943 llvmpipe_create_ms_state(struct pipe_context *pipe,
1944 const struct pipe_shader_state *templ)
1945 {
1946 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1947 struct lp_compute_shader *shader = CALLOC_STRUCT(lp_compute_shader);
1948 if (!shader)
1949 return NULL;
1950
1951 llvmpipe_register_shader(pipe, templ);
1952
1953 shader->no = mesh_no++;
1954 shader->base.type = templ->type;
1955
1956 shader->base.ir.nir = templ->ir.nir;
1957 shader->req_local_mem += ((struct nir_shader *)shader->base.ir.nir)->info.shared_size;
1958 list_inithead(&shader->variants.list);
1959
1960 shader->draw_mesh_data = draw_create_mesh_shader(llvmpipe->draw, templ);
1961 if (shader->draw_mesh_data == NULL) {
1962 FREE(shader);
1963 return NULL;
1964 }
1965
1966 struct nir_shader *nir = shader->base.ir.nir;
1967 int nr_samplers = BITSET_LAST_BIT(nir->info.samplers_used);
1968 int nr_sampler_views = BITSET_LAST_BIT(nir->info.textures_used);
1969 int nr_images = BITSET_LAST_BIT(nir->info.images_used);
1970 shader->variant_key_size = lp_cs_variant_key_size(MAX2(nr_samplers, nr_sampler_views), nr_images);
1971 return shader;
1972 }
1973
1974
1975 static void
llvmpipe_bind_ms_state(struct pipe_context * pipe,void * _mesh)1976 llvmpipe_bind_ms_state(struct pipe_context *pipe, void *_mesh)
1977 {
1978 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1979
1980 if (llvmpipe->mhs == _mesh)
1981 return;
1982
1983 llvmpipe->mhs = (struct lp_compute_shader *)_mesh;
1984
1985 draw_bind_mesh_shader(llvmpipe->draw, _mesh ? llvmpipe->mhs->draw_mesh_data : NULL);
1986 llvmpipe->dirty |= LP_NEW_MESH;
1987 }
1988
1989
1990 static void
llvmpipe_delete_ms_state(struct pipe_context * pipe,void * _mesh)1991 llvmpipe_delete_ms_state(struct pipe_context *pipe, void *_mesh)
1992 {
1993 struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
1994 struct lp_compute_shader *shader = _mesh;
1995 struct lp_cs_variant_list_item *li, *next;
1996
1997 /* Delete all the variants */
1998 LIST_FOR_EACH_ENTRY_SAFE(li, next, &shader->variants.list, list) {
1999 llvmpipe_remove_cs_shader_variant(llvmpipe, li->base);
2000 }
2001
2002 draw_delete_mesh_shader(llvmpipe->draw, shader->draw_mesh_data);
2003 ralloc_free(shader->base.ir.nir);
2004
2005 FREE(shader);
2006 }
2007
2008 static void
lp_mesh_call_draw(struct llvmpipe_context * lp,enum mesa_prim prim,int prim_out_idx,int cull_prim_idx,int task_idx,void * vbuf,size_t task_out_size,int vsize,int psize,int per_prim_count,size_t prim_offset)2009 lp_mesh_call_draw(struct llvmpipe_context *lp,
2010 enum mesa_prim prim,
2011 int prim_out_idx,
2012 int cull_prim_idx,
2013 int task_idx,
2014 void *vbuf, size_t task_out_size,
2015 int vsize, int psize, int per_prim_count,
2016 size_t prim_offset)
2017 {
2018 unsigned prim_len = mesa_vertices_per_prim(prim);
2019 uint32_t *ptr = (uint32_t *)((char *)vbuf + task_out_size * task_idx);
2020 uint32_t vertex_count = ptr[1];
2021 uint32_t prim_count = ptr[2];
2022
2023 if (!vertex_count || !prim_count)
2024 return;
2025
2026 struct draw_vertex_info vinfo;
2027 vinfo.verts = (struct vertex_header *)ptr;
2028 vinfo.vertex_size = vsize / 8;
2029 vinfo.stride = vsize;
2030 vinfo.count = vertex_count;
2031
2032 unsigned elts_size = prim_len * prim_count;
2033 unsigned short *elts = calloc(sizeof(uint16_t), elts_size);
2034 uint32_t *prim_lengths = calloc(prim_count, sizeof(uint32_t));
2035 int elts_idx = 0;
2036 char *prim_ptr = (char *)ptr + prim_offset;
2037 for (unsigned p = 0; p < prim_count; p++) {
2038 uint32_t *prim_idxs = (uint32_t *)(prim_ptr + p * psize + prim_out_idx * 4 * sizeof(float));
2039 for (unsigned elt = 0; elt < prim_len; elt++){
2040 elts[elts_idx++] = prim_idxs[elt];
2041 }
2042 prim_lengths[p] = prim_len;
2043 }
2044
2045 struct draw_prim_info prim_info = { 0 };
2046 prim_info.prim = prim;
2047 prim_info.linear = false;
2048 prim_info.elts = elts;
2049 prim_info.count = prim_count;
2050 prim_info.primitive_count = prim_count;
2051 prim_info.primitive_lengths = prim_lengths;
2052
2053 struct draw_vertex_info vert_out = { 0 };
2054 struct draw_prim_info prim_out = { 0 };
2055 draw_mesh_prim_run(lp->draw,
2056 per_prim_count,
2057 prim_ptr,
2058 cull_prim_idx,
2059 &prim_info,
2060 &vinfo,
2061 &prim_out,
2062 &vert_out);
2063 free(elts);
2064 free(prim_lengths);
2065
2066 draw_collect_primitives_generated(lp->draw,
2067 lp->active_primgen_queries &&
2068 !lp->queries_disabled);
2069 draw_mesh(lp->draw, &vert_out, &prim_out);
2070
2071 free(vert_out.verts);
2072 free(prim_out.primitive_lengths);
2073 }
2074
2075 static void
llvmpipe_draw_mesh_tasks(struct pipe_context * pipe,unsigned drawid_offset,const struct pipe_grid_info * info)2076 llvmpipe_draw_mesh_tasks(struct pipe_context *pipe,
2077 unsigned drawid_offset,
2078 const struct pipe_grid_info *info)
2079 {
2080 struct llvmpipe_context *lp = llvmpipe_context(pipe);
2081 struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
2082 struct lp_cs_job_info job_info;
2083
2084 if (!llvmpipe_check_render_cond(lp))
2085 return;
2086
2087 memset(&job_info, 0, sizeof(job_info));
2088 if (lp->dirty)
2089 llvmpipe_update_derived(lp);
2090
2091 unsigned draw_count = info->draw_count;
2092 if (info->indirect && info->indirect_draw_count) {
2093 struct pipe_transfer *dc_transfer;
2094 uint32_t *dc_param = pipe_buffer_map_range(pipe,
2095 info->indirect_draw_count,
2096 info->indirect_draw_count_offset,
2097 4, PIPE_MAP_READ, &dc_transfer);
2098 if (!dc_transfer) {
2099 debug_printf("%s: failed to map indirect draw count buffer\n", __func__);
2100 return;
2101 }
2102 if (dc_param[0] < draw_count)
2103 draw_count = dc_param[0];
2104 pipe_buffer_unmap(pipe, dc_transfer);
2105 }
2106
2107 struct nir_shader *mhs_shader = lp->mhs->base.ir.nir;
2108 int prim_out_idx = -1;
2109 int first_per_prim_idx = -1;
2110 int cull_prim_idx = -1;
2111 nir_foreach_shader_out_variable(var, mhs_shader) {
2112 if (var->data.per_primitive) {
2113 first_per_prim_idx = var->data.driver_location;
2114 break;
2115 }
2116 }
2117 nir_foreach_shader_out_variable(var, mhs_shader) {
2118 if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES) {
2119 prim_out_idx = var->data.driver_location;
2120 break;
2121 }
2122 }
2123 nir_foreach_shader_out_variable(var, mhs_shader) {
2124 if (var->data.location == VARYING_SLOT_CULL_PRIMITIVE) {
2125 cull_prim_idx = var->data.driver_location - first_per_prim_idx;
2126 break;
2127 }
2128 }
2129 int per_prim_count = util_bitcount64(mhs_shader->info.per_primitive_outputs);
2130 int out_count = util_bitcount64(mhs_shader->info.outputs_written);
2131 int per_vert_count = out_count - per_prim_count;
2132 int vsize = (sizeof(struct vertex_header) + per_vert_count * 4 * sizeof(float)) * 8;
2133 int psize = (per_prim_count * 4 * sizeof(float)) * 8;
2134 size_t prim_offset = vsize * (mhs_shader->info.mesh.max_vertices_out + 8);
2135 size_t task_out_size = prim_offset + psize * (mhs_shader->info.mesh.max_primitives_out + 8);
2136
2137 for (unsigned dr = 0; dr < draw_count; dr++) {
2138 fill_grid_size(pipe, dr, info, job_info.grid_size);
2139
2140 job_info.grid_base[0] = info->grid_base[0];
2141 job_info.grid_base[1] = info->grid_base[1];
2142 job_info.grid_base[2] = info->grid_base[2];
2143 job_info.block_size[0] = info->block[0];
2144 job_info.block_size[1] = info->block[1];
2145 job_info.block_size[2] = info->block[2];
2146
2147 void *payload = NULL;
2148 size_t payload_stride = 0;
2149 int num_tasks = job_info.grid_size[2] * job_info.grid_size[1] * job_info.grid_size[0];
2150 int num_mesh_invocs = 1;
2151 if (lp->tss) {
2152 struct nir_shader *tsk_shader = lp->tss->base.ir.nir;
2153 payload_stride = tsk_shader->info.task_payload_size + 3 * sizeof(uint32_t);
2154
2155 payload = calloc(num_tasks, payload_stride);
2156
2157 job_info.use_iters = false;
2158 job_info.payload = payload;
2159 job_info.payload_stride = payload_stride;
2160 job_info.work_dim = info->work_dim;
2161 job_info.draw_id = dr + drawid_offset;
2162 job_info.req_local_mem = lp->tss->req_local_mem + info->variable_shared_mem;
2163 job_info.current = &lp->task_ctx->cs.current;
2164
2165 if (num_tasks) {
2166 struct lp_cs_tpool_task *task;
2167 mtx_lock(&screen->cs_mutex);
2168 task = lp_cs_tpool_queue_task(screen->cs_tpool, cs_exec_fn, &job_info, num_tasks);
2169 mtx_unlock(&screen->cs_mutex);
2170
2171 lp_cs_tpool_wait_for_task(screen->cs_tpool, &task);
2172 }
2173 if (!lp->queries_disabled)
2174 lp->pipeline_statistics.ts_invocations += num_tasks * info->block[0] * info->block[1] * info->block[2];
2175 num_mesh_invocs = num_tasks;
2176 }
2177
2178 for (unsigned i = 0; i < num_mesh_invocs; i++) {
2179 if (payload) {
2180 void *this_payload = (char *)payload + (payload_stride * i);
2181 uint32_t *payload_grid = (uint32_t *)this_payload;
2182 assert(lp->tss);
2183 job_info.grid_size[0] = payload_grid[0];
2184 job_info.grid_size[1] = payload_grid[1];
2185 job_info.grid_size[2] = payload_grid[2];
2186 job_info.payload = this_payload;
2187 job_info.block_size[0] = mhs_shader->info.workgroup_size[0];
2188 job_info.block_size[1] = mhs_shader->info.workgroup_size[1];
2189 job_info.block_size[2] = mhs_shader->info.workgroup_size[2];
2190 }
2191
2192 job_info.req_local_mem = lp->mhs->req_local_mem + info->variable_shared_mem;
2193 job_info.current = &lp->mesh_ctx->cs.current;
2194 job_info.payload_stride = 0;
2195 job_info.draw_id = dr + drawid_offset;
2196 job_info.io_stride = task_out_size;
2197
2198 uint32_t job_strides[3] = { job_info.grid_size[0], job_info.grid_size[1], job_info.grid_size[2] };
2199 uint32_t total_grid[3] = { job_info.grid_size[0], job_info.grid_size[1], job_info.grid_size[2] };
2200 const unsigned int max_tasks = 4096;
2201 /* limit how large memory allocation can get for vbuf */
2202 for (unsigned g = 0; g < 3; g++) {
2203 if (job_strides[g] > max_tasks) {
2204 job_strides[g] = max_tasks;
2205 }
2206 }
2207
2208 for (unsigned grid_z = 0; grid_z < total_grid[2]; grid_z += job_strides[2]) {
2209 int this_z = MIN2(total_grid[2] - grid_z, max_tasks);
2210 job_info.grid_base[2] = grid_z;
2211 for (unsigned grid_y = 0; grid_y < total_grid[1]; grid_y += job_strides[1]) {
2212 int this_y = MIN2(total_grid[1] - grid_y, max_tasks);
2213 job_info.grid_base[1] = grid_y;
2214 for (unsigned grid_x = 0; grid_x < total_grid[0]; grid_x += job_strides[0]) {
2215 int this_x = MIN2(total_grid[0] - grid_x, max_tasks);
2216 job_info.grid_base[0] = grid_x;
2217 num_tasks = this_x * this_y * this_z;
2218
2219 job_info.iter_size[0] = this_x;
2220 job_info.iter_size[1] = this_y;
2221 job_info.iter_size[2] = this_z;
2222 job_info.use_iters = true;
2223
2224 void *vbuf = CALLOC(num_tasks, task_out_size);
2225 if (!vbuf)
2226 return;
2227
2228 job_info.io = vbuf;
2229 if (num_tasks) {
2230 struct lp_cs_tpool_task *task;
2231 mtx_lock(&screen->cs_mutex);
2232 task = lp_cs_tpool_queue_task(screen->cs_tpool, cs_exec_fn, &job_info, num_tasks);
2233 mtx_unlock(&screen->cs_mutex);
2234
2235 lp_cs_tpool_wait_for_task(screen->cs_tpool, &task);
2236 }
2237 if (!lp->queries_disabled)
2238 lp->pipeline_statistics.ms_invocations += num_tasks * job_info.block_size[0] * job_info.block_size[1] * job_info.block_size[2];
2239
2240 for (unsigned t = 0; t < num_tasks; t++)
2241 lp_mesh_call_draw(lp,
2242 mhs_shader->info.mesh.primitive_type,
2243 prim_out_idx - first_per_prim_idx,
2244 cull_prim_idx, t, vbuf, task_out_size,
2245 vsize, psize, per_prim_count, prim_offset);
2246 free(vbuf);
2247 }
2248 }
2249 }
2250 }
2251 free(payload);
2252 }
2253 draw_flush(lp->draw);
2254 }
2255
2256 void
llvmpipe_init_mesh_funcs(struct llvmpipe_context * llvmpipe)2257 llvmpipe_init_mesh_funcs(struct llvmpipe_context *llvmpipe)
2258 {
2259 llvmpipe->pipe.create_ms_state = llvmpipe_create_ms_state;
2260 llvmpipe->pipe.bind_ms_state = llvmpipe_bind_ms_state;
2261 llvmpipe->pipe.delete_ms_state = llvmpipe_delete_ms_state;
2262
2263 llvmpipe->pipe.draw_mesh_tasks = llvmpipe_draw_mesh_tasks;
2264 }
2265
2266 void
llvmpipe_task_update_derived(struct llvmpipe_context * llvmpipe)2267 llvmpipe_task_update_derived(struct llvmpipe_context *llvmpipe)
2268 {
2269 if (llvmpipe->dirty & LP_NEW_TASK_CONSTANTS) {
2270 lp_csctx_set_cs_constants(llvmpipe->task_ctx,
2271 ARRAY_SIZE(llvmpipe->constants[PIPE_SHADER_TASK]),
2272 llvmpipe->constants[PIPE_SHADER_TASK]);
2273 update_csctx_consts(llvmpipe, llvmpipe->task_ctx);
2274 }
2275
2276 if (llvmpipe->dirty & LP_NEW_TASK_SSBOS) {
2277 lp_csctx_set_cs_ssbos(llvmpipe->task_ctx,
2278 ARRAY_SIZE(llvmpipe->ssbos[PIPE_SHADER_TASK]),
2279 llvmpipe->ssbos[PIPE_SHADER_TASK]);
2280 update_csctx_ssbo(llvmpipe, llvmpipe->task_ctx);
2281 }
2282
2283 if (llvmpipe->dirty & LP_NEW_TASK_SAMPLER_VIEW)
2284 lp_csctx_set_sampler_views(llvmpipe->task_ctx,
2285 llvmpipe->num_sampler_views[PIPE_SHADER_TASK],
2286 llvmpipe->sampler_views[PIPE_SHADER_TASK]);
2287
2288 if (llvmpipe->dirty & LP_NEW_TASK_SAMPLER)
2289 lp_csctx_set_sampler_state(llvmpipe->task_ctx,
2290 llvmpipe->num_samplers[PIPE_SHADER_TASK],
2291 llvmpipe->samplers[PIPE_SHADER_TASK]);
2292
2293 if (llvmpipe->dirty & LP_NEW_TASK_IMAGES)
2294 lp_csctx_set_cs_images(llvmpipe->task_ctx,
2295 ARRAY_SIZE(llvmpipe->images[PIPE_SHADER_TASK]),
2296 llvmpipe->images[PIPE_SHADER_TASK]);
2297
2298 struct lp_cs_context *csctx = llvmpipe->task_ctx;
2299 csctx->cs.current.jit_resources.aniso_filter_table = lp_build_sample_aniso_filter_table();
2300 }
2301
2302 void
llvmpipe_mesh_update_derived(struct llvmpipe_context * llvmpipe)2303 llvmpipe_mesh_update_derived(struct llvmpipe_context *llvmpipe)
2304 {
2305 if (llvmpipe->dirty & LP_NEW_MESH_CONSTANTS) {
2306 lp_csctx_set_cs_constants(llvmpipe->mesh_ctx,
2307 ARRAY_SIZE(llvmpipe->constants[PIPE_SHADER_MESH]),
2308 llvmpipe->constants[PIPE_SHADER_MESH]);
2309 update_csctx_consts(llvmpipe, llvmpipe->mesh_ctx);
2310 }
2311
2312 if (llvmpipe->dirty & LP_NEW_MESH_SSBOS) {
2313 lp_csctx_set_cs_ssbos(llvmpipe->mesh_ctx,
2314 ARRAY_SIZE(llvmpipe->ssbos[PIPE_SHADER_MESH]),
2315 llvmpipe->ssbos[PIPE_SHADER_MESH]);
2316 update_csctx_ssbo(llvmpipe, llvmpipe->mesh_ctx);
2317 }
2318
2319 if (llvmpipe->dirty & LP_NEW_MESH_SAMPLER_VIEW)
2320 lp_csctx_set_sampler_views(llvmpipe->mesh_ctx,
2321 llvmpipe->num_sampler_views[PIPE_SHADER_MESH],
2322 llvmpipe->sampler_views[PIPE_SHADER_MESH]);
2323
2324 if (llvmpipe->dirty & LP_NEW_MESH_SAMPLER)
2325 lp_csctx_set_sampler_state(llvmpipe->mesh_ctx,
2326 llvmpipe->num_samplers[PIPE_SHADER_MESH],
2327 llvmpipe->samplers[PIPE_SHADER_MESH]);
2328
2329 if (llvmpipe->dirty & LP_NEW_MESH_IMAGES)
2330 lp_csctx_set_cs_images(llvmpipe->mesh_ctx,
2331 ARRAY_SIZE(llvmpipe->images[PIPE_SHADER_MESH]),
2332 llvmpipe->images[PIPE_SHADER_MESH]);
2333
2334 struct lp_cs_context *csctx = llvmpipe->mesh_ctx;
2335 csctx->cs.current.jit_resources.aniso_filter_table = lp_build_sample_aniso_filter_table();
2336 }
2337