1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "draw_llvm.h"
29
30 #include "draw_context.h"
31 #include "draw_vs.h"
32 #include "draw_gs.h"
33
34 #include "gallivm/lp_bld_arit.h"
35 #include "gallivm/lp_bld_arit_overflow.h"
36 #include "gallivm/lp_bld_bitarit.h"
37 #include "gallivm/lp_bld_gather.h"
38 #include "gallivm/lp_bld_logic.h"
39 #include "gallivm/lp_bld_const.h"
40 #include "gallivm/lp_bld_coro.h"
41 #include "gallivm/lp_bld_swizzle.h"
42 #include "gallivm/lp_bld_struct.h"
43 #include "gallivm/lp_bld_type.h"
44 #include "gallivm/lp_bld_flow.h"
45 #include "gallivm/lp_bld_debug.h"
46 #include "gallivm/lp_bld_tgsi.h"
47 #include "gallivm/lp_bld_nir.h"
48 #include "gallivm/lp_bld_printf.h"
49 #include "gallivm/lp_bld_intr.h"
50 #include "gallivm/lp_bld_init.h"
51 #include "gallivm/lp_bld_type.h"
52 #include "gallivm/lp_bld_pack.h"
53 #include "gallivm/lp_bld_format.h"
54 #include "gallivm/lp_bld_misc.h"
55 #include "gallivm/lp_bld_jit_sample.h"
56 #include "tgsi/tgsi_exec.h"
57 #include "tgsi/tgsi_dump.h"
58
59 #include "util/u_math.h"
60 #include "util/u_pointer.h"
61 #include "util/u_string.h"
62 #include "nir_serialize.h"
63 #include "util/mesa-sha1.h"
64 #define DEBUG_STORE 0
65
66
67 static void
68 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
69
70
71 struct draw_gs_llvm_iface {
72 struct lp_build_gs_iface base;
73
74 struct draw_gs_llvm_variant *variant;
75 LLVMValueRef input;
76 };
77
78
79 static inline const struct draw_gs_llvm_iface *
draw_gs_llvm_iface(const struct lp_build_gs_iface * iface)80 draw_gs_llvm_iface(const struct lp_build_gs_iface *iface)
81 {
82 return (const struct draw_gs_llvm_iface *)iface;
83 }
84
85
86 struct draw_tcs_llvm_iface {
87 struct lp_build_tcs_iface base;
88
89 struct draw_tcs_llvm_variant *variant;
90 LLVMValueRef input;
91 LLVMValueRef output;
92 };
93
94
95 static inline const struct draw_tcs_llvm_iface *
draw_tcs_llvm_iface(const struct lp_build_tcs_iface * iface)96 draw_tcs_llvm_iface(const struct lp_build_tcs_iface *iface)
97 {
98 return (const struct draw_tcs_llvm_iface *)iface;
99 }
100
101
102 struct draw_tes_llvm_iface {
103 struct lp_build_tes_iface base;
104
105 struct draw_tes_llvm_variant *variant;
106 LLVMValueRef input;
107 };
108
109
110 static inline const struct draw_tes_llvm_iface *
draw_tes_llvm_iface(const struct lp_build_tes_iface * iface)111 draw_tes_llvm_iface(const struct lp_build_tes_iface *iface)
112 {
113 return (const struct draw_tes_llvm_iface *)iface;
114 }
115
116
117 /**
118 * Create LLVM type for draw_vertex_buffer.
119 */
120 static LLVMTypeRef
create_jit_dvbuffer_type(struct gallivm_state * gallivm,const char * struct_name)121 create_jit_dvbuffer_type(struct gallivm_state *gallivm,
122 const char *struct_name)
123 {
124 LLVMTargetDataRef target = gallivm->target;
125 LLVMTypeRef dvbuffer_type;
126 LLVMTypeRef elem_types[DRAW_JIT_DVBUFFER_NUM_FIELDS];
127 LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
128
129 elem_types[DRAW_JIT_DVBUFFER_MAP] =
130 LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
131 elem_types[DRAW_JIT_DVBUFFER_SIZE] = int32_type;
132
133 dvbuffer_type = LLVMStructTypeInContext(gallivm->context, elem_types,
134 ARRAY_SIZE(elem_types), 0);
135
136 (void) target; /* silence unused var warning for non-debug build */
137 LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, map,
138 target, dvbuffer_type,
139 DRAW_JIT_DVBUFFER_MAP);
140 LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, size,
141 target, dvbuffer_type,
142 DRAW_JIT_DVBUFFER_SIZE);
143
144 return dvbuffer_type;
145 }
146
147 /**
148 * Create LLVM type for struct draw_jit_context
149 */
150 static LLVMTypeRef
create_vs_jit_context_type(struct gallivm_state * gallivm,const char * struct_name)151 create_vs_jit_context_type(struct gallivm_state *gallivm, const char *struct_name)
152 {
153 LLVMTargetDataRef target = gallivm->target;
154 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
155 LLVMTypeRef elem_types[DRAW_VS_JIT_CTX_NUM_FIELDS];
156
157 elem_types[DRAW_VS_JIT_CTX_PLANES] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), DRAW_TOTAL_CLIP_PLANES), 0);
158 elem_types[DRAW_VS_JIT_CTX_VIEWPORT] = LLVMPointerType(float_type, 0);
159
160 LLVMTypeRef context_type = LLVMStructTypeInContext(gallivm->context, elem_types, ARRAY_SIZE(elem_types), 0);
161
162 (void) target; /* silence unused var warning for non-debug build */
163 LP_CHECK_MEMBER_OFFSET(struct draw_vs_jit_context, planes,
164 target, context_type, DRAW_VS_JIT_CTX_PLANES);
165 LP_CHECK_MEMBER_OFFSET(struct draw_vs_jit_context, viewports,
166 target, context_type, DRAW_VS_JIT_CTX_VIEWPORT);
167 LP_CHECK_STRUCT_SIZE(struct draw_vs_jit_context,
168 target, context_type);
169
170 return context_type;
171 }
172
173
174 /**
175 * Create LLVM type for struct draw_gs_jit_context
176 */
177 static LLVMTypeRef
create_gs_jit_context_type(struct gallivm_state * gallivm,unsigned vector_length,const char * struct_name)178 create_gs_jit_context_type(struct gallivm_state *gallivm,
179 unsigned vector_length,
180 const char *struct_name)
181 {
182 LLVMTargetDataRef target = gallivm->target;
183 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
184 LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
185 LLVMTypeRef elem_types[DRAW_GS_JIT_CTX_NUM_FIELDS];
186 LLVMTypeRef context_type;
187
188 elem_types[DRAW_GS_JIT_CTX_PLANES] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
189 DRAW_TOTAL_CLIP_PLANES), 0);
190 elem_types[DRAW_GS_JIT_CTX_VIEWPORT] = LLVMPointerType(float_type, 0); /* viewports */
191
192
193 elem_types[DRAW_GS_JIT_CTX_PRIM_LENGTHS] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);
194 elem_types[DRAW_GS_JIT_CTX_EMITTED_VERTICES] = LLVMPointerType(LLVMVectorType(int_type,
195 vector_length), 0);
196 elem_types[DRAW_GS_JIT_CTX_EMITTED_PRIMS] = LLVMPointerType(LLVMVectorType(int_type,
197 vector_length), 0);
198
199 context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
200 ARRAY_SIZE(elem_types), 0);
201
202 (void) target; /* silence unused var warning for non-debug build */
203 LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes,
204 target, context_type, DRAW_GS_JIT_CTX_PLANES);
205 LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewports,
206 target, context_type, DRAW_GS_JIT_CTX_VIEWPORT);
207 LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,
208 target, context_type,
209 DRAW_GS_JIT_CTX_PRIM_LENGTHS);
210 LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,
211 target, context_type,
212 DRAW_GS_JIT_CTX_EMITTED_VERTICES);
213 LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,
214 target, context_type,
215 DRAW_GS_JIT_CTX_EMITTED_PRIMS);
216 LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context,
217 target, context_type);
218 return context_type;
219 }
220
221
222 static LLVMTypeRef
create_gs_jit_input_type_deref(struct gallivm_state * gallivm)223 create_gs_jit_input_type_deref(struct gallivm_state *gallivm)
224 {
225 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
226 LLVMTypeRef input_array;
227
228 input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */
229 input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */
230 input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
231 return input_array;
232 }
233
234
235 static LLVMTypeRef
create_gs_jit_input_type(struct gallivm_state * gallivm)236 create_gs_jit_input_type(struct gallivm_state *gallivm)
237 {
238 return LLVMPointerType(create_gs_jit_input_type_deref(gallivm), 0); /* num vertices per prim */
239 }
240
241
242 /**
243 * Create LLVM type for struct pipe_vertex_buffer
244 */
245 static LLVMTypeRef
create_jit_vertex_buffer_type(struct gallivm_state * gallivm,const char * struct_name)246 create_jit_vertex_buffer_type(struct gallivm_state *gallivm,
247 const char *struct_name)
248 {
249 LLVMTargetDataRef target = gallivm->target;
250 LLVMTypeRef elem_types[3];
251 LLVMTypeRef vb_type;
252
253 elem_types[0] = LLVMInt8TypeInContext(gallivm->context);
254 elem_types[1] = LLVMInt32TypeInContext(gallivm->context);
255 elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
256
257 vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
258 ARRAY_SIZE(elem_types), 0);
259
260 (void) target; /* silence unused var warning for non-debug build */
261 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, is_user_buffer,
262 target, vb_type, 0);
263 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
264 target, vb_type, 1);
265 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer.resource,
266 target, vb_type, 2);
267
268 LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
269
270 return vb_type;
271 }
272
273
274 static LLVMTypeRef
create_tcs_jit_input_type_deref(struct gallivm_state * gallivm)275 create_tcs_jit_input_type_deref(struct gallivm_state *gallivm)
276 {
277 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
278 LLVMTypeRef input_array;
279
280 input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
281 input_array = LLVMArrayType(input_array, NUM_TCS_INPUTS); /* num attrs per vertex */
282 return input_array;
283 }
284
285
286 static LLVMTypeRef
create_tcs_jit_input_type(struct gallivm_state * gallivm)287 create_tcs_jit_input_type(struct gallivm_state *gallivm)
288 {
289 return LLVMPointerType(create_tcs_jit_input_type_deref(gallivm), 0); /* num vertices per prim */
290 }
291
292
293 static LLVMTypeRef
create_tcs_jit_output_type_deref(struct gallivm_state * gallivm)294 create_tcs_jit_output_type_deref(struct gallivm_state *gallivm)
295 {
296 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
297 LLVMTypeRef output_array;
298
299 output_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
300 output_array = LLVMArrayType(output_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
301 return output_array;
302 }
303
304
305 static LLVMTypeRef
create_tcs_jit_output_type(struct gallivm_state * gallivm)306 create_tcs_jit_output_type(struct gallivm_state *gallivm)
307 {
308 return LLVMPointerType(create_tcs_jit_output_type_deref(gallivm), 0); /* num vertices per prim */
309 }
310
311
312 static LLVMTypeRef
create_tes_jit_input_deref_type(struct gallivm_state * gallivm)313 create_tes_jit_input_deref_type(struct gallivm_state *gallivm)
314 {
315 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
316 LLVMTypeRef input_array;
317
318 input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
319 input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
320
321 return input_array;
322 }
323
324
325 /**
326 * Create LLVM types for various structures.
327 */
328 static void
create_vs_jit_types(struct draw_llvm_variant * variant)329 create_vs_jit_types(struct draw_llvm_variant *variant)
330 {
331 struct gallivm_state *gallivm = variant->gallivm;
332
333 variant->context_type = create_vs_jit_context_type(gallivm, "draw_vs_jit_context");
334 variant->context_ptr_type = LLVMPointerType(variant->context_type, 0);
335
336 variant->resources_type = lp_build_jit_resources_type(gallivm);
337 variant->resources_ptr_type = LLVMPointerType(variant->resources_type, 0);
338
339 variant->buffer_type = create_jit_dvbuffer_type(gallivm, "draw_vertex_buffer");
340 variant->buffer_ptr_type = LLVMPointerType(variant->buffer_type, 0);
341
342 variant->vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
343 variant->vb_ptr_type = LLVMPointerType(variant->vb_type, 0);
344 }
345
346
347 static LLVMTypeRef
get_context_ptr_type(struct draw_llvm_variant * variant)348 get_context_ptr_type(struct draw_llvm_variant *variant)
349 {
350 if (!variant->context_ptr_type)
351 create_vs_jit_types(variant);
352 return variant->context_ptr_type;
353 }
354
355
356 static LLVMTypeRef
get_buffer_ptr_type(struct draw_llvm_variant * variant)357 get_buffer_ptr_type(struct draw_llvm_variant *variant)
358 {
359 if (!variant->buffer_ptr_type)
360 create_vs_jit_types(variant);
361 return variant->buffer_ptr_type;
362 }
363
364
365 static LLVMTypeRef
get_vb_ptr_type(struct draw_llvm_variant * variant)366 get_vb_ptr_type(struct draw_llvm_variant *variant)
367 {
368 if (!variant->vb_ptr_type)
369 create_vs_jit_types(variant);
370 return variant->vb_ptr_type;
371 }
372
373 static LLVMTypeRef
get_vertex_header_ptr_type(struct draw_llvm_variant * variant)374 get_vertex_header_ptr_type(struct draw_llvm_variant *variant)
375 {
376 assert(variant->vertex_header_ptr_type);
377 return variant->vertex_header_ptr_type;
378 }
379
380
381 /**
382 * Create per-context LLVM info.
383 */
384 struct draw_llvm *
draw_llvm_create(struct draw_context * draw,lp_context_ref * context)385 draw_llvm_create(struct draw_context *draw, lp_context_ref *context)
386 {
387 struct draw_llvm *llvm;
388
389 if (!lp_build_init())
390 return NULL;
391
392 llvm = CALLOC_STRUCT(draw_llvm);
393 if (!llvm)
394 return NULL;
395
396 llvm->draw = draw;
397
398 if (context) {
399 llvm->context = *context;
400 llvm->context.owned = false;
401 }
402 if (!llvm->context.ref) {
403 lp_context_create(&llvm->context);
404 }
405 if (!llvm->context.ref)
406 goto fail;
407
408 llvm->nr_variants = 0;
409 list_inithead(&llvm->vs_variants_list.list);
410
411 llvm->nr_gs_variants = 0;
412 list_inithead(&llvm->gs_variants_list.list);
413
414 llvm->nr_tcs_variants = 0;
415 list_inithead(&llvm->tcs_variants_list.list);
416
417 llvm->nr_tes_variants = 0;
418 list_inithead(&llvm->tes_variants_list.list);
419
420 return llvm;
421
422 fail:
423 draw_llvm_destroy(llvm);
424 return NULL;
425 }
426
427
428 /**
429 * Free per-context LLVM info.
430 */
431 void
draw_llvm_destroy(struct draw_llvm * llvm)432 draw_llvm_destroy(struct draw_llvm *llvm)
433 {
434 lp_context_destroy(&llvm->context);
435
436 /* XXX free other draw_llvm data? */
437 FREE(llvm);
438 }
439
440
441 static void
draw_get_ir_cache_key(struct nir_shader * nir,const void * key,size_t key_size,uint32_t val_32bit,unsigned char ir_sha1_cache_key[20])442 draw_get_ir_cache_key(struct nir_shader *nir,
443 const void *key, size_t key_size,
444 uint32_t val_32bit,
445 unsigned char ir_sha1_cache_key[20])
446 {
447 struct blob blob = { 0 };
448 unsigned ir_size;
449 void *ir_binary;
450
451 blob_init(&blob);
452 nir_serialize(&blob, nir, true);
453 ir_binary = blob.data;
454 ir_size = blob.size;
455
456 struct mesa_sha1 ctx;
457 _mesa_sha1_init(&ctx);
458 _mesa_sha1_update(&ctx, key, key_size);
459 _mesa_sha1_update(&ctx, ir_binary, ir_size);
460 _mesa_sha1_update(&ctx, &val_32bit, 4);
461 _mesa_sha1_final(&ctx, ir_sha1_cache_key);
462
463 blob_finish(&blob);
464 }
465
466
467 /**
468 * Create LLVM-generated code for a vertex shader.
469 */
470 struct draw_llvm_variant *
draw_llvm_create_variant(struct draw_llvm * llvm,unsigned num_inputs,const struct draw_llvm_variant_key * key)471 draw_llvm_create_variant(struct draw_llvm *llvm,
472 unsigned num_inputs,
473 const struct draw_llvm_variant_key *key)
474 {
475 struct draw_llvm_variant *variant;
476 struct llvm_vertex_shader *shader =
477 llvm_vertex_shader(llvm->draw->vs.vertex_shader);
478 char module_name[64];
479 unsigned char ir_sha1_cache_key[20];
480 struct lp_cached_code cached = { 0 };
481 bool needs_caching = false;
482 variant = MALLOC(sizeof *variant +
483 shader->variant_key_size -
484 sizeof variant->key);
485 if (!variant)
486 return NULL;
487
488 variant->llvm = llvm;
489 variant->shader = shader;
490 memcpy(&variant->key, key, shader->variant_key_size);
491
492 snprintf(module_name, sizeof(module_name), "draw_llvm_vs_variant%u",
493 variant->shader->variants_cached);
494
495 if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
496 draw_get_ir_cache_key(shader->base.state.ir.nir,
497 key,
498 shader->variant_key_size,
499 num_inputs,
500 ir_sha1_cache_key);
501
502 llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
503 &cached,
504 ir_sha1_cache_key);
505 if (!cached.data_size)
506 needs_caching = true;
507 }
508 variant->gallivm = gallivm_create(module_name, &llvm->context, &cached);
509
510 create_vs_jit_types(variant);
511
512 if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
513 if (llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_TGSI)
514 tgsi_dump(llvm->draw->vs.vertex_shader->state.tokens, 0);
515 else
516 nir_print_shader(llvm->draw->vs.vertex_shader->state.ir.nir, stderr);
517 draw_llvm_dump_variant_key(&variant->key);
518 }
519
520 variant->vertex_header_type = lp_build_create_jit_vertex_header_type(variant->gallivm, num_inputs);
521 variant->vertex_header_ptr_type = LLVMPointerType(variant->vertex_header_type, 0);
522
523 draw_llvm_generate(llvm, variant);
524
525 gallivm_compile_module(variant->gallivm);
526
527 variant->jit_func = (draw_jit_vert_func)
528 gallivm_jit_function(variant->gallivm, variant->function, variant->function_name);
529
530 if (needs_caching)
531 llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
532 &cached,
533 ir_sha1_cache_key);
534 gallivm_free_ir(variant->gallivm);
535
536 variant->list_item_global.base = variant;
537 variant->list_item_local.base = variant;
538 /*variant->no = */shader->variants_created++;
539 variant->list_item_global.base = variant;
540
541 return variant;
542 }
543
544
545 static void
do_clamp_vertex_color(struct gallivm_state * gallivm,struct lp_type type,const struct tgsi_shader_info * info,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS])546 do_clamp_vertex_color(struct gallivm_state *gallivm,
547 struct lp_type type,
548 const struct tgsi_shader_info *info,
549 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
550 {
551 LLVMBuilderRef builder = gallivm->builder;
552 LLVMValueRef out;
553 unsigned chan, attrib;
554 struct lp_build_context bld;
555 lp_build_context_init(&bld, gallivm, type);
556
557 for (attrib = 0; attrib < info->num_outputs; ++attrib) {
558 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
559 if (outputs[attrib][chan]) {
560 switch (info->output_semantic_name[attrib]) {
561 case TGSI_SEMANTIC_COLOR:
562 case TGSI_SEMANTIC_BCOLOR:
563 out = LLVMBuildLoad2(builder, LLVMTypeOf(bld.zero), outputs[attrib][chan], "");
564 out = lp_build_clamp(&bld, out, bld.zero, bld.one);
565 LLVMBuildStore(builder, out, outputs[attrib][chan]);
566 break;
567 }
568 }
569 }
570 }
571 }
572
573
574 static void
generate_vs(struct draw_llvm_variant * variant,LLVMBuilderRef builder,struct lp_type vs_type,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],const LLVMValueRef (* inputs)[TGSI_NUM_CHANNELS],const struct lp_bld_tgsi_system_values * system_values,LLVMValueRef context_ptr,LLVMValueRef resources_ptr,const struct lp_build_sampler_soa * draw_sampler,const struct lp_build_image_soa * draw_image,bool clamp_vertex_color,struct lp_build_mask_context * bld_mask)575 generate_vs(struct draw_llvm_variant *variant,
576 LLVMBuilderRef builder,
577 struct lp_type vs_type,
578 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
579 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
580 const struct lp_bld_tgsi_system_values *system_values,
581 LLVMValueRef context_ptr,
582 LLVMValueRef resources_ptr,
583 const struct lp_build_sampler_soa *draw_sampler,
584 const struct lp_build_image_soa *draw_image,
585 bool clamp_vertex_color,
586 struct lp_build_mask_context *bld_mask)
587 {
588 struct draw_llvm *llvm = variant->llvm;
589 const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
590 LLVMValueRef consts_ptr =
591 lp_jit_resources_constants(variant->gallivm, variant->resources_type, resources_ptr);
592 LLVMValueRef ssbos_ptr =
593 lp_jit_resources_ssbos(variant->gallivm, variant->resources_type, resources_ptr);
594 struct draw_llvm_variant_key *key = &variant->key;
595
596 struct lp_build_tgsi_params params;
597 memset(¶ms, 0, sizeof(params));
598
599 params.type = vs_type;
600 params.mask = bld_mask;
601 params.consts_ptr = consts_ptr;
602 params.system_values = system_values;
603 params.inputs = inputs;
604 params.num_inputs = key->nr_vertex_elements;
605 params.context_type = variant->context_type;
606 params.context_ptr = context_ptr;
607 params.resources_type = variant->resources_type;
608 params.resources_ptr = resources_ptr;
609 params.sampler = draw_sampler;
610 params.info = &llvm->draw->vs.vertex_shader->info;
611 params.ssbo_ptr = ssbos_ptr;
612 params.image = draw_image;
613 params.aniso_filter_table = lp_jit_resources_aniso_filter_table(variant->gallivm,
614 variant->resources_type,
615 resources_ptr);
616
617 if (llvm->draw->vs.vertex_shader->state.ir.nir &&
618 llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_NIR) {
619 lp_build_nir_soa(variant->gallivm,
620 llvm->draw->vs.vertex_shader->state.ir.nir,
621 ¶ms,
622 outputs);
623 } else {
624 lp_build_tgsi_soa(variant->gallivm,
625 tokens,
626 ¶ms,
627 outputs);
628 }
629
630 if (clamp_vertex_color) {
631 const struct tgsi_shader_info *info = &llvm->draw->vs.vertex_shader->info;
632 do_clamp_vertex_color(variant->gallivm,
633 vs_type, info,
634 outputs);
635 }
636 }
637
638
639 static void
fetch_instanced(struct gallivm_state * gallivm,const struct util_format_description * format_desc,struct lp_type vs_type,LLVMValueRef vb_stride,LLVMValueRef map_ptr,LLVMValueRef buffer_size_adj,LLVMValueRef * inputs,LLVMValueRef index)640 fetch_instanced(struct gallivm_state *gallivm,
641 const struct util_format_description *format_desc,
642 struct lp_type vs_type,
643 LLVMValueRef vb_stride,
644 LLVMValueRef map_ptr,
645 LLVMValueRef buffer_size_adj,
646 LLVMValueRef *inputs,
647 LLVMValueRef index)
648 {
649 LLVMTypeRef i32_t = LLVMInt32TypeInContext(gallivm->context);
650 LLVMTypeRef aosf_t, aosi_t;
651 LLVMValueRef zero = LLVMConstNull(i32_t);
652 LLVMBuilderRef builder = gallivm->builder;
653 LLVMValueRef stride, buffer_overflowed, aos, index_valid;
654 unsigned i;
655
656 aosf_t = lp_build_vec_type(gallivm, lp_float32_vec4_type());
657 aosi_t = lp_build_vec_type(gallivm, lp_int32_vec4_type());
658
659 /* This mul can overflow. Wraparound is ok. */
660 stride = LLVMBuildMul(builder, vb_stride, index, "");
661
662 buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
663 stride, buffer_size_adj,
664 "buffer_overflowed");
665
666 if (0) {
667 lp_build_print_value(gallivm, " instance index = ", index);
668 lp_build_print_value(gallivm, " buffer overflowed = ", buffer_overflowed);
669 }
670
671 index_valid = LLVMBuildNot(builder, buffer_overflowed, "");
672 index_valid = LLVMBuildSExt(builder, index_valid, i32_t, "");
673 stride = LLVMBuildAnd(builder, stride, index_valid, "");
674
675 aos = lp_build_fetch_rgba_aos(gallivm,
676 format_desc,
677 lp_float32_vec4_type(),
678 false,
679 map_ptr,
680 stride, zero, zero,
681 NULL);
682
683 index_valid = lp_build_broadcast(gallivm, aosi_t, index_valid);
684 aos = LLVMBuildBitCast(builder, aos, aosi_t, "");
685 aos = LLVMBuildAnd(builder, aos, index_valid, "");
686 aos = LLVMBuildBitCast(builder, aos, aosf_t, "");
687
688 for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
689 LLVMValueRef index = lp_build_const_int32(gallivm, i);
690 inputs[i] = lp_build_extract_broadcast(gallivm,
691 lp_float32_vec4_type(),
692 vs_type, aos, index);
693 }
694 }
695
696
697 static void
fetch_vector(struct gallivm_state * gallivm,const struct util_format_description * format_desc,struct lp_type vs_type,LLVMValueRef vb_stride,LLVMValueRef map_ptr,LLVMValueRef buffer_size_adj,LLVMValueRef * inputs,LLVMValueRef indices)698 fetch_vector(struct gallivm_state *gallivm,
699 const struct util_format_description *format_desc,
700 struct lp_type vs_type,
701 LLVMValueRef vb_stride,
702 LLVMValueRef map_ptr,
703 LLVMValueRef buffer_size_adj,
704 LLVMValueRef *inputs,
705 LLVMValueRef indices)
706 {
707 LLVMBuilderRef builder = gallivm->builder;
708 struct lp_build_context blduivec;
709 struct lp_type fetch_type = vs_type;
710 LLVMValueRef offset, valid_mask;
711 unsigned i;
712
713 lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
714
715 vb_stride = lp_build_broadcast_scalar(&blduivec, vb_stride);
716 buffer_size_adj = lp_build_broadcast_scalar(&blduivec, buffer_size_adj);
717
718 /* This mul can overflow. Wraparound is ok. */
719 offset = lp_build_mul(&blduivec, vb_stride, indices);
720
721 valid_mask = lp_build_compare(gallivm, blduivec.type,
722 PIPE_FUNC_LESS, offset, buffer_size_adj);
723
724 /* not valid elements use offset 0 */
725 offset = LLVMBuildAnd(builder, offset, valid_mask, "");
726
727 if (0) {
728 lp_build_print_value(gallivm, " indices = ", indices);
729 lp_build_print_value(gallivm, " offsets = ", offset);
730 lp_build_print_value(gallivm, " valid_mask = ", valid_mask);
731 }
732
733 /*
734 * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches.
735 * This should always produce better code.
736 */
737
738 /* The type handling is annoying here... */
739 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
740 format_desc->channel[0].pure_integer) {
741 if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
742 fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length);
743 } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
744 fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length);
745 }
746 }
747
748 lp_build_fetch_rgba_soa(gallivm, format_desc,
749 fetch_type, false, map_ptr, offset,
750 blduivec.zero, blduivec.zero,
751 NULL, inputs);
752
753 for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
754 inputs[i] = LLVMBuildBitCast(builder, inputs[i],
755 lp_build_vec_type(gallivm, vs_type), "");
756 }
757
758 /* out-of-bound fetches return all zeros */
759 for (i = 0; i < format_desc->nr_channels; i++) {
760 inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, "");
761 inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, "");
762 inputs[i] = LLVMBuildBitCast(builder, inputs[i],
763 lp_build_vec_type(gallivm, vs_type), "");
764 }
765 }
766
767
768 static void
store_aos(struct gallivm_state * gallivm,bool is_per_prim,LLVMTypeRef io_type,LLVMValueRef io_ptr,LLVMValueRef index,LLVMValueRef value)769 store_aos(struct gallivm_state *gallivm,
770 bool is_per_prim,
771 LLVMTypeRef io_type,
772 LLVMValueRef io_ptr,
773 LLVMValueRef index,
774 LLVMValueRef value)
775 {
776 LLVMTypeRef data_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, lp_float32_vec4_type()), 0);
777 LLVMBuilderRef builder = gallivm->builder;
778 LLVMValueRef data_ptr;
779 LLVMTypeRef data_type;
780 LLVMValueRef indices[3];
781
782 indices[0] = lp_build_const_int32(gallivm, 0);
783 indices[1] = index;
784 indices[2] = lp_build_const_int32(gallivm, 0);
785
786 if (!is_per_prim) {
787 data_ptr = lp_jit_vertex_header_data(gallivm, io_type, io_ptr);
788 data_type = LLVMStructGetTypeAtIndex(io_type, LP_JIT_VERTEX_HEADER_DATA);
789 } else {
790 data_ptr = io_ptr;
791 data_type = io_type;
792 }
793
794 data_ptr = LLVMBuildGEP2(builder, data_type, data_ptr, indices, 3, "");
795 data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, "");
796
797 #if DEBUG_STORE
798 if (is_per_prim)
799 lp_build_printf(gallivm, " ---- %p storing prim attribute %d (io = %p)\n", data_ptr, index, io_ptr);
800 else
801 lp_build_printf(gallivm, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
802 #endif
803
804 /* Unaligned store due to the vertex header */
805 LLVMSetAlignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
806 }
807
808
809 /**
810 * Adjust the mask to architecture endianess. The mask will the store in struct:
811 *
812 * struct vertex_header {
813 * unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
814 * unsigned edgeflag:1;
815 * unsigned pad:1;
816 * unsigned vertex_id:16;
817 * [...]
818 * }
819 *
820 * On little-endian machine nothing needs to done, however on bit-endian machine
821 * the mask's fields need to be adjusted with the algorithm:
822 *
823 * uint32_t reverse (uint32_t x)
824 * {
825 * return (x >> 16) | // vertex_id
826 * ((x & 0x3fff) << 18) | // clipmask
827 * ((x & 0x4000) << 3) | // edgeflag
828 * ((x & 0x8000) << 1); // pad
829 * }
830 */
831 static LLVMValueRef
adjust_mask(struct gallivm_state * gallivm,LLVMValueRef mask)832 adjust_mask(struct gallivm_state *gallivm,
833 LLVMValueRef mask)
834 {
835 #if UTIL_ARCH_BIG_ENDIAN
836 LLVMBuilderRef builder = gallivm->builder;
837 LLVMValueRef vertex_id;
838 LLVMValueRef clipmask;
839 LLVMValueRef pad;
840 LLVMValueRef edgeflag;
841
842 vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), "");
843 clipmask = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), "");
844 clipmask = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), "");
845 if (0) {
846 pad = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), "");
847 pad = LLVMBuildShl(builder, pad, lp_build_const_int32(gallivm, 1), "");
848 }
849 edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
850 edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 3), "");
851
852 mask = LLVMBuildOr(builder, vertex_id, clipmask, "");
853 if (0) {
854 mask = LLVMBuildOr(builder, mask, pad, "");
855 }
856 mask = LLVMBuildOr(builder, mask, edgeflag, "");
857 #endif
858 return mask;
859 }
860
861
862 void
draw_store_aos_array(struct gallivm_state * gallivm,struct lp_type soa_type,LLVMTypeRef io_type,LLVMValueRef io_ptr,LLVMValueRef * indices,LLVMValueRef * aos,int attrib,LLVMValueRef clipmask,bool need_edgeflag,bool is_per_prim)863 draw_store_aos_array(struct gallivm_state *gallivm,
864 struct lp_type soa_type,
865 LLVMTypeRef io_type,
866 LLVMValueRef io_ptr,
867 LLVMValueRef *indices,
868 LLVMValueRef* aos,
869 int attrib,
870 LLVMValueRef clipmask,
871 bool need_edgeflag, bool is_per_prim)
872 {
873 LLVMBuilderRef builder = gallivm->builder;
874 LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
875 LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
876 LLVMValueRef linear_inds[LP_MAX_VECTOR_WIDTH / 32];
877 LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
878 int vector_length = soa_type.length;
879
880 assert(TGSI_NUM_CHANNELS == 4);
881
882 for (int i = 0; i < vector_length; i++) {
883 linear_inds[i] = lp_build_const_int32(gallivm, i);
884 if (indices) {
885 inds[i] = indices[i];
886 } else {
887 inds[i] = linear_inds[i];
888 }
889 io_ptrs[i] = LLVMBuildGEP2(builder, io_type, io_ptr, &inds[i], 1, "");
890 }
891
892 if (attrib == 0 && !is_per_prim) {
893 /* store vertex header for each of the n vertices */
894 LLVMValueRef val, cliptmp;
895 int vertex_id_pad_edgeflag;
896
897 /* If this assertion fails, it means we need to update the bit twidding
898 * code here. See struct vertex_header in draw_private.h.
899 */
900 assert(DRAW_TOTAL_CLIP_PLANES==14);
901 /* initialize vertex id:16 = 0xffff, pad:1 = 0, edgeflag:1 = 1 */
902 if (!need_edgeflag) {
903 vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
904 } else {
905 vertex_id_pad_edgeflag = (0xffff << 16);
906 }
907 if (vector_length == 1)
908 val = lp_build_const_int32(gallivm, vertex_id_pad_edgeflag);
909 else
910 val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type),
911 vertex_id_pad_edgeflag);
912
913 /* OR with the clipmask */
914 cliptmp = LLVMBuildOr(builder, val, clipmask, "");
915 for (unsigned i = 0; i < vector_length; i++) {
916 LLVMValueRef id_ptr = lp_jit_vertex_header_id(gallivm, io_type, io_ptrs[i]);
917 if (vector_length > 1)
918 val = LLVMBuildExtractElement(builder, cliptmp, linear_inds[i], "");
919 else
920 val = cliptmp;
921 val = adjust_mask(gallivm, val);
922 #if DEBUG_STORE
923 lp_build_printf(gallivm, "io = %p, index %d, clipmask = %x\n",
924 io_ptrs[i], inds[i], val);
925 #endif
926 LLVMBuildStore(builder, val, id_ptr);
927 }
928 }
929
930 /* store for each of the n vertices */
931 for (int i = 0; i < vector_length; i++) {
932 store_aos(gallivm, is_per_prim, io_type, io_ptrs[i], attr_index, aos[i]);
933 }
934 }
935
936
937 static void
convert_to_aos(struct gallivm_state * gallivm,LLVMTypeRef io_type,LLVMValueRef io,LLVMValueRef * indices,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],LLVMValueRef clipmask,int num_outputs,struct lp_type soa_type,int primid_slot,bool need_edgeflag)938 convert_to_aos(struct gallivm_state *gallivm,
939 LLVMTypeRef io_type,
940 LLVMValueRef io,
941 LLVMValueRef *indices,
942 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
943 LLVMValueRef clipmask,
944 int num_outputs,
945 struct lp_type soa_type,
946 int primid_slot,
947 bool need_edgeflag)
948 {
949 LLVMBuilderRef builder = gallivm->builder;
950
951 #if DEBUG_STORE
952 lp_build_printf(gallivm, " # storing begin\n");
953 #endif
954 for (unsigned attrib = 0; attrib < num_outputs; ++attrib) {
955 LLVMValueRef soa[TGSI_NUM_CHANNELS];
956 LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32];
957 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
958 if (outputs[attrib][chan]) {
959 LLVMTypeRef single_type = (attrib == primid_slot) ? lp_build_int_vec_type(gallivm, soa_type) : lp_build_vec_type(gallivm, soa_type);
960 LLVMValueRef out = LLVMBuildLoad2(builder, single_type, outputs[attrib][chan], "");
961 lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
962 #if DEBUG_STORE
963 lp_build_printf(gallivm, "output %d : %d ",
964 LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
965 attrib, 0),
966 LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
967 chan, 0));
968 lp_build_print_value(gallivm, "val = ", out);
969 {
970 LLVMValueRef iv =
971 LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), "");
972
973 lp_build_print_value(gallivm, " ival = ", iv);
974 }
975 #endif
976 soa[chan] = out;
977 } else {
978 soa[chan] = 0;
979 }
980 }
981
982
983 if (soa_type.length == TGSI_NUM_CHANNELS) {
984 lp_build_transpose_aos(gallivm, soa_type, soa, aos);
985 } else {
986 lp_build_transpose_aos(gallivm, soa_type, soa, soa);
987
988 for (unsigned i = 0; i < soa_type.length; ++i) {
989 aos[i] = lp_build_extract_range(gallivm,
990 soa[i % TGSI_NUM_CHANNELS],
991 (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
992 TGSI_NUM_CHANNELS);
993 }
994 }
995
996 draw_store_aos_array(gallivm,
997 soa_type,
998 io_type,
999 io,
1000 indices,
1001 aos,
1002 attrib,
1003 clipmask,
1004 need_edgeflag, false);
1005 }
1006 #if DEBUG_STORE
1007 lp_build_printf(gallivm, " # storing end\n");
1008 #endif
1009 }
1010
1011
1012 /**
1013 * Stores original vertex positions in clip coordinates
1014 */
1015 static void
store_clip(struct gallivm_state * gallivm,const struct lp_type vs_type,LLVMTypeRef io_type,LLVMValueRef io_ptr,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],int idx)1016 store_clip(struct gallivm_state *gallivm,
1017 const struct lp_type vs_type,
1018 LLVMTypeRef io_type,
1019 LLVMValueRef io_ptr,
1020 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1021 int idx)
1022 {
1023 LLVMBuilderRef builder = gallivm->builder;
1024 LLVMValueRef soa[4];
1025 LLVMValueRef aos[LP_MAX_VECTOR_LENGTH];
1026 LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1027 LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
1028 LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1029 LLVMTypeRef clip_ptr_type =
1030 LLVMPointerType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context),
1031 4), 0);
1032
1033 for (int i = 0; i < vs_type.length; i++) {
1034 inds[i] = lp_build_const_int32(gallivm, i);
1035 io_ptrs[i] = LLVMBuildGEP2(builder, io_type, io_ptr, &inds[i], 1, "");
1036 }
1037
1038 LLVMTypeRef single_type = lp_build_vec_type(gallivm, vs_type);
1039 soa[0] = LLVMBuildLoad2(builder, single_type, outputs[idx][0], ""); /*x0 x1 .. xn*/
1040 soa[1] = LLVMBuildLoad2(builder, single_type, outputs[idx][1], ""); /*y0 y1 .. yn*/
1041 soa[2] = LLVMBuildLoad2(builder, single_type, outputs[idx][2], ""); /*z0 z1 .. zn*/
1042 soa[3] = LLVMBuildLoad2(builder, single_type, outputs[idx][3], ""); /*w0 w1 .. wn*/
1043
1044 for (int i = 0; i < vs_type.length; i++) {
1045 clip_ptrs[i] = lp_jit_vertex_header_clip_pos(gallivm, io_type, io_ptrs[i]);
1046 }
1047
1048 lp_build_transpose_aos(gallivm, vs_type, soa, soa);
1049 for (int i = 0; i < vs_type.length; ++i) {
1050 aos[i] = lp_build_extract_range(gallivm,
1051 soa[i % TGSI_NUM_CHANNELS],
1052 (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
1053 TGSI_NUM_CHANNELS);
1054 }
1055
1056 for (int j = 0; j < vs_type.length; j++) {
1057 LLVMValueRef clip_ptr;
1058
1059 clip_ptr = LLVMBuildPointerCast(builder, clip_ptrs[j], clip_ptr_type, "");
1060
1061 /* Unaligned store */
1062 LLVMSetAlignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
1063 }
1064 }
1065
1066
1067 /**
1068 * Transforms the outputs for viewport mapping
1069 */
1070 static void
generate_viewport(struct draw_llvm_variant * variant,LLVMBuilderRef builder,struct lp_type vs_type,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],LLVMValueRef context_ptr)1071 generate_viewport(struct draw_llvm_variant *variant,
1072 LLVMBuilderRef builder,
1073 struct lp_type vs_type,
1074 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1075 LLVMValueRef context_ptr)
1076 {
1077 struct gallivm_state *gallivm = variant->gallivm;
1078 struct lp_type f32_type = vs_type;
1079 const unsigned pos = variant->llvm->draw->vs.position_output;
1080 LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1081 LLVMValueRef out3 = LLVMBuildLoad2(builder, vs_type_llvm, outputs[pos][3], ""); /*w0 w1 .. wn*/
1082 LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0); /*1.0 1.0 1.0 1.0*/
1083 LLVMValueRef vp_ptr = draw_vs_jit_context_viewports(variant, context_ptr);
1084
1085 /* We treat pipe_viewport_state as a float array */
1086 const int scale_index_offset = offsetof(struct pipe_viewport_state, scale) / sizeof(float);
1087 const int trans_index_offset = offsetof(struct pipe_viewport_state, translate) / sizeof(float);
1088
1089 /* for 1/w convention*/
1090 out3 = LLVMBuildFDiv(builder, const1, out3, "");
1091 LLVMBuildStore(builder, out3, outputs[pos][3]);
1092
1093 LLVMTypeRef elem_type = lp_build_elem_type(gallivm, vs_type);
1094
1095 /* Viewport Mapping */
1096 for (unsigned i = 0; i < 3; i++) {
1097 LLVMValueRef out = LLVMBuildLoad2(builder, vs_type_llvm, outputs[pos][i], ""); /*x0 x1 .. xn*/
1098 LLVMValueRef scale;
1099 LLVMValueRef trans;
1100 LLVMValueRef scale_i;
1101 LLVMValueRef trans_i;
1102 LLVMValueRef index;
1103
1104 index = lp_build_const_int32(gallivm, i + scale_index_offset);
1105 scale_i = LLVMBuildGEP2(builder, elem_type, vp_ptr, &index, 1, "");
1106
1107 index = lp_build_const_int32(gallivm, i + trans_index_offset);
1108 trans_i = LLVMBuildGEP2(builder, elem_type, vp_ptr, &index, 1, "");
1109
1110 scale = lp_build_broadcast(gallivm, vs_type_llvm,
1111 LLVMBuildLoad2(builder, elem_type, scale_i, "scale"));
1112 trans = lp_build_broadcast(gallivm, vs_type_llvm,
1113 LLVMBuildLoad2(builder, elem_type, trans_i, "trans"));
1114
1115 /* divide by w */
1116 out = LLVMBuildFMul(builder, out, out3, "");
1117 /* mult by scale, add translation */
1118 out = lp_build_fmuladd(builder, out, scale, trans);
1119
1120 /* store transformed outputs */
1121 LLVMBuildStore(builder, out, outputs[pos][i]);
1122 }
1123
1124 }
1125
1126
1127 /**
1128 * Returns clipmask as nxi32 bitmask for the n vertices
1129 */
1130 static LLVMValueRef
generate_clipmask(struct draw_llvm * llvm,struct gallivm_state * gallivm,struct lp_type vs_type,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],struct draw_llvm_variant_key * key,LLVMTypeRef context_type,LLVMValueRef context_ptr,bool * have_clipdist)1131 generate_clipmask(struct draw_llvm *llvm,
1132 struct gallivm_state *gallivm,
1133 struct lp_type vs_type,
1134 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1135 struct draw_llvm_variant_key *key,
1136 LLVMTypeRef context_type,
1137 LLVMValueRef context_ptr,
1138 bool *have_clipdist)
1139 {
1140 LLVMBuilderRef builder = gallivm->builder;
1141 LLVMValueRef mask; /* stores the <nxi32> clipmasks */
1142 LLVMValueRef test, temp;
1143 LLVMValueRef zero, shift;
1144 LLVMValueRef pos_x, pos_y, pos_z, pos_w;
1145 LLVMValueRef cv_x, cv_y, cv_z, cv_w;
1146 LLVMValueRef plane1, planes, plane_ptr;
1147 struct lp_type f32_type = vs_type;
1148 struct lp_type i32_type = lp_int_type(vs_type);
1149 const unsigned pos = llvm->draw->vs.position_output;
1150 const unsigned cv = llvm->draw->vs.clipvertex_output;
1151 int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
1152 bool have_cd = false;
1153 bool clip_user = key->clip_user;
1154 unsigned ucp_enable = key->ucp_enable;
1155 unsigned cd[2];
1156
1157 cd[0] = llvm->draw->vs.ccdistance_output[0];
1158 cd[1] = llvm->draw->vs.ccdistance_output[1];
1159
1160 if (cd[0] != pos || cd[1] != pos)
1161 have_cd = true;
1162
1163 if (num_written_clipdistance && !clip_user) {
1164 clip_user = true;
1165 ucp_enable = (1 << num_written_clipdistance) - 1;
1166 }
1167
1168 mask = lp_build_const_int_vec(gallivm, i32_type, 0);
1169 temp = lp_build_const_int_vec(gallivm, i32_type, 0);
1170 zero = lp_build_const_vec(gallivm, f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */
1171 shift = lp_build_const_int_vec(gallivm, i32_type, 1); /* 1 1 1 1 */
1172
1173 LLVMTypeRef vec_type = LLVMTypeOf(zero);
1174
1175 /*
1176 * load clipvertex and position from correct locations.
1177 * if they are the same just load them once.
1178 */
1179 pos_x = LLVMBuildLoad2(builder, vec_type, outputs[pos][0], ""); /*x0 x1 .. xn */
1180 pos_y = LLVMBuildLoad2(builder, vec_type, outputs[pos][1], ""); /*y0 y1 .. yn */
1181 pos_z = LLVMBuildLoad2(builder, vec_type, outputs[pos][2], ""); /*z0 z1 .. zn */
1182 pos_w = LLVMBuildLoad2(builder, vec_type, outputs[pos][3], ""); /*w0 w1 .. wn */
1183
1184 if (clip_user && cv != pos) {
1185 cv_x = LLVMBuildLoad2(builder, vec_type, outputs[cv][0], ""); /*x0 x1 .. xn */
1186 cv_y = LLVMBuildLoad2(builder, vec_type, outputs[cv][1], ""); /*y0 y1 .. yn */
1187 cv_z = LLVMBuildLoad2(builder, vec_type, outputs[cv][2], ""); /*z0 z1 .. zn */
1188 cv_w = LLVMBuildLoad2(builder, vec_type, outputs[cv][3], ""); /*w0 w1 .. wn */
1189 } else {
1190 cv_x = pos_x;
1191 cv_y = pos_y;
1192 cv_z = pos_z;
1193 cv_w = pos_w;
1194 }
1195
1196 /*
1197 * Be careful with the comparisons and NaNs (using llvm's unordered
1198 * comparisons here).
1199 */
1200 /* Cliptest, for hardwired planes */
1201 /*
1202 * XXX should take guardband into account (currently not in key).
1203 * Otherwise might run the draw pipeline stages for nothing.
1204 */
1205 if (key->clip_xy) {
1206 /* plane 1 */
1207 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
1208 temp = shift;
1209 test = LLVMBuildAnd(builder, test, temp, "");
1210 mask = test;
1211
1212 /* plane 2 */
1213 test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
1214 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1215 temp = LLVMBuildShl(builder, temp, shift, "");
1216 test = LLVMBuildAnd(builder, test, temp, "");
1217 mask = LLVMBuildOr(builder, mask, test, "");
1218
1219 /* plane 3 */
1220 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
1221 temp = LLVMBuildShl(builder, temp, shift, "");
1222 test = LLVMBuildAnd(builder, test, temp, "");
1223 mask = LLVMBuildOr(builder, mask, test, "");
1224
1225 /* plane 4 */
1226 test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
1227 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1228 temp = LLVMBuildShl(builder, temp, shift, "");
1229 test = LLVMBuildAnd(builder, test, temp, "");
1230 mask = LLVMBuildOr(builder, mask, test, "");
1231 }
1232
1233 if (key->clip_z) {
1234 temp = lp_build_const_int_vec(gallivm, i32_type, 16);
1235 if (key->clip_halfz) {
1236 /* plane 5 */
1237 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
1238 test = LLVMBuildAnd(builder, test, temp, "");
1239 mask = LLVMBuildOr(builder, mask, test, "");
1240 } else {
1241 /* plane 5 */
1242 test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
1243 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1244 test = LLVMBuildAnd(builder, test, temp, "");
1245 mask = LLVMBuildOr(builder, mask, test, "");
1246 }
1247 /* plane 6 */
1248 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
1249 temp = LLVMBuildShl(builder, temp, shift, "");
1250 test = LLVMBuildAnd(builder, test, temp, "");
1251 mask = LLVMBuildOr(builder, mask, test, "");
1252 }
1253
1254 if (clip_user) {
1255 LLVMValueRef planes_ptr = draw_vs_jit_context_planes(gallivm, context_type, context_ptr);
1256 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
1257 LLVMTypeRef planes_type = LLVMArrayType(LLVMArrayType(float_type, 4), DRAW_TOTAL_CLIP_PLANES);
1258 LLVMValueRef indices[3];
1259 LLVMValueRef is_nan_or_inf;
1260
1261 /* userclip planes */
1262 while (ucp_enable) {
1263 unsigned plane_idx = ffs(ucp_enable)-1;
1264 ucp_enable &= ~(1 << plane_idx);
1265 plane_idx += 6;
1266
1267 if (have_cd && num_written_clipdistance) {
1268 LLVMValueRef clipdist;
1269 int i;
1270 i = plane_idx - 6;
1271
1272 *have_clipdist = true;
1273 if (i < 4) {
1274 clipdist = LLVMBuildLoad2(builder, vec_type, outputs[cd[0]][i], "");
1275 } else {
1276 clipdist = LLVMBuildLoad2(builder, vec_type, outputs[cd[1]][i-4], "");
1277 }
1278 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist);
1279 is_nan_or_inf = lp_build_is_inf_or_nan(gallivm, vs_type, clipdist);
1280 test = LLVMBuildOr(builder, test, is_nan_or_inf, "");
1281 temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
1282 test = LLVMBuildAnd(builder, test, temp, "");
1283 mask = LLVMBuildOr(builder, mask, test, "");
1284 } else {
1285 LLVMTypeRef vs_elem_type = lp_build_elem_type(gallivm, vs_type);
1286 LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1287 LLVMValueRef sum = NULL;
1288 indices[0] = lp_build_const_int32(gallivm, 0);
1289 indices[1] = lp_build_const_int32(gallivm, plane_idx);
1290
1291 for (int i = 0; i < 4; ++i) {
1292 indices[2] = lp_build_const_int32(gallivm, i);
1293 plane_ptr = LLVMBuildGEP2(builder, planes_type, planes_ptr, indices, 3, "");
1294 plane1 = LLVMBuildLoad2(builder, vs_elem_type, plane_ptr,
1295 (const char *[]){"plane_x", "plane_y", "plane_z", "plane_w"}[i]);
1296 planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
1297 if (i == 0) {
1298 sum = LLVMBuildFMul(builder, planes, cv_x, "");
1299 } else {
1300 sum = lp_build_fmuladd(builder, planes,
1301 (LLVMValueRef[]){cv_x, cv_y, cv_z, cv_w}[i], sum);
1302 }
1303 }
1304
1305 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
1306 temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
1307 test = LLVMBuildAnd(builder, test, temp, "");
1308 mask = LLVMBuildOr(builder, mask, test, "");
1309 }
1310 }
1311 }
1312 if (key->need_edgeflags) {
1313 /*
1314 * This isn't really part of clipmask but stored the same in vertex
1315 * header later, so do it here.
1316 */
1317 unsigned edge_attr = llvm->draw->vs.edgeflag_output;
1318 LLVMValueRef one = lp_build_const_vec(gallivm, f32_type, 1.0);
1319 LLVMValueRef edgeflag = LLVMBuildLoad2(builder, vec_type, outputs[edge_attr][0], "");
1320 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_EQUAL, one, edgeflag);
1321 temp = lp_build_const_int_vec(gallivm, i32_type,
1322 1LL << DRAW_TOTAL_CLIP_PLANES);
1323 test = LLVMBuildAnd(builder, test, temp, "");
1324 mask = LLVMBuildOr(builder, mask, test, "");
1325 }
1326 return mask;
1327 }
1328
1329
1330 /**
1331 * Returns boolean if any clipping has occurred
1332 * Used zero/one i8 value to represent boolean
1333 */
1334 static LLVMValueRef
clipmask_booli8(struct gallivm_state * gallivm,const struct lp_type vs_type,LLVMTypeRef clipmask_bool_type,LLVMValueRef clipmask_bool_ptr,bool edgeflag_in_clipmask)1335 clipmask_booli8(struct gallivm_state *gallivm,
1336 const struct lp_type vs_type,
1337 LLVMTypeRef clipmask_bool_type,
1338 LLVMValueRef clipmask_bool_ptr,
1339 bool edgeflag_in_clipmask)
1340 {
1341 LLVMBuilderRef builder = gallivm->builder;
1342 LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
1343 LLVMValueRef clipmask_bool = LLVMBuildLoad2(builder, clipmask_bool_type, clipmask_bool_ptr, "");
1344 LLVMValueRef ret;
1345 struct lp_build_context bldivec;
1346
1347 lp_build_context_init(&bldivec, gallivm, lp_int_type(vs_type));
1348
1349 /*
1350 * We need to invert the edgeflag bit from the clipmask here
1351 * (because the result is really if we want to run the pipeline or not
1352 * and we (may) need it if edgeflag was 0).
1353 */
1354 if (edgeflag_in_clipmask) {
1355 LLVMValueRef edge = lp_build_const_int_vec(gallivm, bldivec.type,
1356 1LL << DRAW_TOTAL_CLIP_PLANES);
1357 clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, "");
1358 }
1359
1360 /*
1361 * XXX: probably should mask off bits from the mask which come from
1362 * vertices which were beyond the count (i.e. indices_valid for
1363 * linear fetches, for elts ones we don't have the correct mask
1364 * right now). Otherwise might run the pipeline for nothing,
1365 * though everything should still work.
1366 */
1367 ret = lp_build_any_true_range(&bldivec, vs_type.length, clipmask_bool);
1368 ret = LLVMBuildZExt(builder, ret, int8_type, "");
1369 return ret;
1370 }
1371
1372
1373 static LLVMValueRef
draw_gs_llvm_fetch_input(const struct lp_build_gs_iface * gs_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,LLVMValueRef swizzle_index)1374 draw_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
1375 struct lp_build_context * bld,
1376 bool is_vindex_indirect,
1377 LLVMValueRef vertex_index,
1378 bool is_aindex_indirect,
1379 LLVMValueRef attrib_index,
1380 LLVMValueRef swizzle_index)
1381 {
1382 const struct draw_gs_llvm_iface *gs = draw_gs_llvm_iface(gs_iface);
1383 struct gallivm_state *gallivm = bld->gallivm;
1384 LLVMBuilderRef builder = gallivm->builder;
1385 LLVMValueRef indices[3];
1386 LLVMValueRef res;
1387 struct lp_type type = bld->type;
1388
1389 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
1390 LLVMTypeRef channel_vec_type = LLVMVectorType(float_type, TGSI_NUM_CHANNELS);
1391 LLVMTypeRef input_array_type = create_gs_jit_input_type_deref(gallivm);
1392
1393 if (is_vindex_indirect || is_aindex_indirect) {
1394 res = bld->zero;
1395 for (int i = 0; i < type.length; ++i) {
1396 LLVMValueRef idx = lp_build_const_int32(gallivm, i);
1397 LLVMValueRef vert_chan_index = vertex_index;
1398 LLVMValueRef attr_chan_index = attrib_index;
1399 LLVMValueRef channel_vec, value;
1400
1401 if (is_vindex_indirect) {
1402 vert_chan_index = LLVMBuildExtractElement(builder,
1403 vertex_index, idx, "");
1404 }
1405 if (is_aindex_indirect) {
1406 attr_chan_index = LLVMBuildExtractElement(builder,
1407 attrib_index, idx, "");
1408 }
1409
1410 indices[0] = vert_chan_index;
1411 indices[1] = attr_chan_index;
1412 indices[2] = swizzle_index;
1413
1414 channel_vec = LLVMBuildGEP2(builder, input_array_type, gs->input, indices, 3, "");
1415 channel_vec = LLVMBuildLoad2(builder, channel_vec_type, channel_vec, "");
1416 value = LLVMBuildExtractElement(builder, channel_vec, idx, "");
1417
1418 res = LLVMBuildInsertElement(builder, res, value, idx, "");
1419 }
1420 } else {
1421 indices[0] = vertex_index;
1422 indices[1] = attrib_index;
1423 indices[2] = swizzle_index;
1424
1425 res = LLVMBuildGEP2(builder, input_array_type, gs->input, indices, 3, "");
1426 res = LLVMBuildLoad2(builder, channel_vec_type, res, "");
1427 }
1428
1429 return res;
1430 }
1431
1432
1433 static void
draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface * gs_base,struct lp_build_context * bld,LLVMValueRef (* outputs)[4],LLVMValueRef emitted_vertices_vec,LLVMValueRef mask_vec,LLVMValueRef stream_id)1434 draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
1435 struct lp_build_context * bld,
1436 LLVMValueRef (*outputs)[4],
1437 LLVMValueRef emitted_vertices_vec,
1438 LLVMValueRef mask_vec, LLVMValueRef stream_id)
1439 {
1440 const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1441 struct draw_gs_llvm_variant *variant = gs_iface->variant;
1442 struct gallivm_state *gallivm = variant->gallivm;
1443 LLVMBuilderRef builder = gallivm->builder;
1444 struct lp_type gs_type = bld->type;
1445 LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
1446 lp_int_type(gs_type), 0);
1447 LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
1448 LLVMValueRef next_prim_offset =
1449 lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary);
1450 LLVMValueRef io = variant->io_ptr;
1451 const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
1452
1453 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
1454 for (unsigned i = 0; i < gs_type.length; ++i) {
1455 LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1456 LLVMValueRef currently_emitted =
1457 LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
1458 indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
1459 indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
1460 indices[i] = LLVMBuildSelect(builder, LLVMBuildExtractElement(builder, cond, ind, ""), indices[i],
1461 lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary - 1), "");
1462 }
1463
1464 LLVMValueRef stream_idx = LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), "");
1465 LLVMValueRef cnd = LLVMBuildICmp(builder, LLVMIntULT, stream_idx, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");
1466 struct lp_build_if_state if_ctx;
1467 lp_build_if(&if_ctx, gallivm, cnd);
1468 io = lp_build_pointer_get2(builder, variant->vertex_header_ptr_type,
1469 io, LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), ""));
1470
1471 if (variant->key.clamp_vertex_color) {
1472 do_clamp_vertex_color(gallivm, gs_type,
1473 gs_info, outputs);
1474 }
1475 convert_to_aos(gallivm, variant->vertex_header_type,
1476 io, indices,
1477 outputs, clipmask,
1478 gs_info->num_outputs, gs_type,
1479 -1,
1480 false);
1481 lp_build_endif(&if_ctx);
1482 }
1483
1484
1485 static void
draw_gs_llvm_end_primitive(const struct lp_build_gs_iface * gs_base,struct lp_build_context * bld,LLVMValueRef total_emitted_vertices_vec_ptr,LLVMValueRef verts_per_prim_vec,LLVMValueRef emitted_prims_vec,LLVMValueRef mask_vec,unsigned stream)1486 draw_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
1487 struct lp_build_context * bld,
1488 LLVMValueRef total_emitted_vertices_vec_ptr,
1489 LLVMValueRef verts_per_prim_vec,
1490 LLVMValueRef emitted_prims_vec,
1491 LLVMValueRef mask_vec, unsigned stream)
1492 {
1493 const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1494 struct draw_gs_llvm_variant *variant = gs_iface->variant;
1495 struct gallivm_state *gallivm = variant->gallivm;
1496 LLVMBuilderRef builder = gallivm->builder;
1497 LLVMValueRef prim_lengts_ptr =
1498 draw_gs_jit_prim_lengths(variant, variant->context_ptr);
1499
1500 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
1501 for (unsigned i = 0; i < bld->type.length; ++i) {
1502 LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1503 LLVMValueRef prims_emitted =
1504 LLVMBuildExtractElement(builder, emitted_prims_vec, ind, "");
1505 LLVMValueRef store_ptr;
1506 LLVMValueRef num_vertices =
1507 LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, "");
1508
1509 LLVMValueRef this_cond = LLVMBuildExtractElement(gallivm->builder, cond, ind, "");
1510 struct lp_build_if_state ifthen;
1511 lp_build_if(&ifthen, gallivm, this_cond);
1512 prims_emitted = LLVMBuildMul(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");
1513 prims_emitted = LLVMBuildAdd(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, stream), "");
1514 LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
1515 LLVMTypeRef prim_lengths_type = LLVMPointerType(int_type, 0);
1516 store_ptr = LLVMBuildGEP2(builder, prim_lengths_type, prim_lengts_ptr, &prims_emitted, 1, "");
1517 store_ptr = LLVMBuildLoad2(builder, prim_lengths_type, store_ptr, "");
1518 store_ptr = LLVMBuildGEP2(builder, int_type, store_ptr, &ind, 1, "");
1519 LLVMBuildStore(builder, num_vertices, store_ptr);
1520 lp_build_endif(&ifthen);
1521 }
1522 }
1523
1524
1525 static void
draw_gs_llvm_epilogue(const struct lp_build_gs_iface * gs_base,LLVMValueRef total_emitted_vertices_vec,LLVMValueRef emitted_prims_vec,unsigned stream)1526 draw_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
1527 LLVMValueRef total_emitted_vertices_vec,
1528 LLVMValueRef emitted_prims_vec, unsigned stream)
1529 {
1530 const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1531 struct draw_gs_llvm_variant *variant = gs_iface->variant;
1532 struct gallivm_state *gallivm = variant->gallivm;
1533 LLVMBuilderRef builder = gallivm->builder;
1534 LLVMValueRef emitted_verts_ptr =
1535 draw_gs_jit_emitted_vertices(variant, variant->context_ptr);
1536 LLVMValueRef emitted_prims_ptr =
1537 draw_gs_jit_emitted_prims(variant, variant->context_ptr);
1538 LLVMValueRef stream_val = lp_build_const_int32(gallivm, stream);
1539
1540 emitted_verts_ptr = LLVMBuildGEP2(builder, LLVMTypeOf(total_emitted_vertices_vec), emitted_verts_ptr, &stream_val, 1, "");
1541 emitted_prims_ptr = LLVMBuildGEP2(builder, LLVMTypeOf(emitted_prims_vec), emitted_prims_ptr, &stream_val, 1, "");
1542
1543 LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr);
1544 LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr);
1545 }
1546
1547
1548 static void
draw_llvm_generate(struct draw_llvm * llvm,struct draw_llvm_variant * variant)1549 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1550 {
1551 struct gallivm_state *gallivm = variant->gallivm;
1552 LLVMContextRef context = gallivm->context;
1553 LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
1554 LLVMTypeRef arg_types[14];
1555 unsigned num_arg_types = ARRAY_SIZE(arg_types);
1556 LLVMTypeRef func_type;
1557 LLVMValueRef context_ptr;
1558 LLVMValueRef resources_ptr;
1559 LLVMBasicBlockRef block;
1560 LLVMBuilderRef builder;
1561 char func_name[64];
1562 struct lp_type vs_type;
1563 LLVMValueRef count, fetch_elts, start;
1564 LLVMValueRef vertex_id_offset;
1565 LLVMValueRef stride, step, io_itr;
1566 LLVMValueRef ind_vec, start_vec, have_elts, fetch_max, tmp;
1567 LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1568 LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS];
1569 LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS];
1570 LLVMValueRef buffer_size_adj[PIPE_MAX_ATTRIBS];
1571 LLVMValueRef instance_index[PIPE_MAX_ATTRIBS];
1572 LLVMValueRef fake_buf_ptr, fake_buf;
1573
1574 struct draw_context *draw = llvm->draw;
1575 const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
1576 unsigned i, j;
1577 struct lp_build_context bld, blduivec;
1578 struct lp_build_loop_state lp_loop;
1579 struct lp_build_if_state if_ctx;
1580 const int vector_length = lp_native_vector_width / 32;
1581 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1582 struct lp_build_sampler_soa *sampler = 0;
1583 struct lp_build_image_soa *image = NULL;
1584 LLVMValueRef ret, clipmask_bool_ptr;
1585 struct draw_llvm_variant_key *key = &variant->key;
1586 /* If geometry shader is present we need to skip both the viewport
1587 * transformation and clipping otherwise the inputs to the geometry
1588 * shader will be incorrect.
1589 * The code can't handle vp transform when vs writes vp index neither
1590 * (though this would be fixable here, but couldn't just broadcast
1591 * the values).
1592 */
1593 const bool bypass_viewport = key->has_gs_or_tes || key->bypass_viewport ||
1594 vs_info->writes_viewport_index;
1595 const bool enable_cliptest = !key->has_gs_or_tes && (key->clip_xy ||
1596 key->clip_z ||
1597 key->clip_user ||
1598 key->need_edgeflags);
1599 LLVMValueRef variant_func;
1600 const unsigned pos = draw->vs.position_output;
1601 const unsigned cv = draw->vs.clipvertex_output;
1602 bool have_clipdist = false;
1603 struct lp_bld_tgsi_system_values system_values;
1604
1605 memset(&system_values, 0, sizeof(system_values));
1606 memset(&outputs, 0, sizeof(outputs));
1607 snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant");
1608
1609 i = 0;
1610 arg_types[i++] = get_context_ptr_type(variant); /* context */
1611 arg_types[i++] = variant->resources_ptr_type; /* context */
1612 arg_types[i++] = get_vertex_header_ptr_type(variant); /* vertex_header */
1613 arg_types[i++] = get_buffer_ptr_type(variant); /* vbuffers */
1614 arg_types[i++] = int32_type; /* count */
1615 arg_types[i++] = int32_type; /* start/fetch_elt_max */
1616 arg_types[i++] = int32_type; /* stride */
1617 arg_types[i++] = get_vb_ptr_type(variant); /* pipe_vertex_buffer's */
1618 arg_types[i++] = int32_type; /* instance_id */
1619 arg_types[i++] = int32_type; /* vertex_id_offset */
1620 arg_types[i++] = int32_type; /* start_instance */
1621 arg_types[i++] = LLVMPointerType(int32_type, 0); /* fetch_elts */
1622 arg_types[i++] = int32_type; /* draw_id */
1623 arg_types[i++] = int32_type; /* view_id */
1624 assert(i == ARRAY_SIZE(arg_types));
1625
1626 func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
1627 arg_types, num_arg_types, 0);
1628
1629 variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
1630 variant->function = variant_func;
1631 variant->function_name = MALLOC(strlen(func_name)+1);
1632 strcpy(variant->function_name, func_name);
1633
1634 LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
1635 for (i = 0; i < num_arg_types; ++i)
1636 if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1637 lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
1638
1639 if (gallivm->cache && gallivm->cache->data_size) {
1640 gallivm_stub_func(gallivm, variant_func);
1641 return;
1642 }
1643
1644 context_ptr = LLVMGetParam(variant_func, 0);
1645 resources_ptr = LLVMGetParam(variant_func, 1);
1646 io_ptr = LLVMGetParam(variant_func, 2);
1647 vbuffers_ptr = LLVMGetParam(variant_func, 3);
1648 count = LLVMGetParam(variant_func, 4);
1649 start = LLVMGetParam(variant_func, 5);
1650 /*
1651 * XXX: stride is actually unused. The stride we use is strictly calculated
1652 * from the number of outputs (including the draw_extra outputs).
1653 * Should probably fix some day (we need a new vs just because of extra
1654 * outputs which the generated vs won't touch).
1655 */
1656 stride = LLVMGetParam(variant_func, 6);
1657 vb_ptr = LLVMGetParam(variant_func, 7);
1658 system_values.instance_id = LLVMGetParam(variant_func, 8);
1659 vertex_id_offset = LLVMGetParam(variant_func, 9);
1660 system_values.base_instance = LLVMGetParam(variant_func, 10);
1661 fetch_elts = LLVMGetParam(variant_func, 11);
1662 system_values.draw_id = LLVMGetParam(variant_func, 12);
1663 system_values.view_index = LLVMGetParam(variant_func, 13);
1664
1665 lp_build_name(context_ptr, "context");
1666 lp_build_name(resources_ptr, "resources");
1667 lp_build_name(io_ptr, "io");
1668 lp_build_name(vbuffers_ptr, "vbuffers");
1669 lp_build_name(count, "count");
1670 lp_build_name(start, "start");
1671 lp_build_name(stride, "stride");
1672 lp_build_name(vb_ptr, "vb");
1673 lp_build_name(system_values.instance_id, "instance_id");
1674 lp_build_name(vertex_id_offset, "vertex_id_offset");
1675 lp_build_name(system_values.base_instance, "start_instance");
1676 lp_build_name(fetch_elts, "fetch_elts");
1677 lp_build_name(system_values.draw_id, "draw_id");
1678
1679 /*
1680 * Function body
1681 */
1682
1683 block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
1684 builder = gallivm->builder;
1685 LLVMPositionBuilderAtEnd(builder, block);
1686
1687 memset(&vs_type, 0, sizeof vs_type);
1688 vs_type.floating = true; /* floating point values */
1689 vs_type.sign = true; /* values are signed */
1690 vs_type.norm = false; /* values are not limited to [0,1] or [-1,1] */
1691 vs_type.width = 32; /* 32-bit float */
1692 vs_type.length = vector_length;
1693
1694 lp_build_context_init(&bld, gallivm, lp_type_uint(32));
1695 lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
1696
1697 /* hold temporary "bool" clipmask */
1698 clipmask_bool_ptr = lp_build_alloca(gallivm, blduivec.vec_type, "");
1699
1700 fake_buf = lp_build_alloca_undef(gallivm,
1701 LLVMVectorType(LLVMInt64TypeInContext(context), 4), "");
1702 fake_buf = LLVMBuildBitCast(builder, fake_buf,
1703 LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
1704 fake_buf_ptr = LLVMBuildGEP2(builder, LLVMInt8TypeInContext(context), fake_buf, &bld.zero, 1, "");
1705
1706 /* code generated texture sampling */
1707 sampler = lp_bld_llvm_sampler_soa_create(draw_llvm_variant_key_samplers(key),
1708 MAX2(key->nr_samplers,
1709 key->nr_sampler_views));
1710 image = lp_bld_llvm_image_soa_create(draw_llvm_variant_key_images(key),
1711 key->nr_images);
1712
1713 step = lp_build_const_int32(gallivm, vector_length);
1714
1715 ind_vec = blduivec.undef;
1716 for (i = 0; i < vs_type.length; i++) {
1717 LLVMValueRef index = lp_build_const_int32(gallivm, i);
1718 ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, "");
1719 }
1720
1721 have_elts = LLVMBuildICmp(builder, LLVMIntNE,
1722 LLVMConstPointerNull(arg_types[11]), fetch_elts, "");
1723
1724 fetch_max = LLVMBuildSub(builder, count, bld.one, "fetch_max");
1725 fetch_max = lp_build_broadcast_scalar(&blduivec, fetch_max);
1726 /*
1727 * Only needed for non-indexed path.
1728 */
1729 start_vec = lp_build_broadcast_scalar(&blduivec, start);
1730
1731 /*
1732 * Pre-calculate everything which is constant per shader invocation.
1733 */
1734 for (j = 0; j < key->nr_vertex_elements; ++j) {
1735 LLVMValueRef vb_buffer_offset, buffer_size, temp_ptr;
1736 LLVMValueRef vb_info, vbuffer_ptr, buf_offset, ofbit;
1737 struct pipe_vertex_element *velem = &key->vertex_element[j];
1738 LLVMValueRef vb_index =
1739 lp_build_const_int32(gallivm, velem->vertex_buffer_index);
1740 LLVMValueRef bsize = lp_build_const_int32(gallivm,
1741 util_format_get_blocksize(velem->src_format));
1742 LLVMValueRef src_offset = lp_build_const_int32(gallivm,
1743 velem->src_offset);
1744 LLVMValueRef src_stride = lp_build_const_int32(gallivm,
1745 velem->src_stride);
1746 struct lp_build_if_state if_ctx;
1747
1748 if (velem->src_format != PIPE_FORMAT_NONE) {
1749 vbuffer_ptr = LLVMBuildGEP2(builder, variant->buffer_type, vbuffers_ptr, &vb_index, 1, "");
1750 vb_info = LLVMBuildGEP2(builder, variant->vb_type, vb_ptr, &vb_index, 1, "");
1751 vb_stride[j] = src_stride;
1752 vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, variant->vb_type, vb_info);
1753 map_ptr[j] = draw_jit_dvbuffer_map(gallivm, variant->buffer_type, vbuffer_ptr);
1754 buffer_size = draw_jit_dvbuffer_size(gallivm, variant->buffer_type, vbuffer_ptr);
1755
1756 ofbit = NULL;
1757 /*
1758 * We'll set buffer_size_adj to zero if we have of, so it will
1759 * always overflow later automatically without having to keep ofbit.
1760 * Overflows (with normal wraparound) doing the actual offset
1761 * calculation should be ok, just not for the buffer size calc.
1762 * It would also be possible to detect such overflows and return
1763 * zeros if that happens, but this would be more complex.
1764 */
1765 buf_offset = lp_build_add(&bld, vb_buffer_offset, src_offset);
1766 tmp = lp_build_sub(&bld, bsize, bld.one);
1767 buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, tmp,
1768 &ofbit);
1769 buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size_adj[j],
1770 buf_offset, &ofbit);
1771
1772 /*
1773 * We can't easily set fake vertex buffers outside the generated code.
1774 * Hence, set fake vertex buffers here instead basically, so fetch
1775 * code can always fetch using offset 0, eliminating all control flow
1776 * inside the main loop.
1777 * (Alternatively, could have control flow per vector skipping fetch
1778 * if ofbit is true.)
1779 */
1780 if (velem->instance_divisor) {
1781 /*
1782 * Index is equal to the start instance plus the number of current
1783 * instance divided by the divisor. In this case we compute it as:
1784 * index = start_instance + (instance_id / divisor).
1785 * Note we could actually do the fetch here, outside the loop -
1786 * it's all constant, hopefully llvm recognizes this.
1787 */
1788 LLVMValueRef current_instance;
1789 current_instance = LLVMBuildUDiv(builder, system_values.instance_id,
1790 lp_build_const_int32(gallivm,
1791 velem->instance_divisor),
1792 "instance_divisor");
1793 instance_index[j] = lp_build_uadd_overflow(gallivm, system_values.base_instance,
1794 current_instance, &ofbit);
1795 }
1796
1797 buffer_size_adj[j] = LLVMBuildSelect(builder, ofbit, bld.zero,
1798 buffer_size_adj[j], "");
1799
1800 LLVMTypeRef byte_type = LLVMInt8TypeInContext(context);
1801 LLVMTypeRef byte_ptr_type = LLVMPointerType(byte_type, 0);
1802 temp_ptr = lp_build_alloca_undef(gallivm, byte_ptr_type, "");
1803
1804 lp_build_if(&if_ctx, gallivm, ofbit);
1805 {
1806 LLVMBuildStore(builder, fake_buf_ptr, temp_ptr);
1807 }
1808 lp_build_else(&if_ctx);
1809 {
1810 map_ptr[j] = LLVMBuildGEP2(builder, byte_type, map_ptr[j], &buf_offset, 1, "");
1811 LLVMBuildStore(builder, map_ptr[j], temp_ptr);
1812 }
1813 lp_build_endif(&if_ctx);
1814 map_ptr[j] = LLVMBuildLoad2(builder, byte_ptr_type, temp_ptr, "map_ptr");
1815
1816 if (0) {
1817 lp_build_printf(gallivm, "velem %d, vbuf index = %u, vb_stride = %u\n",
1818 lp_build_const_int32(gallivm, j),
1819 vb_index, vb_stride[j]);
1820 lp_build_printf(gallivm,
1821 " vb_buffer_offset = %u, src_offset = %u, buf_offset = %u\n",
1822 vb_buffer_offset, src_offset, buf_offset);
1823 lp_build_printf(gallivm, " buffer size = %u, blocksize = %u\n",
1824 buffer_size, bsize);
1825 lp_build_printf(gallivm, " instance_id = %u\n", system_values.instance_id);
1826 }
1827 }
1828 }
1829
1830 lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
1831 {
1832 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
1833 LLVMValueRef io;
1834 LLVMValueRef clipmask; /* holds the clipmask value */
1835 LLVMValueRef true_index_array, index_store;
1836 const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
1837
1838 io_itr = lp_loop.counter;
1839
1840 io = LLVMBuildGEP2(builder, variant->vertex_header_type, io_ptr, &io_itr, 1, "");
1841 #if DEBUG_STORE
1842 lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n",
1843 io_itr, io, lp_loop.counter);
1844 #endif
1845
1846 true_index_array = lp_build_broadcast_scalar(&blduivec, lp_loop.counter);
1847 true_index_array = LLVMBuildAdd(builder, true_index_array, ind_vec, "");
1848
1849 LLVMValueRef exec_mask = lp_build_cmp(&blduivec, PIPE_FUNC_LEQUAL, true_index_array, fetch_max);
1850 /*
1851 * Limit indices to fetch_max, otherwise might try to access indices
1852 * beyond index buffer (or rather vsplit elt buffer) size.
1853 * Could probably safely (?) skip this for non-indexed draws and
1854 * simplify things minimally (by removing it could combine the ind_vec
1855 * and start_vec adds). I think the only effect for non-indexed draws will
1856 * be that for the invalid elements they will be all fetched from the
1857 * same location as the last valid one, but noone should really care.
1858 */
1859 true_index_array = lp_build_min(&blduivec, true_index_array, fetch_max);
1860
1861 index_store = lp_build_alloca_undef(gallivm, blduivec.vec_type, "index_store");
1862
1863 lp_build_if(&if_ctx, gallivm, have_elts);
1864 {
1865 /*
1866 * Note: you'd expect some comparison/clamp against fetch_elt_max
1867 * here.
1868 * There used to be one here but it was incorrect: overflow was
1869 * detected if index > fetch_elt_max - but the correct condition
1870 * would be index >= fetch_elt_max (since this is just size of elts
1871 * buffer / element size).
1872 * Using the correct condition however will cause failures - due to
1873 * vsplit/vcache code which rebases indices. So, as an example, if
1874 * fetch_elt_max is just 1 and fetch_count 2, vsplit cache will
1875 * replace all invalid indices with 0 - which in case of elt_bias
1876 * not being zero will get a different fetch index than the valid
1877 * index 0. So, just rely on vsplit code preventing out-of-bounds
1878 * fetches. This is also why it's safe to do elts fetch even if there
1879 * was no index buffer bound - the real buffer is never seen here, at
1880 * least not if there are index buffer overflows...
1881 */
1882
1883 /*
1884 * XXX should not have to do this, as scale can be handled
1885 * natively by loads (hits asserts though).
1886 */
1887 tmp = lp_build_shl_imm(&blduivec, true_index_array, 2);
1888 fetch_elts = LLVMBuildBitCast(builder, fetch_elts,
1889 LLVMPointerType(LLVMInt8TypeInContext(context),
1890 0), "");
1891 tmp = lp_build_gather(gallivm, vs_type.length,
1892 32, bld.type, true,
1893 fetch_elts, tmp, false);
1894 LLVMBuildStore(builder, tmp, index_store);
1895 }
1896 lp_build_else(&if_ctx);
1897 {
1898 tmp = LLVMBuildAdd(builder, true_index_array, start_vec, "");
1899 LLVMBuildStore(builder, tmp, index_store);
1900 }
1901 lp_build_endif(&if_ctx);
1902
1903 true_index_array = LLVMBuildLoad2(builder, blduivec.vec_type, index_store, "");
1904
1905 for (j = 0; j < key->nr_vertex_elements; ++j) {
1906 struct pipe_vertex_element *velem = &key->vertex_element[j];
1907 const struct util_format_description *format_desc =
1908 util_format_description(velem->src_format);
1909
1910 if (format_desc->format == PIPE_FORMAT_NONE) {
1911 for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
1912 inputs[j][i] = lp_build_zero(gallivm, vs_type);
1913 }
1914 } else if (velem->instance_divisor) {
1915 fetch_instanced(gallivm, format_desc, vs_type,
1916 vb_stride[j], map_ptr[j],
1917 buffer_size_adj[j],
1918 inputs[j], instance_index[j]);
1919 } else {
1920 fetch_vector(gallivm, format_desc, vs_type,
1921 vb_stride[j], map_ptr[j],
1922 buffer_size_adj[j],
1923 inputs[j], true_index_array);
1924 }
1925 }
1926
1927 struct lp_build_mask_context mask;
1928
1929 lp_build_mask_begin(&mask, gallivm, vs_type, exec_mask);
1930 /* In the paths with elts vertex id has to be unaffected by the
1931 * index bias and because indices inside our elements array have
1932 * already had index bias applied we need to subtract it here to
1933 * get back to the original index.
1934 * In the linear paths vertex id has to be unaffected by the
1935 * original start index and because we abuse the 'start' variable
1936 * to either represent the actual start index or the index at which
1937 * the primitive was split (we split rendering into chunks of at
1938 * most 4095-vertices) we need to back out the original start
1939 * index out of our vertex id here.
1940 * for ARB_shader_draw_parameters, base_vertex should be 0 for
1941 * non-indexed draws.
1942 */
1943 LLVMValueRef base_vertex = lp_build_select(&bld, have_elts, vertex_id_offset, lp_build_const_int32(gallivm, 0));
1944 system_values.basevertex = lp_build_broadcast_scalar(&blduivec, base_vertex);
1945
1946 /* first vertex is for Vulkan base vertex support */
1947 LLVMValueRef first_vertex = vertex_id_offset;
1948 system_values.firstvertex = lp_build_broadcast_scalar(&blduivec, first_vertex);
1949
1950 system_values.vertex_id = true_index_array;
1951 system_values.vertex_id_nobase = LLVMBuildSub(builder, true_index_array,
1952 lp_build_broadcast_scalar(&blduivec, vertex_id_offset), "");
1953
1954 ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs;
1955 generate_vs(variant,
1956 builder,
1957 vs_type,
1958 outputs,
1959 ptr_aos,
1960 &system_values,
1961 context_ptr,
1962 resources_ptr,
1963 sampler,
1964 image,
1965 key->clamp_vertex_color,
1966 &mask);
1967
1968 lp_build_mask_end(&mask);
1969 if (pos != -1 && cv != -1) {
1970 /* store original positions in clip before further manipulation */
1971 store_clip(gallivm, vs_type, variant->vertex_header_type, io, outputs, pos);
1972
1973 /* do cliptest */
1974 if (enable_cliptest) {
1975 LLVMValueRef temp = LLVMBuildLoad2(builder, blduivec.vec_type, clipmask_bool_ptr, "");
1976 /* allocate clipmask, assign it integer type */
1977 clipmask = generate_clipmask(llvm,
1978 gallivm,
1979 vs_type,
1980 outputs,
1981 key,
1982 variant->context_type,
1983 context_ptr, &have_clipdist);
1984 temp = LLVMBuildOr(builder, clipmask, temp, "");
1985 /* store temporary clipping boolean value */
1986 LLVMBuildStore(builder, temp, clipmask_bool_ptr);
1987 } else {
1988 clipmask = blduivec.zero;
1989 }
1990
1991 /* do viewport mapping */
1992 if (!bypass_viewport) {
1993 generate_viewport(variant, builder, vs_type, outputs, context_ptr);
1994 }
1995 } else {
1996 clipmask = blduivec.zero;
1997 }
1998
1999 /* store clipmask in vertex header,
2000 * original positions in clip
2001 * and transformed positions in data
2002 */
2003 convert_to_aos(gallivm, variant->vertex_header_type, io, NULL, outputs, clipmask,
2004 vs_info->num_outputs, vs_type, -1,
2005 enable_cliptest && key->need_edgeflags);
2006 }
2007 lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
2008
2009 lp_bld_llvm_sampler_soa_destroy(sampler);
2010 lp_bld_llvm_image_soa_destroy(image);
2011
2012 /* return clipping boolean value for function */
2013 ret = clipmask_booli8(gallivm, vs_type, blduivec.vec_type, clipmask_bool_ptr,
2014 enable_cliptest && key->need_edgeflags);
2015
2016 LLVMBuildRet(builder, ret);
2017
2018 gallivm_verify_function(gallivm, variant_func);
2019 }
2020
2021
2022 struct draw_llvm_variant_key *
draw_llvm_make_variant_key(struct draw_llvm * llvm,char * store)2023 draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
2024 {
2025 struct draw_llvm_variant_key *key;
2026 struct lp_sampler_static_state *draw_sampler;
2027 struct lp_image_static_state *draw_image;
2028
2029 key = (struct draw_llvm_variant_key *)store;
2030
2031 memset(key, 0, offsetof(struct draw_llvm_variant_key, vertex_element[0]));
2032
2033
2034 /* will have to rig this up properly later */
2035 key->clip_xy = llvm->draw->clip_xy;
2036 key->clip_z = llvm->draw->clip_z;
2037 key->clip_user = llvm->draw->clip_user;
2038 key->bypass_viewport = llvm->draw->bypass_viewport;
2039 key->clip_halfz = llvm->draw->rasterizer->clip_halfz;
2040 /* XXX assumes edgeflag output not at 0 */
2041 key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? true : false);
2042 key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
2043 key->has_gs_or_tes = llvm->draw->gs.geometry_shader != NULL || llvm->draw->tes.tess_eval_shader != NULL;
2044 key->num_outputs = draw_total_vs_outputs(llvm->draw);
2045
2046 key->clamp_vertex_color = !key->has_gs_or_tes &&
2047 llvm->draw->rasterizer->clamp_vertex_color;
2048
2049 /* All variants of this shader will have the same value for
2050 * nr_samplers. Not yet trying to compact away holes in the
2051 * sampler array.
2052 */
2053 key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
2054 if (llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
2055 key->nr_sampler_views =
2056 llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2057 } else {
2058 key->nr_sampler_views = key->nr_samplers;
2059 }
2060
2061 key->nr_images = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
2062
2063 /* Presumably all variants of the shader should have the same
2064 * number of vertex elements - ie the number of shader inputs.
2065 * NOTE: we NEED to store the needed number of needed inputs
2066 * here, not the number of provided elements to match keysize
2067 * (and the offset of sampler state in the key).
2068 * If we have excess number of vertex elements, this is valid,
2069 * but the excess ones don't matter.
2070 * If we don't have enough vertex elements (which looks not really
2071 * valid but we'll handle it gracefully) fill out missing ones with
2072 * zero (we'll recognize these later by PIPE_FORMAT_NONE).
2073 */
2074 key->nr_vertex_elements =
2075 llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 1;
2076
2077 if (llvm->draw->pt.nr_vertex_elements < key->nr_vertex_elements) {
2078 debug_printf("draw: vs with %d inputs but only have %d vertex elements\n",
2079 key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements);
2080 memset(key->vertex_element, 0,
2081 sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
2082 }
2083 memcpy(key->vertex_element,
2084 llvm->draw->pt.vertex_element,
2085 sizeof(struct pipe_vertex_element) *
2086 MIN2(key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements));
2087
2088 draw_sampler = draw_llvm_variant_key_samplers(key);
2089 memset(draw_sampler, 0,
2090 MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
2091
2092 for (unsigned i = 0 ; i < key->nr_samplers; i++) {
2093 lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
2094 llvm->draw->samplers[PIPE_SHADER_VERTEX][i]);
2095 }
2096 for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2097 lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
2098 llvm->draw->sampler_views[PIPE_SHADER_VERTEX][i]);
2099 }
2100
2101 draw_image = draw_llvm_variant_key_images(key);
2102 memset(draw_image, 0,
2103 key->nr_images * sizeof *draw_image);
2104 for (unsigned i = 0; i < key->nr_images; i++) {
2105 lp_sampler_static_texture_state_image(&draw_image[i].image_state,
2106 llvm->draw->images[PIPE_SHADER_VERTEX][i]);
2107 }
2108 return key;
2109 }
2110
2111
2112 void
draw_llvm_dump_variant_key(struct draw_llvm_variant_key * key)2113 draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
2114 {
2115 struct lp_sampler_static_state *sampler = draw_llvm_variant_key_samplers(key);
2116 struct lp_image_static_state *image = draw_llvm_variant_key_images(key);
2117 debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
2118 debug_printf("clip_xy = %u\n", key->clip_xy);
2119 debug_printf("clip_z = %u\n", key->clip_z);
2120 debug_printf("clip_user = %u\n", key->clip_user);
2121 debug_printf("bypass_viewport = %u\n", key->bypass_viewport);
2122 debug_printf("clip_halfz = %u\n", key->clip_halfz);
2123 debug_printf("need_edgeflags = %u\n", key->need_edgeflags);
2124 debug_printf("has_gs_or_tes = %u\n", key->has_gs_or_tes);
2125 debug_printf("ucp_enable = %u\n", key->ucp_enable);
2126
2127 for (unsigned i = 0 ; i < key->nr_vertex_elements; i++) {
2128 debug_printf("vertex_element[%i].src_offset = %u\n", i, key->vertex_element[i].src_offset);
2129 debug_printf("vertex_element[%i].instance_divisor = %u\n", i, key->vertex_element[i].instance_divisor);
2130 debug_printf("vertex_element[%i].vertex_buffer_index = %u\n", i, key->vertex_element[i].vertex_buffer_index);
2131 debug_printf("vertex_element[%i].src_format = %s\n", i, util_format_name(key->vertex_element[i].src_format));
2132 }
2133
2134 for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2135 debug_printf("sampler[%i].src_format = %s\n", i, util_format_name(sampler[i].texture_state.format));
2136 }
2137
2138 for (unsigned i = 0 ; i < key->nr_images; i++)
2139 debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
2140 }
2141
2142
2143 void
draw_llvm_set_mapped_texture(struct draw_context * draw,enum pipe_shader_type shader_stage,unsigned sview_idx,uint32_t width,uint32_t height,uint32_t depth,uint32_t first_level,uint32_t last_level,uint32_t num_samples,uint32_t sample_stride,const void * base_ptr,uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])2144 draw_llvm_set_mapped_texture(struct draw_context *draw,
2145 enum pipe_shader_type shader_stage,
2146 unsigned sview_idx,
2147 uint32_t width, uint32_t height, uint32_t depth,
2148 uint32_t first_level, uint32_t last_level,
2149 uint32_t num_samples,
2150 uint32_t sample_stride,
2151 const void *base_ptr,
2152 uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
2153 uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
2154 uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])
2155 {
2156 struct lp_jit_texture *jit_tex;
2157
2158 assert(shader_stage < DRAW_MAX_SHADER_STAGE);
2159 assert(sview_idx < ARRAY_SIZE(draw->llvm->jit_resources[shader_stage].textures));
2160
2161 jit_tex = &draw->llvm->jit_resources[shader_stage].textures[sview_idx];
2162 jit_tex->width = width;
2163 jit_tex->height = height;
2164 jit_tex->depth = depth;
2165 jit_tex->first_level = first_level;
2166 jit_tex->last_level = last_level;
2167 jit_tex->base = base_ptr;
2168 jit_tex->mip_offsets[0] = 0;
2169 if (num_samples > 1) {
2170 jit_tex->mip_offsets[0] = mip_offsets[0];
2171 jit_tex->mip_offsets[LP_JIT_TEXTURE_SAMPLE_STRIDE] = sample_stride;
2172 jit_tex->row_stride[0] = row_stride[0];
2173 jit_tex->img_stride[0] = img_stride[0];
2174 jit_tex->last_level = num_samples;
2175 } else {
2176 for (unsigned j = first_level; j <= last_level; j++) {
2177 jit_tex->mip_offsets[j] = mip_offsets[j];
2178 jit_tex->row_stride[j] = row_stride[j];
2179 jit_tex->img_stride[j] = img_stride[j];
2180 }
2181 }
2182 }
2183
2184
2185 void
draw_llvm_set_mapped_image(struct draw_context * draw,enum pipe_shader_type shader_stage,unsigned idx,uint32_t width,uint32_t height,uint32_t depth,const void * base_ptr,uint32_t row_stride,uint32_t img_stride,uint32_t num_samples,uint32_t sample_stride)2186 draw_llvm_set_mapped_image(struct draw_context *draw,
2187 enum pipe_shader_type shader_stage,
2188 unsigned idx,
2189 uint32_t width, uint32_t height, uint32_t depth,
2190 const void *base_ptr,
2191 uint32_t row_stride,
2192 uint32_t img_stride,
2193 uint32_t num_samples,
2194 uint32_t sample_stride)
2195 {
2196 struct lp_jit_image *jit_image;
2197
2198 assert(shader_stage < DRAW_MAX_SHADER_STAGE);
2199 assert(idx < ARRAY_SIZE(draw->llvm->jit_resources[shader_stage].images));
2200
2201 jit_image = &draw->llvm->jit_resources[shader_stage].images[idx];
2202
2203 jit_image->width = width;
2204 jit_image->height = height;
2205 jit_image->depth = depth;
2206 jit_image->base = base_ptr;
2207
2208 jit_image->row_stride = row_stride;
2209 jit_image->img_stride = img_stride;
2210 jit_image->num_samples = num_samples;
2211 jit_image->sample_stride = sample_stride;
2212 }
2213
2214
2215 void
draw_llvm_set_sampler_state(struct draw_context * draw,enum pipe_shader_type shader_type)2216 draw_llvm_set_sampler_state(struct draw_context *draw,
2217 enum pipe_shader_type shader_type)
2218 {
2219 assert(shader_type < DRAW_MAX_SHADER_STAGE);
2220 for (unsigned i = 0; i < draw->num_samplers[shader_type]; i++) {
2221 struct lp_jit_sampler *jit_sam = &draw->llvm->jit_resources[shader_type].samplers[i];
2222
2223 if (draw->samplers[shader_type][i]) {
2224 const struct pipe_sampler_state *s
2225 = draw->samplers[shader_type][i];
2226 jit_sam->min_lod = s->min_lod;
2227 jit_sam->max_lod = s->max_lod;
2228 jit_sam->lod_bias = s->lod_bias;
2229 jit_sam->max_aniso = s->max_anisotropy;
2230 COPY_4V(jit_sam->border_color, s->border_color.f);
2231 }
2232 }
2233 }
2234
2235
2236 void
draw_llvm_destroy_variant(struct draw_llvm_variant * variant)2237 draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
2238 {
2239 struct draw_llvm *llvm = variant->llvm;
2240
2241 if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2242 debug_printf("Deleting VS variant: %u vs variants,\t%u total variants\n",
2243 variant->shader->variants_cached, llvm->nr_variants);
2244 }
2245
2246 gallivm_destroy(variant->gallivm);
2247
2248 list_del(&variant->list_item_local.list);
2249 variant->shader->variants_cached--;
2250 list_del(&variant->list_item_global.list);
2251 llvm->nr_variants--;
2252 if(variant->function_name)
2253 FREE(variant->function_name);
2254 FREE(variant);
2255 }
2256
2257
2258 /**
2259 * Create LLVM types for various structures.
2260 */
2261 static void
create_gs_jit_types(struct draw_gs_llvm_variant * var)2262 create_gs_jit_types(struct draw_gs_llvm_variant *var)
2263 {
2264 struct gallivm_state *gallivm = var->gallivm;
2265
2266 var->context_type = create_gs_jit_context_type(gallivm,
2267 var->shader->base.vector_length,
2268 "draw_gs_jit_context");
2269 var->context_ptr_type = LLVMPointerType(var->context_type, 0);
2270
2271 var->resources_type = lp_build_jit_resources_type(gallivm);
2272 var->resources_ptr_type = LLVMPointerType(var->resources_type, 0);
2273 var->input_array_type = create_gs_jit_input_type(gallivm);
2274 }
2275
2276
2277 static LLVMTypeRef
get_gs_context_ptr_type(struct draw_gs_llvm_variant * variant)2278 get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant)
2279 {
2280 if (!variant->context_ptr_type)
2281 create_gs_jit_types(variant);
2282 return variant->context_ptr_type;
2283 }
2284
2285
2286 static LLVMValueRef
generate_mask_value(struct draw_gs_llvm_variant * variant,struct lp_type gs_type)2287 generate_mask_value(struct draw_gs_llvm_variant *variant,
2288 struct lp_type gs_type)
2289 {
2290 struct gallivm_state *gallivm = variant->gallivm;
2291 LLVMBuilderRef builder = gallivm->builder;
2292 struct lp_type mask_type = lp_int_type(gs_type);
2293 LLVMValueRef num_prims;
2294 LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
2295
2296 num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),
2297 variant->num_prims);
2298 for (unsigned i = 0; i < gs_type.length; i++) {
2299 LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2300 mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
2301 }
2302 mask_val = lp_build_compare(gallivm, mask_type,
2303 PIPE_FUNC_GREATER, num_prims, mask_val);
2304
2305 return mask_val;
2306 }
2307
2308
2309 static void
draw_gs_llvm_generate(struct draw_llvm * llvm,struct draw_gs_llvm_variant * variant)2310 draw_gs_llvm_generate(struct draw_llvm *llvm,
2311 struct draw_gs_llvm_variant *variant)
2312 {
2313 struct gallivm_state *gallivm = variant->gallivm;
2314 LLVMContextRef context = gallivm->context;
2315 LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
2316 LLVMTypeRef arg_types[9];
2317 LLVMTypeRef func_type;
2318 LLVMValueRef variant_func;
2319 LLVMValueRef context_ptr;
2320 LLVMValueRef resources_ptr;
2321 LLVMValueRef prim_id_ptr;
2322 LLVMBasicBlockRef block;
2323 LLVMBuilderRef builder;
2324 LLVMValueRef io_ptr, input_array, num_prims, mask_val;
2325 struct lp_build_sampler_soa *sampler = 0;
2326 struct lp_build_image_soa *image = NULL;
2327 struct lp_build_context bld;
2328 struct lp_bld_tgsi_system_values system_values;
2329 char func_name[64];
2330 struct lp_type gs_type;
2331 struct draw_gs_llvm_iface gs_iface;
2332 const struct tgsi_token *tokens = variant->shader->base.state.tokens;
2333 LLVMValueRef consts_ptr;
2334 LLVMValueRef ssbos_ptr;
2335 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
2336 struct lp_build_mask_context mask;
2337 const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
2338 unsigned vector_length = variant->shader->base.vector_length;
2339
2340 memset(&system_values, 0, sizeof(system_values));
2341 memset(&outputs, 0, sizeof(outputs));
2342
2343 snprintf(func_name, sizeof(func_name), "draw_llvm_gs_variant");
2344
2345 assert(variant->vertex_header_ptr_type);
2346
2347 LLVMTypeRef prim_id_type = LLVMVectorType(int32_type, vector_length);
2348 arg_types[0] = get_gs_context_ptr_type(variant); /* context */
2349 arg_types[1] = variant->resources_ptr_type;
2350 arg_types[2] = variant->input_array_type; /* input */
2351 arg_types[3] = LLVMPointerType(variant->vertex_header_ptr_type, 0); /* vertex_header */
2352 arg_types[4] = int32_type; /* num_prims */
2353 arg_types[5] = int32_type; /* instance_id */
2354 arg_types[6] = LLVMPointerType(prim_id_type, 0); /* prim_id_ptr */
2355 arg_types[7] = int32_type;
2356 arg_types[8] = int32_type;
2357
2358 func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
2359
2360 variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
2361
2362 variant->function = variant_func;
2363 variant->function_name = MALLOC(strlen(func_name)+1);
2364 strcpy(variant->function_name, func_name);
2365
2366 LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
2367
2368 for (unsigned i = 0; i < ARRAY_SIZE(arg_types); ++i)
2369 if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
2370 lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
2371
2372 if (gallivm->cache && gallivm->cache->data_size) {
2373 gallivm_stub_func(gallivm, variant_func);
2374 return;
2375 }
2376
2377 context_ptr = LLVMGetParam(variant_func, 0);
2378 resources_ptr = LLVMGetParam(variant_func, 1);
2379 input_array = LLVMGetParam(variant_func, 2);
2380 io_ptr = LLVMGetParam(variant_func, 3);
2381 num_prims = LLVMGetParam(variant_func, 4);
2382 system_values.instance_id = LLVMGetParam(variant_func, 5);
2383 prim_id_ptr = LLVMGetParam(variant_func, 6);
2384 system_values.invocation_id = LLVMGetParam(variant_func, 7);
2385 system_values.view_index = LLVMGetParam(variant_func, 8);
2386
2387 lp_build_name(context_ptr, "context");
2388 lp_build_name(resources_ptr, "resources");
2389 lp_build_name(input_array, "input");
2390 lp_build_name(io_ptr, "io");
2391 lp_build_name(num_prims, "num_prims");
2392 lp_build_name(system_values.instance_id, "instance_id");
2393 lp_build_name(prim_id_ptr, "prim_id_ptr");
2394 lp_build_name(system_values.invocation_id, "invocation_id");
2395 lp_build_name(system_values.view_index, "view_index");
2396
2397 variant->context_ptr = context_ptr;
2398 variant->io_ptr = io_ptr;
2399 variant->num_prims = num_prims;
2400
2401 gs_iface.base.fetch_input = draw_gs_llvm_fetch_input;
2402 gs_iface.base.emit_vertex = draw_gs_llvm_emit_vertex;
2403 gs_iface.base.end_primitive = draw_gs_llvm_end_primitive;
2404 gs_iface.base.gs_epilogue = draw_gs_llvm_epilogue;
2405 gs_iface.input = input_array;
2406 gs_iface.variant = variant;
2407
2408 /*
2409 * Function body
2410 */
2411
2412 block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
2413 builder = gallivm->builder;
2414 LLVMPositionBuilderAtEnd(builder, block);
2415
2416 lp_build_context_init(&bld, gallivm, lp_type_int(32));
2417
2418 memset(&gs_type, 0, sizeof gs_type);
2419 gs_type.floating = true; /* floating point values */
2420 gs_type.sign = true; /* values are signed */
2421 gs_type.norm = false; /* values are not limited to [0,1] or [-1,1] */
2422 gs_type.width = 32; /* 32-bit float */
2423 gs_type.length = vector_length;
2424
2425 consts_ptr = lp_jit_resources_constants(gallivm, variant->resources_type, resources_ptr);
2426
2427 ssbos_ptr = lp_jit_resources_ssbos(gallivm, variant->resources_type, resources_ptr);
2428
2429 /* code generated texture sampling */
2430 sampler = lp_bld_llvm_sampler_soa_create(variant->key.samplers,
2431 MAX2(variant->key.nr_samplers,
2432 variant->key.nr_sampler_views));
2433 image = lp_bld_llvm_image_soa_create(draw_gs_llvm_variant_key_images(&variant->key),
2434 variant->key.nr_images);
2435 mask_val = generate_mask_value(variant, gs_type);
2436 lp_build_mask_begin(&mask, gallivm, gs_type, mask_val);
2437
2438 if (gs_info->uses_primid) {
2439 system_values.prim_id = LLVMBuildLoad2(builder, prim_id_type, prim_id_ptr, "prim_id");
2440 }
2441
2442 if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2443 if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)
2444 tgsi_dump(tokens, 0);
2445 else
2446 nir_print_shader(llvm->draw->gs.geometry_shader->state.ir.nir, stderr);
2447 draw_gs_llvm_dump_variant_key(&variant->key);
2448 }
2449
2450 struct lp_build_tgsi_params params;
2451 memset(¶ms, 0, sizeof(params));
2452
2453 params.type = gs_type;
2454 params.mask = &mask;
2455 params.consts_ptr = consts_ptr;
2456 params.system_values = &system_values;
2457 params.context_type = variant->context_type;
2458 params.context_ptr = context_ptr;
2459 params.resources_type = variant->resources_type;
2460 params.resources_ptr = resources_ptr;
2461 params.sampler = sampler;
2462 params.info = &llvm->draw->gs.geometry_shader->info;
2463 params.gs_iface = (const struct lp_build_gs_iface *)&gs_iface;
2464 params.ssbo_ptr = ssbos_ptr;
2465 params.image = image;
2466 params.gs_vertex_streams = variant->shader->base.num_vertex_streams;
2467 params.aniso_filter_table = lp_jit_resources_aniso_filter_table(gallivm,
2468 variant->resources_type,
2469 resources_ptr);
2470
2471
2472 if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)
2473 lp_build_tgsi_soa(variant->gallivm,
2474 tokens,
2475 ¶ms,
2476 outputs);
2477 else
2478 lp_build_nir_soa(variant->gallivm,
2479 llvm->draw->gs.geometry_shader->state.ir.nir,
2480 ¶ms,
2481 outputs);
2482
2483 lp_bld_llvm_sampler_soa_destroy(sampler);
2484 lp_bld_llvm_image_soa_destroy(image);
2485
2486 lp_build_mask_end(&mask);
2487
2488 LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
2489
2490 gallivm_verify_function(gallivm, variant_func);
2491 }
2492
2493
2494 struct draw_gs_llvm_variant *
draw_gs_llvm_create_variant(struct draw_llvm * llvm,unsigned num_outputs,const struct draw_gs_llvm_variant_key * key)2495 draw_gs_llvm_create_variant(struct draw_llvm *llvm,
2496 unsigned num_outputs,
2497 const struct draw_gs_llvm_variant_key *key)
2498 {
2499 struct draw_gs_llvm_variant *variant;
2500 struct llvm_geometry_shader *shader =
2501 llvm_geometry_shader(llvm->draw->gs.geometry_shader);
2502 char module_name[64];
2503 unsigned char ir_sha1_cache_key[20];
2504 struct lp_cached_code cached = { 0 };
2505 bool needs_caching = false;
2506
2507 variant = MALLOC(sizeof *variant +
2508 shader->variant_key_size -
2509 sizeof variant->key);
2510 if (!variant)
2511 return NULL;
2512
2513 variant->llvm = llvm;
2514 variant->shader = shader;
2515
2516 snprintf(module_name, sizeof(module_name), "draw_llvm_gs_variant%u",
2517 variant->shader->variants_cached);
2518
2519 memcpy(&variant->key, key, shader->variant_key_size);
2520
2521 if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
2522 draw_get_ir_cache_key(shader->base.state.ir.nir,
2523 key,
2524 shader->variant_key_size,
2525 num_outputs,
2526 ir_sha1_cache_key);
2527
2528 llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
2529 &cached,
2530 ir_sha1_cache_key);
2531 if (!cached.data_size)
2532 needs_caching = true;
2533 }
2534 variant->gallivm = gallivm_create(module_name, &llvm->context, &cached);
2535
2536 create_gs_jit_types(variant);
2537
2538 variant->vertex_header_type = lp_build_create_jit_vertex_header_type(variant->gallivm, num_outputs);
2539 variant->vertex_header_ptr_type = LLVMPointerType(variant->vertex_header_type, 0);
2540
2541 draw_gs_llvm_generate(llvm, variant);
2542
2543 gallivm_compile_module(variant->gallivm);
2544
2545 variant->jit_func = (draw_gs_jit_func)
2546 gallivm_jit_function(variant->gallivm, variant->function, variant->function_name);
2547
2548 if (needs_caching)
2549 llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
2550 &cached,
2551 ir_sha1_cache_key);
2552 gallivm_free_ir(variant->gallivm);
2553
2554 variant->list_item_global.base = variant;
2555 variant->list_item_local.base = variant;
2556 /*variant->no = */shader->variants_created++;
2557 variant->list_item_global.base = variant;
2558
2559 return variant;
2560 }
2561
2562
2563 void
draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant * variant)2564 draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant)
2565 {
2566 struct draw_llvm *llvm = variant->llvm;
2567
2568 if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2569 debug_printf("Deleting GS variant: %u gs variants,\t%u total variants\n",
2570 variant->shader->variants_cached, llvm->nr_gs_variants);
2571 }
2572
2573 gallivm_destroy(variant->gallivm);
2574
2575 list_del(&variant->list_item_local.list);
2576 variant->shader->variants_cached--;
2577 list_del(&variant->list_item_global.list);
2578 llvm->nr_gs_variants--;
2579 if(variant->function_name)
2580 FREE(variant->function_name);
2581 FREE(variant);
2582 }
2583
2584
2585 struct draw_gs_llvm_variant_key *
draw_gs_llvm_make_variant_key(struct draw_llvm * llvm,char * store)2586 draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
2587 {
2588 struct draw_gs_llvm_variant_key *key;
2589 struct lp_sampler_static_state *draw_sampler;
2590 struct lp_image_static_state *draw_image;
2591
2592 key = (struct draw_gs_llvm_variant_key *)store;
2593
2594 memset(key, 0, offsetof(struct draw_gs_llvm_variant_key, samplers[0]));
2595
2596 key->num_outputs = draw_total_gs_outputs(llvm->draw);
2597
2598 key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color;
2599
2600 /* All variants of this shader will have the same value for
2601 * nr_samplers. Not yet trying to compact away holes in the
2602 * sampler array.
2603 */
2604 key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
2605 if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
2606 key->nr_sampler_views =
2607 llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2608 } else {
2609 key->nr_sampler_views = key->nr_samplers;
2610 }
2611
2612 key->nr_images = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
2613
2614 draw_sampler = key->samplers;
2615
2616 memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
2617
2618 for (unsigned i = 0 ; i < key->nr_samplers; i++) {
2619 lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
2620 llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]);
2621 }
2622 for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2623 lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
2624 llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]);
2625 }
2626
2627 draw_image = draw_gs_llvm_variant_key_images(key);
2628 memset(draw_image, 0,
2629 key->nr_images * sizeof *draw_image);
2630 for (unsigned i = 0; i < key->nr_images; i++) {
2631 lp_sampler_static_texture_state_image(&draw_image[i].image_state,
2632 llvm->draw->images[PIPE_SHADER_GEOMETRY][i]);
2633 }
2634 return key;
2635 }
2636
2637
2638 void
draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key * key)2639 draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key)
2640 {
2641 struct lp_sampler_static_state *sampler = key->samplers;
2642 struct lp_image_static_state *image = draw_gs_llvm_variant_key_images(key);
2643
2644 debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
2645 for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2646 debug_printf("sampler[%i].src_format = %s\n", i,
2647 util_format_name(sampler[i].texture_state.format));
2648 }
2649
2650 for (unsigned i = 0 ; i < key->nr_images; i++)
2651 debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
2652
2653 }
2654
2655
2656 static void
create_tcs_jit_types(struct draw_tcs_llvm_variant * var)2657 create_tcs_jit_types(struct draw_tcs_llvm_variant *var)
2658 {
2659 struct gallivm_state *gallivm = var->gallivm;
2660
2661 var->resources_type = lp_build_jit_resources_type(gallivm);
2662 var->resources_ptr_type = LLVMPointerType(var->resources_type, 0);
2663 var->input_array_type = create_tcs_jit_input_type(gallivm);
2664 var->output_array_type = create_tcs_jit_output_type(gallivm);
2665 }
2666
2667
2668 static LLVMTypeRef
get_tcs_resources_ptr_type(struct draw_tcs_llvm_variant * variant)2669 get_tcs_resources_ptr_type(struct draw_tcs_llvm_variant *variant)
2670 {
2671 if (!variant->resources_ptr_type)
2672 create_tcs_jit_types(variant);
2673 return variant->resources_ptr_type;
2674 }
2675
2676
2677 static LLVMValueRef
draw_tcs_llvm_emit_fetch_input(const struct lp_build_tcs_iface * tes_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index)2678 draw_tcs_llvm_emit_fetch_input(const struct lp_build_tcs_iface *tes_iface,
2679 struct lp_build_context *bld,
2680 bool is_vindex_indirect,
2681 LLVMValueRef vertex_index,
2682 bool is_aindex_indirect,
2683 LLVMValueRef attrib_index,
2684 bool is_sindex_indirect,
2685 LLVMValueRef swizzle_index)
2686 {
2687 const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
2688 struct gallivm_state *gallivm = bld->gallivm;
2689 LLVMBuilderRef builder = gallivm->builder;
2690 LLVMValueRef indices[3];
2691 LLVMValueRef res;
2692 struct lp_type type = bld->type;
2693 LLVMTypeRef input_type = create_tcs_jit_input_type_deref(gallivm);
2694 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
2695
2696 if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
2697 res = bld->zero;
2698 for (int i = 0; i < type.length; ++i) {
2699 LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2700 LLVMValueRef vert_chan_index = vertex_index;
2701 LLVMValueRef attr_chan_index = attrib_index;
2702 LLVMValueRef swiz_chan_index = swizzle_index;
2703 LLVMValueRef channel_vec;
2704
2705 if (is_vindex_indirect) {
2706 vert_chan_index = LLVMBuildExtractElement(builder,
2707 vertex_index, idx, "");
2708 }
2709 if (is_aindex_indirect) {
2710 attr_chan_index = LLVMBuildExtractElement(builder,
2711 attrib_index, idx, "");
2712 }
2713 if (is_sindex_indirect) {
2714 swiz_chan_index = LLVMBuildExtractElement(builder,
2715 swizzle_index, idx, "");
2716 }
2717
2718 indices[0] = vert_chan_index;
2719 indices[1] = attr_chan_index;
2720 indices[2] = swiz_chan_index;
2721
2722 channel_vec = LLVMBuildGEP2(builder, input_type, tcs->input, indices, 3, "");
2723 channel_vec = LLVMBuildLoad2(builder, float_type, channel_vec, "");
2724 res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
2725 }
2726 } else {
2727 indices[0] = vertex_index;
2728 indices[1] = attrib_index;
2729 indices[2] = swizzle_index;
2730 res = LLVMBuildGEP2(builder, input_type, tcs->input, indices, 3, "");
2731 res = LLVMBuildLoad2(builder, float_type, res, "");
2732 res = lp_build_broadcast_scalar(bld, res);
2733 }
2734 return res;
2735 }
2736
2737
2738 static LLVMValueRef
draw_tcs_llvm_emit_fetch_output(const struct lp_build_tcs_iface * tes_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index,uint32_t name)2739 draw_tcs_llvm_emit_fetch_output(const struct lp_build_tcs_iface *tes_iface,
2740 struct lp_build_context *bld,
2741 bool is_vindex_indirect,
2742 LLVMValueRef vertex_index,
2743 bool is_aindex_indirect,
2744 LLVMValueRef attrib_index,
2745 bool is_sindex_indirect,
2746 LLVMValueRef swizzle_index,
2747 uint32_t name)
2748 {
2749 const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
2750 struct gallivm_state *gallivm = bld->gallivm;
2751 LLVMBuilderRef builder = gallivm->builder;
2752 LLVMValueRef indices[3];
2753 LLVMValueRef res;
2754 struct lp_type type = bld->type;
2755 LLVMTypeRef output_type = create_tcs_jit_output_type_deref(gallivm);
2756 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
2757
2758 if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
2759 res = bld->zero;
2760 for (int i = 0; i < type.length; ++i) {
2761 LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2762 LLVMValueRef vert_chan_index = vertex_index;
2763 LLVMValueRef attr_chan_index = attrib_index;
2764 LLVMValueRef swiz_chan_index = swizzle_index;
2765 LLVMValueRef channel_vec;
2766
2767 if (is_vindex_indirect) {
2768 vert_chan_index = LLVMBuildExtractElement(builder,
2769 vertex_index, idx, "");
2770 }
2771 if (is_aindex_indirect) {
2772 attr_chan_index = LLVMBuildExtractElement(builder,
2773 attrib_index, idx, "");
2774 }
2775 if (is_sindex_indirect) {
2776 swiz_chan_index = LLVMBuildExtractElement(builder,
2777 swizzle_index, idx, "");
2778 }
2779
2780 indices[0] = vert_chan_index;
2781 indices[1] = attr_chan_index;
2782 indices[2] = swiz_chan_index;
2783
2784 channel_vec = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2785 channel_vec = LLVMBuildLoad2(builder, float_type, channel_vec, "");
2786
2787 res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
2788 }
2789 } else {
2790 indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
2791 indices[1] = attrib_index;
2792 indices[2] = swizzle_index;
2793
2794 res = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2795 res = LLVMBuildLoad2(builder, float_type, res, "");
2796 res = lp_build_broadcast_scalar(bld, res);
2797 }
2798 return res;
2799 }
2800
2801
2802 static void
draw_tcs_llvm_emit_store_output(const struct lp_build_tcs_iface * tes_iface,struct lp_build_context * bld,unsigned name,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index,LLVMValueRef value,LLVMValueRef mask_vec)2803 draw_tcs_llvm_emit_store_output(const struct lp_build_tcs_iface *tes_iface,
2804 struct lp_build_context *bld,
2805 unsigned name,
2806 bool is_vindex_indirect,
2807 LLVMValueRef vertex_index,
2808 bool is_aindex_indirect,
2809 LLVMValueRef attrib_index,
2810 bool is_sindex_indirect,
2811 LLVMValueRef swizzle_index,
2812 LLVMValueRef value,
2813 LLVMValueRef mask_vec)
2814 {
2815 const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
2816 struct gallivm_state *gallivm = bld->gallivm;
2817 LLVMBuilderRef builder = gallivm->builder;
2818 LLVMValueRef indices[3];
2819 LLVMValueRef res;
2820 struct lp_type type = bld->type;
2821 LLVMTypeRef output_type = create_tcs_jit_output_type_deref(gallivm);
2822
2823 if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
2824 for (int i = 0; i < type.length; ++i) {
2825 LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2826 LLVMValueRef vert_chan_index = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
2827 LLVMValueRef attr_chan_index = attrib_index;
2828 LLVMValueRef swiz_chan_index = swizzle_index;
2829 LLVMValueRef channel_vec;
2830
2831 if (is_vindex_indirect) {
2832 vert_chan_index = LLVMBuildExtractElement(builder,
2833 vertex_index, idx, "");
2834 }
2835 if (is_aindex_indirect) {
2836 attr_chan_index = LLVMBuildExtractElement(builder,
2837 attrib_index, idx, "");
2838 }
2839
2840 if (is_sindex_indirect) {
2841 swiz_chan_index = LLVMBuildExtractElement(builder,
2842 swizzle_index, idx, "");
2843 }
2844
2845 indices[0] = vert_chan_index;
2846 indices[1] = attr_chan_index;
2847 indices[2] = swiz_chan_index;
2848
2849 channel_vec = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2850
2851 res = LLVMBuildExtractElement(builder, value, idx, "");
2852
2853 struct lp_build_if_state ifthen;
2854 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
2855 cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
2856 lp_build_if(&ifthen, gallivm, cond);
2857 LLVMBuildStore(builder, res, channel_vec);
2858 lp_build_endif(&ifthen);
2859 }
2860 } else {
2861 indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
2862 indices[1] = attrib_index;
2863 indices[2] = swizzle_index;
2864
2865 res = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2866 for (unsigned i = 0; i < type.length; ++i) {
2867 LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2868 LLVMValueRef val = LLVMBuildExtractElement(builder, value, idx, "");
2869
2870 struct lp_build_if_state ifthen;
2871 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
2872 cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
2873 lp_build_if(&ifthen, gallivm, cond);
2874 LLVMBuildStore(builder, val, res);
2875 lp_build_endif(&ifthen);
2876 }
2877 }
2878 }
2879
2880
2881 static LLVMValueRef
generate_tcs_mask_value(struct draw_tcs_llvm_variant * variant,struct lp_type tcs_type,LLVMValueRef limit,LLVMValueRef loop_counter)2882 generate_tcs_mask_value(struct draw_tcs_llvm_variant *variant,
2883 struct lp_type tcs_type, LLVMValueRef limit, LLVMValueRef loop_counter)
2884 {
2885 struct gallivm_state *gallivm = variant->gallivm;
2886 LLVMBuilderRef builder = gallivm->builder;
2887 struct lp_type mask_type = lp_int_type(tcs_type);
2888 LLVMValueRef num_vecs;
2889 LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
2890
2891 num_vecs = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);
2892 for (unsigned i = 0; i < tcs_type.length; i++) {
2893 LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2894 mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");
2895 }
2896 mask_val = lp_build_compare(gallivm, mask_type,
2897 PIPE_FUNC_GREATER, num_vecs, mask_val);
2898
2899 return mask_val;
2900 }
2901
2902
2903 static void
draw_tcs_llvm_generate(struct draw_llvm * llvm,struct draw_tcs_llvm_variant * variant)2904 draw_tcs_llvm_generate(struct draw_llvm *llvm,
2905 struct draw_tcs_llvm_variant *variant)
2906 {
2907 struct gallivm_state *gallivm = variant->gallivm;
2908 LLVMContextRef context = gallivm->context;
2909 LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
2910 LLVMTypeRef arg_types[7];
2911 LLVMTypeRef func_type, coro_func_type;
2912 LLVMValueRef variant_func, variant_coro;
2913 LLVMValueRef resources_ptr;
2914 LLVMValueRef view_index;
2915 LLVMValueRef input_array, output_array, prim_id, patch_vertices_in;
2916 LLVMValueRef mask_val;
2917 LLVMBasicBlockRef block;
2918 LLVMBuilderRef builder;
2919 struct lp_build_context bld, bldvec;
2920 struct lp_build_sampler_soa *sampler = 0;
2921 struct lp_build_image_soa *image = NULL;
2922 struct lp_bld_tgsi_system_values system_values;
2923 char func_name[64], func_name_coro[64];
2924 struct draw_tcs_llvm_iface tcs_iface;
2925 struct lp_build_mask_context mask;
2926 LLVMValueRef consts_ptr;
2927 LLVMValueRef ssbos_ptr;
2928 struct lp_type tcs_type;
2929 unsigned vector_length = variant->shader->base.vector_length;
2930
2931 memset(&system_values, 0, sizeof(system_values));
2932
2933 snprintf(func_name, sizeof(func_name), "draw_llvm_tcs_variant");
2934
2935 snprintf(func_name_coro, sizeof(func_name_coro), "draw_llvm_tcs_coro_variant");
2936
2937 arg_types[0] = get_tcs_resources_ptr_type(variant); /* context */
2938 arg_types[1] = variant->input_array_type; /* input */
2939 arg_types[2] = variant->output_array_type;
2940 arg_types[3] = int32_type;
2941 arg_types[4] = int32_type;
2942 arg_types[5] = int32_type;
2943 arg_types[6] = int32_type; /* coroutine only */
2944
2945 func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types) - 1, 0);
2946
2947 coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), arg_types, ARRAY_SIZE(arg_types), 0);
2948
2949 variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
2950
2951 variant_coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type);
2952
2953 variant->function = variant_func;
2954 variant->function_name = MALLOC(strlen(func_name)+1);
2955 strcpy(variant->function_name, func_name);
2956 LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
2957
2958 LLVMSetFunctionCallConv(variant_coro, LLVMCCallConv);
2959
2960 lp_build_coro_add_presplit(variant_coro);
2961
2962 for (unsigned i = 0; i < ARRAY_SIZE(arg_types); ++i) {
2963 if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
2964 lp_add_function_attr(variant_coro, i + 1, LP_FUNC_ATTR_NOALIAS);
2965 lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
2966 }
2967 }
2968
2969 if (gallivm->cache && gallivm->cache->data_size) {
2970 gallivm_stub_func(gallivm, variant_func);
2971 gallivm_stub_func(gallivm, variant_coro);
2972 return;
2973 }
2974
2975 resources_ptr = LLVMGetParam(variant_func, 0);
2976 input_array = LLVMGetParam(variant_func, 1);
2977 output_array = LLVMGetParam(variant_func, 2);
2978 prim_id = LLVMGetParam(variant_func, 3);
2979 patch_vertices_in = LLVMGetParam(variant_func, 4);
2980 view_index = LLVMGetParam(variant_func, 5);
2981
2982 lp_build_name(resources_ptr, "resources");
2983 lp_build_name(input_array, "input");
2984 lp_build_name(output_array, "output");
2985 lp_build_name(prim_id, "prim_id");
2986 lp_build_name(patch_vertices_in, "patch_vertices_in");
2987 lp_build_name(view_index, "view_index");
2988
2989 block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
2990 builder = gallivm->builder;
2991 LLVMPositionBuilderAtEnd(builder, block);
2992
2993 lp_build_context_init(&bld, gallivm, lp_type_int(32));
2994
2995 memset(&tcs_type, 0, sizeof tcs_type);
2996 tcs_type.floating = true; /* floating point values */
2997 tcs_type.sign = true; /* values are signed */
2998 tcs_type.norm = false; /* values are not limited to [0,1] or [-1,1] */
2999 tcs_type.width = 32; /* 32-bit float */
3000 tcs_type.length = vector_length;
3001
3002 lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tcs_type));
3003
3004 LLVMValueRef count = lp_build_const_int32(gallivm, variant->shader->base.vertices_out);
3005 LLVMValueRef step = lp_build_const_int32(gallivm, vector_length);
3006
3007 struct lp_build_loop_state loop_state[2];
3008 LLVMValueRef num_inner_loop;
3009 unsigned count_align = util_align_npot(variant->shader->base.vertices_out, tcs_type.length);
3010 num_inner_loop = lp_build_const_int32(gallivm, count_align / tcs_type.length);
3011 LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
3012 LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_inner_loop, "coro_hdls");
3013 unsigned end_coroutine = INT_MAX;
3014 lp_build_loop_begin(&loop_state[1], gallivm,
3015 lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
3016 lp_build_loop_begin(&loop_state[0], gallivm,
3017 lp_build_const_int32(gallivm, 0)); /* inner loop */
3018 {
3019 LLVMValueRef args[7];
3020 args[0] = resources_ptr;
3021 args[1] = input_array;
3022 args[2] = output_array;
3023 args[3] = prim_id;
3024 args[4] = patch_vertices_in;
3025 args[5] = view_index;
3026 args[6] = loop_state[0].counter;
3027 LLVMValueRef coro_entry = LLVMBuildGEP2(builder, hdl_ptr_type, coro_hdls, &loop_state[0].counter, 1, "");
3028 LLVMValueRef coro_hdl = LLVMBuildLoad2(builder, hdl_ptr_type, coro_entry, "coro_hdl");
3029
3030 struct lp_build_if_state ifstate;
3031 LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntEQ, loop_state[1].counter,
3032 lp_build_const_int32(gallivm, 0), "");
3033 /* first time here - call the coroutine function entry point */
3034 lp_build_if(&ifstate, gallivm, cmp);
3035 LLVMValueRef coro_ret = LLVMBuildCall2(builder, coro_func_type, variant_coro, args, 7, "");
3036 LLVMBuildStore(builder, coro_ret, coro_entry);
3037 lp_build_else(&ifstate);
3038 /* subsequent calls for this invocation - check if done. */
3039 LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl);
3040 struct lp_build_if_state ifstate2;
3041 lp_build_if(&ifstate2, gallivm, coro_done);
3042 /* if done destroy and force loop exit */
3043 lp_build_coro_destroy(gallivm, coro_hdl);
3044 lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1));
3045 lp_build_else(&ifstate2);
3046 /* otherwise resume the coroutine */
3047 lp_build_coro_resume(gallivm, coro_hdl);
3048 lp_build_endif(&ifstate2);
3049 lp_build_endif(&ifstate);
3050 lp_build_loop_force_reload_counter(&loop_state[1]);
3051 }
3052 lp_build_loop_end_cond(&loop_state[0],
3053 num_inner_loop,
3054 NULL, LLVMIntUGE);
3055 lp_build_loop_end_cond(&loop_state[1],
3056 lp_build_const_int32(gallivm, end_coroutine),
3057 NULL, LLVMIntEQ);
3058 LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
3059
3060 block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "entry");
3061 LLVMPositionBuilderAtEnd(builder, block);
3062
3063 resources_ptr = LLVMGetParam(variant_coro, 0);
3064 input_array = LLVMGetParam(variant_coro, 1);
3065 output_array = LLVMGetParam(variant_coro, 2);
3066 prim_id = LLVMGetParam(variant_coro, 3);
3067 patch_vertices_in = LLVMGetParam(variant_coro, 4);
3068 view_index = LLVMGetParam(variant_coro, 5);
3069
3070 consts_ptr = lp_jit_resources_constants(gallivm, variant->resources_type, resources_ptr);
3071
3072 ssbos_ptr = lp_jit_resources_ssbos(gallivm, variant->resources_type, resources_ptr);
3073 sampler = lp_bld_llvm_sampler_soa_create(variant->key.samplers,
3074 MAX2(variant->key.nr_samplers,
3075 variant->key.nr_sampler_views));
3076 image = lp_bld_llvm_image_soa_create(draw_tcs_llvm_variant_key_images(&variant->key),
3077 variant->key.nr_images);
3078
3079 LLVMValueRef counter = LLVMGetParam(variant_coro, 6);
3080 LLVMValueRef invocvec = LLVMGetUndef(LLVMVectorType(int32_type, vector_length));
3081 for (unsigned i = 0; i < vector_length; i++) {
3082 LLVMValueRef loop_iter = lp_build_const_int32(gallivm, i);
3083 LLVMValueRef idx = LLVMBuildAdd(builder, LLVMBuildMul(builder, counter, step, ""), loop_iter, "");
3084 invocvec = LLVMBuildInsertElement(builder, invocvec, idx, loop_iter, "");
3085 }
3086
3087 system_values.invocation_id = invocvec;
3088 system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);
3089 system_values.view_index = view_index;
3090 system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);
3091 tcs_iface.input = input_array;
3092 tcs_iface.output = output_array;
3093 tcs_iface.base.emit_fetch_input = draw_tcs_llvm_emit_fetch_input;
3094 tcs_iface.base.emit_fetch_output = draw_tcs_llvm_emit_fetch_output;
3095 tcs_iface.base.emit_store_output = draw_tcs_llvm_emit_store_output;
3096
3097
3098 {
3099 LLVMValueRef coro_id = lp_build_coro_id(gallivm);
3100 LLVMValueRef coro_hdl = lp_build_coro_begin_alloc_mem(gallivm, coro_id);
3101
3102 mask_val = generate_tcs_mask_value(variant, tcs_type, count, LLVMBuildMul(builder, counter, step, ""));
3103 lp_build_mask_begin(&mask, gallivm, tcs_type, mask_val);
3104
3105 struct lp_build_coro_suspend_info coro_info;
3106
3107 LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "suspend");
3108 LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "cleanup");
3109
3110 coro_info.suspend = sus_block;
3111 coro_info.cleanup = clean_block;
3112
3113 struct lp_build_tgsi_params params;
3114 memset(¶ms, 0, sizeof(params));
3115
3116 params.type = tcs_type;
3117 params.mask = &mask;
3118 params.consts_ptr = consts_ptr;
3119 params.system_values = &system_values;
3120 params.resources_type = variant->resources_type;
3121 params.resources_ptr = resources_ptr;
3122 params.sampler = sampler;
3123 params.info = &llvm->draw->tcs.tess_ctrl_shader->info;
3124 params.ssbo_ptr = ssbos_ptr;
3125 params.image = image;
3126 params.coro = &coro_info;
3127 params.tcs_iface = &tcs_iface.base;
3128 params.aniso_filter_table = lp_jit_resources_aniso_filter_table(gallivm,
3129 variant->resources_type,
3130 resources_ptr);
3131
3132 lp_build_nir_soa(variant->gallivm,
3133 llvm->draw->tcs.tess_ctrl_shader->state.ir.nir,
3134 ¶ms, NULL);
3135
3136 lp_build_mask_end(&mask);
3137
3138 lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true);
3139 LLVMPositionBuilderAtEnd(builder, clean_block);
3140
3141 lp_build_coro_free_mem(gallivm, coro_id, coro_hdl);
3142
3143 LLVMBuildBr(builder, sus_block);
3144 LLVMPositionBuilderAtEnd(builder, sus_block);
3145
3146 lp_build_coro_end(gallivm, coro_hdl);
3147 LLVMBuildRet(builder, coro_hdl);
3148 }
3149
3150 lp_bld_llvm_sampler_soa_destroy(sampler);
3151 lp_bld_llvm_image_soa_destroy(image);
3152 gallivm_verify_function(gallivm, variant_func);
3153 gallivm_verify_function(gallivm, variant_coro);
3154 }
3155
3156
3157 struct draw_tcs_llvm_variant *
draw_tcs_llvm_create_variant(struct draw_llvm * llvm,unsigned num_outputs,const struct draw_tcs_llvm_variant_key * key)3158 draw_tcs_llvm_create_variant(struct draw_llvm *llvm,
3159 unsigned num_outputs,
3160 const struct draw_tcs_llvm_variant_key *key)
3161 {
3162 struct draw_tcs_llvm_variant *variant;
3163 struct llvm_tess_ctrl_shader *shader = llvm_tess_ctrl_shader(llvm->draw->tcs.tess_ctrl_shader);
3164 char module_name[64];
3165 unsigned char ir_sha1_cache_key[20];
3166 struct lp_cached_code cached = { 0 };
3167 bool needs_caching = false;
3168
3169 variant = MALLOC(sizeof *variant +
3170 shader->variant_key_size - sizeof variant->key);
3171 if (!variant)
3172 return NULL;
3173
3174 variant->llvm = llvm;
3175 variant->shader = shader;
3176
3177 snprintf(module_name, sizeof(module_name), "draw_llvm_tcs_variant%u",
3178 variant->shader->variants_cached);
3179
3180 memcpy(&variant->key, key, shader->variant_key_size);
3181
3182 if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
3183 draw_get_ir_cache_key(shader->base.state.ir.nir,
3184 key,
3185 shader->variant_key_size,
3186 num_outputs,
3187 ir_sha1_cache_key);
3188
3189 llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
3190 &cached,
3191 ir_sha1_cache_key);
3192 if (!cached.data_size)
3193 needs_caching = true;
3194 }
3195
3196 variant->gallivm = gallivm_create(module_name, &llvm->context, &cached);
3197
3198 create_tcs_jit_types(variant);
3199
3200 if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3201 nir_print_shader(llvm->draw->tcs.tess_ctrl_shader->state.ir.nir, stderr);
3202 draw_tcs_llvm_dump_variant_key(&variant->key);
3203 }
3204
3205 draw_tcs_llvm_generate(llvm, variant);
3206
3207 gallivm_compile_module(variant->gallivm);
3208
3209 variant->jit_func = (draw_tcs_jit_func)
3210 gallivm_jit_function(variant->gallivm, variant->function, variant->function_name);
3211
3212 if (needs_caching)
3213 llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
3214 &cached,
3215 ir_sha1_cache_key);
3216 gallivm_free_ir(variant->gallivm);
3217
3218 variant->list_item_global.base = variant;
3219 variant->list_item_local.base = variant;
3220 /*variant->no = */shader->variants_created++;
3221 variant->list_item_global.base = variant;
3222
3223 return variant;
3224 }
3225
3226
3227 void
draw_tcs_llvm_destroy_variant(struct draw_tcs_llvm_variant * variant)3228 draw_tcs_llvm_destroy_variant(struct draw_tcs_llvm_variant *variant)
3229 {
3230 struct draw_llvm *llvm = variant->llvm;
3231
3232 if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3233 debug_printf("Deleting TCS variant: %u tcs variants,\t%u total variants\n",
3234 variant->shader->variants_cached, llvm->nr_tcs_variants);
3235 }
3236
3237 gallivm_destroy(variant->gallivm);
3238
3239 list_del(&variant->list_item_local.list);
3240 variant->shader->variants_cached--;
3241 list_del(&variant->list_item_global.list);
3242 llvm->nr_tcs_variants--;
3243 if(variant->function_name)
3244 FREE(variant->function_name);
3245 FREE(variant);
3246 }
3247
3248
3249 struct draw_tcs_llvm_variant_key *
draw_tcs_llvm_make_variant_key(struct draw_llvm * llvm,char * store)3250 draw_tcs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
3251 {
3252 unsigned i;
3253 struct draw_tcs_llvm_variant_key *key;
3254 struct lp_sampler_static_state *draw_sampler;
3255 struct lp_image_static_state *draw_image;
3256
3257 key = (struct draw_tcs_llvm_variant_key *)store;
3258
3259 memset(key, 0, offsetof(struct draw_tcs_llvm_variant_key, samplers[0]));
3260
3261 /* All variants of this shader will have the same value for
3262 * nr_samplers. Not yet trying to compact away holes in the
3263 * sampler array.
3264 */
3265 key->nr_samplers = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
3266 if (llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
3267 key->nr_sampler_views =
3268 llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
3269 } else {
3270 key->nr_sampler_views = key->nr_samplers;
3271 }
3272
3273 key->nr_images = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
3274
3275 draw_sampler = key->samplers;
3276
3277 memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
3278
3279 for (i = 0 ; i < key->nr_samplers; i++) {
3280 lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
3281 llvm->draw->samplers[PIPE_SHADER_TESS_CTRL][i]);
3282 }
3283 for (i = 0 ; i < key->nr_sampler_views; i++) {
3284 lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
3285 llvm->draw->sampler_views[PIPE_SHADER_TESS_CTRL][i]);
3286 }
3287
3288 draw_image = draw_tcs_llvm_variant_key_images(key);
3289 memset(draw_image, 0,
3290 key->nr_images * sizeof *draw_image);
3291 for (i = 0; i < key->nr_images; i++) {
3292 lp_sampler_static_texture_state_image(&draw_image[i].image_state,
3293 llvm->draw->images[PIPE_SHADER_TESS_CTRL][i]);
3294 }
3295 return key;
3296 }
3297
3298
3299 void
draw_tcs_llvm_dump_variant_key(struct draw_tcs_llvm_variant_key * key)3300 draw_tcs_llvm_dump_variant_key(struct draw_tcs_llvm_variant_key *key)
3301 {
3302 struct lp_sampler_static_state *sampler = key->samplers;
3303 struct lp_image_static_state *image = draw_tcs_llvm_variant_key_images(key);
3304 for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
3305 debug_printf("sampler[%i].src_format = %s\n", i,
3306 util_format_name(sampler[i].texture_state.format));
3307 }
3308
3309 for (unsigned i = 0 ; i < key->nr_images; i++)
3310 debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
3311 }
3312
3313
3314 static void
create_tes_jit_types(struct draw_tes_llvm_variant * var)3315 create_tes_jit_types(struct draw_tes_llvm_variant *var)
3316 {
3317 struct gallivm_state *gallivm = var->gallivm;
3318
3319 var->resources_type = lp_build_jit_resources_type(gallivm);
3320 var->resources_ptr_type = LLVMPointerType(var->resources_type, 0);
3321 var->input_array_deref_type = create_tes_jit_input_deref_type(gallivm);
3322 var->input_array_type = LLVMPointerType(var->input_array_deref_type, 0); /* num vertices per prim */
3323 }
3324
3325
3326 static LLVMTypeRef
get_tes_resources_ptr_type(struct draw_tes_llvm_variant * variant)3327 get_tes_resources_ptr_type(struct draw_tes_llvm_variant *variant)
3328 {
3329 if (!variant->resources_ptr_type)
3330 create_tes_jit_types(variant);
3331 return variant->resources_ptr_type;
3332 }
3333
3334
3335 static LLVMValueRef
generate_tes_mask_value(struct draw_tes_llvm_variant * variant,struct lp_type tes_type,LLVMValueRef limit,LLVMValueRef loop_counter)3336 generate_tes_mask_value(struct draw_tes_llvm_variant *variant,
3337 struct lp_type tes_type, LLVMValueRef limit, LLVMValueRef loop_counter)
3338 {
3339 struct gallivm_state *gallivm = variant->gallivm;
3340 LLVMBuilderRef builder = gallivm->builder;
3341 struct lp_type mask_type = lp_int_type(tes_type);
3342 LLVMValueRef num_prims;
3343 LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
3344 unsigned i;
3345
3346 num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);
3347 for (i = 0; i < tes_type.length; i++) {
3348 LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3349 mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");
3350 }
3351 mask_val = lp_build_compare(gallivm, mask_type,
3352 PIPE_FUNC_GREATER, num_prims, mask_val);
3353
3354 return mask_val;
3355 }
3356
3357
3358 static LLVMValueRef
draw_tes_llvm_fetch_vertex_input(const struct lp_build_tes_iface * tes_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index)3359 draw_tes_llvm_fetch_vertex_input(const struct lp_build_tes_iface *tes_iface,
3360 struct lp_build_context *bld,
3361 bool is_vindex_indirect,
3362 LLVMValueRef vertex_index,
3363 bool is_aindex_indirect,
3364 LLVMValueRef attrib_index,
3365 bool is_sindex_indirect,
3366 LLVMValueRef swizzle_index)
3367 {
3368 const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);
3369 struct gallivm_state *gallivm = bld->gallivm;
3370 LLVMBuilderRef builder = gallivm->builder;
3371 LLVMValueRef indices[3];
3372 LLVMValueRef res;
3373 struct lp_type type = bld->type;
3374
3375 if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
3376 res = bld->zero;
3377
3378 for (int i = 0; i < type.length; ++i) {
3379 LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3380 LLVMValueRef vert_chan_index = vertex_index;
3381 LLVMValueRef attr_chan_index = attrib_index;
3382 LLVMValueRef swiz_chan_index = swizzle_index;
3383 LLVMValueRef channel_vec;
3384
3385 if (is_vindex_indirect) {
3386 vert_chan_index = LLVMBuildExtractElement(builder,
3387 vertex_index, idx, "");
3388 }
3389 if (is_aindex_indirect) {
3390 attr_chan_index = LLVMBuildExtractElement(builder,
3391 attrib_index, idx, "");
3392 }
3393 if (is_sindex_indirect) {
3394 swiz_chan_index = LLVMBuildExtractElement(builder,
3395 swizzle_index, idx, "");
3396 }
3397
3398 indices[0] = vert_chan_index;
3399 indices[1] = attr_chan_index;
3400 indices[2] = swiz_chan_index;
3401
3402 channel_vec = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3403 channel_vec = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), channel_vec, "");
3404
3405 res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3406 }
3407 } else {
3408 indices[0] = vertex_index;
3409 indices[1] = attrib_index;
3410 indices[2] = swizzle_index;
3411
3412 res = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3413 res = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), res, "");
3414 res = lp_build_broadcast_scalar(bld, res);
3415 }
3416 return res;
3417 }
3418
3419
3420 static LLVMValueRef
draw_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface * tes_iface,struct lp_build_context * bld,bool is_aindex_indirect,LLVMValueRef attrib_index,LLVMValueRef swizzle_index)3421 draw_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
3422 struct lp_build_context *bld,
3423 bool is_aindex_indirect,
3424 LLVMValueRef attrib_index,
3425 LLVMValueRef swizzle_index)
3426 {
3427 const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);
3428 struct gallivm_state *gallivm = bld->gallivm;
3429 LLVMBuilderRef builder = gallivm->builder;
3430 LLVMValueRef indices[3];
3431 LLVMValueRef res;
3432 struct lp_type type = bld->type;
3433
3434 if (is_aindex_indirect) {
3435 res = bld->zero;
3436
3437 for (int i = 0; i < type.length; ++i) {
3438 LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3439 LLVMValueRef attr_chan_index = attrib_index;
3440 LLVMValueRef channel_vec;
3441
3442 if (is_aindex_indirect) {
3443 attr_chan_index = LLVMBuildExtractElement(builder,
3444 attrib_index, idx, "");
3445 }
3446
3447 indices[0] = lp_build_const_int32(gallivm, 0);
3448 indices[1] = attr_chan_index;
3449 indices[2] = swizzle_index;
3450
3451 channel_vec = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3452 channel_vec = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), channel_vec, "");
3453
3454 res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3455 }
3456 } else {
3457 indices[0] = lp_build_const_int32(gallivm, 0);
3458 indices[1] = attrib_index;
3459 indices[2] = swizzle_index;
3460
3461 res = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3462 res = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), res, "");
3463 res = lp_build_broadcast_scalar(bld, res);
3464 }
3465 return res;
3466 }
3467
3468
3469 static void
draw_tes_llvm_generate(struct draw_llvm * llvm,struct draw_tes_llvm_variant * variant)3470 draw_tes_llvm_generate(struct draw_llvm *llvm,
3471 struct draw_tes_llvm_variant *variant)
3472 {
3473 struct gallivm_state *gallivm = variant->gallivm;
3474 LLVMContextRef context = gallivm->context;
3475 LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
3476 LLVMTypeRef flt_type = LLVMFloatTypeInContext(context);
3477 LLVMTypeRef arg_types[11];
3478 LLVMTypeRef func_type;
3479 LLVMValueRef variant_func;
3480 LLVMValueRef resources_ptr;
3481 LLVMValueRef tess_coord[2], io_ptr, input_array, num_tess_coord;
3482 LLVMValueRef view_index;
3483 LLVMValueRef tess_inner, tess_outer, prim_id, patch_vertices_in;
3484 LLVMBasicBlockRef block;
3485 LLVMBuilderRef builder;
3486 LLVMValueRef mask_val;
3487 struct lp_build_context bld, bldvec;
3488 struct lp_build_sampler_soa *sampler = 0;
3489 struct lp_build_image_soa *image = NULL;
3490 struct lp_bld_tgsi_system_values system_values;
3491 char func_name[64];
3492 unsigned i;
3493 struct draw_tes_llvm_iface tes_iface;
3494 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
3495 struct lp_build_mask_context mask;
3496 LLVMValueRef consts_ptr;
3497 LLVMValueRef ssbos_ptr;
3498 LLVMValueRef step;
3499 struct lp_type tes_type;
3500 unsigned vector_length = variant->shader->base.vector_length;
3501 int primid_slot = -1;
3502
3503 memset(&system_values, 0, sizeof(system_values));
3504 memset(&outputs, 0, sizeof(outputs));
3505
3506 snprintf(func_name, sizeof(func_name), "draw_llvm_tes_variant");
3507
3508 LLVMTypeRef tess_outer_deref_type = LLVMArrayType(flt_type, 4);
3509 LLVMTypeRef tess_inner_deref_type = LLVMArrayType(flt_type, 2);
3510
3511 arg_types[0] = get_tes_resources_ptr_type(variant); /* context */
3512 arg_types[1] = variant->input_array_type; /* input */
3513 arg_types[2] = variant->vertex_header_ptr_type;
3514 arg_types[3] = int32_type;
3515 arg_types[4] = int32_type;
3516 arg_types[5] = LLVMPointerType(flt_type, 0);
3517 arg_types[6] = LLVMPointerType(flt_type, 0);
3518 arg_types[7] = LLVMPointerType(tess_outer_deref_type, 0);
3519 arg_types[8] = LLVMPointerType(tess_inner_deref_type, 0);
3520 arg_types[9] = int32_type;
3521 arg_types[10] = int32_type;
3522
3523 func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
3524 variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
3525
3526 variant->function = variant_func;
3527 variant->function_name = MALLOC(strlen(func_name)+1);
3528 strcpy(variant->function_name, func_name);
3529 LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
3530
3531 for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
3532 if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
3533 lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
3534
3535 if (gallivm->cache && gallivm->cache->data_size) {
3536 gallivm_stub_func(gallivm, variant_func);
3537 return;
3538 }
3539
3540 resources_ptr = LLVMGetParam(variant_func, 0);
3541 input_array = LLVMGetParam(variant_func, 1);
3542 io_ptr = LLVMGetParam(variant_func, 2);
3543 prim_id = LLVMGetParam(variant_func, 3);
3544 num_tess_coord = LLVMGetParam(variant_func, 4);
3545 tess_coord[0] = LLVMGetParam(variant_func, 5);
3546 tess_coord[1] = LLVMGetParam(variant_func, 6);
3547 tess_outer = LLVMGetParam(variant_func, 7);
3548 tess_inner = LLVMGetParam(variant_func, 8);
3549 patch_vertices_in = LLVMGetParam(variant_func, 9);
3550 view_index = LLVMGetParam(variant_func, 10);
3551
3552 lp_build_name(resources_ptr, "resources");
3553 lp_build_name(input_array, "input");
3554 lp_build_name(io_ptr, "io");
3555 lp_build_name(prim_id, "prim_id");
3556 lp_build_name(num_tess_coord, "num_tess_coord");
3557 lp_build_name(tess_coord[0], "tess_coord[0]");
3558 lp_build_name(tess_coord[1], "tess_coord[1]");
3559 lp_build_name(tess_outer, "tess_outer");
3560 lp_build_name(tess_inner, "tess_inner");
3561 lp_build_name(patch_vertices_in, "patch_vertices_in");
3562 lp_build_name(view_index, "view_index");
3563
3564 tes_iface.base.fetch_vertex_input = draw_tes_llvm_fetch_vertex_input;
3565 tes_iface.base.fetch_patch_input = draw_tes_llvm_fetch_patch_input;
3566 tes_iface.input = input_array;
3567 tes_iface.variant = variant;
3568
3569 block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
3570 builder = gallivm->builder;
3571 LLVMPositionBuilderAtEnd(builder, block);
3572
3573 lp_build_context_init(&bld, gallivm, lp_type_int(32));
3574
3575 memset(&tes_type, 0, sizeof tes_type);
3576 tes_type.floating = true; /* floating point values */
3577 tes_type.sign = true; /* values are signed */
3578 tes_type.norm = false; /* values are not limited to [0,1] or [-1,1] */
3579 tes_type.width = 32; /* 32-bit float */
3580 tes_type.length = vector_length;
3581
3582 lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tes_type));
3583 consts_ptr = lp_jit_resources_constants(gallivm, variant->resources_type, resources_ptr);
3584
3585 ssbos_ptr = lp_jit_resources_ssbos(gallivm, variant->resources_type, resources_ptr);
3586
3587 sampler = lp_bld_llvm_sampler_soa_create(variant->key.samplers,
3588 MAX2(variant->key.nr_samplers,
3589 variant->key.nr_sampler_views));
3590 image = lp_bld_llvm_image_soa_create(draw_tes_llvm_variant_key_images(&variant->key),
3591 variant->key.nr_images);
3592 step = lp_build_const_int32(gallivm, vector_length);
3593
3594 system_values.tess_outer = LLVMBuildLoad2(builder, tess_outer_deref_type, tess_outer, "");
3595 system_values.tess_inner = LLVMBuildLoad2(builder, tess_inner_deref_type, tess_inner, "");
3596
3597 system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);
3598
3599 system_values.view_index = view_index;
3600
3601 system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);
3602
3603 if (variant->key.primid_needed) {
3604 int slot = variant->key.primid_output;
3605 for (unsigned i = 0; i < 4; i++) {
3606 outputs[slot][i] = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, tes_type), "primid");
3607 LLVMBuildStore(builder, system_values.prim_id, outputs[slot][i]);
3608 }
3609 primid_slot = slot;
3610 }
3611 struct lp_build_loop_state lp_loop;
3612 lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
3613 {
3614 LLVMValueRef io;
3615
3616 io = LLVMBuildGEP2(builder, variant->vertex_header_type, io_ptr, &lp_loop.counter, 1, "");
3617 mask_val = generate_tes_mask_value(variant, tes_type, num_tess_coord, lp_loop.counter);
3618 lp_build_mask_begin(&mask, gallivm, tes_type, mask_val);
3619
3620 system_values.tess_coord = LLVMGetUndef(LLVMArrayType(LLVMVectorType(flt_type, vector_length), 3));
3621 for (i = 0; i < 3; i++) {
3622 LLVMValueRef tess_coord_chan = LLVMGetUndef(LLVMVectorType(flt_type, vector_length));
3623 for (unsigned j = 0; j < vector_length; j++) {
3624 LLVMValueRef idx = LLVMBuildAdd(builder, lp_loop.counter, lp_build_const_int32(gallivm, j), "");
3625 LLVMValueRef tc_val;
3626 if (i == 2) {
3627 if (variant->shader->base.prim_mode == MESA_PRIM_TRIANGLES) {
3628 tc_val = lp_build_const_float(gallivm, 1.0);
3629 tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get2(builder, flt_type, tess_coord[0], idx), "");
3630 tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get2(builder, flt_type, tess_coord[1], idx), "");
3631 } else
3632 tc_val = lp_build_const_float(gallivm, 0.0);
3633 } else
3634 tc_val = lp_build_pointer_get2(builder, flt_type, tess_coord[i], idx);
3635
3636 tess_coord_chan = LLVMBuildInsertElement(builder, tess_coord_chan, tc_val, lp_build_const_int32(gallivm, j), "");
3637 }
3638 system_values.tess_coord = LLVMBuildInsertValue(builder, system_values.tess_coord, tess_coord_chan, i, "");
3639 }
3640
3641 struct lp_build_tgsi_params params;
3642 memset(¶ms, 0, sizeof(params));
3643
3644 params.type = tes_type;
3645 params.mask = &mask;
3646 params.consts_ptr = consts_ptr;
3647 params.system_values = &system_values;
3648 params.resources_type = variant->resources_type;
3649 params.resources_ptr = resources_ptr;
3650 params.sampler = sampler;
3651 params.info = &llvm->draw->tes.tess_eval_shader->info;
3652 params.ssbo_ptr = ssbos_ptr;
3653 params.image = image;
3654 params.tes_iface = &tes_iface.base;
3655 params.aniso_filter_table = lp_jit_resources_aniso_filter_table(gallivm, variant->resources_type, resources_ptr);
3656
3657 lp_build_nir_soa(variant->gallivm,
3658 llvm->draw->tes.tess_eval_shader->state.ir.nir,
3659 ¶ms,
3660 outputs);
3661
3662 lp_build_mask_end(&mask);
3663
3664 if (variant->key.clamp_vertex_color) {
3665 const struct tgsi_shader_info *info = &llvm->draw->tes.tess_eval_shader->info;
3666 do_clamp_vertex_color(variant->gallivm,
3667 tes_type, info,
3668 outputs);
3669 }
3670 LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
3671 lp_int_type(tes_type), 0);
3672
3673 convert_to_aos(gallivm, variant->vertex_header_type, io, NULL, outputs, clipmask,
3674 draw_total_tes_outputs(llvm->draw), tes_type, primid_slot, false);
3675 }
3676 lp_build_loop_end_cond(&lp_loop, num_tess_coord, step, LLVMIntUGE);
3677 lp_bld_llvm_sampler_soa_destroy(sampler);
3678 lp_bld_llvm_image_soa_destroy(image);
3679
3680 LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
3681 gallivm_verify_function(gallivm, variant_func);
3682 }
3683
3684
3685 struct draw_tes_llvm_variant *
draw_tes_llvm_create_variant(struct draw_llvm * llvm,unsigned num_outputs,const struct draw_tes_llvm_variant_key * key)3686 draw_tes_llvm_create_variant(struct draw_llvm *llvm,
3687 unsigned num_outputs,
3688 const struct draw_tes_llvm_variant_key *key)
3689 {
3690 struct draw_tes_llvm_variant *variant;
3691 struct llvm_tess_eval_shader *shader = llvm_tess_eval_shader(llvm->draw->tes.tess_eval_shader);
3692 char module_name[64];
3693 unsigned char ir_sha1_cache_key[20];
3694 struct lp_cached_code cached = { 0 };
3695 bool needs_caching = false;
3696
3697 variant = MALLOC(sizeof *variant +
3698 shader->variant_key_size - sizeof variant->key);
3699 if (!variant)
3700 return NULL;
3701
3702 variant->llvm = llvm;
3703 variant->shader = shader;
3704
3705 snprintf(module_name, sizeof(module_name), "draw_llvm_tes_variant%u",
3706 variant->shader->variants_cached);
3707
3708 memcpy(&variant->key, key, shader->variant_key_size);
3709 if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
3710 draw_get_ir_cache_key(shader->base.state.ir.nir,
3711 key,
3712 shader->variant_key_size,
3713 num_outputs,
3714 ir_sha1_cache_key);
3715
3716 llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
3717 &cached,
3718 ir_sha1_cache_key);
3719 if (!cached.data_size)
3720 needs_caching = true;
3721 }
3722 variant->gallivm = gallivm_create(module_name, &llvm->context, &cached);
3723
3724 create_tes_jit_types(variant);
3725
3726 variant->vertex_header_type = lp_build_create_jit_vertex_header_type(variant->gallivm, num_outputs);
3727 variant->vertex_header_ptr_type = LLVMPointerType(variant->vertex_header_type, 0);
3728
3729 if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3730 nir_print_shader(llvm->draw->tes.tess_eval_shader->state.ir.nir, stderr);
3731 draw_tes_llvm_dump_variant_key(&variant->key);
3732 }
3733
3734 draw_tes_llvm_generate(llvm, variant);
3735
3736 gallivm_compile_module(variant->gallivm);
3737
3738 variant->jit_func = (draw_tes_jit_func)
3739 gallivm_jit_function(variant->gallivm, variant->function, variant->function_name);
3740
3741 if (needs_caching)
3742 llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
3743 &cached,
3744 ir_sha1_cache_key);
3745 gallivm_free_ir(variant->gallivm);
3746
3747 variant->list_item_global.base = variant;
3748 variant->list_item_local.base = variant;
3749 /*variant->no = */shader->variants_created++;
3750 variant->list_item_global.base = variant;
3751
3752 return variant;
3753 }
3754
3755
3756 void
draw_tes_llvm_destroy_variant(struct draw_tes_llvm_variant * variant)3757 draw_tes_llvm_destroy_variant(struct draw_tes_llvm_variant *variant)
3758 {
3759 struct draw_llvm *llvm = variant->llvm;
3760
3761 if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3762 debug_printf("Deleting TES variant: %u tes variants,\t%u total variants\n",
3763 variant->shader->variants_cached, llvm->nr_tes_variants);
3764 }
3765
3766 gallivm_destroy(variant->gallivm);
3767
3768 list_del(&variant->list_item_local.list);
3769 variant->shader->variants_cached--;
3770 list_del(&variant->list_item_global.list);
3771 llvm->nr_tes_variants--;
3772 if(variant->function_name)
3773 FREE(variant->function_name);
3774 FREE(variant);
3775 }
3776
3777
3778 struct draw_tes_llvm_variant_key *
draw_tes_llvm_make_variant_key(struct draw_llvm * llvm,char * store)3779 draw_tes_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
3780 {
3781 struct draw_tes_llvm_variant_key *key;
3782 struct lp_sampler_static_state *draw_sampler;
3783 struct lp_image_static_state *draw_image;
3784
3785 key = (struct draw_tes_llvm_variant_key *)store;
3786
3787 memset(key, 0, offsetof(struct draw_tes_llvm_variant_key, samplers[0]));
3788
3789 int primid_output = draw_find_shader_output(llvm->draw, TGSI_SEMANTIC_PRIMID, 0);
3790 if (primid_output >= 0) {
3791 key->primid_output = primid_output;
3792 key->primid_needed = true;
3793 }
3794
3795 key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color &&
3796 llvm->draw->gs.geometry_shader == NULL;
3797
3798 /* All variants of this shader will have the same value for
3799 * nr_samplers. Not yet trying to compact away holes in the
3800 * sampler array.
3801 */
3802 key->nr_samplers = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
3803 if (llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
3804 key->nr_sampler_views =
3805 llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
3806 } else {
3807 key->nr_sampler_views = key->nr_samplers;
3808 }
3809
3810 key->nr_images = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
3811
3812 draw_sampler = key->samplers;
3813
3814 memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
3815
3816 for (unsigned i = 0 ; i < key->nr_samplers; i++) {
3817 lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
3818 llvm->draw->samplers[PIPE_SHADER_TESS_EVAL][i]);
3819 }
3820 for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
3821 lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
3822 llvm->draw->sampler_views[PIPE_SHADER_TESS_EVAL][i]);
3823 }
3824
3825 draw_image = draw_tes_llvm_variant_key_images(key);
3826 memset(draw_image, 0,
3827 key->nr_images * sizeof *draw_image);
3828 for (unsigned i = 0; i < key->nr_images; i++) {
3829 lp_sampler_static_texture_state_image(&draw_image[i].image_state,
3830 llvm->draw->images[PIPE_SHADER_TESS_EVAL][i]);
3831 }
3832 return key;
3833 }
3834
3835
3836 void
draw_tes_llvm_dump_variant_key(struct draw_tes_llvm_variant_key * key)3837 draw_tes_llvm_dump_variant_key(struct draw_tes_llvm_variant_key *key)
3838 {
3839 struct lp_sampler_static_state *sampler = key->samplers;
3840 struct lp_image_static_state *image = draw_tes_llvm_variant_key_images(key);
3841
3842 if (key->primid_needed)
3843 debug_printf("prim id output %d\n", key->primid_output);
3844 debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
3845 for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
3846 debug_printf("sampler[%i].src_format = %s\n", i,
3847 util_format_name(sampler[i].texture_state.format));
3848 }
3849
3850 for (unsigned i = 0 ; i < key->nr_images; i++)
3851 debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
3852
3853 }
3854