xref: /aosp_15_r20/external/mesa3d/src/gallium/auxiliary/draw/draw_llvm.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include "draw_llvm.h"
29 
30 #include "draw_context.h"
31 #include "draw_vs.h"
32 #include "draw_gs.h"
33 
34 #include "gallivm/lp_bld_arit.h"
35 #include "gallivm/lp_bld_arit_overflow.h"
36 #include "gallivm/lp_bld_bitarit.h"
37 #include "gallivm/lp_bld_gather.h"
38 #include "gallivm/lp_bld_logic.h"
39 #include "gallivm/lp_bld_const.h"
40 #include "gallivm/lp_bld_coro.h"
41 #include "gallivm/lp_bld_swizzle.h"
42 #include "gallivm/lp_bld_struct.h"
43 #include "gallivm/lp_bld_type.h"
44 #include "gallivm/lp_bld_flow.h"
45 #include "gallivm/lp_bld_debug.h"
46 #include "gallivm/lp_bld_tgsi.h"
47 #include "gallivm/lp_bld_nir.h"
48 #include "gallivm/lp_bld_printf.h"
49 #include "gallivm/lp_bld_intr.h"
50 #include "gallivm/lp_bld_init.h"
51 #include "gallivm/lp_bld_type.h"
52 #include "gallivm/lp_bld_pack.h"
53 #include "gallivm/lp_bld_format.h"
54 #include "gallivm/lp_bld_misc.h"
55 #include "gallivm/lp_bld_jit_sample.h"
56 #include "tgsi/tgsi_exec.h"
57 #include "tgsi/tgsi_dump.h"
58 
59 #include "util/u_math.h"
60 #include "util/u_pointer.h"
61 #include "util/u_string.h"
62 #include "nir_serialize.h"
63 #include "util/mesa-sha1.h"
64 #define DEBUG_STORE 0
65 
66 
67 static void
68 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
69 
70 
71 struct draw_gs_llvm_iface {
72    struct lp_build_gs_iface base;
73 
74    struct draw_gs_llvm_variant *variant;
75    LLVMValueRef input;
76 };
77 
78 
79 static inline const struct draw_gs_llvm_iface *
draw_gs_llvm_iface(const struct lp_build_gs_iface * iface)80 draw_gs_llvm_iface(const struct lp_build_gs_iface *iface)
81 {
82    return (const struct draw_gs_llvm_iface *)iface;
83 }
84 
85 
86 struct draw_tcs_llvm_iface {
87    struct lp_build_tcs_iface base;
88 
89    struct draw_tcs_llvm_variant *variant;
90    LLVMValueRef input;
91    LLVMValueRef output;
92 };
93 
94 
95 static inline const struct draw_tcs_llvm_iface *
draw_tcs_llvm_iface(const struct lp_build_tcs_iface * iface)96 draw_tcs_llvm_iface(const struct lp_build_tcs_iface *iface)
97 {
98    return (const struct draw_tcs_llvm_iface *)iface;
99 }
100 
101 
102 struct draw_tes_llvm_iface {
103    struct lp_build_tes_iface base;
104 
105    struct draw_tes_llvm_variant *variant;
106    LLVMValueRef input;
107 };
108 
109 
110 static inline const struct draw_tes_llvm_iface *
draw_tes_llvm_iface(const struct lp_build_tes_iface * iface)111 draw_tes_llvm_iface(const struct lp_build_tes_iface *iface)
112 {
113    return (const struct draw_tes_llvm_iface *)iface;
114 }
115 
116 
117 /**
118  * Create LLVM type for draw_vertex_buffer.
119  */
120 static LLVMTypeRef
create_jit_dvbuffer_type(struct gallivm_state * gallivm,const char * struct_name)121 create_jit_dvbuffer_type(struct gallivm_state *gallivm,
122                          const char *struct_name)
123 {
124    LLVMTargetDataRef target = gallivm->target;
125    LLVMTypeRef dvbuffer_type;
126    LLVMTypeRef elem_types[DRAW_JIT_DVBUFFER_NUM_FIELDS];
127    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
128 
129    elem_types[DRAW_JIT_DVBUFFER_MAP] =
130       LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
131    elem_types[DRAW_JIT_DVBUFFER_SIZE] = int32_type;
132 
133    dvbuffer_type = LLVMStructTypeInContext(gallivm->context, elem_types,
134                                            ARRAY_SIZE(elem_types), 0);
135 
136    (void) target; /* silence unused var warning for non-debug build */
137    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, map,
138                           target, dvbuffer_type,
139                           DRAW_JIT_DVBUFFER_MAP);
140    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, size,
141                           target, dvbuffer_type,
142                           DRAW_JIT_DVBUFFER_SIZE);
143 
144    return dvbuffer_type;
145 }
146 
147 /**
148  * Create LLVM type for struct draw_jit_context
149  */
150 static LLVMTypeRef
create_vs_jit_context_type(struct gallivm_state * gallivm,const char * struct_name)151 create_vs_jit_context_type(struct gallivm_state *gallivm, const char *struct_name)
152 {
153    LLVMTargetDataRef target = gallivm->target;
154    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
155    LLVMTypeRef elem_types[DRAW_VS_JIT_CTX_NUM_FIELDS];
156 
157    elem_types[DRAW_VS_JIT_CTX_PLANES] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), DRAW_TOTAL_CLIP_PLANES), 0);
158    elem_types[DRAW_VS_JIT_CTX_VIEWPORT] = LLVMPointerType(float_type, 0);
159 
160    LLVMTypeRef context_type = LLVMStructTypeInContext(gallivm->context, elem_types, ARRAY_SIZE(elem_types), 0);
161 
162    (void) target; /* silence unused var warning for non-debug build */
163    LP_CHECK_MEMBER_OFFSET(struct draw_vs_jit_context, planes,
164                           target, context_type, DRAW_VS_JIT_CTX_PLANES);
165    LP_CHECK_MEMBER_OFFSET(struct draw_vs_jit_context, viewports,
166                           target, context_type, DRAW_VS_JIT_CTX_VIEWPORT);
167    LP_CHECK_STRUCT_SIZE(struct draw_vs_jit_context,
168                         target, context_type);
169 
170    return context_type;
171 }
172 
173 
174 /**
175  * Create LLVM type for struct draw_gs_jit_context
176  */
177 static LLVMTypeRef
create_gs_jit_context_type(struct gallivm_state * gallivm,unsigned vector_length,const char * struct_name)178 create_gs_jit_context_type(struct gallivm_state *gallivm,
179                            unsigned vector_length,
180                            const char *struct_name)
181 {
182    LLVMTargetDataRef target = gallivm->target;
183    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
184    LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
185    LLVMTypeRef elem_types[DRAW_GS_JIT_CTX_NUM_FIELDS];
186    LLVMTypeRef context_type;
187 
188    elem_types[DRAW_GS_JIT_CTX_PLANES] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
189                                                                       DRAW_TOTAL_CLIP_PLANES), 0);
190    elem_types[DRAW_GS_JIT_CTX_VIEWPORT] = LLVMPointerType(float_type, 0); /* viewports */
191 
192 
193    elem_types[DRAW_GS_JIT_CTX_PRIM_LENGTHS] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);
194    elem_types[DRAW_GS_JIT_CTX_EMITTED_VERTICES] = LLVMPointerType(LLVMVectorType(int_type,
195                                                                                  vector_length), 0);
196    elem_types[DRAW_GS_JIT_CTX_EMITTED_PRIMS] = LLVMPointerType(LLVMVectorType(int_type,
197                                                                               vector_length), 0);
198 
199    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
200                                           ARRAY_SIZE(elem_types), 0);
201 
202    (void) target; /* silence unused var warning for non-debug build */
203    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes,
204                           target, context_type, DRAW_GS_JIT_CTX_PLANES);
205    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewports,
206                           target, context_type, DRAW_GS_JIT_CTX_VIEWPORT);
207    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,
208                           target, context_type,
209                           DRAW_GS_JIT_CTX_PRIM_LENGTHS);
210    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,
211                           target, context_type,
212                           DRAW_GS_JIT_CTX_EMITTED_VERTICES);
213    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,
214                           target, context_type,
215                           DRAW_GS_JIT_CTX_EMITTED_PRIMS);
216    LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context,
217                         target, context_type);
218    return context_type;
219 }
220 
221 
222 static LLVMTypeRef
create_gs_jit_input_type_deref(struct gallivm_state * gallivm)223 create_gs_jit_input_type_deref(struct gallivm_state *gallivm)
224 {
225    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
226    LLVMTypeRef input_array;
227 
228    input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */
229    input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */
230    input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
231    return input_array;
232 }
233 
234 
235 static LLVMTypeRef
create_gs_jit_input_type(struct gallivm_state * gallivm)236 create_gs_jit_input_type(struct gallivm_state *gallivm)
237 {
238    return LLVMPointerType(create_gs_jit_input_type_deref(gallivm), 0); /* num vertices per prim */
239 }
240 
241 
242 /**
243  * Create LLVM type for struct pipe_vertex_buffer
244  */
245 static LLVMTypeRef
create_jit_vertex_buffer_type(struct gallivm_state * gallivm,const char * struct_name)246 create_jit_vertex_buffer_type(struct gallivm_state *gallivm,
247                               const char *struct_name)
248 {
249    LLVMTargetDataRef target = gallivm->target;
250    LLVMTypeRef elem_types[3];
251    LLVMTypeRef vb_type;
252 
253    elem_types[0] = LLVMInt8TypeInContext(gallivm->context);
254    elem_types[1] = LLVMInt32TypeInContext(gallivm->context);
255    elem_types[2] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
256 
257    vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
258                                      ARRAY_SIZE(elem_types), 0);
259 
260    (void) target; /* silence unused var warning for non-debug build */
261    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, is_user_buffer,
262                           target, vb_type, 0);
263    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
264                           target, vb_type, 1);
265    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer.resource,
266                           target, vb_type, 2);
267 
268    LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
269 
270    return vb_type;
271 }
272 
273 
274 static LLVMTypeRef
create_tcs_jit_input_type_deref(struct gallivm_state * gallivm)275 create_tcs_jit_input_type_deref(struct gallivm_state *gallivm)
276 {
277    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
278    LLVMTypeRef input_array;
279 
280    input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
281    input_array = LLVMArrayType(input_array, NUM_TCS_INPUTS); /* num attrs per vertex */
282    return input_array;
283 }
284 
285 
286 static LLVMTypeRef
create_tcs_jit_input_type(struct gallivm_state * gallivm)287 create_tcs_jit_input_type(struct gallivm_state *gallivm)
288 {
289    return LLVMPointerType(create_tcs_jit_input_type_deref(gallivm), 0); /* num vertices per prim */
290 }
291 
292 
293 static LLVMTypeRef
create_tcs_jit_output_type_deref(struct gallivm_state * gallivm)294 create_tcs_jit_output_type_deref(struct gallivm_state *gallivm)
295 {
296    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
297    LLVMTypeRef output_array;
298 
299    output_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
300    output_array = LLVMArrayType(output_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
301    return output_array;
302 }
303 
304 
305 static LLVMTypeRef
create_tcs_jit_output_type(struct gallivm_state * gallivm)306 create_tcs_jit_output_type(struct gallivm_state *gallivm)
307 {
308    return LLVMPointerType(create_tcs_jit_output_type_deref(gallivm), 0); /* num vertices per prim */
309 }
310 
311 
312 static LLVMTypeRef
create_tes_jit_input_deref_type(struct gallivm_state * gallivm)313 create_tes_jit_input_deref_type(struct gallivm_state *gallivm)
314 {
315    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
316    LLVMTypeRef input_array;
317 
318    input_array = LLVMArrayType(float_type, TGSI_NUM_CHANNELS); /* num channels */
319    input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
320 
321    return input_array;
322 }
323 
324 
325 /**
326  * Create LLVM types for various structures.
327  */
328 static void
create_vs_jit_types(struct draw_llvm_variant * variant)329 create_vs_jit_types(struct draw_llvm_variant *variant)
330 {
331    struct gallivm_state *gallivm = variant->gallivm;
332 
333    variant->context_type = create_vs_jit_context_type(gallivm, "draw_vs_jit_context");
334    variant->context_ptr_type = LLVMPointerType(variant->context_type, 0);
335 
336    variant->resources_type = lp_build_jit_resources_type(gallivm);
337    variant->resources_ptr_type = LLVMPointerType(variant->resources_type, 0);
338 
339    variant->buffer_type = create_jit_dvbuffer_type(gallivm, "draw_vertex_buffer");
340    variant->buffer_ptr_type = LLVMPointerType(variant->buffer_type, 0);
341 
342    variant->vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
343    variant->vb_ptr_type = LLVMPointerType(variant->vb_type, 0);
344 }
345 
346 
347 static LLVMTypeRef
get_context_ptr_type(struct draw_llvm_variant * variant)348 get_context_ptr_type(struct draw_llvm_variant *variant)
349 {
350    if (!variant->context_ptr_type)
351       create_vs_jit_types(variant);
352    return variant->context_ptr_type;
353 }
354 
355 
356 static LLVMTypeRef
get_buffer_ptr_type(struct draw_llvm_variant * variant)357 get_buffer_ptr_type(struct draw_llvm_variant *variant)
358 {
359    if (!variant->buffer_ptr_type)
360       create_vs_jit_types(variant);
361    return variant->buffer_ptr_type;
362 }
363 
364 
365 static LLVMTypeRef
get_vb_ptr_type(struct draw_llvm_variant * variant)366 get_vb_ptr_type(struct draw_llvm_variant *variant)
367 {
368    if (!variant->vb_ptr_type)
369       create_vs_jit_types(variant);
370    return variant->vb_ptr_type;
371 }
372 
373 static LLVMTypeRef
get_vertex_header_ptr_type(struct draw_llvm_variant * variant)374 get_vertex_header_ptr_type(struct draw_llvm_variant *variant)
375 {
376    assert(variant->vertex_header_ptr_type);
377    return variant->vertex_header_ptr_type;
378 }
379 
380 
381 /**
382  * Create per-context LLVM info.
383  */
384 struct draw_llvm *
draw_llvm_create(struct draw_context * draw,lp_context_ref * context)385 draw_llvm_create(struct draw_context *draw, lp_context_ref *context)
386 {
387    struct draw_llvm *llvm;
388 
389    if (!lp_build_init())
390       return NULL;
391 
392    llvm = CALLOC_STRUCT(draw_llvm);
393    if (!llvm)
394       return NULL;
395 
396    llvm->draw = draw;
397 
398    if (context) {
399       llvm->context = *context;
400       llvm->context.owned = false;
401    }
402    if (!llvm->context.ref) {
403       lp_context_create(&llvm->context);
404    }
405    if (!llvm->context.ref)
406       goto fail;
407 
408    llvm->nr_variants = 0;
409    list_inithead(&llvm->vs_variants_list.list);
410 
411    llvm->nr_gs_variants = 0;
412    list_inithead(&llvm->gs_variants_list.list);
413 
414    llvm->nr_tcs_variants = 0;
415    list_inithead(&llvm->tcs_variants_list.list);
416 
417    llvm->nr_tes_variants = 0;
418    list_inithead(&llvm->tes_variants_list.list);
419 
420    return llvm;
421 
422 fail:
423    draw_llvm_destroy(llvm);
424    return NULL;
425 }
426 
427 
428 /**
429  * Free per-context LLVM info.
430  */
431 void
draw_llvm_destroy(struct draw_llvm * llvm)432 draw_llvm_destroy(struct draw_llvm *llvm)
433 {
434    lp_context_destroy(&llvm->context);
435 
436    /* XXX free other draw_llvm data? */
437    FREE(llvm);
438 }
439 
440 
441 static void
draw_get_ir_cache_key(struct nir_shader * nir,const void * key,size_t key_size,uint32_t val_32bit,unsigned char ir_sha1_cache_key[20])442 draw_get_ir_cache_key(struct nir_shader *nir,
443                       const void *key, size_t key_size,
444                       uint32_t val_32bit,
445                       unsigned char ir_sha1_cache_key[20])
446 {
447    struct blob blob = { 0 };
448    unsigned ir_size;
449    void *ir_binary;
450 
451    blob_init(&blob);
452    nir_serialize(&blob, nir, true);
453    ir_binary = blob.data;
454    ir_size = blob.size;
455 
456    struct mesa_sha1 ctx;
457    _mesa_sha1_init(&ctx);
458    _mesa_sha1_update(&ctx, key, key_size);
459    _mesa_sha1_update(&ctx, ir_binary, ir_size);
460    _mesa_sha1_update(&ctx, &val_32bit, 4);
461    _mesa_sha1_final(&ctx, ir_sha1_cache_key);
462 
463    blob_finish(&blob);
464 }
465 
466 
467 /**
468  * Create LLVM-generated code for a vertex shader.
469  */
470 struct draw_llvm_variant *
draw_llvm_create_variant(struct draw_llvm * llvm,unsigned num_inputs,const struct draw_llvm_variant_key * key)471 draw_llvm_create_variant(struct draw_llvm *llvm,
472                          unsigned num_inputs,
473                          const struct draw_llvm_variant_key *key)
474 {
475    struct draw_llvm_variant *variant;
476    struct llvm_vertex_shader *shader =
477       llvm_vertex_shader(llvm->draw->vs.vertex_shader);
478    char module_name[64];
479    unsigned char ir_sha1_cache_key[20];
480    struct lp_cached_code cached = { 0 };
481    bool needs_caching = false;
482    variant = MALLOC(sizeof *variant +
483                     shader->variant_key_size -
484                     sizeof variant->key);
485    if (!variant)
486       return NULL;
487 
488    variant->llvm = llvm;
489    variant->shader = shader;
490    memcpy(&variant->key, key, shader->variant_key_size);
491 
492    snprintf(module_name, sizeof(module_name), "draw_llvm_vs_variant%u",
493             variant->shader->variants_cached);
494 
495    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
496       draw_get_ir_cache_key(shader->base.state.ir.nir,
497                             key,
498                             shader->variant_key_size,
499                             num_inputs,
500                             ir_sha1_cache_key);
501 
502       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
503                                          &cached,
504                                          ir_sha1_cache_key);
505       if (!cached.data_size)
506          needs_caching = true;
507    }
508    variant->gallivm = gallivm_create(module_name, &llvm->context, &cached);
509 
510    create_vs_jit_types(variant);
511 
512    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
513       if (llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_TGSI)
514          tgsi_dump(llvm->draw->vs.vertex_shader->state.tokens, 0);
515       else
516          nir_print_shader(llvm->draw->vs.vertex_shader->state.ir.nir, stderr);
517       draw_llvm_dump_variant_key(&variant->key);
518    }
519 
520    variant->vertex_header_type = lp_build_create_jit_vertex_header_type(variant->gallivm, num_inputs);
521    variant->vertex_header_ptr_type = LLVMPointerType(variant->vertex_header_type, 0);
522 
523    draw_llvm_generate(llvm, variant);
524 
525    gallivm_compile_module(variant->gallivm);
526 
527    variant->jit_func = (draw_jit_vert_func)
528          gallivm_jit_function(variant->gallivm, variant->function, variant->function_name);
529 
530    if (needs_caching)
531       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
532                                            &cached,
533                                            ir_sha1_cache_key);
534    gallivm_free_ir(variant->gallivm);
535 
536    variant->list_item_global.base = variant;
537    variant->list_item_local.base = variant;
538    /*variant->no = */shader->variants_created++;
539    variant->list_item_global.base = variant;
540 
541    return variant;
542 }
543 
544 
545 static void
do_clamp_vertex_color(struct gallivm_state * gallivm,struct lp_type type,const struct tgsi_shader_info * info,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS])546 do_clamp_vertex_color(struct gallivm_state *gallivm,
547                       struct lp_type type,
548                       const struct tgsi_shader_info *info,
549                       LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
550 {
551    LLVMBuilderRef builder = gallivm->builder;
552    LLVMValueRef out;
553    unsigned chan, attrib;
554    struct lp_build_context bld;
555    lp_build_context_init(&bld, gallivm, type);
556 
557    for (attrib = 0; attrib < info->num_outputs; ++attrib) {
558       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
559          if (outputs[attrib][chan]) {
560             switch (info->output_semantic_name[attrib]) {
561             case TGSI_SEMANTIC_COLOR:
562             case TGSI_SEMANTIC_BCOLOR:
563                out = LLVMBuildLoad2(builder, LLVMTypeOf(bld.zero), outputs[attrib][chan], "");
564                out = lp_build_clamp(&bld, out, bld.zero, bld.one);
565                LLVMBuildStore(builder, out, outputs[attrib][chan]);
566                break;
567             }
568          }
569       }
570    }
571 }
572 
573 
574 static void
generate_vs(struct draw_llvm_variant * variant,LLVMBuilderRef builder,struct lp_type vs_type,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],const LLVMValueRef (* inputs)[TGSI_NUM_CHANNELS],const struct lp_bld_tgsi_system_values * system_values,LLVMValueRef context_ptr,LLVMValueRef resources_ptr,const struct lp_build_sampler_soa * draw_sampler,const struct lp_build_image_soa * draw_image,bool clamp_vertex_color,struct lp_build_mask_context * bld_mask)575 generate_vs(struct draw_llvm_variant *variant,
576             LLVMBuilderRef builder,
577             struct lp_type vs_type,
578             LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
579             const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
580             const struct lp_bld_tgsi_system_values *system_values,
581             LLVMValueRef context_ptr,
582             LLVMValueRef resources_ptr,
583             const struct lp_build_sampler_soa *draw_sampler,
584             const struct lp_build_image_soa *draw_image,
585             bool clamp_vertex_color,
586             struct lp_build_mask_context *bld_mask)
587 {
588    struct draw_llvm *llvm = variant->llvm;
589    const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
590    LLVMValueRef consts_ptr =
591       lp_jit_resources_constants(variant->gallivm, variant->resources_type, resources_ptr);
592    LLVMValueRef ssbos_ptr =
593       lp_jit_resources_ssbos(variant->gallivm, variant->resources_type, resources_ptr);
594    struct draw_llvm_variant_key *key = &variant->key;
595 
596    struct lp_build_tgsi_params params;
597    memset(&params, 0, sizeof(params));
598 
599    params.type = vs_type;
600    params.mask = bld_mask;
601    params.consts_ptr = consts_ptr;
602    params.system_values = system_values;
603    params.inputs = inputs;
604    params.num_inputs = key->nr_vertex_elements;
605    params.context_type = variant->context_type;
606    params.context_ptr = context_ptr;
607    params.resources_type = variant->resources_type;
608    params.resources_ptr = resources_ptr;
609    params.sampler = draw_sampler;
610    params.info = &llvm->draw->vs.vertex_shader->info;
611    params.ssbo_ptr = ssbos_ptr;
612    params.image = draw_image;
613    params.aniso_filter_table = lp_jit_resources_aniso_filter_table(variant->gallivm,
614                                                                    variant->resources_type,
615                                                                    resources_ptr);
616 
617    if (llvm->draw->vs.vertex_shader->state.ir.nir &&
618        llvm->draw->vs.vertex_shader->state.type == PIPE_SHADER_IR_NIR) {
619       lp_build_nir_soa(variant->gallivm,
620                        llvm->draw->vs.vertex_shader->state.ir.nir,
621                        &params,
622                        outputs);
623    } else {
624       lp_build_tgsi_soa(variant->gallivm,
625                         tokens,
626                         &params,
627                         outputs);
628    }
629 
630    if (clamp_vertex_color) {
631       const struct tgsi_shader_info *info = &llvm->draw->vs.vertex_shader->info;
632       do_clamp_vertex_color(variant->gallivm,
633                             vs_type, info,
634                             outputs);
635    }
636 }
637 
638 
639 static void
fetch_instanced(struct gallivm_state * gallivm,const struct util_format_description * format_desc,struct lp_type vs_type,LLVMValueRef vb_stride,LLVMValueRef map_ptr,LLVMValueRef buffer_size_adj,LLVMValueRef * inputs,LLVMValueRef index)640 fetch_instanced(struct gallivm_state *gallivm,
641                 const struct util_format_description *format_desc,
642                 struct lp_type vs_type,
643                 LLVMValueRef vb_stride,
644                 LLVMValueRef map_ptr,
645                 LLVMValueRef buffer_size_adj,
646                 LLVMValueRef *inputs,
647                 LLVMValueRef index)
648 {
649    LLVMTypeRef i32_t = LLVMInt32TypeInContext(gallivm->context);
650    LLVMTypeRef aosf_t, aosi_t;
651    LLVMValueRef zero = LLVMConstNull(i32_t);
652    LLVMBuilderRef builder = gallivm->builder;
653    LLVMValueRef stride, buffer_overflowed, aos, index_valid;
654    unsigned i;
655 
656    aosf_t = lp_build_vec_type(gallivm, lp_float32_vec4_type());
657    aosi_t = lp_build_vec_type(gallivm, lp_int32_vec4_type());
658 
659    /* This mul can overflow. Wraparound is ok. */
660    stride = LLVMBuildMul(builder, vb_stride, index, "");
661 
662    buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
663                                      stride, buffer_size_adj,
664                                      "buffer_overflowed");
665 
666    if (0) {
667       lp_build_print_value(gallivm, "   instance index = ", index);
668       lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
669    }
670 
671    index_valid = LLVMBuildNot(builder, buffer_overflowed, "");
672    index_valid = LLVMBuildSExt(builder, index_valid, i32_t, "");
673    stride = LLVMBuildAnd(builder, stride, index_valid, "");
674 
675    aos = lp_build_fetch_rgba_aos(gallivm,
676                                  format_desc,
677                                  lp_float32_vec4_type(),
678                                  false,
679                                  map_ptr,
680                                  stride, zero, zero,
681                                  NULL);
682 
683    index_valid = lp_build_broadcast(gallivm, aosi_t, index_valid);
684    aos = LLVMBuildBitCast(builder, aos, aosi_t, "");
685    aos = LLVMBuildAnd(builder, aos, index_valid, "");
686    aos = LLVMBuildBitCast(builder, aos, aosf_t, "");
687 
688    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
689       LLVMValueRef index = lp_build_const_int32(gallivm, i);
690       inputs[i] = lp_build_extract_broadcast(gallivm,
691                                              lp_float32_vec4_type(),
692                                              vs_type, aos, index);
693    }
694 }
695 
696 
697 static void
fetch_vector(struct gallivm_state * gallivm,const struct util_format_description * format_desc,struct lp_type vs_type,LLVMValueRef vb_stride,LLVMValueRef map_ptr,LLVMValueRef buffer_size_adj,LLVMValueRef * inputs,LLVMValueRef indices)698 fetch_vector(struct gallivm_state *gallivm,
699              const struct util_format_description *format_desc,
700              struct lp_type vs_type,
701              LLVMValueRef vb_stride,
702              LLVMValueRef map_ptr,
703              LLVMValueRef buffer_size_adj,
704              LLVMValueRef *inputs,
705              LLVMValueRef indices)
706 {
707    LLVMBuilderRef builder = gallivm->builder;
708    struct lp_build_context blduivec;
709    struct lp_type fetch_type = vs_type;
710    LLVMValueRef offset, valid_mask;
711    unsigned i;
712 
713    lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
714 
715    vb_stride = lp_build_broadcast_scalar(&blduivec, vb_stride);
716    buffer_size_adj = lp_build_broadcast_scalar(&blduivec, buffer_size_adj);
717 
718    /* This mul can overflow. Wraparound is ok. */
719    offset = lp_build_mul(&blduivec, vb_stride, indices);
720 
721    valid_mask = lp_build_compare(gallivm, blduivec.type,
722                                  PIPE_FUNC_LESS, offset, buffer_size_adj);
723 
724    /* not valid elements use offset 0 */
725    offset = LLVMBuildAnd(builder, offset, valid_mask, "");
726 
727    if (0) {
728       lp_build_print_value(gallivm, "   indices = ", indices);
729       lp_build_print_value(gallivm, "   offsets = ", offset);
730       lp_build_print_value(gallivm, "   valid_mask = ", valid_mask);
731    }
732 
733    /*
734     * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches.
735     * This should always produce better code.
736     */
737 
738    /* The type handling is annoying here... */
739    if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
740        format_desc->channel[0].pure_integer) {
741       if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
742          fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length);
743       } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
744          fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length);
745       }
746    }
747 
748    lp_build_fetch_rgba_soa(gallivm, format_desc,
749                            fetch_type, false, map_ptr, offset,
750                            blduivec.zero, blduivec.zero,
751                            NULL, inputs);
752 
753    for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
754       inputs[i] = LLVMBuildBitCast(builder, inputs[i],
755                                    lp_build_vec_type(gallivm, vs_type), "");
756    }
757 
758    /* out-of-bound fetches return all zeros */
759    for (i = 0; i < format_desc->nr_channels; i++) {
760       inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, "");
761       inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, "");
762       inputs[i] = LLVMBuildBitCast(builder, inputs[i],
763                                    lp_build_vec_type(gallivm, vs_type), "");
764    }
765 }
766 
767 
768 static void
store_aos(struct gallivm_state * gallivm,bool is_per_prim,LLVMTypeRef io_type,LLVMValueRef io_ptr,LLVMValueRef index,LLVMValueRef value)769 store_aos(struct gallivm_state *gallivm,
770           bool is_per_prim,
771           LLVMTypeRef io_type,
772           LLVMValueRef io_ptr,
773           LLVMValueRef index,
774           LLVMValueRef value)
775 {
776    LLVMTypeRef data_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, lp_float32_vec4_type()), 0);
777    LLVMBuilderRef builder = gallivm->builder;
778    LLVMValueRef data_ptr;
779    LLVMTypeRef data_type;
780    LLVMValueRef indices[3];
781 
782    indices[0] = lp_build_const_int32(gallivm, 0);
783    indices[1] = index;
784    indices[2] = lp_build_const_int32(gallivm, 0);
785 
786    if (!is_per_prim) {
787       data_ptr = lp_jit_vertex_header_data(gallivm, io_type, io_ptr);
788       data_type = LLVMStructGetTypeAtIndex(io_type, LP_JIT_VERTEX_HEADER_DATA);
789    } else {
790       data_ptr = io_ptr;
791       data_type = io_type;
792    }
793 
794    data_ptr = LLVMBuildGEP2(builder, data_type, data_ptr, indices, 3, "");
795    data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, "");
796 
797 #if DEBUG_STORE
798    if (is_per_prim)
799       lp_build_printf(gallivm, "    ---- %p storing prim attribute %d (io = %p)\n", data_ptr, index, io_ptr);
800    else
801       lp_build_printf(gallivm, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
802 #endif
803 
804    /* Unaligned store due to the vertex header */
805    LLVMSetAlignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
806 }
807 
808 
809 /**
810  * Adjust the mask to architecture endianess. The mask will the store in struct:
811  *
812  * struct vertex_header {
813  *    unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
814  *    unsigned edgeflag:1;
815  *    unsigned pad:1;
816  *    unsigned vertex_id:16;
817  *    [...]
818  * }
819  *
820  * On little-endian machine nothing needs to done, however on bit-endian machine
821  * the mask's fields need to be adjusted with the algorithm:
822  *
823  * uint32_t reverse (uint32_t x)
824  * {
825  *   return (x >> 16) |              // vertex_id
826  *          ((x & 0x3fff) << 18) |   // clipmask
827  *          ((x & 0x4000) << 3) |    // edgeflag
828  *          ((x & 0x8000) << 1);     // pad
829  * }
830  */
831 static LLVMValueRef
adjust_mask(struct gallivm_state * gallivm,LLVMValueRef mask)832 adjust_mask(struct gallivm_state *gallivm,
833             LLVMValueRef mask)
834 {
835 #if UTIL_ARCH_BIG_ENDIAN
836    LLVMBuilderRef builder = gallivm->builder;
837    LLVMValueRef vertex_id;
838    LLVMValueRef clipmask;
839    LLVMValueRef pad;
840    LLVMValueRef edgeflag;
841 
842    vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), "");
843    clipmask  = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), "");
844    clipmask  = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), "");
845    if (0) {
846       pad = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), "");
847       pad = LLVMBuildShl(builder, pad, lp_build_const_int32(gallivm, 1), "");
848    }
849    edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
850    edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 3), "");
851 
852    mask = LLVMBuildOr(builder, vertex_id, clipmask, "");
853    if (0) {
854       mask = LLVMBuildOr(builder, mask, pad, "");
855    }
856    mask = LLVMBuildOr(builder, mask, edgeflag, "");
857 #endif
858    return mask;
859 }
860 
861 
862 void
draw_store_aos_array(struct gallivm_state * gallivm,struct lp_type soa_type,LLVMTypeRef io_type,LLVMValueRef io_ptr,LLVMValueRef * indices,LLVMValueRef * aos,int attrib,LLVMValueRef clipmask,bool need_edgeflag,bool is_per_prim)863 draw_store_aos_array(struct gallivm_state *gallivm,
864                      struct lp_type soa_type,
865                      LLVMTypeRef io_type,
866                      LLVMValueRef io_ptr,
867                      LLVMValueRef *indices,
868                      LLVMValueRef* aos,
869                      int attrib,
870                      LLVMValueRef clipmask,
871                      bool need_edgeflag, bool is_per_prim)
872 {
873    LLVMBuilderRef builder = gallivm->builder;
874    LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
875    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
876    LLVMValueRef linear_inds[LP_MAX_VECTOR_WIDTH / 32];
877    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
878    int vector_length = soa_type.length;
879 
880    assert(TGSI_NUM_CHANNELS == 4);
881 
882    for (int i = 0; i < vector_length; i++) {
883       linear_inds[i] = lp_build_const_int32(gallivm, i);
884       if (indices) {
885          inds[i] = indices[i];
886       } else {
887          inds[i] = linear_inds[i];
888       }
889       io_ptrs[i] = LLVMBuildGEP2(builder, io_type, io_ptr, &inds[i], 1, "");
890    }
891 
892    if (attrib == 0 && !is_per_prim) {
893       /* store vertex header for each of the n vertices */
894       LLVMValueRef val, cliptmp;
895       int vertex_id_pad_edgeflag;
896 
897       /* If this assertion fails, it means we need to update the bit twidding
898        * code here.  See struct vertex_header in draw_private.h.
899        */
900       assert(DRAW_TOTAL_CLIP_PLANES==14);
901       /* initialize vertex id:16 = 0xffff, pad:1 = 0, edgeflag:1 = 1 */
902       if (!need_edgeflag) {
903          vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
904       } else {
905          vertex_id_pad_edgeflag = (0xffff << 16);
906       }
907       if (vector_length == 1)
908          val = lp_build_const_int32(gallivm, vertex_id_pad_edgeflag);
909       else
910          val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type),
911                                       vertex_id_pad_edgeflag);
912 
913       /* OR with the clipmask */
914       cliptmp = LLVMBuildOr(builder, val, clipmask, "");
915       for (unsigned i = 0; i < vector_length; i++) {
916          LLVMValueRef id_ptr = lp_jit_vertex_header_id(gallivm, io_type, io_ptrs[i]);
917          if (vector_length > 1)
918             val = LLVMBuildExtractElement(builder, cliptmp, linear_inds[i], "");
919          else
920             val = cliptmp;
921          val = adjust_mask(gallivm, val);
922 #if DEBUG_STORE
923          lp_build_printf(gallivm, "io = %p, index %d, clipmask = %x\n",
924                          io_ptrs[i], inds[i], val);
925 #endif
926          LLVMBuildStore(builder, val, id_ptr);
927       }
928    }
929 
930    /* store for each of the n vertices */
931    for (int i = 0; i < vector_length; i++) {
932       store_aos(gallivm, is_per_prim, io_type, io_ptrs[i], attr_index, aos[i]);
933    }
934 }
935 
936 
937 static void
convert_to_aos(struct gallivm_state * gallivm,LLVMTypeRef io_type,LLVMValueRef io,LLVMValueRef * indices,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],LLVMValueRef clipmask,int num_outputs,struct lp_type soa_type,int primid_slot,bool need_edgeflag)938 convert_to_aos(struct gallivm_state *gallivm,
939                LLVMTypeRef io_type,
940                LLVMValueRef io,
941                LLVMValueRef *indices,
942                LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
943                LLVMValueRef clipmask,
944                int num_outputs,
945                struct lp_type soa_type,
946                int primid_slot,
947                bool need_edgeflag)
948 {
949    LLVMBuilderRef builder = gallivm->builder;
950 
951 #if DEBUG_STORE
952    lp_build_printf(gallivm, "   # storing begin\n");
953 #endif
954    for (unsigned attrib = 0; attrib < num_outputs; ++attrib) {
955       LLVMValueRef soa[TGSI_NUM_CHANNELS];
956       LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32];
957       for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
958          if (outputs[attrib][chan]) {
959             LLVMTypeRef single_type = (attrib == primid_slot) ? lp_build_int_vec_type(gallivm, soa_type) : lp_build_vec_type(gallivm, soa_type);
960             LLVMValueRef out = LLVMBuildLoad2(builder, single_type, outputs[attrib][chan], "");
961             lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
962 #if DEBUG_STORE
963             lp_build_printf(gallivm, "output %d : %d ",
964                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
965                                          attrib, 0),
966                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
967                                          chan, 0));
968             lp_build_print_value(gallivm, "val = ", out);
969             {
970                LLVMValueRef iv =
971                   LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), "");
972 
973                lp_build_print_value(gallivm, "  ival = ", iv);
974             }
975 #endif
976             soa[chan] = out;
977          } else {
978             soa[chan] = 0;
979          }
980       }
981 
982 
983       if (soa_type.length == TGSI_NUM_CHANNELS) {
984          lp_build_transpose_aos(gallivm, soa_type, soa, aos);
985       } else {
986          lp_build_transpose_aos(gallivm, soa_type, soa, soa);
987 
988          for (unsigned i = 0; i < soa_type.length; ++i) {
989             aos[i] = lp_build_extract_range(gallivm,
990                                             soa[i % TGSI_NUM_CHANNELS],
991                                             (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
992                                             TGSI_NUM_CHANNELS);
993          }
994       }
995 
996       draw_store_aos_array(gallivm,
997                            soa_type,
998                            io_type,
999                            io,
1000                            indices,
1001                            aos,
1002                            attrib,
1003                            clipmask,
1004                            need_edgeflag, false);
1005    }
1006 #if DEBUG_STORE
1007    lp_build_printf(gallivm, "   # storing end\n");
1008 #endif
1009 }
1010 
1011 
1012 /**
1013  * Stores original vertex positions in clip coordinates
1014  */
1015 static void
store_clip(struct gallivm_state * gallivm,const struct lp_type vs_type,LLVMTypeRef io_type,LLVMValueRef io_ptr,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],int idx)1016 store_clip(struct gallivm_state *gallivm,
1017            const struct lp_type vs_type,
1018            LLVMTypeRef io_type,
1019            LLVMValueRef io_ptr,
1020            LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1021            int idx)
1022 {
1023    LLVMBuilderRef builder = gallivm->builder;
1024    LLVMValueRef soa[4];
1025    LLVMValueRef aos[LP_MAX_VECTOR_LENGTH];
1026    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1027    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
1028    LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32];
1029    LLVMTypeRef clip_ptr_type =
1030       LLVMPointerType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context),
1031                                      4), 0);
1032 
1033    for (int i = 0; i < vs_type.length; i++) {
1034       inds[i] = lp_build_const_int32(gallivm, i);
1035       io_ptrs[i] = LLVMBuildGEP2(builder, io_type, io_ptr, &inds[i], 1, "");
1036    }
1037 
1038    LLVMTypeRef single_type = lp_build_vec_type(gallivm, vs_type);
1039    soa[0] = LLVMBuildLoad2(builder, single_type, outputs[idx][0], ""); /*x0 x1 .. xn*/
1040    soa[1] = LLVMBuildLoad2(builder, single_type, outputs[idx][1], ""); /*y0 y1 .. yn*/
1041    soa[2] = LLVMBuildLoad2(builder, single_type, outputs[idx][2], ""); /*z0 z1 .. zn*/
1042    soa[3] = LLVMBuildLoad2(builder, single_type, outputs[idx][3], ""); /*w0 w1 .. wn*/
1043 
1044    for (int i = 0; i < vs_type.length; i++) {
1045       clip_ptrs[i] = lp_jit_vertex_header_clip_pos(gallivm, io_type, io_ptrs[i]);
1046    }
1047 
1048    lp_build_transpose_aos(gallivm, vs_type, soa, soa);
1049    for (int i = 0; i < vs_type.length; ++i) {
1050       aos[i] = lp_build_extract_range(gallivm,
1051                                       soa[i % TGSI_NUM_CHANNELS],
1052                                       (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
1053                                       TGSI_NUM_CHANNELS);
1054    }
1055 
1056    for (int j = 0; j < vs_type.length; j++) {
1057       LLVMValueRef clip_ptr;
1058 
1059       clip_ptr = LLVMBuildPointerCast(builder, clip_ptrs[j], clip_ptr_type, "");
1060 
1061       /* Unaligned store */
1062       LLVMSetAlignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
1063    }
1064 }
1065 
1066 
1067 /**
1068  * Transforms the outputs for viewport mapping
1069  */
1070 static void
generate_viewport(struct draw_llvm_variant * variant,LLVMBuilderRef builder,struct lp_type vs_type,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],LLVMValueRef context_ptr)1071 generate_viewport(struct draw_llvm_variant *variant,
1072                   LLVMBuilderRef builder,
1073                   struct lp_type vs_type,
1074                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1075                   LLVMValueRef context_ptr)
1076 {
1077    struct gallivm_state *gallivm = variant->gallivm;
1078    struct lp_type f32_type = vs_type;
1079    const unsigned pos = variant->llvm->draw->vs.position_output;
1080    LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1081    LLVMValueRef out3 = LLVMBuildLoad2(builder, vs_type_llvm, outputs[pos][3], ""); /*w0 w1 .. wn*/
1082    LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
1083    LLVMValueRef vp_ptr = draw_vs_jit_context_viewports(variant, context_ptr);
1084 
1085    /* We treat pipe_viewport_state as a float array */
1086    const int scale_index_offset = offsetof(struct pipe_viewport_state, scale) / sizeof(float);
1087    const int trans_index_offset = offsetof(struct pipe_viewport_state, translate) / sizeof(float);
1088 
1089    /* for 1/w convention*/
1090    out3 = LLVMBuildFDiv(builder, const1, out3, "");
1091    LLVMBuildStore(builder, out3, outputs[pos][3]);
1092 
1093    LLVMTypeRef elem_type = lp_build_elem_type(gallivm, vs_type);
1094 
1095    /* Viewport Mapping */
1096    for (unsigned i = 0; i < 3; i++) {
1097       LLVMValueRef out = LLVMBuildLoad2(builder, vs_type_llvm, outputs[pos][i], ""); /*x0 x1 .. xn*/
1098       LLVMValueRef scale;
1099       LLVMValueRef trans;
1100       LLVMValueRef scale_i;
1101       LLVMValueRef trans_i;
1102       LLVMValueRef index;
1103 
1104       index = lp_build_const_int32(gallivm, i + scale_index_offset);
1105       scale_i = LLVMBuildGEP2(builder, elem_type, vp_ptr, &index, 1, "");
1106 
1107       index = lp_build_const_int32(gallivm, i + trans_index_offset);
1108       trans_i = LLVMBuildGEP2(builder, elem_type, vp_ptr, &index, 1, "");
1109 
1110       scale = lp_build_broadcast(gallivm, vs_type_llvm,
1111                                  LLVMBuildLoad2(builder, elem_type, scale_i, "scale"));
1112       trans = lp_build_broadcast(gallivm, vs_type_llvm,
1113                                  LLVMBuildLoad2(builder, elem_type, trans_i, "trans"));
1114 
1115       /* divide by w */
1116       out = LLVMBuildFMul(builder, out, out3, "");
1117       /* mult by scale, add translation */
1118       out = lp_build_fmuladd(builder, out, scale, trans);
1119 
1120       /* store transformed outputs */
1121       LLVMBuildStore(builder, out, outputs[pos][i]);
1122    }
1123 
1124 }
1125 
1126 
1127 /**
1128  * Returns clipmask as nxi32 bitmask for the n vertices
1129  */
1130 static LLVMValueRef
generate_clipmask(struct draw_llvm * llvm,struct gallivm_state * gallivm,struct lp_type vs_type,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS],struct draw_llvm_variant_key * key,LLVMTypeRef context_type,LLVMValueRef context_ptr,bool * have_clipdist)1131 generate_clipmask(struct draw_llvm *llvm,
1132                   struct gallivm_state *gallivm,
1133                   struct lp_type vs_type,
1134                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
1135                   struct draw_llvm_variant_key *key,
1136                   LLVMTypeRef context_type,
1137                   LLVMValueRef context_ptr,
1138                   bool *have_clipdist)
1139 {
1140    LLVMBuilderRef builder = gallivm->builder;
1141    LLVMValueRef mask; /* stores the <nxi32> clipmasks */
1142    LLVMValueRef test, temp;
1143    LLVMValueRef zero, shift;
1144    LLVMValueRef pos_x, pos_y, pos_z, pos_w;
1145    LLVMValueRef cv_x, cv_y, cv_z, cv_w;
1146    LLVMValueRef plane1, planes, plane_ptr;
1147    struct lp_type f32_type = vs_type;
1148    struct lp_type i32_type = lp_int_type(vs_type);
1149    const unsigned pos = llvm->draw->vs.position_output;
1150    const unsigned cv = llvm->draw->vs.clipvertex_output;
1151    int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
1152    bool have_cd = false;
1153    bool clip_user = key->clip_user;
1154    unsigned ucp_enable = key->ucp_enable;
1155    unsigned cd[2];
1156 
1157    cd[0] = llvm->draw->vs.ccdistance_output[0];
1158    cd[1] = llvm->draw->vs.ccdistance_output[1];
1159 
1160    if (cd[0] != pos || cd[1] != pos)
1161       have_cd = true;
1162 
1163    if (num_written_clipdistance && !clip_user) {
1164       clip_user = true;
1165       ucp_enable = (1 << num_written_clipdistance) - 1;
1166    }
1167 
1168    mask = lp_build_const_int_vec(gallivm, i32_type, 0);
1169    temp = lp_build_const_int_vec(gallivm, i32_type, 0);
1170    zero = lp_build_const_vec(gallivm, f32_type, 0);         /* 0.0f 0.0f 0.0f 0.0f */
1171    shift = lp_build_const_int_vec(gallivm, i32_type, 1);    /* 1 1 1 1 */
1172 
1173    LLVMTypeRef vec_type = LLVMTypeOf(zero);
1174 
1175    /*
1176     * load clipvertex and position from correct locations.
1177     * if they are the same just load them once.
1178     */
1179    pos_x = LLVMBuildLoad2(builder, vec_type, outputs[pos][0], ""); /*x0 x1 .. xn */
1180    pos_y = LLVMBuildLoad2(builder, vec_type, outputs[pos][1], ""); /*y0 y1 .. yn */
1181    pos_z = LLVMBuildLoad2(builder, vec_type, outputs[pos][2], ""); /*z0 z1 .. zn */
1182    pos_w = LLVMBuildLoad2(builder, vec_type, outputs[pos][3], ""); /*w0 w1 .. wn */
1183 
1184    if (clip_user && cv != pos) {
1185       cv_x = LLVMBuildLoad2(builder, vec_type, outputs[cv][0], ""); /*x0 x1 .. xn */
1186       cv_y = LLVMBuildLoad2(builder, vec_type, outputs[cv][1], ""); /*y0 y1 .. yn */
1187       cv_z = LLVMBuildLoad2(builder, vec_type, outputs[cv][2], ""); /*z0 z1 .. zn */
1188       cv_w = LLVMBuildLoad2(builder, vec_type, outputs[cv][3], ""); /*w0 w1 .. wn */
1189    } else {
1190       cv_x = pos_x;
1191       cv_y = pos_y;
1192       cv_z = pos_z;
1193       cv_w = pos_w;
1194    }
1195 
1196    /*
1197     * Be careful with the comparisons and NaNs (using llvm's unordered
1198     * comparisons here).
1199     */
1200    /* Cliptest, for hardwired planes */
1201    /*
1202     * XXX should take guardband into account (currently not in key).
1203     * Otherwise might run the draw pipeline stages for nothing.
1204     */
1205    if (key->clip_xy) {
1206       /* plane 1 */
1207       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
1208       temp = shift;
1209       test = LLVMBuildAnd(builder, test, temp, "");
1210       mask = test;
1211 
1212       /* plane 2 */
1213       test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
1214       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1215       temp = LLVMBuildShl(builder, temp, shift, "");
1216       test = LLVMBuildAnd(builder, test, temp, "");
1217       mask = LLVMBuildOr(builder, mask, test, "");
1218 
1219       /* plane 3 */
1220       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
1221       temp = LLVMBuildShl(builder, temp, shift, "");
1222       test = LLVMBuildAnd(builder, test, temp, "");
1223       mask = LLVMBuildOr(builder, mask, test, "");
1224 
1225       /* plane 4 */
1226       test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
1227       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1228       temp = LLVMBuildShl(builder, temp, shift, "");
1229       test = LLVMBuildAnd(builder, test, temp, "");
1230       mask = LLVMBuildOr(builder, mask, test, "");
1231    }
1232 
1233    if (key->clip_z) {
1234       temp = lp_build_const_int_vec(gallivm, i32_type, 16);
1235       if (key->clip_halfz) {
1236          /* plane 5 */
1237          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
1238          test = LLVMBuildAnd(builder, test, temp, "");
1239          mask = LLVMBuildOr(builder, mask, test, "");
1240       } else {
1241          /* plane 5 */
1242          test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
1243          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
1244          test = LLVMBuildAnd(builder, test, temp, "");
1245          mask = LLVMBuildOr(builder, mask, test, "");
1246       }
1247       /* plane 6 */
1248       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
1249       temp = LLVMBuildShl(builder, temp, shift, "");
1250       test = LLVMBuildAnd(builder, test, temp, "");
1251       mask = LLVMBuildOr(builder, mask, test, "");
1252    }
1253 
1254    if (clip_user) {
1255       LLVMValueRef planes_ptr = draw_vs_jit_context_planes(gallivm, context_type, context_ptr);
1256       LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
1257       LLVMTypeRef planes_type = LLVMArrayType(LLVMArrayType(float_type, 4), DRAW_TOTAL_CLIP_PLANES);
1258       LLVMValueRef indices[3];
1259       LLVMValueRef is_nan_or_inf;
1260 
1261       /* userclip planes */
1262       while (ucp_enable) {
1263          unsigned plane_idx = ffs(ucp_enable)-1;
1264          ucp_enable &= ~(1 << plane_idx);
1265          plane_idx += 6;
1266 
1267          if (have_cd && num_written_clipdistance) {
1268             LLVMValueRef clipdist;
1269             int i;
1270             i = plane_idx - 6;
1271 
1272             *have_clipdist = true;
1273             if (i < 4) {
1274                clipdist = LLVMBuildLoad2(builder, vec_type, outputs[cd[0]][i], "");
1275             } else {
1276                clipdist = LLVMBuildLoad2(builder, vec_type, outputs[cd[1]][i-4], "");
1277             }
1278             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist);
1279             is_nan_or_inf = lp_build_is_inf_or_nan(gallivm, vs_type, clipdist);
1280             test = LLVMBuildOr(builder, test, is_nan_or_inf, "");
1281             temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
1282             test = LLVMBuildAnd(builder, test, temp, "");
1283             mask = LLVMBuildOr(builder, mask, test, "");
1284          } else {
1285             LLVMTypeRef vs_elem_type = lp_build_elem_type(gallivm, vs_type);
1286             LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
1287             LLVMValueRef sum = NULL;
1288             indices[0] = lp_build_const_int32(gallivm, 0);
1289             indices[1] = lp_build_const_int32(gallivm, plane_idx);
1290 
1291             for (int i = 0; i < 4; ++i) {
1292                indices[2] = lp_build_const_int32(gallivm, i);
1293                plane_ptr = LLVMBuildGEP2(builder, planes_type, planes_ptr, indices, 3, "");
1294                plane1 = LLVMBuildLoad2(builder, vs_elem_type, plane_ptr,
1295                                        (const char *[]){"plane_x", "plane_y", "plane_z", "plane_w"}[i]);
1296                planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
1297                if (i == 0) {
1298                   sum = LLVMBuildFMul(builder, planes, cv_x, "");
1299                } else {
1300                   sum = lp_build_fmuladd(builder, planes,
1301                                          (LLVMValueRef[]){cv_x, cv_y, cv_z, cv_w}[i], sum);
1302                }
1303             }
1304 
1305             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
1306             temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
1307             test = LLVMBuildAnd(builder, test, temp, "");
1308             mask = LLVMBuildOr(builder, mask, test, "");
1309          }
1310       }
1311    }
1312    if (key->need_edgeflags) {
1313       /*
1314        * This isn't really part of clipmask but stored the same in vertex
1315        * header later, so do it here.
1316        */
1317       unsigned edge_attr = llvm->draw->vs.edgeflag_output;
1318       LLVMValueRef one = lp_build_const_vec(gallivm, f32_type, 1.0);
1319       LLVMValueRef edgeflag = LLVMBuildLoad2(builder, vec_type, outputs[edge_attr][0], "");
1320       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_EQUAL, one, edgeflag);
1321       temp = lp_build_const_int_vec(gallivm, i32_type,
1322                                     1LL << DRAW_TOTAL_CLIP_PLANES);
1323       test = LLVMBuildAnd(builder, test, temp, "");
1324       mask = LLVMBuildOr(builder, mask, test, "");
1325    }
1326    return mask;
1327 }
1328 
1329 
1330 /**
1331  * Returns boolean if any clipping has occurred
1332  * Used zero/one i8 value to represent boolean
1333  */
1334 static LLVMValueRef
clipmask_booli8(struct gallivm_state * gallivm,const struct lp_type vs_type,LLVMTypeRef clipmask_bool_type,LLVMValueRef clipmask_bool_ptr,bool edgeflag_in_clipmask)1335 clipmask_booli8(struct gallivm_state *gallivm,
1336                 const struct lp_type vs_type,
1337                 LLVMTypeRef clipmask_bool_type,
1338                 LLVMValueRef clipmask_bool_ptr,
1339                 bool edgeflag_in_clipmask)
1340 {
1341    LLVMBuilderRef builder = gallivm->builder;
1342    LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
1343    LLVMValueRef clipmask_bool = LLVMBuildLoad2(builder, clipmask_bool_type, clipmask_bool_ptr, "");
1344    LLVMValueRef ret;
1345    struct lp_build_context bldivec;
1346 
1347    lp_build_context_init(&bldivec, gallivm, lp_int_type(vs_type));
1348 
1349    /*
1350     * We need to invert the edgeflag bit from the clipmask here
1351     * (because the result is really if we want to run the pipeline or not
1352     * and we (may) need it if edgeflag was 0).
1353     */
1354    if (edgeflag_in_clipmask) {
1355       LLVMValueRef edge = lp_build_const_int_vec(gallivm, bldivec.type,
1356                                                  1LL << DRAW_TOTAL_CLIP_PLANES);
1357       clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, "");
1358    }
1359 
1360    /*
1361     * XXX: probably should mask off bits from the mask which come from
1362     * vertices which were beyond the count (i.e. indices_valid for
1363     * linear fetches, for elts ones we don't have the correct mask
1364     * right now). Otherwise might run the pipeline for nothing,
1365     * though everything should still work.
1366     */
1367    ret = lp_build_any_true_range(&bldivec, vs_type.length, clipmask_bool);
1368    ret = LLVMBuildZExt(builder, ret, int8_type, "");
1369    return ret;
1370 }
1371 
1372 
1373 static LLVMValueRef
draw_gs_llvm_fetch_input(const struct lp_build_gs_iface * gs_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,LLVMValueRef swizzle_index)1374 draw_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
1375                          struct lp_build_context * bld,
1376                          bool is_vindex_indirect,
1377                          LLVMValueRef vertex_index,
1378                          bool is_aindex_indirect,
1379                          LLVMValueRef attrib_index,
1380                          LLVMValueRef swizzle_index)
1381 {
1382    const struct draw_gs_llvm_iface *gs = draw_gs_llvm_iface(gs_iface);
1383    struct gallivm_state *gallivm = bld->gallivm;
1384    LLVMBuilderRef builder = gallivm->builder;
1385    LLVMValueRef indices[3];
1386    LLVMValueRef res;
1387    struct lp_type type = bld->type;
1388 
1389    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
1390    LLVMTypeRef channel_vec_type = LLVMVectorType(float_type, TGSI_NUM_CHANNELS);
1391    LLVMTypeRef input_array_type = create_gs_jit_input_type_deref(gallivm);
1392 
1393    if (is_vindex_indirect || is_aindex_indirect) {
1394       res = bld->zero;
1395       for (int i = 0; i < type.length; ++i) {
1396          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
1397          LLVMValueRef vert_chan_index = vertex_index;
1398          LLVMValueRef attr_chan_index = attrib_index;
1399          LLVMValueRef channel_vec, value;
1400 
1401          if (is_vindex_indirect) {
1402             vert_chan_index = LLVMBuildExtractElement(builder,
1403                                                       vertex_index, idx, "");
1404          }
1405          if (is_aindex_indirect) {
1406             attr_chan_index = LLVMBuildExtractElement(builder,
1407                                                       attrib_index, idx, "");
1408          }
1409 
1410          indices[0] = vert_chan_index;
1411          indices[1] = attr_chan_index;
1412          indices[2] = swizzle_index;
1413 
1414          channel_vec = LLVMBuildGEP2(builder, input_array_type, gs->input, indices, 3, "");
1415          channel_vec = LLVMBuildLoad2(builder, channel_vec_type, channel_vec, "");
1416          value = LLVMBuildExtractElement(builder, channel_vec, idx, "");
1417 
1418          res = LLVMBuildInsertElement(builder, res, value, idx, "");
1419       }
1420    } else {
1421       indices[0] = vertex_index;
1422       indices[1] = attrib_index;
1423       indices[2] = swizzle_index;
1424 
1425       res = LLVMBuildGEP2(builder, input_array_type, gs->input, indices, 3, "");
1426       res = LLVMBuildLoad2(builder, channel_vec_type, res, "");
1427    }
1428 
1429    return res;
1430 }
1431 
1432 
1433 static void
draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface * gs_base,struct lp_build_context * bld,LLVMValueRef (* outputs)[4],LLVMValueRef emitted_vertices_vec,LLVMValueRef mask_vec,LLVMValueRef stream_id)1434 draw_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
1435                          struct lp_build_context * bld,
1436                          LLVMValueRef (*outputs)[4],
1437                          LLVMValueRef emitted_vertices_vec,
1438                          LLVMValueRef mask_vec, LLVMValueRef stream_id)
1439 {
1440    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1441    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1442    struct gallivm_state *gallivm = variant->gallivm;
1443    LLVMBuilderRef builder = gallivm->builder;
1444    struct lp_type gs_type = bld->type;
1445    LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
1446                                                   lp_int_type(gs_type), 0);
1447    LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
1448    LLVMValueRef next_prim_offset =
1449       lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary);
1450    LLVMValueRef io = variant->io_ptr;
1451    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
1452 
1453    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
1454    for (unsigned i = 0; i < gs_type.length; ++i) {
1455       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1456       LLVMValueRef currently_emitted =
1457          LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
1458       indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
1459       indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
1460       indices[i] = LLVMBuildSelect(builder, LLVMBuildExtractElement(builder, cond, ind, ""), indices[i],
1461                                    lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary - 1), "");
1462    }
1463 
1464    LLVMValueRef stream_idx = LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), "");
1465    LLVMValueRef cnd = LLVMBuildICmp(builder, LLVMIntULT, stream_idx, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");
1466    struct lp_build_if_state if_ctx;
1467    lp_build_if(&if_ctx, gallivm, cnd);
1468    io = lp_build_pointer_get2(builder, variant->vertex_header_ptr_type,
1469                               io, LLVMBuildExtractElement(builder, stream_id, lp_build_const_int32(gallivm, 0), ""));
1470 
1471    if (variant->key.clamp_vertex_color) {
1472       do_clamp_vertex_color(gallivm, gs_type,
1473                             gs_info, outputs);
1474    }
1475    convert_to_aos(gallivm, variant->vertex_header_type,
1476                   io, indices,
1477                   outputs, clipmask,
1478                   gs_info->num_outputs, gs_type,
1479                   -1,
1480                   false);
1481    lp_build_endif(&if_ctx);
1482 }
1483 
1484 
1485 static void
draw_gs_llvm_end_primitive(const struct lp_build_gs_iface * gs_base,struct lp_build_context * bld,LLVMValueRef total_emitted_vertices_vec_ptr,LLVMValueRef verts_per_prim_vec,LLVMValueRef emitted_prims_vec,LLVMValueRef mask_vec,unsigned stream)1486 draw_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
1487                            struct lp_build_context * bld,
1488                            LLVMValueRef total_emitted_vertices_vec_ptr,
1489                            LLVMValueRef verts_per_prim_vec,
1490                            LLVMValueRef emitted_prims_vec,
1491                            LLVMValueRef mask_vec, unsigned stream)
1492 {
1493    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1494    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1495    struct gallivm_state *gallivm = variant->gallivm;
1496    LLVMBuilderRef builder = gallivm->builder;
1497    LLVMValueRef prim_lengts_ptr =
1498       draw_gs_jit_prim_lengths(variant, variant->context_ptr);
1499 
1500    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
1501    for (unsigned i = 0; i < bld->type.length; ++i) {
1502       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
1503       LLVMValueRef prims_emitted =
1504          LLVMBuildExtractElement(builder, emitted_prims_vec, ind, "");
1505       LLVMValueRef store_ptr;
1506       LLVMValueRef num_vertices =
1507          LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, "");
1508 
1509       LLVMValueRef this_cond = LLVMBuildExtractElement(gallivm->builder, cond, ind, "");
1510       struct lp_build_if_state ifthen;
1511       lp_build_if(&ifthen, gallivm, this_cond);
1512       prims_emitted = LLVMBuildMul(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, variant->shader->base.num_vertex_streams), "");
1513       prims_emitted = LLVMBuildAdd(gallivm->builder, prims_emitted, lp_build_const_int32(gallivm, stream), "");
1514       LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
1515       LLVMTypeRef prim_lengths_type = LLVMPointerType(int_type, 0);
1516       store_ptr = LLVMBuildGEP2(builder, prim_lengths_type, prim_lengts_ptr, &prims_emitted, 1, "");
1517       store_ptr = LLVMBuildLoad2(builder, prim_lengths_type, store_ptr, "");
1518       store_ptr = LLVMBuildGEP2(builder, int_type, store_ptr, &ind, 1, "");
1519       LLVMBuildStore(builder, num_vertices, store_ptr);
1520       lp_build_endif(&ifthen);
1521    }
1522 }
1523 
1524 
1525 static void
draw_gs_llvm_epilogue(const struct lp_build_gs_iface * gs_base,LLVMValueRef total_emitted_vertices_vec,LLVMValueRef emitted_prims_vec,unsigned stream)1526 draw_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
1527                       LLVMValueRef total_emitted_vertices_vec,
1528                       LLVMValueRef emitted_prims_vec, unsigned stream)
1529 {
1530    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
1531    struct draw_gs_llvm_variant *variant = gs_iface->variant;
1532    struct gallivm_state *gallivm = variant->gallivm;
1533    LLVMBuilderRef builder = gallivm->builder;
1534    LLVMValueRef emitted_verts_ptr =
1535       draw_gs_jit_emitted_vertices(variant, variant->context_ptr);
1536    LLVMValueRef emitted_prims_ptr =
1537       draw_gs_jit_emitted_prims(variant, variant->context_ptr);
1538    LLVMValueRef stream_val = lp_build_const_int32(gallivm, stream);
1539 
1540    emitted_verts_ptr = LLVMBuildGEP2(builder, LLVMTypeOf(total_emitted_vertices_vec), emitted_verts_ptr, &stream_val, 1, "");
1541    emitted_prims_ptr = LLVMBuildGEP2(builder, LLVMTypeOf(emitted_prims_vec), emitted_prims_ptr, &stream_val, 1, "");
1542 
1543    LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr);
1544    LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr);
1545 }
1546 
1547 
1548 static void
draw_llvm_generate(struct draw_llvm * llvm,struct draw_llvm_variant * variant)1549 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1550 {
1551    struct gallivm_state *gallivm = variant->gallivm;
1552    LLVMContextRef context = gallivm->context;
1553    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
1554    LLVMTypeRef arg_types[14];
1555    unsigned num_arg_types = ARRAY_SIZE(arg_types);
1556    LLVMTypeRef func_type;
1557    LLVMValueRef context_ptr;
1558    LLVMValueRef resources_ptr;
1559    LLVMBasicBlockRef block;
1560    LLVMBuilderRef builder;
1561    char func_name[64];
1562    struct lp_type vs_type;
1563    LLVMValueRef count, fetch_elts, start;
1564    LLVMValueRef vertex_id_offset;
1565    LLVMValueRef stride, step, io_itr;
1566    LLVMValueRef ind_vec, start_vec, have_elts, fetch_max, tmp;
1567    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1568    LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS];
1569    LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS];
1570    LLVMValueRef buffer_size_adj[PIPE_MAX_ATTRIBS];
1571    LLVMValueRef instance_index[PIPE_MAX_ATTRIBS];
1572    LLVMValueRef fake_buf_ptr, fake_buf;
1573 
1574    struct draw_context *draw = llvm->draw;
1575    const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
1576    unsigned i, j;
1577    struct lp_build_context bld, blduivec;
1578    struct lp_build_loop_state lp_loop;
1579    struct lp_build_if_state if_ctx;
1580    const int vector_length = lp_native_vector_width / 32;
1581    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1582    struct lp_build_sampler_soa *sampler = 0;
1583    struct lp_build_image_soa *image = NULL;
1584    LLVMValueRef ret, clipmask_bool_ptr;
1585    struct draw_llvm_variant_key *key = &variant->key;
1586    /* If geometry shader is present we need to skip both the viewport
1587     * transformation and clipping otherwise the inputs to the geometry
1588     * shader will be incorrect.
1589     * The code can't handle vp transform when vs writes vp index neither
1590     * (though this would be fixable here, but couldn't just broadcast
1591     * the values).
1592     */
1593    const bool bypass_viewport = key->has_gs_or_tes || key->bypass_viewport ||
1594                                 vs_info->writes_viewport_index;
1595    const bool enable_cliptest = !key->has_gs_or_tes && (key->clip_xy ||
1596                                                     key->clip_z ||
1597                                                     key->clip_user ||
1598                                                     key->need_edgeflags);
1599    LLVMValueRef variant_func;
1600    const unsigned pos = draw->vs.position_output;
1601    const unsigned cv = draw->vs.clipvertex_output;
1602    bool have_clipdist = false;
1603    struct lp_bld_tgsi_system_values system_values;
1604 
1605    memset(&system_values, 0, sizeof(system_values));
1606    memset(&outputs, 0, sizeof(outputs));
1607    snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant");
1608 
1609    i = 0;
1610    arg_types[i++] = get_context_ptr_type(variant);       /* context */
1611    arg_types[i++] = variant->resources_ptr_type;       /* context */
1612    arg_types[i++] = get_vertex_header_ptr_type(variant); /* vertex_header */
1613    arg_types[i++] = get_buffer_ptr_type(variant);        /* vbuffers */
1614    arg_types[i++] = int32_type;                          /* count */
1615    arg_types[i++] = int32_type;                          /* start/fetch_elt_max */
1616    arg_types[i++] = int32_type;                          /* stride */
1617    arg_types[i++] = get_vb_ptr_type(variant);            /* pipe_vertex_buffer's */
1618    arg_types[i++] = int32_type;                          /* instance_id */
1619    arg_types[i++] = int32_type;                          /* vertex_id_offset */
1620    arg_types[i++] = int32_type;                          /* start_instance */
1621    arg_types[i++] = LLVMPointerType(int32_type, 0);      /* fetch_elts  */
1622    arg_types[i++] = int32_type;                          /* draw_id */
1623    arg_types[i++] = int32_type;                          /* view_id */
1624    assert(i == ARRAY_SIZE(arg_types));
1625 
1626    func_type = LLVMFunctionType(LLVMInt8TypeInContext(context),
1627                                 arg_types, num_arg_types, 0);
1628 
1629    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
1630    variant->function = variant_func;
1631    variant->function_name = MALLOC(strlen(func_name)+1);
1632    strcpy(variant->function_name, func_name);
1633 
1634    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
1635    for (i = 0; i < num_arg_types; ++i)
1636       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1637          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
1638 
1639    if (gallivm->cache && gallivm->cache->data_size) {
1640       gallivm_stub_func(gallivm, variant_func);
1641       return;
1642    }
1643 
1644    context_ptr               = LLVMGetParam(variant_func, 0);
1645    resources_ptr             = LLVMGetParam(variant_func, 1);
1646    io_ptr                    = LLVMGetParam(variant_func, 2);
1647    vbuffers_ptr              = LLVMGetParam(variant_func, 3);
1648    count                     = LLVMGetParam(variant_func, 4);
1649    start                     = LLVMGetParam(variant_func, 5);
1650    /*
1651     * XXX: stride is actually unused. The stride we use is strictly calculated
1652     * from the number of outputs (including the draw_extra outputs).
1653     * Should probably fix some day (we need a new vs just because of extra
1654     * outputs which the generated vs won't touch).
1655     */
1656    stride                    = LLVMGetParam(variant_func, 6);
1657    vb_ptr                    = LLVMGetParam(variant_func, 7);
1658    system_values.instance_id = LLVMGetParam(variant_func, 8);
1659    vertex_id_offset          = LLVMGetParam(variant_func, 9);
1660    system_values.base_instance = LLVMGetParam(variant_func, 10);
1661    fetch_elts                = LLVMGetParam(variant_func, 11);
1662    system_values.draw_id     = LLVMGetParam(variant_func, 12);
1663    system_values.view_index  = LLVMGetParam(variant_func, 13);
1664 
1665    lp_build_name(context_ptr, "context");
1666    lp_build_name(resources_ptr, "resources");
1667    lp_build_name(io_ptr, "io");
1668    lp_build_name(vbuffers_ptr, "vbuffers");
1669    lp_build_name(count, "count");
1670    lp_build_name(start, "start");
1671    lp_build_name(stride, "stride");
1672    lp_build_name(vb_ptr, "vb");
1673    lp_build_name(system_values.instance_id, "instance_id");
1674    lp_build_name(vertex_id_offset, "vertex_id_offset");
1675    lp_build_name(system_values.base_instance, "start_instance");
1676    lp_build_name(fetch_elts, "fetch_elts");
1677    lp_build_name(system_values.draw_id, "draw_id");
1678 
1679    /*
1680     * Function body
1681     */
1682 
1683    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
1684    builder = gallivm->builder;
1685    LLVMPositionBuilderAtEnd(builder, block);
1686 
1687    memset(&vs_type, 0, sizeof vs_type);
1688    vs_type.floating = true; /* floating point values */
1689    vs_type.sign = true;     /* values are signed */
1690    vs_type.norm = false;    /* values are not limited to [0,1] or [-1,1] */
1691    vs_type.width = 32;      /* 32-bit float */
1692    vs_type.length = vector_length;
1693 
1694    lp_build_context_init(&bld, gallivm, lp_type_uint(32));
1695    lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type));
1696 
1697    /* hold temporary "bool" clipmask */
1698    clipmask_bool_ptr = lp_build_alloca(gallivm, blduivec.vec_type, "");
1699 
1700    fake_buf = lp_build_alloca_undef(gallivm,
1701                  LLVMVectorType(LLVMInt64TypeInContext(context), 4), "");
1702    fake_buf = LLVMBuildBitCast(builder, fake_buf,
1703                  LLVMPointerType(LLVMInt8TypeInContext(context), 0), "");
1704    fake_buf_ptr = LLVMBuildGEP2(builder, LLVMInt8TypeInContext(context), fake_buf, &bld.zero, 1, "");
1705 
1706    /* code generated texture sampling */
1707    sampler = lp_bld_llvm_sampler_soa_create(draw_llvm_variant_key_samplers(key),
1708                                           MAX2(key->nr_samplers,
1709                                                key->nr_sampler_views));
1710    image = lp_bld_llvm_image_soa_create(draw_llvm_variant_key_images(key),
1711                                       key->nr_images);
1712 
1713    step = lp_build_const_int32(gallivm, vector_length);
1714 
1715    ind_vec = blduivec.undef;
1716    for (i = 0; i < vs_type.length; i++) {
1717       LLVMValueRef index = lp_build_const_int32(gallivm, i);
1718       ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, "");
1719    }
1720 
1721    have_elts = LLVMBuildICmp(builder, LLVMIntNE,
1722                              LLVMConstPointerNull(arg_types[11]), fetch_elts, "");
1723 
1724    fetch_max = LLVMBuildSub(builder, count, bld.one, "fetch_max");
1725    fetch_max = lp_build_broadcast_scalar(&blduivec, fetch_max);
1726    /*
1727     * Only needed for non-indexed path.
1728     */
1729    start_vec = lp_build_broadcast_scalar(&blduivec, start);
1730 
1731    /*
1732     * Pre-calculate everything which is constant per shader invocation.
1733     */
1734    for (j = 0; j < key->nr_vertex_elements; ++j) {
1735       LLVMValueRef vb_buffer_offset, buffer_size, temp_ptr;
1736       LLVMValueRef vb_info, vbuffer_ptr, buf_offset, ofbit;
1737       struct pipe_vertex_element *velem = &key->vertex_element[j];
1738       LLVMValueRef vb_index =
1739          lp_build_const_int32(gallivm, velem->vertex_buffer_index);
1740       LLVMValueRef bsize = lp_build_const_int32(gallivm,
1741                                                 util_format_get_blocksize(velem->src_format));
1742       LLVMValueRef src_offset = lp_build_const_int32(gallivm,
1743                                                      velem->src_offset);
1744       LLVMValueRef src_stride = lp_build_const_int32(gallivm,
1745                                                      velem->src_stride);
1746       struct lp_build_if_state if_ctx;
1747 
1748       if (velem->src_format != PIPE_FORMAT_NONE) {
1749          vbuffer_ptr = LLVMBuildGEP2(builder, variant->buffer_type, vbuffers_ptr, &vb_index, 1, "");
1750          vb_info = LLVMBuildGEP2(builder, variant->vb_type, vb_ptr, &vb_index, 1, "");
1751          vb_stride[j] = src_stride;
1752          vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, variant->vb_type, vb_info);
1753          map_ptr[j] = draw_jit_dvbuffer_map(gallivm, variant->buffer_type, vbuffer_ptr);
1754          buffer_size = draw_jit_dvbuffer_size(gallivm, variant->buffer_type, vbuffer_ptr);
1755 
1756          ofbit = NULL;
1757          /*
1758           * We'll set buffer_size_adj to zero if we have of, so it will
1759           * always overflow later automatically without having to keep ofbit.
1760           * Overflows (with normal wraparound) doing the actual offset
1761           * calculation should be ok, just not for the buffer size calc.
1762           * It would also be possible to detect such overflows and return
1763           * zeros if that happens, but this would be more complex.
1764           */
1765          buf_offset = lp_build_add(&bld, vb_buffer_offset, src_offset);
1766          tmp = lp_build_sub(&bld, bsize, bld.one);
1767          buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, tmp,
1768                                                      &ofbit);
1769          buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size_adj[j],
1770                                                      buf_offset, &ofbit);
1771 
1772          /*
1773           * We can't easily set fake vertex buffers outside the generated code.
1774           * Hence, set fake vertex buffers here instead basically, so fetch
1775           * code can always fetch using offset 0, eliminating all control flow
1776           * inside the main loop.
1777           * (Alternatively, could have control flow per vector skipping fetch
1778           * if ofbit is true.)
1779           */
1780          if (velem->instance_divisor) {
1781             /*
1782              * Index is equal to the start instance plus the number of current
1783              * instance divided by the divisor. In this case we compute it as:
1784              * index = start_instance + (instance_id  / divisor).
1785              * Note we could actually do the fetch here, outside the loop -
1786              * it's all constant, hopefully llvm recognizes this.
1787              */
1788             LLVMValueRef current_instance;
1789             current_instance = LLVMBuildUDiv(builder, system_values.instance_id,
1790                                              lp_build_const_int32(gallivm,
1791                                                                   velem->instance_divisor),
1792                                              "instance_divisor");
1793             instance_index[j] = lp_build_uadd_overflow(gallivm, system_values.base_instance,
1794                                                        current_instance, &ofbit);
1795          }
1796 
1797          buffer_size_adj[j] = LLVMBuildSelect(builder, ofbit, bld.zero,
1798                                               buffer_size_adj[j], "");
1799 
1800          LLVMTypeRef byte_type = LLVMInt8TypeInContext(context);
1801          LLVMTypeRef byte_ptr_type = LLVMPointerType(byte_type, 0);
1802          temp_ptr = lp_build_alloca_undef(gallivm, byte_ptr_type, "");
1803 
1804          lp_build_if(&if_ctx, gallivm, ofbit);
1805          {
1806             LLVMBuildStore(builder, fake_buf_ptr, temp_ptr);
1807          }
1808          lp_build_else(&if_ctx);
1809          {
1810             map_ptr[j] = LLVMBuildGEP2(builder, byte_type, map_ptr[j], &buf_offset, 1, "");
1811             LLVMBuildStore(builder, map_ptr[j], temp_ptr);
1812          }
1813          lp_build_endif(&if_ctx);
1814          map_ptr[j] = LLVMBuildLoad2(builder, byte_ptr_type, temp_ptr, "map_ptr");
1815 
1816          if (0) {
1817             lp_build_printf(gallivm, "velem %d, vbuf index = %u, vb_stride = %u\n",
1818                             lp_build_const_int32(gallivm, j),
1819                             vb_index, vb_stride[j]);
1820             lp_build_printf(gallivm,
1821                             "   vb_buffer_offset = %u, src_offset = %u, buf_offset = %u\n",
1822                             vb_buffer_offset, src_offset, buf_offset);
1823             lp_build_printf(gallivm, "   buffer size = %u, blocksize = %u\n",
1824                             buffer_size, bsize);
1825             lp_build_printf(gallivm, "   instance_id = %u\n", system_values.instance_id);
1826          }
1827       }
1828    }
1829 
1830    lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
1831    {
1832       LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
1833       LLVMValueRef io;
1834       LLVMValueRef clipmask;   /* holds the clipmask value */
1835       LLVMValueRef true_index_array, index_store;
1836       const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
1837 
1838       io_itr = lp_loop.counter;
1839 
1840       io = LLVMBuildGEP2(builder, variant->vertex_header_type, io_ptr, &io_itr, 1, "");
1841 #if DEBUG_STORE
1842       lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n",
1843                       io_itr, io, lp_loop.counter);
1844 #endif
1845 
1846       true_index_array = lp_build_broadcast_scalar(&blduivec, lp_loop.counter);
1847       true_index_array = LLVMBuildAdd(builder, true_index_array, ind_vec, "");
1848 
1849       LLVMValueRef exec_mask = lp_build_cmp(&blduivec, PIPE_FUNC_LEQUAL, true_index_array, fetch_max);
1850       /*
1851        * Limit indices to fetch_max, otherwise might try to access indices
1852        * beyond index buffer (or rather vsplit elt buffer) size.
1853        * Could probably safely (?) skip this for non-indexed draws and
1854        * simplify things minimally (by removing it could combine the ind_vec
1855        * and start_vec adds). I think the only effect for non-indexed draws will
1856        * be that for the invalid elements they will be all fetched from the
1857        * same location as the last valid one, but noone should really care.
1858        */
1859       true_index_array = lp_build_min(&blduivec, true_index_array, fetch_max);
1860 
1861       index_store = lp_build_alloca_undef(gallivm, blduivec.vec_type, "index_store");
1862 
1863       lp_build_if(&if_ctx, gallivm, have_elts);
1864       {
1865          /*
1866           * Note: you'd expect some comparison/clamp against fetch_elt_max
1867           * here.
1868           * There used to be one here but it was incorrect: overflow was
1869           * detected if index > fetch_elt_max - but the correct condition
1870           * would be index >= fetch_elt_max (since this is just size of elts
1871           * buffer / element size).
1872           * Using the correct condition however will cause failures - due to
1873           * vsplit/vcache code which rebases indices. So, as an example, if
1874           * fetch_elt_max is just 1 and fetch_count 2, vsplit cache will
1875           * replace all invalid indices with 0 - which in case of elt_bias
1876           * not being zero will get a different fetch index than the valid
1877           * index 0. So, just rely on vsplit code preventing out-of-bounds
1878           * fetches. This is also why it's safe to do elts fetch even if there
1879           * was no index buffer bound - the real buffer is never seen here, at
1880           * least not if there are index buffer overflows...
1881           */
1882 
1883          /*
1884           * XXX should not have to do this, as scale can be handled
1885           * natively by loads (hits asserts though).
1886           */
1887          tmp = lp_build_shl_imm(&blduivec, true_index_array, 2);
1888          fetch_elts = LLVMBuildBitCast(builder, fetch_elts,
1889                                        LLVMPointerType(LLVMInt8TypeInContext(context),
1890                                                        0), "");
1891          tmp = lp_build_gather(gallivm, vs_type.length,
1892                                32, bld.type, true,
1893                                fetch_elts, tmp, false);
1894          LLVMBuildStore(builder, tmp, index_store);
1895       }
1896       lp_build_else(&if_ctx);
1897       {
1898          tmp = LLVMBuildAdd(builder, true_index_array, start_vec, "");
1899          LLVMBuildStore(builder, tmp, index_store);
1900       }
1901       lp_build_endif(&if_ctx);
1902 
1903       true_index_array = LLVMBuildLoad2(builder, blduivec.vec_type, index_store, "");
1904 
1905       for (j = 0; j < key->nr_vertex_elements; ++j) {
1906          struct pipe_vertex_element *velem = &key->vertex_element[j];
1907          const struct util_format_description *format_desc =
1908             util_format_description(velem->src_format);
1909 
1910          if (format_desc->format == PIPE_FORMAT_NONE) {
1911             for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
1912                inputs[j][i] = lp_build_zero(gallivm, vs_type);
1913             }
1914          } else if (velem->instance_divisor) {
1915             fetch_instanced(gallivm, format_desc, vs_type,
1916                             vb_stride[j], map_ptr[j],
1917                             buffer_size_adj[j],
1918                             inputs[j], instance_index[j]);
1919          } else {
1920             fetch_vector(gallivm, format_desc, vs_type,
1921                          vb_stride[j], map_ptr[j],
1922                          buffer_size_adj[j],
1923                          inputs[j], true_index_array);
1924          }
1925       }
1926 
1927       struct lp_build_mask_context mask;
1928 
1929       lp_build_mask_begin(&mask, gallivm, vs_type, exec_mask);
1930       /* In the paths with elts vertex id has to be unaffected by the
1931        * index bias and because indices inside our elements array have
1932        * already had index bias applied we need to subtract it here to
1933        * get back to the original index.
1934        * In the linear paths vertex id has to be unaffected by the
1935        * original start index and because we abuse the 'start' variable
1936        * to either represent the actual start index or the index at which
1937        * the primitive was split (we split rendering into chunks of at
1938        * most 4095-vertices) we need to back out the original start
1939        * index out of our vertex id here.
1940        * for ARB_shader_draw_parameters, base_vertex should be 0 for
1941        * non-indexed draws.
1942        */
1943       LLVMValueRef base_vertex = lp_build_select(&bld, have_elts, vertex_id_offset, lp_build_const_int32(gallivm, 0));
1944       system_values.basevertex = lp_build_broadcast_scalar(&blduivec, base_vertex);
1945 
1946       /* first vertex is for Vulkan base vertex support */
1947       LLVMValueRef first_vertex = vertex_id_offset;
1948       system_values.firstvertex = lp_build_broadcast_scalar(&blduivec, first_vertex);
1949 
1950       system_values.vertex_id = true_index_array;
1951       system_values.vertex_id_nobase = LLVMBuildSub(builder, true_index_array,
1952                                                     lp_build_broadcast_scalar(&blduivec, vertex_id_offset), "");
1953 
1954       ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs;
1955       generate_vs(variant,
1956                   builder,
1957                   vs_type,
1958                   outputs,
1959                   ptr_aos,
1960                   &system_values,
1961                   context_ptr,
1962                   resources_ptr,
1963                   sampler,
1964                   image,
1965                   key->clamp_vertex_color,
1966                   &mask);
1967 
1968       lp_build_mask_end(&mask);
1969       if (pos != -1 && cv != -1) {
1970          /* store original positions in clip before further manipulation */
1971          store_clip(gallivm, vs_type, variant->vertex_header_type, io, outputs, pos);
1972 
1973          /* do cliptest */
1974          if (enable_cliptest) {
1975             LLVMValueRef temp = LLVMBuildLoad2(builder, blduivec.vec_type, clipmask_bool_ptr, "");
1976             /* allocate clipmask, assign it integer type */
1977             clipmask = generate_clipmask(llvm,
1978                                          gallivm,
1979                                          vs_type,
1980                                          outputs,
1981                                          key,
1982                                          variant->context_type,
1983                                          context_ptr, &have_clipdist);
1984             temp = LLVMBuildOr(builder, clipmask, temp, "");
1985             /* store temporary clipping boolean value */
1986             LLVMBuildStore(builder, temp, clipmask_bool_ptr);
1987          } else {
1988             clipmask = blduivec.zero;
1989          }
1990 
1991          /* do viewport mapping */
1992          if (!bypass_viewport) {
1993             generate_viewport(variant, builder, vs_type, outputs, context_ptr);
1994          }
1995       } else {
1996          clipmask = blduivec.zero;
1997       }
1998 
1999       /* store clipmask in vertex header,
2000        * original positions in clip
2001        * and transformed positions in data
2002        */
2003       convert_to_aos(gallivm, variant->vertex_header_type, io, NULL, outputs, clipmask,
2004                      vs_info->num_outputs, vs_type, -1,
2005                      enable_cliptest && key->need_edgeflags);
2006    }
2007    lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
2008 
2009    lp_bld_llvm_sampler_soa_destroy(sampler);
2010    lp_bld_llvm_image_soa_destroy(image);
2011 
2012    /* return clipping boolean value for function */
2013    ret = clipmask_booli8(gallivm, vs_type, blduivec.vec_type, clipmask_bool_ptr,
2014                          enable_cliptest && key->need_edgeflags);
2015 
2016    LLVMBuildRet(builder, ret);
2017 
2018    gallivm_verify_function(gallivm, variant_func);
2019 }
2020 
2021 
2022 struct draw_llvm_variant_key *
draw_llvm_make_variant_key(struct draw_llvm * llvm,char * store)2023 draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
2024 {
2025    struct draw_llvm_variant_key *key;
2026    struct lp_sampler_static_state *draw_sampler;
2027    struct lp_image_static_state *draw_image;
2028 
2029    key = (struct draw_llvm_variant_key *)store;
2030 
2031    memset(key, 0, offsetof(struct draw_llvm_variant_key, vertex_element[0]));
2032 
2033 
2034    /* will have to rig this up properly later */
2035    key->clip_xy = llvm->draw->clip_xy;
2036    key->clip_z = llvm->draw->clip_z;
2037    key->clip_user = llvm->draw->clip_user;
2038    key->bypass_viewport = llvm->draw->bypass_viewport;
2039    key->clip_halfz = llvm->draw->rasterizer->clip_halfz;
2040    /* XXX assumes edgeflag output not at 0 */
2041    key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? true : false);
2042    key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
2043    key->has_gs_or_tes = llvm->draw->gs.geometry_shader != NULL || llvm->draw->tes.tess_eval_shader != NULL;
2044    key->num_outputs = draw_total_vs_outputs(llvm->draw);
2045 
2046    key->clamp_vertex_color = !key->has_gs_or_tes &&
2047       llvm->draw->rasterizer->clamp_vertex_color;
2048 
2049    /* All variants of this shader will have the same value for
2050     * nr_samplers.  Not yet trying to compact away holes in the
2051     * sampler array.
2052     */
2053    key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
2054    if (llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
2055       key->nr_sampler_views =
2056          llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2057    } else {
2058       key->nr_sampler_views = key->nr_samplers;
2059    }
2060 
2061    key->nr_images = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
2062 
2063    /* Presumably all variants of the shader should have the same
2064     * number of vertex elements - ie the number of shader inputs.
2065     * NOTE: we NEED to store the needed number of needed inputs
2066     * here, not the number of provided elements to match keysize
2067     * (and the offset of sampler state in the key).
2068     * If we have excess number of vertex elements, this is valid,
2069     * but the excess ones don't matter.
2070     * If we don't have enough vertex elements (which looks not really
2071     * valid but we'll handle it gracefully) fill out missing ones with
2072     * zero (we'll recognize these later by PIPE_FORMAT_NONE).
2073     */
2074    key->nr_vertex_elements =
2075       llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 1;
2076 
2077    if (llvm->draw->pt.nr_vertex_elements < key->nr_vertex_elements) {
2078       debug_printf("draw: vs with %d inputs but only have %d vertex elements\n",
2079                    key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements);
2080       memset(key->vertex_element, 0,
2081              sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
2082    }
2083    memcpy(key->vertex_element,
2084           llvm->draw->pt.vertex_element,
2085           sizeof(struct pipe_vertex_element) *
2086              MIN2(key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements));
2087 
2088    draw_sampler = draw_llvm_variant_key_samplers(key);
2089    memset(draw_sampler, 0,
2090           MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
2091 
2092    for (unsigned i = 0 ; i < key->nr_samplers; i++) {
2093       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
2094                                       llvm->draw->samplers[PIPE_SHADER_VERTEX][i]);
2095    }
2096    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2097       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
2098                                       llvm->draw->sampler_views[PIPE_SHADER_VERTEX][i]);
2099    }
2100 
2101    draw_image = draw_llvm_variant_key_images(key);
2102    memset(draw_image, 0,
2103           key->nr_images * sizeof *draw_image);
2104    for (unsigned i = 0; i < key->nr_images; i++) {
2105       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
2106                                             llvm->draw->images[PIPE_SHADER_VERTEX][i]);
2107    }
2108    return key;
2109 }
2110 
2111 
2112 void
draw_llvm_dump_variant_key(struct draw_llvm_variant_key * key)2113 draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
2114 {
2115    struct lp_sampler_static_state *sampler = draw_llvm_variant_key_samplers(key);
2116    struct lp_image_static_state *image = draw_llvm_variant_key_images(key);
2117    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
2118    debug_printf("clip_xy = %u\n", key->clip_xy);
2119    debug_printf("clip_z = %u\n", key->clip_z);
2120    debug_printf("clip_user = %u\n", key->clip_user);
2121    debug_printf("bypass_viewport = %u\n", key->bypass_viewport);
2122    debug_printf("clip_halfz = %u\n", key->clip_halfz);
2123    debug_printf("need_edgeflags = %u\n", key->need_edgeflags);
2124    debug_printf("has_gs_or_tes = %u\n", key->has_gs_or_tes);
2125    debug_printf("ucp_enable = %u\n", key->ucp_enable);
2126 
2127    for (unsigned i = 0 ; i < key->nr_vertex_elements; i++) {
2128       debug_printf("vertex_element[%i].src_offset = %u\n", i, key->vertex_element[i].src_offset);
2129       debug_printf("vertex_element[%i].instance_divisor = %u\n", i, key->vertex_element[i].instance_divisor);
2130       debug_printf("vertex_element[%i].vertex_buffer_index = %u\n", i, key->vertex_element[i].vertex_buffer_index);
2131       debug_printf("vertex_element[%i].src_format = %s\n", i, util_format_name(key->vertex_element[i].src_format));
2132    }
2133 
2134    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2135       debug_printf("sampler[%i].src_format = %s\n", i, util_format_name(sampler[i].texture_state.format));
2136    }
2137 
2138    for (unsigned i = 0 ; i < key->nr_images; i++)
2139       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
2140 }
2141 
2142 
2143 void
draw_llvm_set_mapped_texture(struct draw_context * draw,enum pipe_shader_type shader_stage,unsigned sview_idx,uint32_t width,uint32_t height,uint32_t depth,uint32_t first_level,uint32_t last_level,uint32_t num_samples,uint32_t sample_stride,const void * base_ptr,uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])2144 draw_llvm_set_mapped_texture(struct draw_context *draw,
2145                              enum pipe_shader_type shader_stage,
2146                              unsigned sview_idx,
2147                              uint32_t width, uint32_t height, uint32_t depth,
2148                              uint32_t first_level, uint32_t last_level,
2149                              uint32_t num_samples,
2150                              uint32_t sample_stride,
2151                              const void *base_ptr,
2152                              uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
2153                              uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
2154                              uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])
2155 {
2156    struct lp_jit_texture *jit_tex;
2157 
2158    assert(shader_stage < DRAW_MAX_SHADER_STAGE);
2159    assert(sview_idx < ARRAY_SIZE(draw->llvm->jit_resources[shader_stage].textures));
2160 
2161    jit_tex = &draw->llvm->jit_resources[shader_stage].textures[sview_idx];
2162    jit_tex->width = width;
2163    jit_tex->height = height;
2164    jit_tex->depth = depth;
2165    jit_tex->first_level = first_level;
2166    jit_tex->last_level = last_level;
2167    jit_tex->base = base_ptr;
2168    jit_tex->mip_offsets[0] = 0;
2169    if (num_samples > 1) {
2170       jit_tex->mip_offsets[0] = mip_offsets[0];
2171       jit_tex->mip_offsets[LP_JIT_TEXTURE_SAMPLE_STRIDE] = sample_stride;
2172       jit_tex->row_stride[0] = row_stride[0];
2173       jit_tex->img_stride[0] = img_stride[0];
2174       jit_tex->last_level = num_samples;
2175    } else {
2176       for (unsigned j = first_level; j <= last_level; j++) {
2177          jit_tex->mip_offsets[j] = mip_offsets[j];
2178          jit_tex->row_stride[j] = row_stride[j];
2179          jit_tex->img_stride[j] = img_stride[j];
2180       }
2181    }
2182 }
2183 
2184 
2185 void
draw_llvm_set_mapped_image(struct draw_context * draw,enum pipe_shader_type shader_stage,unsigned idx,uint32_t width,uint32_t height,uint32_t depth,const void * base_ptr,uint32_t row_stride,uint32_t img_stride,uint32_t num_samples,uint32_t sample_stride)2186 draw_llvm_set_mapped_image(struct draw_context *draw,
2187                            enum pipe_shader_type shader_stage,
2188                            unsigned idx,
2189                            uint32_t width, uint32_t height, uint32_t depth,
2190                            const void *base_ptr,
2191                            uint32_t row_stride,
2192                            uint32_t img_stride,
2193                            uint32_t num_samples,
2194                            uint32_t sample_stride)
2195 {
2196    struct lp_jit_image *jit_image;
2197 
2198    assert(shader_stage < DRAW_MAX_SHADER_STAGE);
2199    assert(idx < ARRAY_SIZE(draw->llvm->jit_resources[shader_stage].images));
2200 
2201    jit_image = &draw->llvm->jit_resources[shader_stage].images[idx];
2202 
2203    jit_image->width = width;
2204    jit_image->height = height;
2205    jit_image->depth = depth;
2206    jit_image->base = base_ptr;
2207 
2208    jit_image->row_stride = row_stride;
2209    jit_image->img_stride = img_stride;
2210    jit_image->num_samples = num_samples;
2211    jit_image->sample_stride = sample_stride;
2212 }
2213 
2214 
2215 void
draw_llvm_set_sampler_state(struct draw_context * draw,enum pipe_shader_type shader_type)2216 draw_llvm_set_sampler_state(struct draw_context *draw,
2217                             enum pipe_shader_type shader_type)
2218 {
2219    assert(shader_type < DRAW_MAX_SHADER_STAGE);
2220    for (unsigned i = 0; i < draw->num_samplers[shader_type]; i++) {
2221       struct lp_jit_sampler *jit_sam = &draw->llvm->jit_resources[shader_type].samplers[i];
2222 
2223       if (draw->samplers[shader_type][i]) {
2224          const struct pipe_sampler_state *s
2225             = draw->samplers[shader_type][i];
2226          jit_sam->min_lod = s->min_lod;
2227          jit_sam->max_lod = s->max_lod;
2228          jit_sam->lod_bias = s->lod_bias;
2229          jit_sam->max_aniso = s->max_anisotropy;
2230          COPY_4V(jit_sam->border_color, s->border_color.f);
2231       }
2232    }
2233 }
2234 
2235 
2236 void
draw_llvm_destroy_variant(struct draw_llvm_variant * variant)2237 draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
2238 {
2239    struct draw_llvm *llvm = variant->llvm;
2240 
2241    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2242       debug_printf("Deleting VS variant: %u vs variants,\t%u total variants\n",
2243                     variant->shader->variants_cached, llvm->nr_variants);
2244    }
2245 
2246    gallivm_destroy(variant->gallivm);
2247 
2248    list_del(&variant->list_item_local.list);
2249    variant->shader->variants_cached--;
2250    list_del(&variant->list_item_global.list);
2251    llvm->nr_variants--;
2252    if(variant->function_name)
2253       FREE(variant->function_name);
2254    FREE(variant);
2255 }
2256 
2257 
2258 /**
2259  * Create LLVM types for various structures.
2260  */
2261 static void
create_gs_jit_types(struct draw_gs_llvm_variant * var)2262 create_gs_jit_types(struct draw_gs_llvm_variant *var)
2263 {
2264    struct gallivm_state *gallivm = var->gallivm;
2265 
2266    var->context_type = create_gs_jit_context_type(gallivm,
2267                                              var->shader->base.vector_length,
2268                                              "draw_gs_jit_context");
2269    var->context_ptr_type = LLVMPointerType(var->context_type, 0);
2270 
2271    var->resources_type = lp_build_jit_resources_type(gallivm);
2272    var->resources_ptr_type = LLVMPointerType(var->resources_type, 0);
2273    var->input_array_type = create_gs_jit_input_type(gallivm);
2274 }
2275 
2276 
2277 static LLVMTypeRef
get_gs_context_ptr_type(struct draw_gs_llvm_variant * variant)2278 get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant)
2279 {
2280    if (!variant->context_ptr_type)
2281       create_gs_jit_types(variant);
2282    return variant->context_ptr_type;
2283 }
2284 
2285 
2286 static LLVMValueRef
generate_mask_value(struct draw_gs_llvm_variant * variant,struct lp_type gs_type)2287 generate_mask_value(struct draw_gs_llvm_variant *variant,
2288                     struct lp_type gs_type)
2289 {
2290    struct gallivm_state *gallivm = variant->gallivm;
2291    LLVMBuilderRef builder = gallivm->builder;
2292    struct lp_type mask_type = lp_int_type(gs_type);
2293    LLVMValueRef num_prims;
2294    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
2295 
2296    num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),
2297                                   variant->num_prims);
2298    for (unsigned i = 0; i < gs_type.length; i++) {
2299       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2300       mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
2301    }
2302    mask_val = lp_build_compare(gallivm, mask_type,
2303                                PIPE_FUNC_GREATER, num_prims, mask_val);
2304 
2305    return mask_val;
2306 }
2307 
2308 
2309 static void
draw_gs_llvm_generate(struct draw_llvm * llvm,struct draw_gs_llvm_variant * variant)2310 draw_gs_llvm_generate(struct draw_llvm *llvm,
2311                       struct draw_gs_llvm_variant *variant)
2312 {
2313    struct gallivm_state *gallivm = variant->gallivm;
2314    LLVMContextRef context = gallivm->context;
2315    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
2316    LLVMTypeRef arg_types[9];
2317    LLVMTypeRef func_type;
2318    LLVMValueRef variant_func;
2319    LLVMValueRef context_ptr;
2320    LLVMValueRef resources_ptr;
2321    LLVMValueRef prim_id_ptr;
2322    LLVMBasicBlockRef block;
2323    LLVMBuilderRef builder;
2324    LLVMValueRef io_ptr, input_array, num_prims, mask_val;
2325    struct lp_build_sampler_soa *sampler = 0;
2326    struct lp_build_image_soa *image = NULL;
2327    struct lp_build_context bld;
2328    struct lp_bld_tgsi_system_values system_values;
2329    char func_name[64];
2330    struct lp_type gs_type;
2331    struct draw_gs_llvm_iface gs_iface;
2332    const struct tgsi_token *tokens = variant->shader->base.state.tokens;
2333    LLVMValueRef consts_ptr;
2334    LLVMValueRef ssbos_ptr;
2335    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
2336    struct lp_build_mask_context mask;
2337    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
2338    unsigned vector_length = variant->shader->base.vector_length;
2339 
2340    memset(&system_values, 0, sizeof(system_values));
2341    memset(&outputs, 0, sizeof(outputs));
2342 
2343    snprintf(func_name, sizeof(func_name), "draw_llvm_gs_variant");
2344 
2345    assert(variant->vertex_header_ptr_type);
2346 
2347    LLVMTypeRef prim_id_type = LLVMVectorType(int32_type, vector_length);
2348    arg_types[0] = get_gs_context_ptr_type(variant);    /* context */
2349    arg_types[1] = variant->resources_ptr_type;
2350    arg_types[2] = variant->input_array_type;           /* input */
2351    arg_types[3] = LLVMPointerType(variant->vertex_header_ptr_type, 0);     /* vertex_header */
2352    arg_types[4] = int32_type;                          /* num_prims */
2353    arg_types[5] = int32_type;                          /* instance_id */
2354    arg_types[6] = LLVMPointerType(prim_id_type, 0);    /* prim_id_ptr */
2355    arg_types[7] = int32_type;
2356    arg_types[8] = int32_type;
2357 
2358    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
2359 
2360    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
2361 
2362    variant->function = variant_func;
2363    variant->function_name = MALLOC(strlen(func_name)+1);
2364    strcpy(variant->function_name, func_name);
2365 
2366    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
2367 
2368    for (unsigned i = 0; i < ARRAY_SIZE(arg_types); ++i)
2369       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
2370          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
2371 
2372    if (gallivm->cache && gallivm->cache->data_size) {
2373       gallivm_stub_func(gallivm, variant_func);
2374       return;
2375    }
2376 
2377    context_ptr               = LLVMGetParam(variant_func, 0);
2378    resources_ptr             = LLVMGetParam(variant_func, 1);
2379    input_array               = LLVMGetParam(variant_func, 2);
2380    io_ptr                    = LLVMGetParam(variant_func, 3);
2381    num_prims                 = LLVMGetParam(variant_func, 4);
2382    system_values.instance_id = LLVMGetParam(variant_func, 5);
2383    prim_id_ptr               = LLVMGetParam(variant_func, 6);
2384    system_values.invocation_id = LLVMGetParam(variant_func, 7);
2385    system_values.view_index  = LLVMGetParam(variant_func, 8);
2386 
2387    lp_build_name(context_ptr, "context");
2388    lp_build_name(resources_ptr, "resources");
2389    lp_build_name(input_array, "input");
2390    lp_build_name(io_ptr, "io");
2391    lp_build_name(num_prims, "num_prims");
2392    lp_build_name(system_values.instance_id, "instance_id");
2393    lp_build_name(prim_id_ptr, "prim_id_ptr");
2394    lp_build_name(system_values.invocation_id, "invocation_id");
2395    lp_build_name(system_values.view_index, "view_index");
2396 
2397    variant->context_ptr = context_ptr;
2398    variant->io_ptr = io_ptr;
2399    variant->num_prims = num_prims;
2400 
2401    gs_iface.base.fetch_input = draw_gs_llvm_fetch_input;
2402    gs_iface.base.emit_vertex = draw_gs_llvm_emit_vertex;
2403    gs_iface.base.end_primitive = draw_gs_llvm_end_primitive;
2404    gs_iface.base.gs_epilogue = draw_gs_llvm_epilogue;
2405    gs_iface.input = input_array;
2406    gs_iface.variant = variant;
2407 
2408    /*
2409     * Function body
2410     */
2411 
2412    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
2413    builder = gallivm->builder;
2414    LLVMPositionBuilderAtEnd(builder, block);
2415 
2416    lp_build_context_init(&bld, gallivm, lp_type_int(32));
2417 
2418    memset(&gs_type, 0, sizeof gs_type);
2419    gs_type.floating = true; /* floating point values */
2420    gs_type.sign = true;     /* values are signed */
2421    gs_type.norm = false;    /* values are not limited to [0,1] or [-1,1] */
2422    gs_type.width = 32;      /* 32-bit float */
2423    gs_type.length = vector_length;
2424 
2425    consts_ptr = lp_jit_resources_constants(gallivm, variant->resources_type, resources_ptr);
2426 
2427    ssbos_ptr = lp_jit_resources_ssbos(gallivm, variant->resources_type, resources_ptr);
2428 
2429    /* code generated texture sampling */
2430    sampler = lp_bld_llvm_sampler_soa_create(variant->key.samplers,
2431                                           MAX2(variant->key.nr_samplers,
2432                                                variant->key.nr_sampler_views));
2433    image = lp_bld_llvm_image_soa_create(draw_gs_llvm_variant_key_images(&variant->key),
2434                                       variant->key.nr_images);
2435    mask_val = generate_mask_value(variant, gs_type);
2436    lp_build_mask_begin(&mask, gallivm, gs_type, mask_val);
2437 
2438    if (gs_info->uses_primid) {
2439       system_values.prim_id = LLVMBuildLoad2(builder, prim_id_type, prim_id_ptr, "prim_id");
2440    }
2441 
2442    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2443       if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)
2444          tgsi_dump(tokens, 0);
2445       else
2446          nir_print_shader(llvm->draw->gs.geometry_shader->state.ir.nir, stderr);
2447       draw_gs_llvm_dump_variant_key(&variant->key);
2448    }
2449 
2450    struct lp_build_tgsi_params params;
2451    memset(&params, 0, sizeof(params));
2452 
2453    params.type = gs_type;
2454    params.mask = &mask;
2455    params.consts_ptr = consts_ptr;
2456    params.system_values = &system_values;
2457    params.context_type = variant->context_type;
2458    params.context_ptr = context_ptr;
2459    params.resources_type = variant->resources_type;
2460    params.resources_ptr = resources_ptr;
2461    params.sampler = sampler;
2462    params.info = &llvm->draw->gs.geometry_shader->info;
2463    params.gs_iface = (const struct lp_build_gs_iface *)&gs_iface;
2464    params.ssbo_ptr = ssbos_ptr;
2465    params.image = image;
2466    params.gs_vertex_streams = variant->shader->base.num_vertex_streams;
2467    params.aniso_filter_table = lp_jit_resources_aniso_filter_table(gallivm,
2468                                                                    variant->resources_type,
2469                                                                    resources_ptr);
2470 
2471 
2472    if (llvm->draw->gs.geometry_shader->state.type == PIPE_SHADER_IR_TGSI)
2473       lp_build_tgsi_soa(variant->gallivm,
2474                         tokens,
2475                         &params,
2476                         outputs);
2477    else
2478       lp_build_nir_soa(variant->gallivm,
2479                        llvm->draw->gs.geometry_shader->state.ir.nir,
2480                        &params,
2481                        outputs);
2482 
2483    lp_bld_llvm_sampler_soa_destroy(sampler);
2484    lp_bld_llvm_image_soa_destroy(image);
2485 
2486    lp_build_mask_end(&mask);
2487 
2488    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
2489 
2490    gallivm_verify_function(gallivm, variant_func);
2491 }
2492 
2493 
2494 struct draw_gs_llvm_variant *
draw_gs_llvm_create_variant(struct draw_llvm * llvm,unsigned num_outputs,const struct draw_gs_llvm_variant_key * key)2495 draw_gs_llvm_create_variant(struct draw_llvm *llvm,
2496                             unsigned num_outputs,
2497                             const struct draw_gs_llvm_variant_key *key)
2498 {
2499    struct draw_gs_llvm_variant *variant;
2500    struct llvm_geometry_shader *shader =
2501       llvm_geometry_shader(llvm->draw->gs.geometry_shader);
2502    char module_name[64];
2503    unsigned char ir_sha1_cache_key[20];
2504    struct lp_cached_code cached = { 0 };
2505    bool needs_caching = false;
2506 
2507    variant = MALLOC(sizeof *variant +
2508                     shader->variant_key_size -
2509                     sizeof variant->key);
2510    if (!variant)
2511       return NULL;
2512 
2513    variant->llvm = llvm;
2514    variant->shader = shader;
2515 
2516    snprintf(module_name, sizeof(module_name), "draw_llvm_gs_variant%u",
2517             variant->shader->variants_cached);
2518 
2519    memcpy(&variant->key, key, shader->variant_key_size);
2520 
2521    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
2522       draw_get_ir_cache_key(shader->base.state.ir.nir,
2523                             key,
2524                             shader->variant_key_size,
2525                             num_outputs,
2526                             ir_sha1_cache_key);
2527 
2528       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
2529                                          &cached,
2530                                          ir_sha1_cache_key);
2531       if (!cached.data_size)
2532          needs_caching = true;
2533    }
2534    variant->gallivm = gallivm_create(module_name, &llvm->context, &cached);
2535 
2536    create_gs_jit_types(variant);
2537 
2538    variant->vertex_header_type = lp_build_create_jit_vertex_header_type(variant->gallivm, num_outputs);
2539    variant->vertex_header_ptr_type = LLVMPointerType(variant->vertex_header_type, 0);
2540 
2541    draw_gs_llvm_generate(llvm, variant);
2542 
2543    gallivm_compile_module(variant->gallivm);
2544 
2545    variant->jit_func = (draw_gs_jit_func)
2546          gallivm_jit_function(variant->gallivm, variant->function, variant->function_name);
2547 
2548    if (needs_caching)
2549       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
2550                                            &cached,
2551                                            ir_sha1_cache_key);
2552    gallivm_free_ir(variant->gallivm);
2553 
2554    variant->list_item_global.base = variant;
2555    variant->list_item_local.base = variant;
2556    /*variant->no = */shader->variants_created++;
2557    variant->list_item_global.base = variant;
2558 
2559    return variant;
2560 }
2561 
2562 
2563 void
draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant * variant)2564 draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant)
2565 {
2566    struct draw_llvm *llvm = variant->llvm;
2567 
2568    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
2569       debug_printf("Deleting GS variant: %u gs variants,\t%u total variants\n",
2570                     variant->shader->variants_cached, llvm->nr_gs_variants);
2571    }
2572 
2573    gallivm_destroy(variant->gallivm);
2574 
2575    list_del(&variant->list_item_local.list);
2576    variant->shader->variants_cached--;
2577    list_del(&variant->list_item_global.list);
2578    llvm->nr_gs_variants--;
2579    if(variant->function_name)
2580       FREE(variant->function_name);
2581    FREE(variant);
2582 }
2583 
2584 
2585 struct draw_gs_llvm_variant_key *
draw_gs_llvm_make_variant_key(struct draw_llvm * llvm,char * store)2586 draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
2587 {
2588    struct draw_gs_llvm_variant_key *key;
2589    struct lp_sampler_static_state *draw_sampler;
2590    struct lp_image_static_state *draw_image;
2591 
2592    key = (struct draw_gs_llvm_variant_key *)store;
2593 
2594    memset(key, 0, offsetof(struct draw_gs_llvm_variant_key, samplers[0]));
2595 
2596    key->num_outputs = draw_total_gs_outputs(llvm->draw);
2597 
2598    key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color;
2599 
2600    /* All variants of this shader will have the same value for
2601     * nr_samplers.  Not yet trying to compact away holes in the
2602     * sampler array.
2603     */
2604    key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
2605    if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
2606       key->nr_sampler_views =
2607          llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2608    } else {
2609       key->nr_sampler_views = key->nr_samplers;
2610    }
2611 
2612    key->nr_images = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
2613 
2614    draw_sampler = key->samplers;
2615 
2616    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
2617 
2618    for (unsigned i = 0 ; i < key->nr_samplers; i++) {
2619       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
2620                                       llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]);
2621    }
2622    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2623       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
2624                                       llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]);
2625    }
2626 
2627    draw_image = draw_gs_llvm_variant_key_images(key);
2628    memset(draw_image, 0,
2629           key->nr_images * sizeof *draw_image);
2630    for (unsigned i = 0; i < key->nr_images; i++) {
2631       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
2632                                             llvm->draw->images[PIPE_SHADER_GEOMETRY][i]);
2633    }
2634    return key;
2635 }
2636 
2637 
2638 void
draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key * key)2639 draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key)
2640 {
2641    struct lp_sampler_static_state *sampler = key->samplers;
2642    struct lp_image_static_state *image = draw_gs_llvm_variant_key_images(key);
2643 
2644    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
2645    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
2646       debug_printf("sampler[%i].src_format = %s\n", i,
2647                    util_format_name(sampler[i].texture_state.format));
2648    }
2649 
2650    for (unsigned i = 0 ; i < key->nr_images; i++)
2651       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
2652 
2653 }
2654 
2655 
2656 static void
create_tcs_jit_types(struct draw_tcs_llvm_variant * var)2657 create_tcs_jit_types(struct draw_tcs_llvm_variant *var)
2658 {
2659    struct gallivm_state *gallivm = var->gallivm;
2660 
2661    var->resources_type = lp_build_jit_resources_type(gallivm);
2662    var->resources_ptr_type = LLVMPointerType(var->resources_type, 0);
2663    var->input_array_type = create_tcs_jit_input_type(gallivm);
2664    var->output_array_type = create_tcs_jit_output_type(gallivm);
2665 }
2666 
2667 
2668 static LLVMTypeRef
get_tcs_resources_ptr_type(struct draw_tcs_llvm_variant * variant)2669 get_tcs_resources_ptr_type(struct draw_tcs_llvm_variant *variant)
2670 {
2671    if (!variant->resources_ptr_type)
2672       create_tcs_jit_types(variant);
2673    return variant->resources_ptr_type;
2674 }
2675 
2676 
2677 static LLVMValueRef
draw_tcs_llvm_emit_fetch_input(const struct lp_build_tcs_iface * tes_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index)2678 draw_tcs_llvm_emit_fetch_input(const struct lp_build_tcs_iface *tes_iface,
2679                                struct lp_build_context *bld,
2680                                bool is_vindex_indirect,
2681                                LLVMValueRef vertex_index,
2682                                bool is_aindex_indirect,
2683                                LLVMValueRef attrib_index,
2684                                bool is_sindex_indirect,
2685                                LLVMValueRef swizzle_index)
2686 {
2687    const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
2688    struct gallivm_state *gallivm = bld->gallivm;
2689    LLVMBuilderRef builder = gallivm->builder;
2690    LLVMValueRef indices[3];
2691    LLVMValueRef res;
2692    struct lp_type type = bld->type;
2693    LLVMTypeRef input_type = create_tcs_jit_input_type_deref(gallivm);
2694    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
2695 
2696    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
2697       res = bld->zero;
2698       for (int i = 0; i < type.length; ++i) {
2699          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2700          LLVMValueRef vert_chan_index = vertex_index;
2701          LLVMValueRef attr_chan_index = attrib_index;
2702          LLVMValueRef swiz_chan_index = swizzle_index;
2703          LLVMValueRef channel_vec;
2704 
2705          if (is_vindex_indirect) {
2706             vert_chan_index = LLVMBuildExtractElement(builder,
2707                                                       vertex_index, idx, "");
2708          }
2709          if (is_aindex_indirect) {
2710             attr_chan_index = LLVMBuildExtractElement(builder,
2711                                                       attrib_index, idx, "");
2712          }
2713          if (is_sindex_indirect) {
2714             swiz_chan_index = LLVMBuildExtractElement(builder,
2715                                                       swizzle_index, idx, "");
2716          }
2717 
2718          indices[0] = vert_chan_index;
2719          indices[1] = attr_chan_index;
2720          indices[2] = swiz_chan_index;
2721 
2722          channel_vec = LLVMBuildGEP2(builder, input_type, tcs->input, indices, 3, "");
2723          channel_vec = LLVMBuildLoad2(builder, float_type, channel_vec, "");
2724          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
2725       }
2726    } else {
2727       indices[0] = vertex_index;
2728       indices[1] = attrib_index;
2729       indices[2] = swizzle_index;
2730       res = LLVMBuildGEP2(builder, input_type, tcs->input, indices, 3, "");
2731       res = LLVMBuildLoad2(builder, float_type, res, "");
2732       res = lp_build_broadcast_scalar(bld, res);
2733    }
2734    return res;
2735 }
2736 
2737 
2738 static LLVMValueRef
draw_tcs_llvm_emit_fetch_output(const struct lp_build_tcs_iface * tes_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index,uint32_t name)2739 draw_tcs_llvm_emit_fetch_output(const struct lp_build_tcs_iface *tes_iface,
2740                                 struct lp_build_context *bld,
2741                                 bool is_vindex_indirect,
2742                                 LLVMValueRef vertex_index,
2743                                 bool is_aindex_indirect,
2744                                 LLVMValueRef attrib_index,
2745                                 bool is_sindex_indirect,
2746                                 LLVMValueRef swizzle_index,
2747                                 uint32_t name)
2748 {
2749    const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
2750    struct gallivm_state *gallivm = bld->gallivm;
2751    LLVMBuilderRef builder = gallivm->builder;
2752    LLVMValueRef indices[3];
2753    LLVMValueRef res;
2754    struct lp_type type = bld->type;
2755    LLVMTypeRef output_type = create_tcs_jit_output_type_deref(gallivm);
2756    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
2757 
2758    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
2759       res = bld->zero;
2760       for (int i = 0; i < type.length; ++i) {
2761          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2762          LLVMValueRef vert_chan_index = vertex_index;
2763          LLVMValueRef attr_chan_index = attrib_index;
2764          LLVMValueRef swiz_chan_index = swizzle_index;
2765          LLVMValueRef channel_vec;
2766 
2767          if (is_vindex_indirect) {
2768             vert_chan_index = LLVMBuildExtractElement(builder,
2769                                                       vertex_index, idx, "");
2770          }
2771          if (is_aindex_indirect) {
2772             attr_chan_index = LLVMBuildExtractElement(builder,
2773                                                       attrib_index, idx, "");
2774          }
2775          if (is_sindex_indirect) {
2776             swiz_chan_index = LLVMBuildExtractElement(builder,
2777                                                       swizzle_index, idx, "");
2778          }
2779 
2780          indices[0] = vert_chan_index;
2781          indices[1] = attr_chan_index;
2782          indices[2] = swiz_chan_index;
2783 
2784          channel_vec = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2785          channel_vec = LLVMBuildLoad2(builder, float_type, channel_vec, "");
2786 
2787          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
2788       }
2789    } else {
2790       indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
2791       indices[1] = attrib_index;
2792       indices[2] = swizzle_index;
2793 
2794       res = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2795       res = LLVMBuildLoad2(builder, float_type, res, "");
2796       res = lp_build_broadcast_scalar(bld, res);
2797    }
2798    return res;
2799 }
2800 
2801 
2802 static void
draw_tcs_llvm_emit_store_output(const struct lp_build_tcs_iface * tes_iface,struct lp_build_context * bld,unsigned name,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index,LLVMValueRef value,LLVMValueRef mask_vec)2803 draw_tcs_llvm_emit_store_output(const struct lp_build_tcs_iface *tes_iface,
2804                                 struct lp_build_context *bld,
2805                                 unsigned name,
2806                                 bool is_vindex_indirect,
2807                                 LLVMValueRef vertex_index,
2808                                 bool is_aindex_indirect,
2809                                 LLVMValueRef attrib_index,
2810                                 bool is_sindex_indirect,
2811                                 LLVMValueRef swizzle_index,
2812                                 LLVMValueRef value,
2813                                 LLVMValueRef mask_vec)
2814 {
2815    const struct draw_tcs_llvm_iface *tcs = draw_tcs_llvm_iface(tes_iface);
2816    struct gallivm_state *gallivm = bld->gallivm;
2817    LLVMBuilderRef builder = gallivm->builder;
2818    LLVMValueRef indices[3];
2819    LLVMValueRef res;
2820    struct lp_type type = bld->type;
2821    LLVMTypeRef output_type = create_tcs_jit_output_type_deref(gallivm);
2822 
2823    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
2824       for (int i = 0; i < type.length; ++i) {
2825          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2826          LLVMValueRef vert_chan_index = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
2827          LLVMValueRef attr_chan_index = attrib_index;
2828          LLVMValueRef swiz_chan_index = swizzle_index;
2829          LLVMValueRef channel_vec;
2830 
2831          if (is_vindex_indirect) {
2832             vert_chan_index = LLVMBuildExtractElement(builder,
2833                                                       vertex_index, idx, "");
2834          }
2835          if (is_aindex_indirect) {
2836             attr_chan_index = LLVMBuildExtractElement(builder,
2837                                                       attrib_index, idx, "");
2838          }
2839 
2840          if (is_sindex_indirect) {
2841             swiz_chan_index = LLVMBuildExtractElement(builder,
2842                                                       swizzle_index, idx, "");
2843          }
2844 
2845          indices[0] = vert_chan_index;
2846          indices[1] = attr_chan_index;
2847          indices[2] = swiz_chan_index;
2848 
2849          channel_vec = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2850 
2851          res = LLVMBuildExtractElement(builder, value, idx, "");
2852 
2853          struct lp_build_if_state ifthen;
2854          LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
2855          cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
2856          lp_build_if(&ifthen, gallivm, cond);
2857          LLVMBuildStore(builder, res, channel_vec);
2858          lp_build_endif(&ifthen);
2859       }
2860    } else {
2861       indices[0] = vertex_index ? vertex_index : lp_build_const_int32(gallivm, 0);
2862       indices[1] = attrib_index;
2863       indices[2] = swizzle_index;
2864 
2865       res = LLVMBuildGEP2(builder, output_type, tcs->output, indices, 3, "");
2866       for (unsigned i = 0; i < type.length; ++i) {
2867          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2868          LLVMValueRef val = LLVMBuildExtractElement(builder, value, idx, "");
2869 
2870          struct lp_build_if_state ifthen;
2871          LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, mask_vec, lp_build_const_int_vec(gallivm, bld->type, 0), "");
2872          cond = LLVMBuildExtractElement(gallivm->builder, cond, idx, "");
2873          lp_build_if(&ifthen, gallivm, cond);
2874          LLVMBuildStore(builder, val, res);
2875          lp_build_endif(&ifthen);
2876       }
2877    }
2878 }
2879 
2880 
2881 static LLVMValueRef
generate_tcs_mask_value(struct draw_tcs_llvm_variant * variant,struct lp_type tcs_type,LLVMValueRef limit,LLVMValueRef loop_counter)2882 generate_tcs_mask_value(struct draw_tcs_llvm_variant *variant,
2883                         struct lp_type tcs_type, LLVMValueRef limit, LLVMValueRef loop_counter)
2884 {
2885    struct gallivm_state *gallivm = variant->gallivm;
2886    LLVMBuilderRef builder = gallivm->builder;
2887    struct lp_type mask_type = lp_int_type(tcs_type);
2888    LLVMValueRef num_vecs;
2889    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
2890 
2891    num_vecs = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);
2892    for (unsigned i = 0; i < tcs_type.length; i++) {
2893       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
2894       mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");
2895    }
2896    mask_val = lp_build_compare(gallivm, mask_type,
2897                                PIPE_FUNC_GREATER, num_vecs, mask_val);
2898 
2899    return mask_val;
2900 }
2901 
2902 
2903 static void
draw_tcs_llvm_generate(struct draw_llvm * llvm,struct draw_tcs_llvm_variant * variant)2904 draw_tcs_llvm_generate(struct draw_llvm *llvm,
2905                        struct draw_tcs_llvm_variant *variant)
2906 {
2907    struct gallivm_state *gallivm = variant->gallivm;
2908    LLVMContextRef context = gallivm->context;
2909    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
2910    LLVMTypeRef arg_types[7];
2911    LLVMTypeRef func_type, coro_func_type;
2912    LLVMValueRef variant_func, variant_coro;
2913    LLVMValueRef resources_ptr;
2914    LLVMValueRef view_index;
2915    LLVMValueRef input_array, output_array, prim_id, patch_vertices_in;
2916    LLVMValueRef mask_val;
2917    LLVMBasicBlockRef block;
2918    LLVMBuilderRef builder;
2919    struct lp_build_context bld, bldvec;
2920    struct lp_build_sampler_soa *sampler = 0;
2921    struct lp_build_image_soa *image = NULL;
2922    struct lp_bld_tgsi_system_values system_values;
2923    char func_name[64], func_name_coro[64];
2924    struct draw_tcs_llvm_iface tcs_iface;
2925    struct lp_build_mask_context mask;
2926    LLVMValueRef consts_ptr;
2927    LLVMValueRef ssbos_ptr;
2928    struct lp_type tcs_type;
2929    unsigned vector_length = variant->shader->base.vector_length;
2930 
2931    memset(&system_values, 0, sizeof(system_values));
2932 
2933    snprintf(func_name, sizeof(func_name), "draw_llvm_tcs_variant");
2934 
2935    snprintf(func_name_coro, sizeof(func_name_coro), "draw_llvm_tcs_coro_variant");
2936 
2937    arg_types[0] = get_tcs_resources_ptr_type(variant);    /* context */
2938    arg_types[1] = variant->input_array_type;           /* input */
2939    arg_types[2] = variant->output_array_type;
2940    arg_types[3] = int32_type;
2941    arg_types[4] = int32_type;
2942    arg_types[5] = int32_type;
2943    arg_types[6] = int32_type; /* coroutine only */
2944 
2945    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types) - 1, 0);
2946 
2947    coro_func_type = LLVMFunctionType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), arg_types, ARRAY_SIZE(arg_types), 0);
2948 
2949    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
2950 
2951    variant_coro = LLVMAddFunction(gallivm->module, func_name_coro, coro_func_type);
2952 
2953    variant->function = variant_func;
2954    variant->function_name = MALLOC(strlen(func_name)+1);
2955    strcpy(variant->function_name, func_name);
2956    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
2957 
2958    LLVMSetFunctionCallConv(variant_coro, LLVMCCallConv);
2959 
2960    lp_build_coro_add_presplit(variant_coro);
2961 
2962    for (unsigned i = 0; i < ARRAY_SIZE(arg_types); ++i) {
2963       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
2964          lp_add_function_attr(variant_coro, i + 1, LP_FUNC_ATTR_NOALIAS);
2965          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
2966       }
2967    }
2968 
2969    if (gallivm->cache && gallivm->cache->data_size) {
2970       gallivm_stub_func(gallivm, variant_func);
2971       gallivm_stub_func(gallivm, variant_coro);
2972       return;
2973    }
2974 
2975    resources_ptr               = LLVMGetParam(variant_func, 0);
2976    input_array               = LLVMGetParam(variant_func, 1);
2977    output_array              = LLVMGetParam(variant_func, 2);
2978    prim_id                   = LLVMGetParam(variant_func, 3);
2979    patch_vertices_in         = LLVMGetParam(variant_func, 4);
2980    view_index                = LLVMGetParam(variant_func, 5);
2981 
2982    lp_build_name(resources_ptr, "resources");
2983    lp_build_name(input_array, "input");
2984    lp_build_name(output_array, "output");
2985    lp_build_name(prim_id, "prim_id");
2986    lp_build_name(patch_vertices_in, "patch_vertices_in");
2987    lp_build_name(view_index, "view_index");
2988 
2989    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
2990    builder = gallivm->builder;
2991    LLVMPositionBuilderAtEnd(builder, block);
2992 
2993    lp_build_context_init(&bld, gallivm, lp_type_int(32));
2994 
2995    memset(&tcs_type, 0, sizeof tcs_type);
2996    tcs_type.floating = true; /* floating point values */
2997    tcs_type.sign = true;     /* values are signed */
2998    tcs_type.norm = false;    /* values are not limited to [0,1] or [-1,1] */
2999    tcs_type.width = 32;      /* 32-bit float */
3000    tcs_type.length = vector_length;
3001 
3002    lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tcs_type));
3003 
3004    LLVMValueRef count = lp_build_const_int32(gallivm, variant->shader->base.vertices_out);
3005    LLVMValueRef step = lp_build_const_int32(gallivm, vector_length);
3006 
3007    struct lp_build_loop_state loop_state[2];
3008    LLVMValueRef num_inner_loop;
3009    unsigned count_align = util_align_npot(variant->shader->base.vertices_out, tcs_type.length);
3010    num_inner_loop = lp_build_const_int32(gallivm, count_align / tcs_type.length);
3011    LLVMTypeRef hdl_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
3012    LLVMValueRef coro_hdls = LLVMBuildArrayAlloca(gallivm->builder, hdl_ptr_type, num_inner_loop, "coro_hdls");
3013    unsigned end_coroutine = INT_MAX;
3014    lp_build_loop_begin(&loop_state[1], gallivm,
3015                        lp_build_const_int32(gallivm, 0)); /* coroutine reentry loop */
3016    lp_build_loop_begin(&loop_state[0], gallivm,
3017                        lp_build_const_int32(gallivm, 0)); /* inner loop */
3018    {
3019       LLVMValueRef args[7];
3020       args[0] = resources_ptr;
3021       args[1] = input_array;
3022       args[2] = output_array;
3023       args[3] = prim_id;
3024       args[4] = patch_vertices_in;
3025       args[5] = view_index;
3026       args[6] = loop_state[0].counter;
3027       LLVMValueRef coro_entry = LLVMBuildGEP2(builder, hdl_ptr_type, coro_hdls, &loop_state[0].counter, 1, "");
3028       LLVMValueRef coro_hdl = LLVMBuildLoad2(builder, hdl_ptr_type, coro_entry, "coro_hdl");
3029 
3030       struct lp_build_if_state ifstate;
3031       LLVMValueRef cmp = LLVMBuildICmp(builder, LLVMIntEQ, loop_state[1].counter,
3032                                        lp_build_const_int32(gallivm, 0), "");
3033       /* first time here - call the coroutine function entry point */
3034       lp_build_if(&ifstate, gallivm, cmp);
3035       LLVMValueRef coro_ret = LLVMBuildCall2(builder, coro_func_type, variant_coro, args, 7, "");
3036       LLVMBuildStore(builder, coro_ret, coro_entry);
3037       lp_build_else(&ifstate);
3038       /* subsequent calls for this invocation - check if done. */
3039       LLVMValueRef coro_done = lp_build_coro_done(gallivm, coro_hdl);
3040       struct lp_build_if_state ifstate2;
3041       lp_build_if(&ifstate2, gallivm, coro_done);
3042       /* if done destroy and force loop exit */
3043       lp_build_coro_destroy(gallivm, coro_hdl);
3044       lp_build_loop_force_set_counter(&loop_state[1], lp_build_const_int32(gallivm, end_coroutine - 1));
3045       lp_build_else(&ifstate2);
3046       /* otherwise resume the coroutine */
3047       lp_build_coro_resume(gallivm, coro_hdl);
3048       lp_build_endif(&ifstate2);
3049       lp_build_endif(&ifstate);
3050       lp_build_loop_force_reload_counter(&loop_state[1]);
3051    }
3052    lp_build_loop_end_cond(&loop_state[0],
3053                           num_inner_loop,
3054                           NULL,  LLVMIntUGE);
3055    lp_build_loop_end_cond(&loop_state[1],
3056                           lp_build_const_int32(gallivm, end_coroutine),
3057                           NULL, LLVMIntEQ);
3058    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
3059 
3060    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "entry");
3061    LLVMPositionBuilderAtEnd(builder, block);
3062 
3063    resources_ptr = LLVMGetParam(variant_coro, 0);
3064    input_array = LLVMGetParam(variant_coro, 1);
3065    output_array = LLVMGetParam(variant_coro, 2);
3066    prim_id = LLVMGetParam(variant_coro, 3);
3067    patch_vertices_in = LLVMGetParam(variant_coro, 4);
3068    view_index = LLVMGetParam(variant_coro, 5);
3069 
3070    consts_ptr = lp_jit_resources_constants(gallivm, variant->resources_type, resources_ptr);
3071 
3072    ssbos_ptr = lp_jit_resources_ssbos(gallivm, variant->resources_type, resources_ptr);
3073    sampler = lp_bld_llvm_sampler_soa_create(variant->key.samplers,
3074                                           MAX2(variant->key.nr_samplers,
3075                                                variant->key.nr_sampler_views));
3076    image = lp_bld_llvm_image_soa_create(draw_tcs_llvm_variant_key_images(&variant->key),
3077                                       variant->key.nr_images);
3078 
3079    LLVMValueRef counter = LLVMGetParam(variant_coro, 6);
3080    LLVMValueRef invocvec = LLVMGetUndef(LLVMVectorType(int32_type, vector_length));
3081    for (unsigned i = 0; i < vector_length; i++) {
3082       LLVMValueRef loop_iter = lp_build_const_int32(gallivm, i);
3083       LLVMValueRef idx = LLVMBuildAdd(builder, LLVMBuildMul(builder, counter, step, ""), loop_iter, "");
3084       invocvec = LLVMBuildInsertElement(builder, invocvec, idx, loop_iter, "");
3085    }
3086 
3087    system_values.invocation_id = invocvec;
3088    system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);
3089    system_values.view_index = view_index;
3090    system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);
3091    tcs_iface.input = input_array;
3092    tcs_iface.output = output_array;
3093    tcs_iface.base.emit_fetch_input = draw_tcs_llvm_emit_fetch_input;
3094    tcs_iface.base.emit_fetch_output = draw_tcs_llvm_emit_fetch_output;
3095    tcs_iface.base.emit_store_output = draw_tcs_llvm_emit_store_output;
3096 
3097 
3098    {
3099       LLVMValueRef coro_id = lp_build_coro_id(gallivm);
3100       LLVMValueRef coro_hdl = lp_build_coro_begin_alloc_mem(gallivm, coro_id);
3101 
3102       mask_val = generate_tcs_mask_value(variant, tcs_type, count, LLVMBuildMul(builder, counter, step, ""));
3103       lp_build_mask_begin(&mask, gallivm, tcs_type, mask_val);
3104 
3105       struct lp_build_coro_suspend_info coro_info;
3106 
3107       LLVMBasicBlockRef sus_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "suspend");
3108       LLVMBasicBlockRef clean_block = LLVMAppendBasicBlockInContext(gallivm->context, variant_coro, "cleanup");
3109 
3110       coro_info.suspend = sus_block;
3111       coro_info.cleanup = clean_block;
3112 
3113       struct lp_build_tgsi_params params;
3114       memset(&params, 0, sizeof(params));
3115 
3116       params.type = tcs_type;
3117       params.mask = &mask;
3118       params.consts_ptr = consts_ptr;
3119       params.system_values = &system_values;
3120       params.resources_type = variant->resources_type;
3121       params.resources_ptr = resources_ptr;
3122       params.sampler = sampler;
3123       params.info = &llvm->draw->tcs.tess_ctrl_shader->info;
3124       params.ssbo_ptr = ssbos_ptr;
3125       params.image = image;
3126       params.coro = &coro_info;
3127       params.tcs_iface = &tcs_iface.base;
3128       params.aniso_filter_table = lp_jit_resources_aniso_filter_table(gallivm,
3129                                                                       variant->resources_type,
3130                                                                       resources_ptr);
3131 
3132       lp_build_nir_soa(variant->gallivm,
3133                        llvm->draw->tcs.tess_ctrl_shader->state.ir.nir,
3134                        &params, NULL);
3135 
3136       lp_build_mask_end(&mask);
3137 
3138       lp_build_coro_suspend_switch(gallivm, &coro_info, NULL, true);
3139       LLVMPositionBuilderAtEnd(builder, clean_block);
3140 
3141       lp_build_coro_free_mem(gallivm, coro_id, coro_hdl);
3142 
3143       LLVMBuildBr(builder, sus_block);
3144       LLVMPositionBuilderAtEnd(builder, sus_block);
3145 
3146       lp_build_coro_end(gallivm, coro_hdl);
3147       LLVMBuildRet(builder, coro_hdl);
3148    }
3149 
3150    lp_bld_llvm_sampler_soa_destroy(sampler);
3151    lp_bld_llvm_image_soa_destroy(image);
3152    gallivm_verify_function(gallivm, variant_func);
3153    gallivm_verify_function(gallivm, variant_coro);
3154 }
3155 
3156 
3157 struct draw_tcs_llvm_variant *
draw_tcs_llvm_create_variant(struct draw_llvm * llvm,unsigned num_outputs,const struct draw_tcs_llvm_variant_key * key)3158 draw_tcs_llvm_create_variant(struct draw_llvm *llvm,
3159                              unsigned num_outputs,
3160                              const struct draw_tcs_llvm_variant_key *key)
3161 {
3162    struct draw_tcs_llvm_variant *variant;
3163    struct llvm_tess_ctrl_shader *shader = llvm_tess_ctrl_shader(llvm->draw->tcs.tess_ctrl_shader);
3164    char module_name[64];
3165    unsigned char ir_sha1_cache_key[20];
3166    struct lp_cached_code cached = { 0 };
3167    bool needs_caching = false;
3168 
3169    variant = MALLOC(sizeof *variant +
3170                     shader->variant_key_size - sizeof variant->key);
3171    if (!variant)
3172       return NULL;
3173 
3174    variant->llvm = llvm;
3175    variant->shader = shader;
3176 
3177    snprintf(module_name, sizeof(module_name), "draw_llvm_tcs_variant%u",
3178             variant->shader->variants_cached);
3179 
3180    memcpy(&variant->key, key, shader->variant_key_size);
3181 
3182    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
3183       draw_get_ir_cache_key(shader->base.state.ir.nir,
3184                             key,
3185                             shader->variant_key_size,
3186                             num_outputs,
3187                             ir_sha1_cache_key);
3188 
3189       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
3190                                          &cached,
3191                                          ir_sha1_cache_key);
3192       if (!cached.data_size)
3193          needs_caching = true;
3194    }
3195 
3196    variant->gallivm = gallivm_create(module_name, &llvm->context, &cached);
3197 
3198    create_tcs_jit_types(variant);
3199 
3200    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3201       nir_print_shader(llvm->draw->tcs.tess_ctrl_shader->state.ir.nir, stderr);
3202       draw_tcs_llvm_dump_variant_key(&variant->key);
3203    }
3204 
3205    draw_tcs_llvm_generate(llvm, variant);
3206 
3207    gallivm_compile_module(variant->gallivm);
3208 
3209    variant->jit_func = (draw_tcs_jit_func)
3210       gallivm_jit_function(variant->gallivm, variant->function, variant->function_name);
3211 
3212    if (needs_caching)
3213       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
3214                                            &cached,
3215                                            ir_sha1_cache_key);
3216    gallivm_free_ir(variant->gallivm);
3217 
3218    variant->list_item_global.base = variant;
3219    variant->list_item_local.base = variant;
3220    /*variant->no = */shader->variants_created++;
3221    variant->list_item_global.base = variant;
3222 
3223    return variant;
3224 }
3225 
3226 
3227 void
draw_tcs_llvm_destroy_variant(struct draw_tcs_llvm_variant * variant)3228 draw_tcs_llvm_destroy_variant(struct draw_tcs_llvm_variant *variant)
3229 {
3230    struct draw_llvm *llvm = variant->llvm;
3231 
3232    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3233       debug_printf("Deleting TCS variant: %u tcs variants,\t%u total variants\n",
3234                     variant->shader->variants_cached, llvm->nr_tcs_variants);
3235    }
3236 
3237    gallivm_destroy(variant->gallivm);
3238 
3239    list_del(&variant->list_item_local.list);
3240    variant->shader->variants_cached--;
3241    list_del(&variant->list_item_global.list);
3242    llvm->nr_tcs_variants--;
3243    if(variant->function_name)
3244       FREE(variant->function_name);
3245    FREE(variant);
3246 }
3247 
3248 
3249 struct draw_tcs_llvm_variant_key *
draw_tcs_llvm_make_variant_key(struct draw_llvm * llvm,char * store)3250 draw_tcs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
3251 {
3252    unsigned i;
3253    struct draw_tcs_llvm_variant_key *key;
3254    struct lp_sampler_static_state *draw_sampler;
3255    struct lp_image_static_state *draw_image;
3256 
3257    key = (struct draw_tcs_llvm_variant_key *)store;
3258 
3259    memset(key, 0, offsetof(struct draw_tcs_llvm_variant_key, samplers[0]));
3260 
3261    /* All variants of this shader will have the same value for
3262     * nr_samplers.  Not yet trying to compact away holes in the
3263     * sampler array.
3264     */
3265    key->nr_samplers = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
3266    if (llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
3267       key->nr_sampler_views =
3268          llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
3269    } else {
3270       key->nr_sampler_views = key->nr_samplers;
3271    }
3272 
3273    key->nr_images = llvm->draw->tcs.tess_ctrl_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
3274 
3275    draw_sampler = key->samplers;
3276 
3277    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
3278 
3279    for (i = 0 ; i < key->nr_samplers; i++) {
3280       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
3281                                       llvm->draw->samplers[PIPE_SHADER_TESS_CTRL][i]);
3282    }
3283    for (i = 0 ; i < key->nr_sampler_views; i++) {
3284       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
3285                                       llvm->draw->sampler_views[PIPE_SHADER_TESS_CTRL][i]);
3286    }
3287 
3288    draw_image = draw_tcs_llvm_variant_key_images(key);
3289    memset(draw_image, 0,
3290           key->nr_images * sizeof *draw_image);
3291    for (i = 0; i < key->nr_images; i++) {
3292       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
3293                                             llvm->draw->images[PIPE_SHADER_TESS_CTRL][i]);
3294    }
3295    return key;
3296 }
3297 
3298 
3299 void
draw_tcs_llvm_dump_variant_key(struct draw_tcs_llvm_variant_key * key)3300 draw_tcs_llvm_dump_variant_key(struct draw_tcs_llvm_variant_key *key)
3301 {
3302    struct lp_sampler_static_state *sampler = key->samplers;
3303    struct lp_image_static_state *image = draw_tcs_llvm_variant_key_images(key);
3304    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
3305       debug_printf("sampler[%i].src_format = %s\n", i,
3306                    util_format_name(sampler[i].texture_state.format));
3307    }
3308 
3309    for (unsigned i = 0 ; i < key->nr_images; i++)
3310       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
3311 }
3312 
3313 
3314 static void
create_tes_jit_types(struct draw_tes_llvm_variant * var)3315 create_tes_jit_types(struct draw_tes_llvm_variant *var)
3316 {
3317    struct gallivm_state *gallivm = var->gallivm;
3318 
3319    var->resources_type = lp_build_jit_resources_type(gallivm);
3320    var->resources_ptr_type = LLVMPointerType(var->resources_type, 0);
3321    var->input_array_deref_type = create_tes_jit_input_deref_type(gallivm);
3322    var->input_array_type = LLVMPointerType(var->input_array_deref_type, 0); /* num vertices per prim */
3323 }
3324 
3325 
3326 static LLVMTypeRef
get_tes_resources_ptr_type(struct draw_tes_llvm_variant * variant)3327 get_tes_resources_ptr_type(struct draw_tes_llvm_variant *variant)
3328 {
3329    if (!variant->resources_ptr_type)
3330       create_tes_jit_types(variant);
3331    return variant->resources_ptr_type;
3332 }
3333 
3334 
3335 static LLVMValueRef
generate_tes_mask_value(struct draw_tes_llvm_variant * variant,struct lp_type tes_type,LLVMValueRef limit,LLVMValueRef loop_counter)3336 generate_tes_mask_value(struct draw_tes_llvm_variant *variant,
3337                         struct lp_type tes_type, LLVMValueRef limit, LLVMValueRef loop_counter)
3338 {
3339    struct gallivm_state *gallivm = variant->gallivm;
3340    LLVMBuilderRef builder = gallivm->builder;
3341    struct lp_type mask_type = lp_int_type(tes_type);
3342    LLVMValueRef num_prims;
3343    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
3344    unsigned i;
3345 
3346    num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), limit);
3347    for (i = 0; i < tes_type.length; i++) {
3348       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3349       mask_val = LLVMBuildInsertElement(builder, mask_val, LLVMBuildAdd(builder, loop_counter, idx, ""), idx, "");
3350    }
3351    mask_val = lp_build_compare(gallivm, mask_type,
3352                                PIPE_FUNC_GREATER, num_prims, mask_val);
3353 
3354    return mask_val;
3355 }
3356 
3357 
3358 static LLVMValueRef
draw_tes_llvm_fetch_vertex_input(const struct lp_build_tes_iface * tes_iface,struct lp_build_context * bld,bool is_vindex_indirect,LLVMValueRef vertex_index,bool is_aindex_indirect,LLVMValueRef attrib_index,bool is_sindex_indirect,LLVMValueRef swizzle_index)3359 draw_tes_llvm_fetch_vertex_input(const struct lp_build_tes_iface *tes_iface,
3360                                  struct lp_build_context *bld,
3361                                  bool is_vindex_indirect,
3362                                  LLVMValueRef vertex_index,
3363                                  bool is_aindex_indirect,
3364                                  LLVMValueRef attrib_index,
3365                                  bool is_sindex_indirect,
3366                                  LLVMValueRef swizzle_index)
3367 {
3368    const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);
3369    struct gallivm_state *gallivm = bld->gallivm;
3370    LLVMBuilderRef builder = gallivm->builder;
3371    LLVMValueRef indices[3];
3372    LLVMValueRef res;
3373    struct lp_type type = bld->type;
3374 
3375    if (is_vindex_indirect || is_aindex_indirect || is_sindex_indirect) {
3376       res = bld->zero;
3377 
3378       for (int i = 0; i < type.length; ++i) {
3379          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3380          LLVMValueRef vert_chan_index = vertex_index;
3381          LLVMValueRef attr_chan_index = attrib_index;
3382          LLVMValueRef swiz_chan_index = swizzle_index;
3383          LLVMValueRef channel_vec;
3384 
3385          if (is_vindex_indirect) {
3386             vert_chan_index = LLVMBuildExtractElement(builder,
3387                                                       vertex_index, idx, "");
3388          }
3389          if (is_aindex_indirect) {
3390             attr_chan_index = LLVMBuildExtractElement(builder,
3391                                                       attrib_index, idx, "");
3392          }
3393          if (is_sindex_indirect) {
3394             swiz_chan_index = LLVMBuildExtractElement(builder,
3395                                                       swizzle_index, idx, "");
3396          }
3397 
3398          indices[0] = vert_chan_index;
3399          indices[1] = attr_chan_index;
3400          indices[2] = swiz_chan_index;
3401 
3402          channel_vec = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3403          channel_vec = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), channel_vec, "");
3404 
3405          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3406       }
3407    } else {
3408       indices[0] = vertex_index;
3409       indices[1] = attrib_index;
3410       indices[2] = swizzle_index;
3411 
3412       res = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3413       res = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), res, "");
3414       res = lp_build_broadcast_scalar(bld, res);
3415    }
3416    return res;
3417 }
3418 
3419 
3420 static LLVMValueRef
draw_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface * tes_iface,struct lp_build_context * bld,bool is_aindex_indirect,LLVMValueRef attrib_index,LLVMValueRef swizzle_index)3421 draw_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
3422                                 struct lp_build_context *bld,
3423                                 bool is_aindex_indirect,
3424                                 LLVMValueRef attrib_index,
3425                                 LLVMValueRef swizzle_index)
3426 {
3427    const struct draw_tes_llvm_iface *tes = draw_tes_llvm_iface(tes_iface);
3428    struct gallivm_state *gallivm = bld->gallivm;
3429    LLVMBuilderRef builder = gallivm->builder;
3430    LLVMValueRef indices[3];
3431    LLVMValueRef res;
3432    struct lp_type type = bld->type;
3433 
3434    if (is_aindex_indirect) {
3435       res = bld->zero;
3436 
3437       for (int i = 0; i < type.length; ++i) {
3438          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
3439          LLVMValueRef attr_chan_index = attrib_index;
3440          LLVMValueRef channel_vec;
3441 
3442          if (is_aindex_indirect) {
3443             attr_chan_index = LLVMBuildExtractElement(builder,
3444                                                       attrib_index, idx, "");
3445          }
3446 
3447          indices[0] = lp_build_const_int32(gallivm, 0);
3448          indices[1] = attr_chan_index;
3449          indices[2] = swizzle_index;
3450 
3451          channel_vec = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3452          channel_vec = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), channel_vec, "");
3453 
3454          res = LLVMBuildInsertElement(builder, res, channel_vec, idx, "");
3455       }
3456    } else {
3457       indices[0] = lp_build_const_int32(gallivm, 0);
3458       indices[1] = attrib_index;
3459       indices[2] = swizzle_index;
3460 
3461       res = LLVMBuildGEP2(builder, tes->variant->input_array_deref_type, tes->input, indices, 3, "");
3462       res = LLVMBuildLoad2(builder, LLVMFloatTypeInContext(gallivm->context), res, "");
3463       res = lp_build_broadcast_scalar(bld, res);
3464    }
3465    return res;
3466 }
3467 
3468 
3469 static void
draw_tes_llvm_generate(struct draw_llvm * llvm,struct draw_tes_llvm_variant * variant)3470 draw_tes_llvm_generate(struct draw_llvm *llvm,
3471                        struct draw_tes_llvm_variant *variant)
3472 {
3473    struct gallivm_state *gallivm = variant->gallivm;
3474    LLVMContextRef context = gallivm->context;
3475    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
3476    LLVMTypeRef flt_type = LLVMFloatTypeInContext(context);
3477    LLVMTypeRef arg_types[11];
3478    LLVMTypeRef func_type;
3479    LLVMValueRef variant_func;
3480    LLVMValueRef resources_ptr;
3481    LLVMValueRef tess_coord[2], io_ptr, input_array, num_tess_coord;
3482    LLVMValueRef view_index;
3483    LLVMValueRef tess_inner, tess_outer, prim_id, patch_vertices_in;
3484    LLVMBasicBlockRef block;
3485    LLVMBuilderRef builder;
3486    LLVMValueRef mask_val;
3487    struct lp_build_context bld, bldvec;
3488    struct lp_build_sampler_soa *sampler = 0;
3489    struct lp_build_image_soa *image = NULL;
3490    struct lp_bld_tgsi_system_values system_values;
3491    char func_name[64];
3492    unsigned i;
3493    struct draw_tes_llvm_iface tes_iface;
3494    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
3495    struct lp_build_mask_context mask;
3496    LLVMValueRef consts_ptr;
3497    LLVMValueRef ssbos_ptr;
3498    LLVMValueRef step;
3499    struct lp_type tes_type;
3500    unsigned vector_length = variant->shader->base.vector_length;
3501    int primid_slot = -1;
3502 
3503    memset(&system_values, 0, sizeof(system_values));
3504    memset(&outputs, 0, sizeof(outputs));
3505 
3506    snprintf(func_name, sizeof(func_name), "draw_llvm_tes_variant");
3507 
3508    LLVMTypeRef tess_outer_deref_type = LLVMArrayType(flt_type, 4);
3509    LLVMTypeRef tess_inner_deref_type = LLVMArrayType(flt_type, 2);
3510 
3511    arg_types[0] = get_tes_resources_ptr_type(variant);    /* context */
3512    arg_types[1] = variant->input_array_type;           /* input */
3513    arg_types[2] = variant->vertex_header_ptr_type;
3514    arg_types[3] = int32_type;
3515    arg_types[4] = int32_type;
3516    arg_types[5] = LLVMPointerType(flt_type, 0);
3517    arg_types[6] = LLVMPointerType(flt_type, 0);
3518    arg_types[7] = LLVMPointerType(tess_outer_deref_type, 0);
3519    arg_types[8] = LLVMPointerType(tess_inner_deref_type, 0);
3520    arg_types[9] = int32_type;
3521    arg_types[10] = int32_type;
3522 
3523    func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0);
3524    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
3525 
3526    variant->function = variant_func;
3527    variant->function_name = MALLOC(strlen(func_name)+1);
3528    strcpy(variant->function_name, func_name);
3529    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
3530 
3531    for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
3532       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
3533          lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS);
3534 
3535    if (gallivm->cache && gallivm->cache->data_size) {
3536       gallivm_stub_func(gallivm, variant_func);
3537       return;
3538    }
3539 
3540    resources_ptr               = LLVMGetParam(variant_func, 0);
3541    input_array               = LLVMGetParam(variant_func, 1);
3542    io_ptr                    = LLVMGetParam(variant_func, 2);
3543    prim_id                   = LLVMGetParam(variant_func, 3);
3544    num_tess_coord            = LLVMGetParam(variant_func, 4);
3545    tess_coord[0]             = LLVMGetParam(variant_func, 5);
3546    tess_coord[1]             = LLVMGetParam(variant_func, 6);
3547    tess_outer                = LLVMGetParam(variant_func, 7);
3548    tess_inner                = LLVMGetParam(variant_func, 8);
3549    patch_vertices_in         = LLVMGetParam(variant_func, 9);
3550    view_index                = LLVMGetParam(variant_func, 10);
3551 
3552    lp_build_name(resources_ptr, "resources");
3553    lp_build_name(input_array, "input");
3554    lp_build_name(io_ptr, "io");
3555    lp_build_name(prim_id, "prim_id");
3556    lp_build_name(num_tess_coord, "num_tess_coord");
3557    lp_build_name(tess_coord[0], "tess_coord[0]");
3558    lp_build_name(tess_coord[1], "tess_coord[1]");
3559    lp_build_name(tess_outer, "tess_outer");
3560    lp_build_name(tess_inner, "tess_inner");
3561    lp_build_name(patch_vertices_in, "patch_vertices_in");
3562    lp_build_name(view_index, "view_index");
3563 
3564    tes_iface.base.fetch_vertex_input = draw_tes_llvm_fetch_vertex_input;
3565    tes_iface.base.fetch_patch_input = draw_tes_llvm_fetch_patch_input;
3566    tes_iface.input = input_array;
3567    tes_iface.variant = variant;
3568 
3569    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
3570    builder = gallivm->builder;
3571    LLVMPositionBuilderAtEnd(builder, block);
3572 
3573    lp_build_context_init(&bld, gallivm, lp_type_int(32));
3574 
3575    memset(&tes_type, 0, sizeof tes_type);
3576    tes_type.floating = true; /* floating point values */
3577    tes_type.sign = true;     /* values are signed */
3578    tes_type.norm = false;    /* values are not limited to [0,1] or [-1,1] */
3579    tes_type.width = 32;      /* 32-bit float */
3580    tes_type.length = vector_length;
3581 
3582    lp_build_context_init(&bldvec, variant->gallivm, lp_int_type(tes_type));
3583    consts_ptr = lp_jit_resources_constants(gallivm, variant->resources_type, resources_ptr);
3584 
3585    ssbos_ptr = lp_jit_resources_ssbos(gallivm, variant->resources_type, resources_ptr);
3586 
3587    sampler = lp_bld_llvm_sampler_soa_create(variant->key.samplers,
3588                                           MAX2(variant->key.nr_samplers,
3589                                                variant->key.nr_sampler_views));
3590    image = lp_bld_llvm_image_soa_create(draw_tes_llvm_variant_key_images(&variant->key),
3591                                       variant->key.nr_images);
3592    step = lp_build_const_int32(gallivm, vector_length);
3593 
3594    system_values.tess_outer = LLVMBuildLoad2(builder, tess_outer_deref_type, tess_outer, "");
3595    system_values.tess_inner = LLVMBuildLoad2(builder, tess_inner_deref_type, tess_inner, "");
3596 
3597    system_values.prim_id = lp_build_broadcast_scalar(&bldvec, prim_id);
3598 
3599    system_values.view_index = view_index;
3600 
3601    system_values.vertices_in = lp_build_broadcast_scalar(&bldvec, patch_vertices_in);
3602 
3603    if (variant->key.primid_needed) {
3604       int slot = variant->key.primid_output;
3605       for (unsigned i = 0; i < 4; i++) {
3606          outputs[slot][i] = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, tes_type), "primid");
3607          LLVMBuildStore(builder, system_values.prim_id, outputs[slot][i]);
3608       }
3609       primid_slot = slot;
3610    }
3611    struct lp_build_loop_state lp_loop;
3612    lp_build_loop_begin(&lp_loop, gallivm, bld.zero);
3613    {
3614       LLVMValueRef io;
3615 
3616       io = LLVMBuildGEP2(builder, variant->vertex_header_type, io_ptr, &lp_loop.counter, 1, "");
3617       mask_val = generate_tes_mask_value(variant, tes_type, num_tess_coord, lp_loop.counter);
3618       lp_build_mask_begin(&mask, gallivm, tes_type, mask_val);
3619 
3620       system_values.tess_coord = LLVMGetUndef(LLVMArrayType(LLVMVectorType(flt_type, vector_length), 3));
3621       for (i = 0; i < 3; i++) {
3622          LLVMValueRef tess_coord_chan = LLVMGetUndef(LLVMVectorType(flt_type, vector_length));
3623          for (unsigned j = 0; j < vector_length; j++) {
3624             LLVMValueRef idx = LLVMBuildAdd(builder, lp_loop.counter, lp_build_const_int32(gallivm, j), "");
3625             LLVMValueRef tc_val;
3626             if (i == 2) {
3627                if (variant->shader->base.prim_mode == MESA_PRIM_TRIANGLES) {
3628                   tc_val = lp_build_const_float(gallivm, 1.0);
3629                   tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get2(builder, flt_type, tess_coord[0], idx), "");
3630                   tc_val = LLVMBuildFSub(builder, tc_val, lp_build_pointer_get2(builder, flt_type, tess_coord[1], idx), "");
3631                } else
3632                   tc_val = lp_build_const_float(gallivm, 0.0);
3633             } else
3634                tc_val = lp_build_pointer_get2(builder, flt_type, tess_coord[i], idx);
3635 
3636             tess_coord_chan = LLVMBuildInsertElement(builder, tess_coord_chan, tc_val, lp_build_const_int32(gallivm, j), "");
3637          }
3638          system_values.tess_coord = LLVMBuildInsertValue(builder, system_values.tess_coord, tess_coord_chan, i, "");
3639       }
3640 
3641       struct lp_build_tgsi_params params;
3642       memset(&params, 0, sizeof(params));
3643 
3644       params.type = tes_type;
3645       params.mask = &mask;
3646       params.consts_ptr = consts_ptr;
3647       params.system_values = &system_values;
3648       params.resources_type = variant->resources_type;
3649       params.resources_ptr = resources_ptr;
3650       params.sampler = sampler;
3651       params.info = &llvm->draw->tes.tess_eval_shader->info;
3652       params.ssbo_ptr = ssbos_ptr;
3653       params.image = image;
3654       params.tes_iface = &tes_iface.base;
3655       params.aniso_filter_table = lp_jit_resources_aniso_filter_table(gallivm, variant->resources_type, resources_ptr);
3656 
3657       lp_build_nir_soa(variant->gallivm,
3658                        llvm->draw->tes.tess_eval_shader->state.ir.nir,
3659                        &params,
3660                        outputs);
3661 
3662       lp_build_mask_end(&mask);
3663 
3664       if (variant->key.clamp_vertex_color) {
3665          const struct tgsi_shader_info *info = &llvm->draw->tes.tess_eval_shader->info;
3666          do_clamp_vertex_color(variant->gallivm,
3667                                tes_type, info,
3668                                outputs);
3669       }
3670       LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
3671                                                      lp_int_type(tes_type), 0);
3672 
3673       convert_to_aos(gallivm, variant->vertex_header_type, io, NULL, outputs, clipmask,
3674                      draw_total_tes_outputs(llvm->draw), tes_type, primid_slot, false);
3675    }
3676    lp_build_loop_end_cond(&lp_loop, num_tess_coord, step, LLVMIntUGE);
3677    lp_bld_llvm_sampler_soa_destroy(sampler);
3678    lp_bld_llvm_image_soa_destroy(image);
3679 
3680    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
3681    gallivm_verify_function(gallivm, variant_func);
3682 }
3683 
3684 
3685 struct draw_tes_llvm_variant *
draw_tes_llvm_create_variant(struct draw_llvm * llvm,unsigned num_outputs,const struct draw_tes_llvm_variant_key * key)3686 draw_tes_llvm_create_variant(struct draw_llvm *llvm,
3687                              unsigned num_outputs,
3688                              const struct draw_tes_llvm_variant_key *key)
3689 {
3690    struct draw_tes_llvm_variant *variant;
3691    struct llvm_tess_eval_shader *shader = llvm_tess_eval_shader(llvm->draw->tes.tess_eval_shader);
3692    char module_name[64];
3693    unsigned char ir_sha1_cache_key[20];
3694    struct lp_cached_code cached = { 0 };
3695    bool needs_caching = false;
3696 
3697    variant = MALLOC(sizeof *variant +
3698                     shader->variant_key_size - sizeof variant->key);
3699    if (!variant)
3700       return NULL;
3701 
3702    variant->llvm = llvm;
3703    variant->shader = shader;
3704 
3705    snprintf(module_name, sizeof(module_name), "draw_llvm_tes_variant%u",
3706             variant->shader->variants_cached);
3707 
3708    memcpy(&variant->key, key, shader->variant_key_size);
3709    if (shader->base.state.ir.nir && llvm->draw->disk_cache_cookie) {
3710       draw_get_ir_cache_key(shader->base.state.ir.nir,
3711                             key,
3712                             shader->variant_key_size,
3713                             num_outputs,
3714                             ir_sha1_cache_key);
3715 
3716       llvm->draw->disk_cache_find_shader(llvm->draw->disk_cache_cookie,
3717                                          &cached,
3718                                          ir_sha1_cache_key);
3719       if (!cached.data_size)
3720          needs_caching = true;
3721    }
3722    variant->gallivm = gallivm_create(module_name, &llvm->context, &cached);
3723 
3724    create_tes_jit_types(variant);
3725 
3726    variant->vertex_header_type = lp_build_create_jit_vertex_header_type(variant->gallivm, num_outputs);
3727    variant->vertex_header_ptr_type = LLVMPointerType(variant->vertex_header_type, 0);
3728 
3729    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3730       nir_print_shader(llvm->draw->tes.tess_eval_shader->state.ir.nir, stderr);
3731       draw_tes_llvm_dump_variant_key(&variant->key);
3732    }
3733 
3734    draw_tes_llvm_generate(llvm, variant);
3735 
3736    gallivm_compile_module(variant->gallivm);
3737 
3738    variant->jit_func = (draw_tes_jit_func)
3739       gallivm_jit_function(variant->gallivm, variant->function, variant->function_name);
3740 
3741    if (needs_caching)
3742       llvm->draw->disk_cache_insert_shader(llvm->draw->disk_cache_cookie,
3743                                            &cached,
3744                                            ir_sha1_cache_key);
3745    gallivm_free_ir(variant->gallivm);
3746 
3747    variant->list_item_global.base = variant;
3748    variant->list_item_local.base = variant;
3749    /*variant->no = */shader->variants_created++;
3750    variant->list_item_global.base = variant;
3751 
3752    return variant;
3753 }
3754 
3755 
3756 void
draw_tes_llvm_destroy_variant(struct draw_tes_llvm_variant * variant)3757 draw_tes_llvm_destroy_variant(struct draw_tes_llvm_variant *variant)
3758 {
3759    struct draw_llvm *llvm = variant->llvm;
3760 
3761    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
3762       debug_printf("Deleting TES variant: %u tes variants,\t%u total variants\n",
3763                     variant->shader->variants_cached, llvm->nr_tes_variants);
3764    }
3765 
3766    gallivm_destroy(variant->gallivm);
3767 
3768    list_del(&variant->list_item_local.list);
3769    variant->shader->variants_cached--;
3770    list_del(&variant->list_item_global.list);
3771    llvm->nr_tes_variants--;
3772    if(variant->function_name)
3773       FREE(variant->function_name);
3774    FREE(variant);
3775 }
3776 
3777 
3778 struct draw_tes_llvm_variant_key *
draw_tes_llvm_make_variant_key(struct draw_llvm * llvm,char * store)3779 draw_tes_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
3780 {
3781    struct draw_tes_llvm_variant_key *key;
3782    struct lp_sampler_static_state *draw_sampler;
3783    struct lp_image_static_state *draw_image;
3784 
3785    key = (struct draw_tes_llvm_variant_key *)store;
3786 
3787    memset(key, 0, offsetof(struct draw_tes_llvm_variant_key, samplers[0]));
3788 
3789    int primid_output = draw_find_shader_output(llvm->draw, TGSI_SEMANTIC_PRIMID, 0);
3790    if (primid_output >= 0) {
3791       key->primid_output = primid_output;
3792       key->primid_needed = true;
3793    }
3794 
3795    key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color &&
3796       llvm->draw->gs.geometry_shader == NULL;
3797 
3798    /* All variants of this shader will have the same value for
3799     * nr_samplers.  Not yet trying to compact away holes in the
3800     * sampler array.
3801     */
3802    key->nr_samplers = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
3803    if (llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
3804       key->nr_sampler_views =
3805          llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
3806    } else {
3807       key->nr_sampler_views = key->nr_samplers;
3808    }
3809 
3810    key->nr_images = llvm->draw->tes.tess_eval_shader->info.file_max[TGSI_FILE_IMAGE] + 1;
3811 
3812    draw_sampler = key->samplers;
3813 
3814    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
3815 
3816    for (unsigned i = 0 ; i < key->nr_samplers; i++) {
3817       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
3818                                       llvm->draw->samplers[PIPE_SHADER_TESS_EVAL][i]);
3819    }
3820    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
3821       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
3822                                       llvm->draw->sampler_views[PIPE_SHADER_TESS_EVAL][i]);
3823    }
3824 
3825    draw_image = draw_tes_llvm_variant_key_images(key);
3826    memset(draw_image, 0,
3827           key->nr_images * sizeof *draw_image);
3828    for (unsigned i = 0; i < key->nr_images; i++) {
3829       lp_sampler_static_texture_state_image(&draw_image[i].image_state,
3830                                             llvm->draw->images[PIPE_SHADER_TESS_EVAL][i]);
3831    }
3832    return key;
3833 }
3834 
3835 
3836 void
draw_tes_llvm_dump_variant_key(struct draw_tes_llvm_variant_key * key)3837 draw_tes_llvm_dump_variant_key(struct draw_tes_llvm_variant_key *key)
3838 {
3839    struct lp_sampler_static_state *sampler = key->samplers;
3840    struct lp_image_static_state *image = draw_tes_llvm_variant_key_images(key);
3841 
3842    if (key->primid_needed)
3843       debug_printf("prim id output %d\n", key->primid_output);
3844    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
3845    for (unsigned i = 0 ; i < key->nr_sampler_views; i++) {
3846       debug_printf("sampler[%i].src_format = %s\n", i,
3847                    util_format_name(sampler[i].texture_state.format));
3848    }
3849 
3850    for (unsigned i = 0 ; i < key->nr_images; i++)
3851       debug_printf("images[%i].format = %s\n", i, util_format_name(image[i].image_state.format));
3852 
3853 }
3854