xref: /aosp_15_r20/external/mesa3d/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * Copyright 2007-2008 VMware, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * @file
31  * TGSI to LLVM IR translation -- SoA.
32  *
33  * @author Jose Fonseca <[email protected]>
34  *
35  * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36  * Brian Paul, and others.
37  */
38 
39 #include "util/detect.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "tgsi/tgsi_strings.h"
51 #include "lp_bld_tgsi_action.h"
52 #include "lp_bld_type.h"
53 #include "lp_bld_const.h"
54 #include "lp_bld_arit.h"
55 #include "lp_bld_bitarit.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_init.h"
58 #include "lp_bld_logic.h"
59 #include "lp_bld_misc.h"
60 #include "lp_bld_swizzle.h"
61 #include "lp_bld_flow.h"
62 #include "lp_bld_coro.h"
63 #include "lp_bld_quad.h"
64 #include "lp_bld_tgsi.h"
65 #include "lp_bld_limits.h"
66 #include "lp_bld_debug.h"
67 #include "lp_bld_printf.h"
68 #include "lp_bld_sample.h"
69 #include "lp_bld_struct.h"
70 #include "lp_bld_jit_types.h"
71 
72 #define DUMP_GS_EMITS 0
73 
74 /*
75  * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
76  * instruction.
77  *
78  * TODO:
79  * - take execution masks in consideration
80  * - debug control-flow instructions
81  */
82 #define DEBUG_EXECUTION 0
83 
84 
85 /*
86  * Emit code to print a register value.
87  */
88 static void
emit_dump_reg(struct gallivm_state * gallivm,unsigned file,unsigned index,unsigned chan,LLVMValueRef value)89 emit_dump_reg(struct gallivm_state *gallivm,
90               unsigned file,
91               unsigned index,
92               unsigned chan,
93               LLVMValueRef value)
94 {
95    char buf[32];
96 
97    snprintf(buf, sizeof buf, "    %s[%u].%c = ",
98             tgsi_file_name(file),
99             index, "xyzw"[chan]);
100 
101    lp_build_print_value(gallivm, buf, value);
102 }
103 
104 static inline struct function_ctx *
func_ctx(struct lp_exec_mask * mask)105 func_ctx(struct lp_exec_mask *mask)
106 {
107    assert(mask->function_stack_size > 0);
108    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
109    return &mask->function_stack[mask->function_stack_size - 1];
110 }
111 
112 /*
113  * combine the execution mask if there is one with the current mask.
114  */
115 static LLVMValueRef
mask_vec(struct lp_build_tgsi_context * bld_base)116 mask_vec(struct lp_build_tgsi_context *bld_base)
117 {
118    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
119    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
120    struct lp_exec_mask *exec_mask = &bld->exec_mask;
121    LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
122    if (!exec_mask->has_mask) {
123       return bld_mask;
124    }
125    if (!bld_mask)
126       return exec_mask->exec_mask;
127    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
128                        exec_mask->exec_mask, "");
129 }
130 
lp_exec_tgsi_break(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)131 static void lp_exec_tgsi_break(struct lp_exec_mask *mask,
132                           struct lp_build_tgsi_context * bld_base)
133 {
134    enum tgsi_opcode opcode =
135       bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
136    bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
137                         opcode == TGSI_OPCODE_CASE);
138    lp_exec_break(mask, &bld_base->pc, break_always);
139 }
140 
lp_exec_switch(struct lp_exec_mask * mask,LLVMValueRef switchval)141 static void lp_exec_switch(struct lp_exec_mask *mask,
142                            LLVMValueRef switchval)
143 {
144    struct function_ctx *ctx = func_ctx(mask);
145 
146    if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
147        ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
148       ctx->switch_stack_size++;
149       return;
150    }
151 
152    ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
153       ctx->break_type;
154    ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
155 
156    ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
157    ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
158    ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
159    ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
160    ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
161    ctx->switch_stack_size++;
162 
163    mask->switch_mask = LLVMConstNull(mask->int_vec_type);
164    ctx->switch_val = switchval;
165    ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
166    ctx->switch_in_default = false;
167    ctx->switch_pc = 0;
168 
169    lp_exec_mask_update(mask);
170 }
171 
lp_exec_endswitch(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)172 static void lp_exec_endswitch(struct lp_exec_mask *mask,
173                               struct lp_build_tgsi_context * bld_base)
174 {
175    LLVMBuilderRef builder = mask->bld->gallivm->builder;
176    struct function_ctx *ctx = func_ctx(mask);
177 
178    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
179       ctx->switch_stack_size--;
180       return;
181    }
182 
183    /* check if there's deferred default if so do it now */
184    if (ctx->switch_pc && !ctx->switch_in_default) {
185       LLVMValueRef prevmask, defaultmask;
186       unsigned tmp_pc;
187       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
188       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
189       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
190       ctx->switch_in_default = true;
191 
192       lp_exec_mask_update(mask);
193 
194       assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
195              TGSI_OPCODE_DEFAULT);
196 
197       tmp_pc = bld_base->pc;
198       bld_base->pc = ctx->switch_pc;
199       /*
200        * re-purpose switch_pc to point to here again, since we stop execution of
201        * the deferred default after next break.
202        */
203       ctx->switch_pc = tmp_pc - 1;
204 
205       return;
206    }
207 
208    else if (ctx->switch_pc && ctx->switch_in_default) {
209       assert(bld_base->pc == ctx->switch_pc + 1);
210    }
211 
212    ctx->switch_stack_size--;
213    mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
214    ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
215    ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
216    ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
217    ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
218 
219    ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
220 
221    lp_exec_mask_update(mask);
222 }
223 
lp_exec_case(struct lp_exec_mask * mask,LLVMValueRef caseval)224 static void lp_exec_case(struct lp_exec_mask *mask,
225                          LLVMValueRef caseval)
226 {
227    LLVMBuilderRef builder = mask->bld->gallivm->builder;
228    struct function_ctx *ctx = func_ctx(mask);
229 
230    LLVMValueRef casemask, prevmask;
231 
232    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
233       return;
234    }
235 
236    /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
237    if (!ctx->switch_in_default) {
238       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
239       casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
240       ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
241                                              ctx->switch_mask_default, "sw_default_mask");
242       casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
243       mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
244 
245       lp_exec_mask_update(mask);
246    }
247 }
248 
249 /*
250  * Analyse default statement in a switch.
251  * \return true if default is last statement, false otherwise
252  * \param default_pc_start contains pc of instruction to jump to
253  *                         if default wasn't last but there's no
254  *                         fallthrough into default.
255  */
default_analyse_is_last(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base,int * default_pc_start)256 static bool default_analyse_is_last(struct lp_exec_mask *mask,
257                                        struct lp_build_tgsi_context * bld_base,
258                                        int *default_pc_start)
259 {
260    unsigned pc = bld_base->pc;
261    struct function_ctx *ctx = func_ctx(mask);
262    int curr_switch_stack = ctx->switch_stack_size;
263 
264    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
265       return false;
266    }
267 
268    /* skip over case statements which are together with default */
269    while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
270       pc++;
271    }
272 
273    while (pc != ~0u && pc < bld_base->num_instructions) {
274       enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
275       switch (opcode) {
276       case TGSI_OPCODE_CASE:
277          if (curr_switch_stack == ctx->switch_stack_size) {
278             *default_pc_start = pc - 1;
279             return false;
280          }
281          break;
282       case TGSI_OPCODE_SWITCH:
283          curr_switch_stack++;
284          break;
285       case TGSI_OPCODE_ENDSWITCH:
286          if (curr_switch_stack == ctx->switch_stack_size) {
287             *default_pc_start = pc - 1;
288             return true;
289          }
290          curr_switch_stack--;
291          break;
292       default:
293          ; /* nothing */
294       }
295       pc++;
296    }
297    /* should never arrive here */
298    assert(0);
299    return true;
300 }
301 
lp_exec_default(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)302 static void lp_exec_default(struct lp_exec_mask *mask,
303                             struct lp_build_tgsi_context * bld_base)
304 {
305    LLVMBuilderRef builder = mask->bld->gallivm->builder;
306    struct function_ctx *ctx = func_ctx(mask);
307 
308    int default_exec_pc = 0;
309    bool default_is_last;
310 
311    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
312       return;
313    }
314 
315    /*
316     * This is a messy opcode, because it may not be always at the end and
317     * there can be fallthrough in and out of it.
318     */
319 
320    default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
321    /*
322     * If it is last statement in switch (note that case statements appearing
323     * "at the same time" as default don't change that) everything is just fine,
324     * update switch mask and go on. This means we can handle default with
325     * fallthrough INTO it without overhead, if it is last.
326     */
327    if (default_is_last) {
328       LLVMValueRef prevmask, defaultmask;
329       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
330       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
331       defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
332       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
333       ctx->switch_in_default = true;
334 
335       lp_exec_mask_update(mask);
336    }
337    else {
338       /*
339        * Technically, "case" immediately before default isn't really a
340        * fallthrough, however we still have to count them as such as we
341        * already have updated the masks.
342        * If that happens in practice could add a switch optimizer pass
343        * which just gets rid of all case statements appearing together with
344        * default (or could do switch analysis at switch start time instead).
345        */
346       enum tgsi_opcode opcode =
347          bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
348       bool ft_into = (opcode != TGSI_OPCODE_BRK &&
349                       opcode != TGSI_OPCODE_SWITCH);
350       /*
351        * If it is not last statement and there was no fallthrough into it,
352        * we record the PC and continue execution at next case (again, those
353        * case encountered at the same time don't count). At endswitch
354        * time, we update switchmask, and go back executing the code we skipped
355        * until the next break (possibly re-executing some code with changed mask
356        * if there was a fallthrough out of default).
357        * Finally, if it is not last statement and there was a fallthrough into it,
358        * do the same as with the former case, except instead of skipping the code
359        * just execute it without updating the mask, then go back and re-execute.
360        */
361       ctx->switch_pc = bld_base->pc;
362       if (!ft_into) {
363          bld_base->pc = default_exec_pc;
364       }
365    }
366 }
367 
368 
lp_exec_mask_call(struct lp_exec_mask * mask,int func,int * pc)369 static void lp_exec_mask_call(struct lp_exec_mask *mask,
370                               int func,
371                               int *pc)
372 {
373    if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
374       return;
375    }
376 
377    lp_exec_mask_function_init(mask, mask->function_stack_size);
378    mask->function_stack[mask->function_stack_size].pc = *pc;
379    mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
380    mask->function_stack_size++;
381    *pc = func;
382 }
383 
lp_exec_mask_ret(struct lp_exec_mask * mask,int * pc)384 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
385 {
386    LLVMBuilderRef builder = mask->bld->gallivm->builder;
387    struct function_ctx *ctx = func_ctx(mask);
388    LLVMValueRef exec_mask;
389 
390    if (ctx->cond_stack_size == 0 &&
391        ctx->loop_stack_size == 0 &&
392        ctx->switch_stack_size == 0 &&
393        mask->function_stack_size == 1) {
394       /* returning from main() */
395       *pc = -1;
396       return;
397    }
398 
399    if (mask->function_stack_size == 1) {
400       /*
401        * This requires special handling since we need to ensure
402        * we don't drop the mask even if we have no call stack
403        * (e.g. after a ret in a if clause after the endif)
404        */
405       mask->ret_in_main = true;
406    }
407 
408    exec_mask = LLVMBuildNot(builder,
409                             mask->exec_mask,
410                             "ret");
411 
412    mask->ret_mask = LLVMBuildAnd(builder,
413                                  mask->ret_mask,
414                                  exec_mask, "ret_full");
415 
416    lp_exec_mask_update(mask);
417 }
418 
lp_exec_mask_bgnsub(struct lp_exec_mask * mask)419 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
420 {
421 }
422 
lp_exec_mask_endsub(struct lp_exec_mask * mask,int * pc)423 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
424 {
425    struct function_ctx *ctx;
426 
427    assert(mask->function_stack_size > 1);
428    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
429 
430    ctx = func_ctx(mask);
431    mask->function_stack_size--;
432 
433    *pc = ctx->pc;
434    mask->ret_mask = ctx->ret_mask;
435 
436    lp_exec_mask_update(mask);
437 }
438 
439 
440 static LLVMValueRef
get_file_ptr(struct lp_build_tgsi_soa_context * bld,unsigned file,int index,unsigned chan)441 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
442              unsigned file,
443              int index,
444              unsigned chan)
445 {
446    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
447    LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
448    LLVMValueRef var_of_array;
449    LLVMTypeRef type_of_array;
450 
451    switch (file) {
452    case TGSI_FILE_TEMPORARY:
453       array_of_vars = bld->temps;
454       var_of_array = bld->temps_array;
455       type_of_array = bld->temps_array_type;
456       break;
457    case TGSI_FILE_OUTPUT:
458       array_of_vars = bld->outputs;
459       var_of_array = bld->outputs_array;
460       type_of_array = bld->outputs_array_type;
461       break;
462    default:
463       assert(0);
464       return NULL;
465    }
466 
467    assert(chan < 4);
468 
469    if (bld->indirect_files & (1 << file)) {
470       LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
471       /* I'm not sure the other path ever gets hit, but leave until someone figures it out,
472          this check doesn't work with opaque pointers. */
473       if (1) {//LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
474          LLVMValueRef gep[2];
475          gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
476          gep[1] = lindex;
477          return LLVMBuildGEP2(builder, type_of_array, var_of_array, gep, 2, "");
478       } else {
479          return LLVMBuildGEP2(builder, type_of_array, var_of_array, &lindex, 1, "");
480       }
481    }
482    else {
483       assert(index <= bld->bld_base.info->file_max[file]);
484       return array_of_vars[index][chan];
485    }
486 }
487 
488 
489 /**
490  * Return pointer to a temporary register channel (src or dest).
491  * Note that indirect addressing cannot be handled here.
492  * \param index  which temporary register
493  * \param chan  which channel of the temp register.
494  */
495 LLVMValueRef
lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)496 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
497              unsigned index,
498              unsigned chan)
499 {
500    return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
501 }
502 
503 /**
504  * Return pointer to a output register channel (src or dest).
505  * Note that indirect addressing cannot be handled here.
506  * \param index  which output register
507  * \param chan  which channel of the output register.
508  */
509 LLVMValueRef
lp_get_output_ptr(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)510 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
511                unsigned index,
512                unsigned chan)
513 {
514    return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
515 }
516 
517 /*
518  * If we have indirect addressing in outputs copy our alloca array
519  * to the outputs slots specified by the caller to make sure
520  * our outputs are delivered consistently via the same interface.
521  */
522 static void
gather_outputs(struct lp_build_tgsi_soa_context * bld)523 gather_outputs(struct lp_build_tgsi_soa_context * bld)
524 {
525    if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
526       unsigned index, chan;
527       assert(bld->bld_base.info->num_outputs <=
528              bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
529       for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
530          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
531             bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
532          }
533       }
534    }
535 }
536 
537 /**
538  * Gather vector.
539  * XXX the lp_build_gather() function should be capable of doing this
540  * with a little work.
541  */
542 static LLVMValueRef
build_gather(struct lp_build_tgsi_context * bld_base,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)543 build_gather(struct lp_build_tgsi_context *bld_base,
544              LLVMValueRef base_ptr,
545              LLVMValueRef indexes,
546              LLVMValueRef overflow_mask,
547              LLVMValueRef indexes2)
548 {
549    struct gallivm_state *gallivm = bld_base->base.gallivm;
550    LLVMBuilderRef builder = gallivm->builder;
551    struct lp_build_context *uint_bld = &bld_base->uint_bld;
552    struct lp_build_context *bld = &bld_base->base;
553    LLVMValueRef res;
554    unsigned i;
555 
556    if (indexes2)
557       res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
558    else
559       res = bld->undef;
560    /*
561     * overflow_mask is a vector telling us which channels
562     * in the vector overflowed. We use the overflow behavior for
563     * constant buffers which is defined as:
564     * Out of bounds access to constant buffer returns 0 in all
565     * components. Out of bounds behavior is always with respect
566     * to the size of the buffer bound at that slot.
567     */
568 
569    if (overflow_mask) {
570       /*
571        * We avoid per-element control flow here (also due to llvm going crazy,
572        * though I suspect it's better anyway since overflow is likely rare).
573        * Note that since we still fetch from buffers even if num_elements was
574        * zero (in this case we'll fetch from index zero) the jit func callers
575        * MUST provide valid fake constant buffers of size 4x32 (the values do
576        * not matter), otherwise we'd still need (not per element though)
577        * control flow.
578        */
579       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
580       if (indexes2)
581          indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
582    }
583 
584    /*
585     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
586     */
587    for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
588       LLVMValueRef si, di;
589       LLVMValueRef index;
590       LLVMValueRef scalar_ptr, scalar;
591 
592       di = lp_build_const_int32(bld->gallivm, i);
593       if (indexes2)
594          si = lp_build_const_int32(bld->gallivm, i >> 1);
595       else
596          si = di;
597 
598       if (indexes2 && (i & 1)) {
599          index = LLVMBuildExtractElement(builder,
600                                          indexes2, si, "");
601       } else {
602          index = LLVMBuildExtractElement(builder,
603                                          indexes, si, "");
604       }
605       scalar_ptr = LLVMBuildGEP2(builder, bld->elem_type, base_ptr,
606                                  &index, 1, "gather_ptr");
607       scalar = LLVMBuildLoad2(builder, bld->elem_type, scalar_ptr, "");
608 
609       res = LLVMBuildInsertElement(builder, res, scalar, di, "");
610    }
611 
612    if (overflow_mask) {
613       if (indexes2) {
614          res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
615          overflow_mask = LLVMBuildSExt(builder, overflow_mask,
616                                        bld_base->dbl_bld.int_vec_type, "");
617          res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
618                                bld_base->dbl_bld.zero, res);
619       } else
620          res = lp_build_select(bld, overflow_mask, bld->zero, res);
621    }
622 
623    return res;
624 }
625 
626 
627 /**
628  * Scatter/store vector.
629  */
630 static void
emit_mask_scatter(struct lp_build_tgsi_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)631 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
632                   LLVMValueRef base_ptr,
633                   LLVMValueRef indexes,
634                   LLVMValueRef values,
635                   struct lp_exec_mask *mask)
636 {
637    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
638    LLVMBuilderRef builder = gallivm->builder;
639    unsigned i;
640    LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
641 
642    /*
643     * Loop over elements of index_vec, store scalar value.
644     */
645    for (i = 0; i < bld->bld_base.base.type.length; i++) {
646       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
647       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
648       LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, bld->bld_base.base.elem_type, base_ptr, &index, 1, "scatter_ptr");
649       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
650       LLVMValueRef scalar_pred = pred ?
651          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
652 
653       if (0)
654          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
655                          ii, val, index, scalar_ptr);
656 
657       if (scalar_pred) {
658          LLVMValueRef real_val, dst_val;
659          dst_val = LLVMBuildLoad2(builder, bld->bld_base.base.elem_type, scalar_ptr, "");
660          real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
661          LLVMBuildStore(builder, real_val, scalar_ptr);
662       }
663       else {
664          LLVMBuildStore(builder, val, scalar_ptr);
665       }
666    }
667 }
668 
669 
670 /**
671  * Read the current value of the ADDR register, convert the floats to
672  * ints, add the base index and return the vector of offsets.
673  * The offsets will be used to index into the constant buffer or
674  * temporary register file.
675  */
676 static LLVMValueRef
get_indirect_index(struct lp_build_tgsi_soa_context * bld,unsigned reg_file,unsigned reg_index,const struct tgsi_ind_register * indirect_reg,int index_limit)677 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
678                    unsigned reg_file, unsigned reg_index,
679                    const struct tgsi_ind_register *indirect_reg,
680                    int index_limit)
681 {
682    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
683    struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
684    /* always use X component of address register */
685    unsigned swizzle = indirect_reg->Swizzle;
686    LLVMValueRef base;
687    LLVMValueRef rel;
688    LLVMValueRef max_index;
689    LLVMValueRef index;
690 
691    assert(bld->indirect_files & (1 << reg_file));
692 
693    base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
694 
695    assert(swizzle < 4);
696    switch (indirect_reg->File) {
697    case TGSI_FILE_ADDRESS:
698       rel = LLVMBuildLoad2(builder,
699                            bld->bld_base.base.int_vec_type,
700                            bld->addr[indirect_reg->Index][swizzle],
701                            "load addr reg");
702       /* ADDR LLVM values already have LLVM integer type. */
703       break;
704    case TGSI_FILE_TEMPORARY:
705       rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
706       rel = LLVMBuildLoad2(builder, bld->bld_base.base.vec_type, rel, "load temp reg");
707       /* TEMP LLVM values always have LLVM float type, but for indirection, the
708        * value actually stored is expected to be an integer */
709       rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
710       break;
711    default:
712       assert(0);
713       rel = uint_bld->zero;
714    }
715 
716    index = lp_build_add(uint_bld, base, rel);
717 
718    /*
719     * emit_fetch_constant handles constant buffer overflow so this code
720     * is pointless for them.
721     * Furthermore the D3D10 spec in section 6.5 says:
722     * If the constant buffer bound to a slot is larger than the size
723     * declared in the shader for that slot, implementations are allowed
724     * to return incorrect data (not necessarily 0) for indices that are
725     * larger than the declared size but smaller than the buffer size.
726     */
727    if (reg_file != TGSI_FILE_CONSTANT) {
728       assert(index_limit >= 0);
729       max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
730                                          uint_bld->type, index_limit);
731 
732       assert(!uint_bld->type.sign);
733       index = lp_build_min(uint_bld, index, max_index);
734    }
735 
736    return index;
737 }
738 
739 static struct lp_build_context *
stype_to_fetch(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype)740 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
741 	       enum tgsi_opcode_type stype)
742 {
743    struct lp_build_context *bld_fetch;
744 
745    switch (stype) {
746    case TGSI_TYPE_FLOAT:
747    case TGSI_TYPE_UNTYPED:
748       bld_fetch = &bld_base->base;
749       break;
750    case TGSI_TYPE_UNSIGNED:
751       bld_fetch = &bld_base->uint_bld;
752       break;
753    case TGSI_TYPE_SIGNED:
754       bld_fetch = &bld_base->int_bld;
755       break;
756    case TGSI_TYPE_DOUBLE:
757       bld_fetch = &bld_base->dbl_bld;
758       break;
759    case TGSI_TYPE_UNSIGNED64:
760       bld_fetch = &bld_base->uint64_bld;
761       break;
762    case TGSI_TYPE_SIGNED64:
763       bld_fetch = &bld_base->int64_bld;
764       break;
765    case TGSI_TYPE_VOID:
766    default:
767       assert(0);
768       bld_fetch = NULL;
769       break;
770    }
771    return bld_fetch;
772 }
773 
774 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,unsigned chan_index,bool need_perelement_offset)775 get_soa_array_offsets(struct lp_build_context *uint_bld,
776                       LLVMValueRef indirect_index,
777                       unsigned chan_index,
778                       bool need_perelement_offset)
779 {
780    struct gallivm_state *gallivm = uint_bld->gallivm;
781    LLVMValueRef chan_vec =
782       lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
783    LLVMValueRef length_vec =
784       lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
785    LLVMValueRef index_vec;
786 
787    /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
788    index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
789    index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
790    index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
791 
792    if (need_perelement_offset) {
793       LLVMValueRef pixel_offsets;
794       unsigned i;
795      /* build pixel offset vector: {0, 1, 2, 3, ...} */
796       pixel_offsets = uint_bld->undef;
797       for (i = 0; i < uint_bld->type.length; i++) {
798          LLVMValueRef ii = lp_build_const_int32(gallivm, i);
799          pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
800                                                 ii, ii, "");
801       }
802       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
803    }
804    return index_vec;
805 }
806 
807 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)808 emit_fetch_constant(
809    struct lp_build_tgsi_context * bld_base,
810    const struct tgsi_full_src_register * reg,
811    enum tgsi_opcode_type stype,
812    unsigned swizzle_in)
813 {
814    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
815    struct gallivm_state *gallivm = bld_base->base.gallivm;
816    LLVMBuilderRef builder = gallivm->builder;
817    struct lp_build_context *uint_bld = &bld_base->uint_bld;
818    unsigned dimension = 0;
819    LLVMValueRef consts_ptr;
820    LLVMValueRef num_consts;
821    LLVMValueRef res;
822    unsigned swizzle = swizzle_in & 0xffff;
823 
824    /* XXX: Handle fetching xyzw components as a vector */
825    assert(swizzle != ~0u);
826 
827    if (reg->Register.Dimension) {
828       assert(!reg->Dimension.Indirect);
829       dimension = reg->Dimension.Index;
830       assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
831    }
832 
833    consts_ptr = bld->consts[dimension];
834    num_consts = bld->consts_sizes[dimension];
835 
836    if (reg->Register.Indirect) {
837       LLVMValueRef indirect_index;
838       LLVMValueRef swizzle_vec =
839          lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
840       LLVMValueRef index_vec;  /* index into the const buffer */
841       LLVMValueRef overflow_mask;
842       LLVMValueRef index_vec2 = NULL;
843 
844       indirect_index = get_indirect_index(bld,
845                                           reg->Register.File,
846                                           reg->Register.Index,
847                                           &reg->Indirect,
848                                           bld->bld_base.info->file_max[reg->Register.File]);
849 
850       /* All fetches are from the same constant buffer, so
851        * we need to propagate the size to a vector to do a
852        * vector comparison */
853       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
854       /* Construct a boolean vector telling us which channels
855        * overflow the bound constant buffer */
856       overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
857                                        indirect_index, num_consts);
858 
859       /* index_vec = indirect_index * 4 + swizzle */
860       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
861       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
862 
863       if (tgsi_type_is_64bit(stype)) {
864          LLVMValueRef swizzle_vec2;
865          swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
866          index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
867          index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
868       }
869       /* Gather values from the constant buffer */
870       res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
871    }
872    else {
873       LLVMValueRef index;  /* index into the const buffer */
874       LLVMValueRef scalar, scalar_ptr;
875       struct lp_build_context *bld_broad = &bld_base->base;
876       index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
877 
878       scalar_ptr = LLVMBuildGEP2(builder, bld_broad->elem_type, consts_ptr,
879                                  &index, 1, "");
880 
881       if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
882 
883          LLVMValueRef scalar2, scalar2_ptr;
884          LLVMValueRef shuffles[2];
885          index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
886 
887          scalar2_ptr = LLVMBuildGEP2(builder, bld_broad->elem_type, consts_ptr,
888                                     &index, 1, "");
889 
890          scalar = LLVMBuildLoad2(builder, bld_broad->elem_type, scalar_ptr, "");
891          scalar2 = LLVMBuildLoad2(builder, bld_broad->elem_type, scalar2_ptr, "");
892          shuffles[0] = lp_build_const_int32(gallivm, 0);
893          shuffles[1] = lp_build_const_int32(gallivm, 1);
894 
895          res = LLVMGetUndef(LLVMVectorType(bld_broad->elem_type, bld_base->base.type.length * 2));
896          res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
897          res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
898       } else {
899         if (stype == TGSI_TYPE_DOUBLE) {
900            LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
901            scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
902            bld_broad = &bld_base->dbl_bld;
903         } else if (stype == TGSI_TYPE_UNSIGNED64) {
904            LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
905            scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
906            bld_broad = &bld_base->uint64_bld;
907         } else if (stype == TGSI_TYPE_SIGNED64) {
908            LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
909            scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
910            bld_broad = &bld_base->int64_bld;
911         }
912         scalar = LLVMBuildLoad2(builder, bld_broad->elem_type, scalar_ptr, "");
913         res = lp_build_broadcast_scalar(bld_broad, scalar);
914       }
915 
916    }
917 
918    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
919       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
920       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
921    }
922 
923    return res;
924 }
925 
926 /**
927  * Fetch 64-bit values from two separate channels.
928  * 64-bit values are stored split across two channels, like xy and zw.
929  * This function creates a set of vec_length*2 floats,
930  * extracts the values from the two channels,
931  * puts them in the correct place, then casts to vec_length 64-bits.
932  */
933 static LLVMValueRef
emit_fetch_64bit(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype,LLVMValueRef input,LLVMValueRef input2)934 emit_fetch_64bit(
935    struct lp_build_tgsi_context * bld_base,
936    enum tgsi_opcode_type stype,
937    LLVMValueRef input,
938    LLVMValueRef input2)
939 {
940    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
941    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
942    LLVMBuilderRef builder = gallivm->builder;
943    LLVMValueRef res;
944    struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
945    int i;
946    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
947    int len = bld_base->base.type.length * 2;
948    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
949 
950    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
951       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
952       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
953    }
954    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
955 
956    return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
957 }
958 
959 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)960 emit_fetch_immediate(
961    struct lp_build_tgsi_context * bld_base,
962    const struct tgsi_full_src_register * reg,
963    enum tgsi_opcode_type stype,
964    unsigned swizzle_in)
965 {
966    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
967    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
968    LLVMBuilderRef builder = gallivm->builder;
969    LLVMValueRef res = NULL;
970    unsigned swizzle = swizzle_in & 0xffff;
971 
972    if (bld->use_immediates_array || reg->Register.Indirect) {
973       LLVMValueRef imms_array;
974       LLVMTypeRef fptr_type;
975 
976       /* cast imms_array pointer to float* */
977       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
978       imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
979 
980       if (reg->Register.Indirect) {
981          LLVMValueRef indirect_index;
982          LLVMValueRef index_vec;  /* index into the immediate register array */
983          LLVMValueRef index_vec2 = NULL;
984          indirect_index = get_indirect_index(bld,
985                                              reg->Register.File,
986                                              reg->Register.Index,
987                                              &reg->Indirect,
988                                              bld->bld_base.info->file_max[reg->Register.File]);
989          /*
990           * Unlike for other reg classes, adding pixel offsets is unnecessary -
991           * immediates are stored as full vectors (FIXME??? - might be better
992           * to store them the same as constants) but all elements are the same
993           * in any case.
994           */
995          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
996                                            indirect_index,
997                                            swizzle,
998                                            false);
999          if (tgsi_type_is_64bit(stype))
1000             index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1001                                               indirect_index,
1002                                               swizzle_in >> 16,
1003                                               false);
1004          /* Gather values from the immediate register array */
1005          res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1006       } else {
1007          LLVMValueRef gep[2];
1008          gep[0] = lp_build_const_int32(gallivm, 0);
1009          gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1010          LLVMValueRef imms_ptr = LLVMBuildGEP2(builder,
1011                                                bld_base->base.vec_type,
1012                                                bld->imms_array, gep, 2, "");
1013          res = LLVMBuildLoad2(builder, bld_base->base.vec_type, imms_ptr, "");
1014 
1015          if (tgsi_type_is_64bit(stype)) {
1016             LLVMValueRef imms_ptr2;
1017             LLVMValueRef res2;
1018             gep[1] = lp_build_const_int32(gallivm,
1019                                           reg->Register.Index * 4 + (swizzle_in >> 16));
1020             imms_ptr2 = LLVMBuildGEP2(builder, bld_base->base.vec_type,
1021                                       bld->imms_array, gep, 2, "");
1022             res2 = LLVMBuildLoad2(builder, bld_base->base.vec_type, imms_ptr2, "");
1023             res = emit_fetch_64bit(bld_base, stype, res, res2);
1024          }
1025       }
1026    }
1027    else {
1028       res = bld->immediates[reg->Register.Index][swizzle];
1029       if (tgsi_type_is_64bit(stype))
1030          res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1031    }
1032 
1033    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1034       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1035       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1036    }
1037    return res;
1038 }
1039 
1040 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1041 emit_fetch_input(
1042    struct lp_build_tgsi_context * bld_base,
1043    const struct tgsi_full_src_register * reg,
1044    enum tgsi_opcode_type stype,
1045    unsigned swizzle_in)
1046 {
1047    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1048    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1049    LLVMBuilderRef builder = gallivm->builder;
1050    LLVMValueRef res;
1051    unsigned swizzle = swizzle_in & 0xffff;
1052 
1053    if (reg->Register.Indirect) {
1054       LLVMValueRef indirect_index;
1055       LLVMValueRef index_vec;  /* index into the input reg array */
1056       LLVMValueRef index_vec2 = NULL;
1057       LLVMValueRef inputs_array;
1058       LLVMTypeRef fptr_type;
1059 
1060       indirect_index = get_indirect_index(bld,
1061                                           reg->Register.File,
1062                                           reg->Register.Index,
1063                                           &reg->Indirect,
1064                                           bld->bld_base.info->file_max[reg->Register.File]);
1065 
1066       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1067                                         indirect_index,
1068                                         swizzle,
1069                                         true);
1070       if (tgsi_type_is_64bit(stype)) {
1071          index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1072                                            indirect_index,
1073                                            swizzle_in >> 16,
1074                                            true);
1075       }
1076       /* cast inputs_array pointer to float* */
1077       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1078       inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1079 
1080       /* Gather values from the input register array */
1081       res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1082    } else {
1083       if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1084          LLVMValueRef lindex = lp_build_const_int32(gallivm,
1085                                         reg->Register.Index * 4 + swizzle);
1086          LLVMValueRef input_ptr = LLVMBuildGEP2(builder, bld_base->base.vec_type,
1087                                                 bld->inputs_array, &lindex, 1, "");
1088 
1089          res = LLVMBuildLoad2(builder, bld_base->base.vec_type, input_ptr, "");
1090          if (tgsi_type_is_64bit(stype)) {
1091             LLVMValueRef lindex1;
1092             LLVMValueRef input_ptr2;
1093             LLVMValueRef res2;
1094 
1095             lindex1 = lp_build_const_int32(gallivm,
1096                                            reg->Register.Index * 4 + (swizzle_in >> 16));
1097             input_ptr2 = LLVMBuildGEP2(builder, bld_base->base.vec_type,
1098                                        bld->inputs_array, &lindex1, 1, "");
1099             res2 = LLVMBuildLoad2(builder, bld_base->base.vec_type, input_ptr2, "");
1100             res = emit_fetch_64bit(bld_base, stype, res, res2);
1101          }
1102       }
1103       else {
1104          res = bld->inputs[reg->Register.Index][swizzle];
1105          if (tgsi_type_is_64bit(stype))
1106             res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1107       }
1108    }
1109 
1110    assert(res);
1111 
1112    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1113       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1114       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1115    }
1116 
1117    return res;
1118 }
1119 
1120 
1121 static LLVMValueRef
emit_fetch_gs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1122 emit_fetch_gs_input(
1123    struct lp_build_tgsi_context * bld_base,
1124    const struct tgsi_full_src_register * reg,
1125    enum tgsi_opcode_type stype,
1126    unsigned swizzle_in)
1127 {
1128    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1129    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1130    const struct tgsi_shader_info *info = bld->bld_base.info;
1131    LLVMBuilderRef builder = gallivm->builder;
1132    LLVMValueRef attrib_index = NULL;
1133    LLVMValueRef vertex_index = NULL;
1134    unsigned swizzle = swizzle_in & 0xffff;
1135    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1136    LLVMValueRef res;
1137 
1138    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1139       /* This is really a system value not a regular input */
1140       assert(!reg->Register.Indirect);
1141       assert(!reg->Dimension.Indirect);
1142       res = bld->system_values.prim_id;
1143       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1144          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1145       }
1146       return res;
1147    }
1148 
1149    if (reg->Register.Indirect) {
1150       /*
1151        * XXX: this is possibly not quite the right value, since file_max may be
1152        * larger than the max attrib index, due to it being the max of declared
1153        * inputs AND the max vertices per prim (which is 6 for tri adj).
1154        * It should however be safe to use (since we always allocate
1155        * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1156        */
1157       int index_limit = info->file_max[reg->Register.File];
1158       attrib_index = get_indirect_index(bld,
1159                                         reg->Register.File,
1160                                         reg->Register.Index,
1161                                         &reg->Indirect,
1162                                         index_limit);
1163    } else {
1164       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1165    }
1166 
1167    if (reg->Dimension.Indirect) {
1168       /*
1169        * A fixed 6 should do as well (which is what we allocate).
1170        */
1171       int index_limit = mesa_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1172       vertex_index = get_indirect_index(bld,
1173                                         reg->Register.File,
1174                                         reg->Dimension.Index,
1175                                         &reg->DimIndirect,
1176                                         index_limit);
1177    } else {
1178       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1179    }
1180 
1181    res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1182                                     reg->Dimension.Indirect,
1183                                     vertex_index,
1184                                     reg->Register.Indirect,
1185                                     attrib_index,
1186                                     swizzle_index);
1187 
1188    assert(res);
1189    if (tgsi_type_is_64bit(stype)) {
1190       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1191       LLVMValueRef res2;
1192       res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1193                                         reg->Dimension.Indirect,
1194                                         vertex_index,
1195                                         reg->Register.Indirect,
1196                                         attrib_index,
1197                                         swizzle_index);
1198       assert(res2);
1199       res = emit_fetch_64bit(bld_base, stype, res, res2);
1200    } else if (stype == TGSI_TYPE_UNSIGNED) {
1201       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1202    } else if (stype == TGSI_TYPE_SIGNED) {
1203       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1204    }
1205 
1206    return res;
1207 }
1208 
1209 static LLVMValueRef
emit_fetch_tcs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1210 emit_fetch_tcs_input(
1211    struct lp_build_tgsi_context * bld_base,
1212    const struct tgsi_full_src_register * reg,
1213    enum tgsi_opcode_type stype,
1214    unsigned swizzle_in)
1215 {
1216    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1217    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1218    const struct tgsi_shader_info *info = bld->bld_base.info;
1219    LLVMBuilderRef builder = gallivm->builder;
1220    LLVMValueRef attrib_index = NULL;
1221    LLVMValueRef vertex_index = NULL;
1222    unsigned swizzle = swizzle_in & 0xffff;
1223    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1224    LLVMValueRef res;
1225 
1226    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1227       /* This is really a system value not a regular input */
1228       assert(!reg->Register.Indirect);
1229       assert(!reg->Dimension.Indirect);
1230       res = bld->system_values.prim_id;
1231       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1232          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1233       }
1234       return res;
1235    }
1236 
1237    if (reg->Register.Indirect) {
1238       int index_limit = info->file_max[reg->Register.File];
1239       attrib_index = get_indirect_index(bld,
1240                                         reg->Register.File,
1241                                         reg->Register.Index,
1242                                         &reg->Indirect,
1243                                         index_limit);
1244    } else {
1245       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1246    }
1247 
1248    if (reg->Dimension.Indirect) {
1249       vertex_index = get_indirect_index(bld,
1250                                         reg->Register.File,
1251                                         reg->Dimension.Index,
1252                                         &reg->DimIndirect,
1253                                         PIPE_MAX_SHADER_INPUTS);
1254    } else {
1255       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1256    }
1257 
1258    // TCS can read from its own outputs
1259    if (reg->Register.File == TGSI_FILE_OUTPUT) {
1260       res = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1261                                               reg->Dimension.Indirect,
1262                                               vertex_index,
1263                                               reg->Register.Indirect,
1264                                               attrib_index,
1265                                               false,
1266                                               swizzle_index,
1267                                               bld_base->info->output_semantic_name[reg->Register.Index]);
1268    } else {
1269       res = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1270                                              reg->Dimension.Indirect,
1271                                              vertex_index,
1272                                              reg->Register.Indirect,
1273                                              attrib_index,
1274                                              false,
1275                                              swizzle_index);
1276    }
1277 
1278 
1279    assert(res);
1280    if (tgsi_type_is_64bit(stype)) {
1281       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1282       LLVMValueRef res2;
1283       if (reg->Register.File == TGSI_FILE_OUTPUT) {
1284          res2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1285                                                   reg->Dimension.Indirect,
1286                                                   vertex_index,
1287                                                   reg->Register.Indirect,
1288                                                   attrib_index,
1289                                                   false,
1290                                                   swizzle_index,
1291                                                   bld_base->info->output_semantic_name[reg->Register.Index]);
1292       } else {
1293          res2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1294                                                  reg->Dimension.Indirect,
1295                                                  vertex_index,
1296                                                  reg->Register.Indirect,
1297                                                  attrib_index,
1298                                                  false,
1299                                                  swizzle_index);
1300       }
1301       assert(res2);
1302       res = emit_fetch_64bit(bld_base, stype, res, res2);
1303    } else if (stype == TGSI_TYPE_UNSIGNED) {
1304       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1305    } else if (stype == TGSI_TYPE_SIGNED) {
1306       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1307    }
1308 
1309    return res;
1310 }
1311 
1312 static LLVMValueRef
emit_fetch_tes_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1313 emit_fetch_tes_input(
1314    struct lp_build_tgsi_context * bld_base,
1315    const struct tgsi_full_src_register * reg,
1316    enum tgsi_opcode_type stype,
1317    unsigned swizzle_in)
1318 {
1319    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1320    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1321    const struct tgsi_shader_info *info = bld->bld_base.info;
1322    LLVMBuilderRef builder = gallivm->builder;
1323    LLVMValueRef attrib_index = NULL;
1324    LLVMValueRef vertex_index = NULL;
1325    unsigned swizzle = swizzle_in & 0xffff;
1326    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1327    LLVMValueRef res;
1328 
1329    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1330       /* This is really a system value not a regular input */
1331       assert(!reg->Register.Indirect);
1332       assert(!reg->Dimension.Indirect);
1333       res = bld->system_values.prim_id;
1334       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1335          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1336       }
1337       return res;
1338    }
1339 
1340    if (reg->Register.Indirect) {
1341       int index_limit = info->file_max[reg->Register.File];
1342       attrib_index = get_indirect_index(bld,
1343                                         reg->Register.File,
1344                                         reg->Register.Index,
1345                                         &reg->Indirect,
1346                                         index_limit);
1347    } else {
1348       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1349    }
1350 
1351    if (reg->Dimension.Indirect) {
1352       vertex_index = get_indirect_index(bld,
1353                                         reg->Register.File,
1354                                         reg->Dimension.Index,
1355                                         &reg->DimIndirect,
1356                                         PIPE_MAX_SHADER_INPUTS);
1357    } else {
1358       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1359    }
1360 
1361    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1362       res = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1363                                      reg->Register.Indirect,
1364                                      attrib_index,
1365                                      swizzle_index);
1366    } else {
1367       res = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1368                                        reg->Dimension.Indirect,
1369                                        vertex_index,
1370                                        reg->Register.Indirect,
1371                                        attrib_index,
1372                                        false,
1373                                        swizzle_index);
1374    }
1375 
1376    assert(res);
1377    if (tgsi_type_is_64bit(stype)) {
1378       LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1379       LLVMValueRef res2;
1380       if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1381          res2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1382                                     reg->Register.Indirect,
1383                                     attrib_index,
1384                                     swizzle_index);
1385       }
1386       else {
1387          res2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1388                                              reg->Dimension.Indirect,
1389                                              vertex_index,
1390                                              reg->Register.Indirect,
1391                                              attrib_index,
1392                                              false,
1393                                              swizzle_index);
1394       }
1395       assert(res2);
1396       res = emit_fetch_64bit(bld_base, stype, res, res2);
1397    } else if (stype == TGSI_TYPE_UNSIGNED) {
1398       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1399    } else if (stype == TGSI_TYPE_SIGNED) {
1400       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1401    }
1402 
1403    return res;
1404 }
1405 
1406 
1407 
1408 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1409 emit_fetch_temporary(
1410    struct lp_build_tgsi_context * bld_base,
1411    const struct tgsi_full_src_register * reg,
1412    enum tgsi_opcode_type stype,
1413    unsigned swizzle_in)
1414 {
1415    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1416    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1417    LLVMBuilderRef builder = gallivm->builder;
1418    LLVMValueRef res;
1419    unsigned swizzle = swizzle_in & 0xffff;
1420 
1421    if (reg->Register.Indirect) {
1422       LLVMValueRef indirect_index;
1423       LLVMValueRef index_vec, index_vec2 = NULL;  /* index into the temp reg array */
1424       LLVMValueRef temps_array;
1425       LLVMTypeRef fptr_type;
1426 
1427       indirect_index = get_indirect_index(bld,
1428                                           reg->Register.File,
1429                                           reg->Register.Index,
1430                                           &reg->Indirect,
1431                                           bld->bld_base.info->file_max[reg->Register.File]);
1432 
1433       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1434                                         indirect_index,
1435                                         swizzle,
1436                                         true);
1437       if (tgsi_type_is_64bit(stype)) {
1438                index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1439                                                   indirect_index,
1440                                                   swizzle_in >> 16,
1441                                                   true);
1442       }
1443 
1444       /* cast temps_array pointer to float* */
1445       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1446       temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1447 
1448       /* Gather values from the temporary register array */
1449       res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1450    }
1451    else {
1452       LLVMValueRef temp_ptr;
1453       LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1454       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1455       res = LLVMBuildLoad2(builder, vec_type, temp_ptr, "");
1456 
1457       if (tgsi_type_is_64bit(stype)) {
1458          LLVMValueRef temp_ptr2, res2;
1459 
1460          temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1461          res2 = LLVMBuildLoad2(builder, vec_type, temp_ptr2, "");
1462          res = emit_fetch_64bit(bld_base, stype, res, res2);
1463       }
1464    }
1465 
1466    if (stype == TGSI_TYPE_SIGNED ||
1467        stype == TGSI_TYPE_UNSIGNED ||
1468        stype == TGSI_TYPE_DOUBLE ||
1469        stype == TGSI_TYPE_SIGNED64 ||
1470        stype == TGSI_TYPE_UNSIGNED64) {
1471       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1472       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1473    }
1474 
1475    return res;
1476 }
1477 
1478 static LLVMValueRef
emit_fetch_system_value(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1479 emit_fetch_system_value(
1480    struct lp_build_tgsi_context * bld_base,
1481    const struct tgsi_full_src_register * reg,
1482    enum tgsi_opcode_type stype,
1483    unsigned swizzle_in)
1484 {
1485    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1486    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1487    const struct tgsi_shader_info *info = bld->bld_base.info;
1488    LLVMBuilderRef builder = gallivm->builder;
1489    LLVMValueRef res;
1490    enum tgsi_opcode_type atype; // Actual type of the value
1491    unsigned swizzle = swizzle_in & 0xffff;
1492 
1493    assert(!reg->Register.Indirect);
1494 
1495    switch (info->system_value_semantic_name[reg->Register.Index]) {
1496    case TGSI_SEMANTIC_INSTANCEID:
1497       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1498       atype = TGSI_TYPE_UNSIGNED;
1499       break;
1500 
1501    case TGSI_SEMANTIC_VERTEXID:
1502       res = bld->system_values.vertex_id;
1503       atype = TGSI_TYPE_UNSIGNED;
1504       break;
1505 
1506    case TGSI_SEMANTIC_VERTEXID_NOBASE:
1507       res = bld->system_values.vertex_id_nobase;
1508       atype = TGSI_TYPE_UNSIGNED;
1509       break;
1510 
1511    case TGSI_SEMANTIC_BASEVERTEX:
1512       res = bld->system_values.basevertex;
1513       atype = TGSI_TYPE_UNSIGNED;
1514       break;
1515 
1516    case TGSI_SEMANTIC_BASEINSTANCE:
1517       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1518       atype = TGSI_TYPE_UNSIGNED;
1519       break;
1520 
1521    case TGSI_SEMANTIC_PRIMID:
1522       res = bld->system_values.prim_id;
1523       atype = TGSI_TYPE_UNSIGNED;
1524       break;
1525 
1526    case TGSI_SEMANTIC_INVOCATIONID:
1527       if (info->processor == PIPE_SHADER_TESS_CTRL)
1528          res = bld->system_values.invocation_id;
1529       else
1530          res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1531       atype = TGSI_TYPE_UNSIGNED;
1532       break;
1533 
1534    case TGSI_SEMANTIC_HELPER_INVOCATION:
1535       res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
1536       atype = TGSI_TYPE_UNSIGNED;
1537       break;
1538 
1539    case TGSI_SEMANTIC_THREAD_ID:
1540       res = bld->system_values.thread_id[swizzle];
1541       atype = TGSI_TYPE_UNSIGNED;
1542       break;
1543 
1544    case TGSI_SEMANTIC_BLOCK_ID:
1545       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_id[swizzle]);
1546       atype = TGSI_TYPE_UNSIGNED;
1547       break;
1548 
1549    case TGSI_SEMANTIC_GRID_SIZE:
1550       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.grid_size[swizzle]);
1551       atype = TGSI_TYPE_UNSIGNED;
1552       break;
1553 
1554    case TGSI_SEMANTIC_TESSCOORD:
1555       {
1556          LLVMValueRef index[] = { lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, swizzle_in) };
1557          LLVMValueRef array_indexed = LLVMBuildGEP2(gallivm->builder, bld->bld_base.base.vec_type,
1558                                                     bld->system_values.tess_coord, index, 2, "tess_coord_array_indexed");
1559          res = LLVMBuildLoad2(builder, bld->bld_base.base.vec_type, array_indexed, "tess_coord");
1560       }
1561       atype = TGSI_TYPE_FLOAT;
1562       break;
1563 
1564    case TGSI_SEMANTIC_FACE:
1565       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1566       atype = TGSI_TYPE_UNSIGNED;
1567       break;
1568 
1569   case TGSI_SEMANTIC_DRAWID:
1570       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1571       atype = TGSI_TYPE_UNSIGNED;
1572       break;
1573 
1574   case TGSI_SEMANTIC_SAMPLEID:
1575       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1576       atype = TGSI_TYPE_UNSIGNED;
1577       break;
1578 
1579    case TGSI_SEMANTIC_TESSOUTER:
1580       res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1581                                        bld->system_values.tess_outer,
1582                                        lp_build_const_int32(gallivm, swizzle_in));
1583       atype = TGSI_TYPE_FLOAT;
1584       break;
1585 
1586    case TGSI_SEMANTIC_TESSINNER:
1587       res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1588                                        bld->system_values.tess_inner,
1589                                        lp_build_const_int32(gallivm, swizzle_in));
1590       atype = TGSI_TYPE_FLOAT;
1591       break;
1592 
1593    case TGSI_SEMANTIC_VERTICESIN:
1594       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.vertices_in);
1595       atype = TGSI_TYPE_UNSIGNED;
1596       break;
1597 
1598    default:
1599       assert(!"unexpected semantic in emit_fetch_system_value");
1600       res = bld_base->base.zero;
1601       atype = TGSI_TYPE_FLOAT;
1602       break;
1603    }
1604 
1605    if (atype != stype) {
1606       if (stype == TGSI_TYPE_FLOAT) {
1607          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1608       } else if (stype == TGSI_TYPE_UNSIGNED) {
1609          res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1610       } else if (stype == TGSI_TYPE_SIGNED) {
1611          res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1612       }
1613    }
1614 
1615    return res;
1616 }
1617 
1618 /**
1619  * Register fetch with derivatives.
1620  */
1621 static void
emit_fetch_deriv(struct lp_build_tgsi_soa_context * bld,LLVMValueRef src,LLVMValueRef * res,LLVMValueRef * ddx,LLVMValueRef * ddy)1622 emit_fetch_deriv(
1623    struct lp_build_tgsi_soa_context *bld,
1624    LLVMValueRef src,
1625    LLVMValueRef *res,
1626    LLVMValueRef *ddx,
1627    LLVMValueRef *ddy)
1628 {
1629    if (res)
1630       *res = src;
1631 
1632    /* TODO: use interpolation coeffs for inputs */
1633 
1634    if (ddx)
1635       *ddx = lp_build_ddx(&bld->bld_base.base, src);
1636 
1637    if (ddy)
1638       *ddy = lp_build_ddy(&bld->bld_base.base, src);
1639 }
1640 
1641 /**
1642  * store an array of vec-length 64-bit into two arrays of vec_length floats
1643  * i.e.
1644  * value is d0, d1, d2, d3 etc.
1645  * each 64-bit has high and low pieces x, y
1646  * so gets stored into the separate channels as:
1647  * chan_ptr = d0.x, d1.x, d2.x, d3.x
1648  * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1649  */
1650 static void
emit_store_64bit_chan(struct lp_build_tgsi_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)1651 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1652                       LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1653                       LLVMValueRef value)
1654 {
1655    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1656    struct gallivm_state *gallivm = bld_base->base.gallivm;
1657    LLVMBuilderRef builder = gallivm->builder;
1658    struct lp_build_context *float_bld = &bld_base->base;
1659    unsigned i;
1660    LLVMValueRef temp, temp2;
1661    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1662    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1663 
1664    for (i = 0; i < bld_base->base.type.length; i++) {
1665       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1666       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1667    }
1668 
1669    temp = LLVMBuildShuffleVector(builder, value,
1670                                  LLVMGetUndef(LLVMTypeOf(value)),
1671                                  LLVMConstVector(shuffles,
1672                                                  bld_base->base.type.length),
1673                                  "");
1674    temp2 = LLVMBuildShuffleVector(builder, value,
1675                                   LLVMGetUndef(LLVMTypeOf(value)),
1676                                   LLVMConstVector(shuffles2,
1677                                                   bld_base->base.type.length),
1678                                   "");
1679 
1680    lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1681    lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1682 }
1683 
1684 static void
emit_store_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1685 emit_store_output(struct lp_build_tgsi_context *bld_base,
1686                   enum tgsi_opcode_type dtype,
1687                   const struct tgsi_full_dst_register *reg,
1688                   unsigned index,
1689                   unsigned chan_index,
1690                   LLVMValueRef indirect_index,
1691                   LLVMValueRef value)
1692 {
1693    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1694    struct gallivm_state *gallivm = bld_base->base.gallivm;
1695    LLVMBuilderRef builder = gallivm->builder;
1696    struct lp_build_context *float_bld = &bld_base->base;
1697 
1698    /* Outputs are always stored as floats */
1699    value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1700 
1701    if (reg->Register.Indirect) {
1702       LLVMValueRef index_vec;  /* indexes into the output registers */
1703       LLVMValueRef outputs_array;
1704       LLVMTypeRef fptr_type;
1705 
1706       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1707                                           indirect_index,
1708                                           chan_index,
1709                                           true);
1710 
1711       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1712       outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1713 
1714       /* Scatter store values into output registers */
1715       emit_mask_scatter(bld, outputs_array, index_vec, value,
1716                         &bld->exec_mask);
1717    }
1718    else {
1719       assert(LLVMTypeOf(value) == float_bld->vec_type);
1720       LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1721                                                 chan_index);
1722 
1723       if (tgsi_type_is_64bit(dtype)) {
1724          LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1725                                                    chan_index + 1);
1726          emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1727                                  value);
1728       } else
1729          lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1730    }
1731 }
1732 
1733 static void
emit_store_tcs_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1734 emit_store_tcs_output(struct lp_build_tgsi_context *bld_base,
1735                       enum tgsi_opcode_type dtype,
1736                       const struct tgsi_full_dst_register *reg,
1737                       unsigned index,
1738                       unsigned chan_index,
1739                       LLVMValueRef indirect_index,
1740                       LLVMValueRef value)
1741 {
1742    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1743    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1744    const struct tgsi_shader_info *info = bld->bld_base.info;
1745    LLVMValueRef attrib_index = NULL;
1746    LLVMValueRef vertex_index = NULL;
1747    LLVMValueRef channel_index = NULL;
1748 
1749    if (reg->Register.Indirect) {
1750       /*
1751        * XXX: this is possibly not quite the right value, since file_max may be
1752        * larger than the max attrib index, due to it being the max of declared
1753        * inputs AND the max vertices per prim (which is 6 for tri adj).
1754        * It should however be safe to use (since we always allocate
1755        * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1756        */
1757       int index_limit = info->file_max[reg->Register.File];
1758       attrib_index = get_indirect_index(bld,
1759                                         reg->Register.File,
1760                                         reg->Register.Index,
1761                                         &reg->Indirect,
1762                                         index_limit);
1763    } else {
1764       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1765    }
1766 
1767    if (reg->Dimension.Indirect) {
1768       vertex_index = get_indirect_index(bld,
1769                                         reg->Register.File,
1770                                         reg->Dimension.Index,
1771                                         &reg->DimIndirect,
1772                                         PIPE_MAX_SHADER_OUTPUTS);
1773    } else {
1774       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1775    }
1776 
1777    channel_index = lp_build_const_int32(gallivm, chan_index);
1778 
1779    assert(bld->tcs_iface->emit_store_output);
1780    bld->tcs_iface->emit_store_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1781                                           bld_base->info->output_semantic_name[reg->Register.Index],
1782                                           reg->Dimension.Indirect,
1783                                           vertex_index,
1784                                           reg->Register.Indirect,
1785                                           attrib_index,
1786                                           false,
1787                                           channel_index,
1788                                           value,
1789                                           mask_vec(bld_base));
1790 }
1791 
1792 static void
emit_store_temp(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1793 emit_store_temp(struct lp_build_tgsi_context *bld_base,
1794                   enum tgsi_opcode_type dtype,
1795                   const struct tgsi_full_dst_register *reg,
1796                   unsigned index,
1797                   unsigned chan_index,
1798                   LLVMValueRef indirect_index,
1799                   LLVMValueRef value)
1800 {
1801    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1802    struct gallivm_state *gallivm = bld_base->base.gallivm;
1803    LLVMBuilderRef builder = gallivm->builder;
1804    struct lp_build_context *float_bld = &bld_base->base;
1805 
1806    /* Temporaries are always stored as floats */
1807    if (!tgsi_type_is_64bit(dtype))
1808       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1809    else
1810       value = LLVMBuildBitCast(builder, value,  LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1811 
1812    if (reg->Register.Indirect) {
1813       LLVMValueRef index_vec;  /* indexes into the temp registers */
1814       LLVMValueRef temps_array;
1815       LLVMTypeRef fptr_type;
1816 
1817       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1818                                           indirect_index,
1819                                           chan_index,
1820                                           true);
1821 
1822       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1823       temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1824 
1825       /* Scatter store values into temp registers */
1826       emit_mask_scatter(bld, temps_array, index_vec, value,
1827                         &bld->exec_mask);
1828    }
1829    else {
1830       LLVMValueRef temp_ptr;
1831       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1832 
1833       if (tgsi_type_is_64bit(dtype)) {
1834          LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1835                                                       reg->Register.Index,
1836                                                       chan_index + 1);
1837          emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1838                                  value);
1839       }
1840       else
1841          lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1842    }
1843 }
1844 
1845 static void
emit_store_address(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1846 emit_store_address(struct lp_build_tgsi_context *bld_base,
1847                    enum tgsi_opcode_type dtype,
1848                    const struct tgsi_full_dst_register *reg,
1849                    unsigned index,
1850                    unsigned chan_index,
1851                    LLVMValueRef indirect_index,
1852                    LLVMValueRef value)
1853 {
1854    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1855    struct gallivm_state *gallivm = bld_base->base.gallivm;
1856    LLVMBuilderRef builder = gallivm->builder;
1857    struct lp_build_context *int_bld = &bld_base->int_bld;
1858 
1859    assert(dtype == TGSI_TYPE_SIGNED);
1860    assert(LLVMTypeOf(value) == int_bld->vec_type);
1861    value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1862    lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1863                         bld->addr[reg->Register.Index][chan_index]);
1864 }
1865 
1866 /**
1867  * Register store.
1868  */
1869 static void
emit_store_chan(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned index,unsigned chan_index,LLVMValueRef value)1870 emit_store_chan(
1871    struct lp_build_tgsi_context *bld_base,
1872    const struct tgsi_full_instruction *inst,
1873    unsigned index,
1874    unsigned chan_index,
1875    LLVMValueRef value)
1876 {
1877    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1878    struct gallivm_state *gallivm = bld_base->base.gallivm;
1879    LLVMBuilderRef builder = gallivm->builder;
1880    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1881    struct lp_build_context *float_bld = &bld_base->base;
1882    LLVMValueRef indirect_index = NULL;
1883    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1884 
1885    /*
1886     * Apply saturation.
1887     *
1888     * It is always assumed to be float.
1889     */
1890    if (inst->Instruction.Saturate) {
1891       assert(dtype == TGSI_TYPE_FLOAT ||
1892              dtype == TGSI_TYPE_UNTYPED);
1893       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1894       value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1895    }
1896 
1897    if (reg->Register.Indirect) {
1898       /*
1899        * Currently the mesa/st doesn't generate indirect stores
1900        * to 64-bit values, it normally uses MOV to do indirect stores.
1901        */
1902       assert(!tgsi_type_is_64bit(dtype));
1903       indirect_index = get_indirect_index(bld,
1904                                           reg->Register.File,
1905                                           reg->Register.Index,
1906                                           &reg->Indirect,
1907                                           bld->bld_base.info->file_max[reg->Register.File]);
1908    } else {
1909       assert(reg->Register.Index <=
1910                              bld_base->info->file_max[reg->Register.File]);
1911    }
1912 
1913    if (DEBUG_EXECUTION) {
1914       emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1915    }
1916 
1917    assert(bld_base->emit_store_reg_funcs[reg->Register.File]);
1918    bld_base->emit_store_reg_funcs[reg->Register.File](bld_base,
1919                                                       dtype,
1920                                                       reg,
1921                                                       index,
1922                                                       chan_index,
1923                                                       indirect_index,
1924                                                       value);
1925 
1926    (void)dtype;
1927 }
1928 
1929 /*
1930  * Called at the beginning of the translation of each TGSI instruction, to
1931  * emit some debug code.
1932  */
1933 static void
emit_debug(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info)1934 emit_debug(
1935    struct lp_build_tgsi_context * bld_base,
1936    const struct tgsi_full_instruction * inst,
1937    const struct tgsi_opcode_info * info)
1938 
1939 {
1940    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1941 
1942    if (DEBUG_EXECUTION) {
1943       /*
1944        * Dump the TGSI instruction.
1945        */
1946 
1947       struct gallivm_state *gallivm = bld_base->base.gallivm;
1948       char buf[512];
1949       buf[0] = '$';
1950       buf[1] = ' ';
1951       tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1952       lp_build_printf(gallivm, buf);
1953 
1954       /* Dump the execution mask.
1955        */
1956       if (bld->exec_mask.has_mask) {
1957          lp_build_print_value(gallivm, "    mask = ", bld->exec_mask.exec_mask);
1958       }
1959    }
1960 }
1961 
1962 static void
emit_store(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,unsigned index,LLVMValueRef dst[4])1963 emit_store(
1964    struct lp_build_tgsi_context * bld_base,
1965    const struct tgsi_full_instruction * inst,
1966    const struct tgsi_opcode_info * info,
1967    unsigned index,
1968    LLVMValueRef dst[4])
1969 
1970 {
1971    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1972 
1973    unsigned writemask = inst->Dst[index].Register.WriteMask;
1974    while (writemask) {
1975       unsigned chan_index = u_bit_scan(&writemask);
1976       if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1977           continue;
1978       emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1979    }
1980 }
1981 
1982 static unsigned
tgsi_to_pipe_tex_target(enum tgsi_texture_type tgsi_target)1983 tgsi_to_pipe_tex_target(enum tgsi_texture_type tgsi_target)
1984 {
1985    switch (tgsi_target) {
1986    case TGSI_TEXTURE_BUFFER:
1987       return PIPE_BUFFER;
1988    case TGSI_TEXTURE_1D:
1989    case TGSI_TEXTURE_SHADOW1D:
1990       return PIPE_TEXTURE_1D;
1991    case TGSI_TEXTURE_2D:
1992    case TGSI_TEXTURE_SHADOW2D:
1993    case TGSI_TEXTURE_2D_MSAA:
1994       return PIPE_TEXTURE_2D;
1995    case TGSI_TEXTURE_3D:
1996       return PIPE_TEXTURE_3D;
1997    case TGSI_TEXTURE_CUBE:
1998    case TGSI_TEXTURE_SHADOWCUBE:
1999       return PIPE_TEXTURE_CUBE;
2000    case TGSI_TEXTURE_RECT:
2001    case TGSI_TEXTURE_SHADOWRECT:
2002       return PIPE_TEXTURE_RECT;
2003    case TGSI_TEXTURE_1D_ARRAY:
2004    case TGSI_TEXTURE_SHADOW1D_ARRAY:
2005       return PIPE_TEXTURE_1D_ARRAY;
2006    case TGSI_TEXTURE_2D_ARRAY:
2007    case TGSI_TEXTURE_SHADOW2D_ARRAY:
2008    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2009       return PIPE_TEXTURE_2D_ARRAY;
2010    case TGSI_TEXTURE_CUBE_ARRAY:
2011    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2012       return PIPE_TEXTURE_CUBE_ARRAY;
2013    default:
2014       assert(0);
2015       return PIPE_BUFFER;
2016    }
2017 }
2018 
2019 
2020 static enum lp_sampler_lod_property
lp_build_lod_property(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned src_op)2021 lp_build_lod_property(
2022    struct lp_build_tgsi_context *bld_base,
2023    const struct tgsi_full_instruction *inst,
2024    unsigned src_op)
2025 {
2026    const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2027    enum lp_sampler_lod_property lod_property;
2028 
2029    /*
2030     * Not much we can do here. We could try catching inputs declared
2031     * with constant interpolation but not sure it's worth it - since for
2032     * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2033     * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2034     * like the constant/immediate recognition below.
2035     * What seems to be of more value would be to recognize temps holding
2036     * broadcasted scalars but no way we can do it.
2037     * Tried asking llvm but without any success (using LLVMIsConstant
2038     * even though this isn't exactly what we'd need), even as simple as
2039     * IMM[0] UINT32 (0,-1,0,0)
2040     * MOV TEMP[0] IMM[0].yyyy
2041     * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2042     * doesn't work.
2043     * This means there's ZERO chance this will ever catch a scalar lod
2044     * with traditional tex opcodes as well as texel fetches, since the lod
2045     * comes from the same reg as coords (except some test shaders using
2046     * constant coords maybe).
2047     * There's at least hope for sample opcodes as well as size queries.
2048     */
2049    if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ ||
2050        reg->Register.File == TGSI_FILE_CONSTANT ||
2051        reg->Register.File == TGSI_FILE_IMMEDIATE) {
2052       lod_property = LP_SAMPLER_LOD_SCALAR;
2053    }
2054    else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2055       if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2056          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2057       }
2058       else {
2059          lod_property = LP_SAMPLER_LOD_PER_QUAD;
2060       }
2061    }
2062    else {
2063       /* never use scalar (per-quad) lod the results are just too wrong. */
2064       lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2065    }
2066    return lod_property;
2067 }
2068 
2069 
2070 /**
2071  * High-level instruction translators.
2072  */
2073 
2074 static void
emit_tex(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,LLVMValueRef * texel,unsigned sampler_reg,enum lp_sampler_op_type sampler_op)2075 emit_tex( struct lp_build_tgsi_soa_context *bld,
2076           const struct tgsi_full_instruction *inst,
2077           enum lp_build_tex_modifier modifier,
2078           LLVMValueRef *texel,
2079           unsigned sampler_reg,
2080           enum lp_sampler_op_type sampler_op)
2081 {
2082    unsigned unit = inst->Src[sampler_reg].Register.Index;
2083    LLVMValueRef oow = NULL;
2084    LLVMValueRef lod = NULL;
2085    LLVMValueRef coords[5];
2086    LLVMValueRef offsets[3] = { NULL };
2087    struct lp_derivatives derivs;
2088    struct lp_sampler_params params = { 0 };
2089    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2090    unsigned num_derivs, num_offsets, i;
2091    unsigned shadow_coord = 0;
2092    unsigned layer_coord = 0;
2093    unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2094 
2095    if (!bld->sampler) {
2096       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2097       for (i = 0; i < 4; i++) {
2098          texel[i] = bld->bld_base.base.undef;
2099       }
2100       return;
2101    }
2102 
2103    switch (inst->Texture.Texture) {
2104    case TGSI_TEXTURE_1D_ARRAY:
2105       layer_coord = 1;
2106       FALLTHROUGH;
2107    case TGSI_TEXTURE_1D:
2108       num_offsets = 1;
2109       num_derivs = 1;
2110       break;
2111    case TGSI_TEXTURE_2D_ARRAY:
2112       layer_coord = 2;
2113       FALLTHROUGH;
2114    case TGSI_TEXTURE_2D:
2115    case TGSI_TEXTURE_RECT:
2116       num_offsets = 2;
2117       num_derivs = 2;
2118       break;
2119    case TGSI_TEXTURE_SHADOW1D_ARRAY:
2120       layer_coord = 1;
2121       FALLTHROUGH;
2122    case TGSI_TEXTURE_SHADOW1D:
2123       shadow_coord = 2;
2124       num_offsets = 1;
2125       num_derivs = 1;
2126       break;
2127    case TGSI_TEXTURE_SHADOW2D_ARRAY:
2128       layer_coord = 2;
2129       shadow_coord = 3;
2130       num_offsets = 2;
2131       num_derivs = 2;
2132       break;
2133    case TGSI_TEXTURE_SHADOW2D:
2134    case TGSI_TEXTURE_SHADOWRECT:
2135       shadow_coord = 2;
2136       num_offsets = 2;
2137       num_derivs = 2;
2138       break;
2139    case TGSI_TEXTURE_CUBE:
2140       num_offsets = 2;
2141       num_derivs = 3;
2142       break;
2143    case TGSI_TEXTURE_3D:
2144       num_offsets = 3;
2145       num_derivs = 3;
2146       break;
2147    case TGSI_TEXTURE_SHADOWCUBE:
2148       shadow_coord = 3;
2149       num_offsets = 2;
2150       num_derivs = 3;
2151       break;
2152    case TGSI_TEXTURE_CUBE_ARRAY:
2153       num_offsets = 2;
2154       num_derivs = 3;
2155       layer_coord = 3;
2156       break;
2157    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2158       num_offsets = 2;
2159       num_derivs = 3;
2160       layer_coord = 3;
2161       shadow_coord = 4; /* shadow coord special different reg */
2162       break;
2163    case TGSI_TEXTURE_2D_MSAA:
2164    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2165    default:
2166       assert(0);
2167       return;
2168    }
2169 
2170    /* Note lod and especially projected are illegal in a LOT of cases */
2171    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2172        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2173       if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
2174          lod = bld->bld_base.base.zero;
2175       } else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2176                  inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2177          /* note that shadow cube array with bias/explicit lod does not exist */
2178          lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2179       }
2180       else {
2181          lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2182       }
2183       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2184          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2185       }
2186       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2187          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2188       }
2189       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2190    }
2191 
2192    if (sampler_op == LP_SAMPLER_OP_GATHER) {
2193       uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
2194       sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
2195    }
2196    if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2197       oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2198       oow = lp_build_rcp(&bld->bld_base.base, oow);
2199    }
2200 
2201    for (i = 0; i < num_derivs; i++) {
2202       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2203       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2204          coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2205    }
2206    for (i = num_derivs; i < 5; i++) {
2207       coords[i] = bld->bld_base.base.undef;
2208    }
2209 
2210    /* Layer coord always goes into 3rd slot, except for cube map arrays */
2211    if (layer_coord) {
2212       if (layer_coord == 3) {
2213          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2214       }
2215       else {
2216          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2217       }
2218       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2219          coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2220    }
2221    /* Shadow coord occupies always 5th slot. */
2222    if (shadow_coord) {
2223       sample_key |= LP_SAMPLER_SHADOW;
2224       if (shadow_coord == 4) {
2225          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2226       }
2227       else {
2228          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2229       }
2230       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2231          coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2232    }
2233 
2234    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2235       unsigned dim;
2236       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2237       for (dim = 0; dim < num_derivs; ++dim) {
2238          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2239          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2240       }
2241       params.derivs = &derivs;
2242       /*
2243        * could also check all src regs if constant but I doubt such
2244        * cases exist in practice.
2245        */
2246       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2247          if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2248             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2249          }
2250          else {
2251             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2252          }
2253       }
2254       else {
2255          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2256       }
2257    }
2258    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2259 
2260    /* we don't handle the 4 offset version of tg4 */
2261    if (inst->Texture.NumOffsets == 1) {
2262       unsigned dim;
2263       sample_key |= LP_SAMPLER_OFFSETS;
2264       for (dim = 0; dim < num_offsets; dim++) {
2265          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2266       }
2267    }
2268 
2269    params.type = bld->bld_base.base.type;
2270    params.sample_key = sample_key;
2271    params.texture_index = unit;
2272    params.sampler_index = unit;
2273    params.resources_type = bld->resources_type;
2274    params.resources_ptr = bld->resources_ptr;
2275    params.thread_data_type = bld->thread_data_type;
2276    params.thread_data_ptr = bld->thread_data_ptr;
2277    params.coords = coords;
2278    params.offsets = offsets;
2279    params.lod = lod;
2280    params.texel = texel;
2281 
2282    bld->sampler->emit_tex_sample(bld->sampler,
2283                                  bld->bld_base.base.gallivm,
2284                                  &params);
2285 }
2286 
2287 static void
emit_sample(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,bool compare,enum lp_sampler_op_type sample_type,LLVMValueRef * texel)2288 emit_sample(struct lp_build_tgsi_soa_context *bld,
2289             const struct tgsi_full_instruction *inst,
2290             enum lp_build_tex_modifier modifier,
2291             bool compare,
2292             enum lp_sampler_op_type sample_type,
2293             LLVMValueRef *texel)
2294 {
2295    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2296    unsigned texture_unit, sampler_unit;
2297    LLVMValueRef lod = NULL;
2298    LLVMValueRef coords[5];
2299    LLVMValueRef offsets[3] = { NULL };
2300    struct lp_derivatives derivs;
2301    struct lp_sampler_params params = { 0 };
2302    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2303 
2304    unsigned num_offsets, num_derivs, i;
2305    unsigned layer_coord = 0;
2306    unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2307 
2308    if (!bld->sampler) {
2309       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2310       for (i = 0; i < 4; i++) {
2311          texel[i] = bld->bld_base.base.undef;
2312       }
2313       return;
2314    }
2315 
2316    /*
2317     * unlike old-style tex opcodes the texture/sampler indices
2318     * always come from src1 and src2 respectively.
2319     */
2320    texture_unit = inst->Src[1].Register.Index;
2321    sampler_unit = inst->Src[2].Register.Index;
2322 
2323    /*
2324     * Note inst->Texture.Texture will contain the number of offsets,
2325     * however the target information is NOT there and comes from the
2326     * declared sampler views instead.
2327     */
2328    switch (bld->sv[texture_unit].Resource) {
2329    case TGSI_TEXTURE_1D:
2330       num_offsets = 1;
2331       num_derivs = 1;
2332       break;
2333    case TGSI_TEXTURE_1D_ARRAY:
2334       layer_coord = 1;
2335       num_offsets = 1;
2336       num_derivs = 1;
2337       break;
2338    case TGSI_TEXTURE_2D:
2339    case TGSI_TEXTURE_RECT:
2340       num_offsets = 2;
2341       num_derivs = 2;
2342       break;
2343    case TGSI_TEXTURE_2D_ARRAY:
2344       layer_coord = 2;
2345       num_offsets = 2;
2346       num_derivs = 2;
2347       break;
2348    case TGSI_TEXTURE_CUBE:
2349       num_offsets = 2;
2350       num_derivs = 3;
2351       break;
2352    case TGSI_TEXTURE_3D:
2353       num_offsets = 3;
2354       num_derivs = 3;
2355       break;
2356    case TGSI_TEXTURE_CUBE_ARRAY:
2357       layer_coord = 3;
2358       num_offsets = 2;
2359       num_derivs = 3;
2360       break;
2361    default:
2362       assert(0);
2363       return;
2364    }
2365 
2366    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2367        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2368       lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2369       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2370          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2371       }
2372       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2373          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2374       }
2375       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2376    }
2377    else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2378       /* XXX might be better to explicitly pass the level zero information */
2379       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2380       lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2381    }
2382 
2383    for (i = 0; i < num_derivs; i++) {
2384       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2385    }
2386    for (i = num_derivs; i < 5; i++) {
2387       coords[i] = bld->bld_base.base.undef;
2388    }
2389 
2390    /* Layer coord always goes into 3rd slot, except for cube map arrays */
2391    if (layer_coord) {
2392       if (layer_coord == 3)
2393          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2394       else
2395          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2396    }
2397    /* Shadow coord occupies always 5th slot. */
2398    if (compare) {
2399       sample_key |= LP_SAMPLER_SHADOW;
2400       coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2401    }
2402 
2403    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2404       unsigned dim;
2405       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2406       for (dim = 0; dim < num_derivs; ++dim) {
2407          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2408          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2409       }
2410       params.derivs = &derivs;
2411       /*
2412        * could also check all src regs if constant but I doubt such
2413        * cases exist in practice.
2414        */
2415       if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2416          if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2417             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2418          }
2419          else {
2420             lod_property = LP_SAMPLER_LOD_PER_QUAD;
2421          }
2422       }
2423       else {
2424          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2425       }
2426    }
2427 
2428    /* some advanced gather instructions (txgo) would require 4 offsets */
2429    if (inst->Texture.NumOffsets == 1) {
2430       unsigned dim;
2431       sample_key |= LP_SAMPLER_OFFSETS;
2432       for (dim = 0; dim < num_offsets; dim++) {
2433          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2434       }
2435    }
2436    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2437 
2438    params.type = bld->bld_base.base.type;
2439    params.sample_key = sample_key;
2440    params.texture_index = texture_unit;
2441    params.sampler_index = sampler_unit;
2442    params.resources_type = bld->resources_type;
2443    params.resources_ptr = bld->resources_ptr;
2444    params.thread_data_type = bld->thread_data_type;
2445    params.thread_data_ptr = bld->thread_data_ptr;
2446    params.coords = coords;
2447    params.offsets = offsets;
2448    params.lod = lod;
2449    params.texel = texel;
2450 
2451    bld->sampler->emit_tex_sample(bld->sampler,
2452                                  bld->bld_base.base.gallivm,
2453                                  &params);
2454 
2455    if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2456        inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2457        inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2458        inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2459       unsigned char swizzles[4];
2460       swizzles[0] = inst->Src[1].Register.SwizzleX;
2461       swizzles[1] = inst->Src[1].Register.SwizzleY;
2462       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2463       swizzles[3] = inst->Src[1].Register.SwizzleW;
2464 
2465       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2466    }
2467 }
2468 
2469 static void
emit_fetch_texels(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * texel,bool is_samplei)2470 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2471                    const struct tgsi_full_instruction *inst,
2472                    LLVMValueRef *texel,
2473                    bool is_samplei)
2474 {
2475    unsigned unit, target;
2476    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2477    LLVMValueRef explicit_lod = NULL;
2478    LLVMValueRef coords[5];
2479    LLVMValueRef offsets[3] = { NULL };
2480    LLVMValueRef ms_index = NULL;
2481    struct lp_sampler_params params = { 0 };
2482    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2483    unsigned dims, i;
2484    unsigned layer_coord = 0;
2485    unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2486 
2487    if (!bld->sampler) {
2488       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2489       for (i = 0; i < 4; i++) {
2490          texel[i] = coord_undef;
2491       }
2492       return;
2493    }
2494 
2495    unit = inst->Src[1].Register.Index;
2496 
2497    if (is_samplei) {
2498       target = bld->sv[unit].Resource;
2499    }
2500    else {
2501       target = inst->Texture.Texture;
2502    }
2503 
2504    switch (target) {
2505    case TGSI_TEXTURE_1D:
2506    case TGSI_TEXTURE_BUFFER:
2507       dims = 1;
2508       break;
2509    case TGSI_TEXTURE_1D_ARRAY:
2510       layer_coord = 1;
2511       dims = 1;
2512       break;
2513    case TGSI_TEXTURE_2D:
2514    case TGSI_TEXTURE_RECT:
2515    case TGSI_TEXTURE_2D_MSAA:
2516       dims = 2;
2517       break;
2518    case TGSI_TEXTURE_2D_ARRAY:
2519    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2520       layer_coord = 2;
2521       dims = 2;
2522       break;
2523    case TGSI_TEXTURE_3D:
2524       dims = 3;
2525       break;
2526    default:
2527       assert(0);
2528       return;
2529    }
2530 
2531    /* always have lod except for buffers and msaa targets ? */
2532    if (target != TGSI_TEXTURE_BUFFER &&
2533        target != TGSI_TEXTURE_2D_MSAA &&
2534        target != TGSI_TEXTURE_2D_ARRAY_MSAA &&
2535        inst->Instruction.Opcode != TGSI_OPCODE_TXF_LZ) {
2536       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2537       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2538       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2539    }
2540 
2541    if (target == TGSI_TEXTURE_2D_MSAA ||
2542        target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
2543       sample_key |= LP_SAMPLER_FETCH_MS;
2544       ms_index = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2545    }
2546 
2547    /*
2548     * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2549     * would be the sample index.
2550     */
2551 
2552    for (i = 0; i < dims; i++) {
2553       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2554    }
2555    /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2556    for (i = dims; i < 5; i++) {
2557       coords[i] = coord_undef;
2558    }
2559    if (layer_coord)
2560       coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2561 
2562    if (inst->Texture.NumOffsets == 1) {
2563       unsigned dim;
2564       sample_key |= LP_SAMPLER_OFFSETS;
2565       for (dim = 0; dim < dims; dim++) {
2566          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2567       }
2568    }
2569    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2570 
2571    params.type = bld->bld_base.base.type;
2572    params.sample_key = sample_key;
2573    params.texture_index = unit;
2574    /*
2575     * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2576     * and trigger some assertions with d3d10 where the sampler view number
2577     * can exceed this.
2578     */
2579    params.sampler_index = 0;
2580    params.resources_type = bld->resources_type;
2581    params.resources_ptr = bld->resources_ptr;
2582    params.thread_data_type = bld->thread_data_type;
2583    params.thread_data_ptr = bld->thread_data_ptr;
2584    params.coords = coords;
2585    params.offsets = offsets;
2586    params.derivs = NULL;
2587    params.lod = explicit_lod;
2588    params.texel = texel;
2589    params.ms_index = ms_index;
2590 
2591    bld->sampler->emit_tex_sample(bld->sampler,
2592                                  bld->bld_base.base.gallivm,
2593                                  &params);
2594 
2595    if (is_samplei &&
2596        (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2597         inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2598         inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2599         inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2600       unsigned char swizzles[4];
2601       swizzles[0] = inst->Src[1].Register.SwizzleX;
2602       swizzles[1] = inst->Src[1].Register.SwizzleY;
2603       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2604       swizzles[3] = inst->Src[1].Register.SwizzleW;
2605 
2606       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2607    }
2608 }
2609 
2610 static void
emit_size_query(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * sizes_out,bool is_sviewinfo)2611 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2612                  const struct tgsi_full_instruction *inst,
2613                  LLVMValueRef *sizes_out,
2614                  bool is_sviewinfo)
2615 {
2616    LLVMValueRef explicit_lod;
2617    enum lp_sampler_lod_property lod_property;
2618    unsigned has_lod;
2619    unsigned i;
2620    unsigned unit = inst->Src[1].Register.Index;
2621    enum tgsi_texture_type target;
2622    enum pipe_texture_target pipe_target;
2623    struct lp_sampler_size_query_params params = { 0 };
2624 
2625    if (is_sviewinfo) {
2626       target = bld->sv[unit].Resource;
2627    }
2628    else {
2629       target = inst->Texture.Texture;
2630    }
2631    switch (target) {
2632    case TGSI_TEXTURE_BUFFER:
2633    case TGSI_TEXTURE_RECT:
2634    case TGSI_TEXTURE_SHADOWRECT:
2635    case TGSI_TEXTURE_2D_MSAA:
2636    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2637       has_lod = 0;
2638       break;
2639    default:
2640       has_lod = 1;
2641       break;
2642    }
2643 
2644    if (!bld->sampler) {
2645       _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2646       for (i = 0; i < 4; i++)
2647          sizes_out[i] = bld->bld_base.int_bld.undef;
2648       return;
2649    }
2650 
2651    if (has_lod) {
2652       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2653       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2654    }
2655    else {
2656       explicit_lod = NULL;
2657       lod_property = LP_SAMPLER_LOD_SCALAR;
2658    }
2659 
2660 
2661    pipe_target = tgsi_to_pipe_tex_target(target);
2662 
2663    params.int_type = bld->bld_base.int_bld.type;
2664    params.texture_unit = unit;
2665    params.texture_unit_offset = NULL;
2666    params.target = pipe_target;
2667    params.resources_type = bld->resources_type;
2668    params.resources_ptr = bld->resources_ptr;
2669    params.is_sviewinfo = true;
2670    params.lod_property = lod_property;
2671    params.explicit_lod = explicit_lod;
2672    params.sizes_out = sizes_out;
2673    params.samples_only = false;
2674 
2675    bld->sampler->emit_size_query(bld->sampler,
2676                                  bld->bld_base.base.gallivm,
2677                                  &params);
2678 }
2679 
2680 static bool
near_end_of_shader(struct lp_build_tgsi_soa_context * bld,int pc)2681 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2682                    int pc)
2683 {
2684    unsigned i;
2685 
2686    for (i = 0; i < 5; i++) {
2687       enum tgsi_opcode opcode;
2688 
2689       if (pc + i >= bld->bld_base.info->num_instructions)
2690          return true;
2691 
2692       opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2693 
2694       if (opcode == TGSI_OPCODE_END)
2695          return true;
2696 
2697       if (opcode == TGSI_OPCODE_TEX ||
2698          opcode == TGSI_OPCODE_TXP ||
2699          opcode == TGSI_OPCODE_TXD ||
2700          opcode == TGSI_OPCODE_TXB ||
2701          opcode == TGSI_OPCODE_TXL ||
2702          opcode == TGSI_OPCODE_TXF ||
2703          opcode == TGSI_OPCODE_TXQ ||
2704          opcode == TGSI_OPCODE_TEX2 ||
2705          opcode == TGSI_OPCODE_TXB2 ||
2706          opcode == TGSI_OPCODE_TXL2 ||
2707          opcode == TGSI_OPCODE_SAMPLE ||
2708          opcode == TGSI_OPCODE_SAMPLE_B ||
2709          opcode == TGSI_OPCODE_SAMPLE_C ||
2710          opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2711          opcode == TGSI_OPCODE_SAMPLE_D ||
2712          opcode == TGSI_OPCODE_SAMPLE_I ||
2713          opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2714          opcode == TGSI_OPCODE_SAMPLE_L ||
2715          opcode == TGSI_OPCODE_SVIEWINFO ||
2716          opcode == TGSI_OPCODE_CAL ||
2717          opcode == TGSI_OPCODE_IF ||
2718          opcode == TGSI_OPCODE_UIF ||
2719          opcode == TGSI_OPCODE_BGNLOOP ||
2720          opcode == TGSI_OPCODE_SWITCH)
2721          return false;
2722    }
2723 
2724    return true;
2725 }
2726 
2727 
2728 
2729 /**
2730  * Kill fragment if any of the src register values are negative.
2731  */
2732 static void
emit_kill_if(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,int pc)2733 emit_kill_if(
2734    struct lp_build_tgsi_soa_context *bld,
2735    const struct tgsi_full_instruction *inst,
2736    int pc)
2737 {
2738    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2739    const struct tgsi_full_src_register *reg = &inst->Src[0];
2740    LLVMValueRef terms[TGSI_NUM_CHANNELS];
2741    LLVMValueRef mask;
2742    unsigned chan_index;
2743 
2744    memset(&terms, 0, sizeof terms);
2745 
2746    TGSI_FOR_EACH_CHANNEL( chan_index ) {
2747       unsigned swizzle;
2748 
2749       /* Unswizzle channel */
2750       swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2751 
2752       /* Check if the component has not been already tested. */
2753       assert(swizzle < TGSI_NUM_CHANNELS);
2754       if( !terms[swizzle] )
2755          /* TODO: change the comparison operator instead of setting the sign */
2756          terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2757    }
2758 
2759    mask = NULL;
2760    TGSI_FOR_EACH_CHANNEL( chan_index ) {
2761       if(terms[chan_index]) {
2762          LLVMValueRef chan_mask;
2763 
2764          /*
2765           * If term < 0 then mask = 0 else mask = ~0.
2766           */
2767          chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2768 
2769          if(mask)
2770             mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2771          else
2772             mask = chan_mask;
2773       }
2774    }
2775 
2776    if (bld->exec_mask.has_mask) {
2777       LLVMValueRef invmask;
2778       invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2779       mask = LLVMBuildOr(builder, mask, invmask, "");
2780    }
2781 
2782    lp_build_mask_update(bld->mask, mask);
2783    if (!near_end_of_shader(bld, pc))
2784       lp_build_mask_check(bld->mask);
2785 }
2786 
2787 
2788 /**
2789  * Unconditional fragment kill.
2790  * The only predication is the execution mask which will apply if
2791  * we're inside a loop or conditional.
2792  */
2793 static void
emit_kill(struct lp_build_tgsi_soa_context * bld,int pc)2794 emit_kill(struct lp_build_tgsi_soa_context *bld,
2795           int pc)
2796 {
2797    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2798    LLVMValueRef mask;
2799 
2800    /* For those channels which are "alive", disable fragment shader
2801     * execution.
2802     */
2803    if (bld->exec_mask.has_mask) {
2804       mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2805    }
2806    else {
2807       LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2808       mask = zero;
2809    }
2810 
2811    lp_build_mask_update(bld->mask, mask);
2812 
2813    if (!near_end_of_shader(bld, pc))
2814       lp_build_mask_check(bld->mask);
2815 }
2816 
2817 
2818 /**
2819  * Emit code which will dump the value of all the temporary registers
2820  * to stdout.
2821  */
2822 static void
emit_dump_file(struct lp_build_tgsi_soa_context * bld,unsigned file)2823 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2824                unsigned file)
2825 {
2826    const struct tgsi_shader_info *info = bld->bld_base.info;
2827    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2828    LLVMBuilderRef builder = gallivm->builder;
2829    LLVMValueRef reg_ptr;
2830    int index;
2831    int max_index = info->file_max[file];
2832 
2833    /*
2834     * Some register files, particularly constants, can be very large,
2835     * and dumping everything could make this unusably slow.
2836     */
2837    max_index = MIN2(max_index, 32);
2838 
2839    for (index = 0; index <= max_index; index++) {
2840       LLVMValueRef res;
2841       unsigned mask;
2842       int chan;
2843 
2844       if (index < 8 * sizeof(unsigned) &&
2845           (info->file_mask[file] & (1u << index)) == 0)  {
2846          /* This was not declared.*/
2847          continue;
2848       }
2849 
2850       if (file == TGSI_FILE_INPUT) {
2851          mask = info->input_usage_mask[index];
2852       } else {
2853          mask = TGSI_WRITEMASK_XYZW;
2854       }
2855 
2856       for (chan = 0; chan < 4; chan++) {
2857          if ((mask & (1 << chan)) == 0) {
2858             /* This channel is not used.*/
2859             continue;
2860          }
2861 
2862          if (file == TGSI_FILE_CONSTANT) {
2863             struct tgsi_full_src_register reg;
2864             memset(&reg, 0, sizeof reg);
2865             reg.Register.File = file;
2866             reg.Register.Index = index;
2867             reg.Register.SwizzleX = 0;
2868             reg.Register.SwizzleY = 1;
2869             reg.Register.SwizzleZ = 2;
2870             reg.Register.SwizzleW = 3;
2871 
2872             res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
2873             if (!res) {
2874                continue;
2875             }
2876          } else if (file == TGSI_FILE_INPUT) {
2877             res = bld->inputs[index][chan];
2878             if (!res) {
2879                continue;
2880             }
2881          } else if (file == TGSI_FILE_TEMPORARY) {
2882             reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2883             assert(reg_ptr);
2884             res = LLVMBuildLoad2(builder, bld->bld_base.base.vec_type, reg_ptr, "");
2885          } else if (file == TGSI_FILE_OUTPUT) {
2886             reg_ptr = lp_get_output_ptr(bld, index, chan);
2887             assert(reg_ptr);
2888             res = LLVMBuildLoad2(builder, bld->bld_base.base.vec_type, reg_ptr, "");
2889          } else {
2890             assert(0);
2891             continue;
2892          }
2893 
2894          emit_dump_reg(gallivm, file, index, chan, res);
2895       }
2896    }
2897 }
2898 
2899 
2900 
2901 void
lp_emit_declaration_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_declaration * decl)2902 lp_emit_declaration_soa(
2903    struct lp_build_tgsi_context *bld_base,
2904    const struct tgsi_full_declaration *decl)
2905 {
2906    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2907    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2908    LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2909    const unsigned first = decl->Range.First;
2910    const unsigned last = decl->Range.Last;
2911    unsigned idx, i;
2912 
2913    assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2914 
2915    switch (decl->Declaration.File) {
2916    case TGSI_FILE_TEMPORARY:
2917       if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2918          assert(last < LP_MAX_INLINED_TEMPS);
2919          for (idx = first; idx <= last; ++idx) {
2920             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2921                bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2922          }
2923       }
2924       break;
2925 
2926    case TGSI_FILE_OUTPUT:
2927       if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2928          for (idx = first; idx <= last; ++idx) {
2929             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2930                bld->outputs[idx][i] = lp_build_alloca(gallivm,
2931                                                       vec_type, "output");
2932          }
2933       }
2934       break;
2935 
2936    case TGSI_FILE_ADDRESS:
2937       /* ADDR registers are only allocated with an integer LLVM IR type,
2938        * as they are guaranteed to always have integers.
2939        * XXX: Not sure if this exception is worthwhile (or the whole idea of
2940        * an ADDR register for that matter).
2941        */
2942       assert(last < LP_MAX_TGSI_ADDRS);
2943       for (idx = first; idx <= last; ++idx) {
2944          assert(idx < LP_MAX_TGSI_ADDRS);
2945          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2946             bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2947       }
2948       break;
2949 
2950    case TGSI_FILE_SAMPLER_VIEW:
2951       /*
2952        * The target stored here MUST match whatever there actually
2953        * is in the set sampler views (what about return type?).
2954        */
2955       assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2956       for (idx = first; idx <= last; ++idx) {
2957          bld->sv[idx] = decl->SamplerView;
2958       }
2959       break;
2960 
2961    case TGSI_FILE_CONSTANT:
2962    {
2963       /*
2964        * We could trivially fetch the per-buffer pointer when fetching the
2965        * constant, relying on llvm to figure out it's always the same pointer
2966        * anyway. However, doing so results in a huge (more than factor of 10)
2967        * slowdown in llvm compilation times for some (but not all) shaders
2968        * (more specifically, the IR optimization spends way more time in
2969        * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2970        */
2971       unsigned idx2D = decl->Dim.Index2D;
2972       LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2973       assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2974       bld->consts[idx2D] = lp_llvm_buffer_base(gallivm, bld->consts_ptr,
2975                                                index2D, LP_MAX_TGSI_CONST_BUFFERS);
2976       bld->consts[idx2D] = LLVMBuildBitCast(gallivm->builder, bld->consts[idx2D], LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0), "");
2977       bld->consts_sizes[idx2D] = lp_llvm_buffer_num_elements(gallivm, bld->consts_ptr,
2978                                                              index2D, LP_MAX_TGSI_CONST_BUFFERS);
2979    }
2980    break;
2981    case TGSI_FILE_BUFFER:
2982    {
2983       unsigned idx = decl->Range.First;
2984       LLVMValueRef index = lp_build_const_int32(gallivm, idx);
2985       assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
2986       bld->ssbos[idx] =
2987          lp_llvm_buffer_base(gallivm, bld->ssbo_ptr,
2988                              index, LP_MAX_TGSI_SHADER_BUFFERS);
2989       bld->ssbo_sizes[idx] =
2990          lp_llvm_buffer_num_elements(gallivm, bld->ssbo_ptr,
2991                              index, LP_MAX_TGSI_SHADER_BUFFERS);
2992 
2993    }
2994    break;
2995    case TGSI_FILE_MEMORY:
2996       break;
2997    default:
2998       /* don't need to declare other vars */
2999       break;
3000    }
3001 }
3002 
3003 
lp_emit_immediate_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_immediate * imm)3004 void lp_emit_immediate_soa(
3005    struct lp_build_tgsi_context *bld_base,
3006    const struct tgsi_full_immediate *imm)
3007 {
3008    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3009    struct gallivm_state * gallivm = bld_base->base.gallivm;
3010    LLVMValueRef imms[4];
3011    unsigned i;
3012    const unsigned size = imm->Immediate.NrTokens - 1;
3013    assert(size <= 4);
3014    switch (imm->Immediate.DataType) {
3015    case TGSI_IMM_FLOAT32:
3016       for( i = 0; i < size; ++i )
3017          imms[i] =
3018                lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
3019 
3020       break;
3021    case TGSI_IMM_FLOAT64:
3022    case TGSI_IMM_UINT64:
3023    case TGSI_IMM_INT64:
3024    case TGSI_IMM_UINT32:
3025       for( i = 0; i < size; ++i ) {
3026          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
3027          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3028       }
3029 
3030       break;
3031    case TGSI_IMM_INT32:
3032       for( i = 0; i < size; ++i ) {
3033          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3034          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3035       }
3036 
3037       break;
3038    }
3039    for( i = size; i < 4; ++i )
3040       imms[i] = bld_base->base.undef;
3041 
3042    if (bld->use_immediates_array) {
3043       unsigned index = bld->num_immediates;
3044       struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3045       LLVMBuilderRef builder = gallivm->builder;
3046       LLVMValueRef gep[2];
3047       gep[0] = lp_build_const_int32(gallivm, 0);
3048 
3049       assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3050       for (i = 0; i < 4; ++i ) {
3051          gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3052          LLVMValueRef imm_ptr = LLVMBuildGEP2(builder,
3053                                               bld->bld_base.base.vec_type,
3054                                               bld->imms_array, gep, 2, "");
3055          LLVMBuildStore(builder, imms[i], imm_ptr);
3056       }
3057    } else {
3058       /* simply copy the immediate values into the next immediates[] slot */
3059       unsigned i;
3060       assert(imm->Immediate.NrTokens - 1 <= 4);
3061       assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3062 
3063       for(i = 0; i < 4; ++i )
3064          bld->immediates[bld->num_immediates][i] = imms[i];
3065 
3066       if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3067          unsigned index = bld->num_immediates;
3068          struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3069          LLVMBuilderRef builder = gallivm->builder;
3070          LLVMValueRef gep[2];
3071          gep[0] = lp_build_const_int32(gallivm, 0);
3072          for (i = 0; i < 4; ++i ) {
3073             gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3074             LLVMValueRef imm_ptr = LLVMBuildGEP2(builder,
3075                                                  bld->bld_base.base.vec_type,
3076                                                  bld->imms_array, gep, 2, "");
3077             LLVMBuildStore(builder,
3078                            bld->immediates[index][i],
3079                            imm_ptr);
3080          }
3081       }
3082    }
3083 
3084    bld->num_immediates++;
3085 }
3086 
3087 static void
ddx_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3088 ddx_emit(
3089    const struct lp_build_tgsi_action * action,
3090    struct lp_build_tgsi_context * bld_base,
3091    struct lp_build_emit_data * emit_data)
3092 {
3093    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3094 
3095    emit_fetch_deriv(bld, emit_data->args[0], NULL,
3096                     &emit_data->output[emit_data->chan], NULL);
3097 }
3098 
3099 static void
ddy_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3100 ddy_emit(
3101    const struct lp_build_tgsi_action * action,
3102    struct lp_build_tgsi_context * bld_base,
3103    struct lp_build_emit_data * emit_data)
3104 {
3105    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3106 
3107    emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3108                     &emit_data->output[emit_data->chan]);
3109 }
3110 
3111 static void
kill_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3112 kill_emit(
3113    const struct lp_build_tgsi_action * action,
3114    struct lp_build_tgsi_context * bld_base,
3115    struct lp_build_emit_data * emit_data)
3116 {
3117    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3118 
3119    emit_kill(bld, bld_base->pc - 1);
3120 }
3121 
3122 static void
kill_if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3123 kill_if_emit(
3124    const struct lp_build_tgsi_action * action,
3125    struct lp_build_tgsi_context * bld_base,
3126    struct lp_build_emit_data * emit_data)
3127 {
3128    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3129 
3130    emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3131 }
3132 
3133 static void
tex_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3134 tex_emit(
3135    const struct lp_build_tgsi_action * action,
3136    struct lp_build_tgsi_context * bld_base,
3137    struct lp_build_emit_data * emit_data)
3138 {
3139    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3140 
3141    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3142             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3143 }
3144 
3145 static void
tex2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3146 tex2_emit(
3147    const struct lp_build_tgsi_action * action,
3148    struct lp_build_tgsi_context * bld_base,
3149    struct lp_build_emit_data * emit_data)
3150 {
3151    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3152 
3153    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3154             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3155 }
3156 
3157 static void
txb_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3158 txb_emit(
3159    const struct lp_build_tgsi_action * action,
3160    struct lp_build_tgsi_context * bld_base,
3161    struct lp_build_emit_data * emit_data)
3162 {
3163    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3164 
3165    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3166             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3167 }
3168 
3169 static void
txb2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3170 txb2_emit(
3171    const struct lp_build_tgsi_action * action,
3172    struct lp_build_tgsi_context * bld_base,
3173    struct lp_build_emit_data * emit_data)
3174 {
3175    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3176 
3177    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3178             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3179 }
3180 
3181 static void
txd_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3182 txd_emit(
3183    const struct lp_build_tgsi_action * action,
3184    struct lp_build_tgsi_context * bld_base,
3185    struct lp_build_emit_data * emit_data)
3186 {
3187    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3188 
3189    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3190             emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3191 }
3192 
3193 static void
txl_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3194 txl_emit(
3195    const struct lp_build_tgsi_action * action,
3196    struct lp_build_tgsi_context * bld_base,
3197    struct lp_build_emit_data * emit_data)
3198 {
3199    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3200 
3201    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3202             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3203 }
3204 
3205 static void
txl2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3206 txl2_emit(
3207    const struct lp_build_tgsi_action * action,
3208    struct lp_build_tgsi_context * bld_base,
3209    struct lp_build_emit_data * emit_data)
3210 {
3211    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3212 
3213    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3214             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3215 }
3216 
3217 static void
txp_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3218 txp_emit(
3219    const struct lp_build_tgsi_action * action,
3220    struct lp_build_tgsi_context * bld_base,
3221    struct lp_build_emit_data * emit_data)
3222 {
3223    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3224 
3225    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3226             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3227 }
3228 
3229 static void
tg4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3230 tg4_emit(
3231    const struct lp_build_tgsi_action * action,
3232    struct lp_build_tgsi_context * bld_base,
3233    struct lp_build_emit_data * emit_data)
3234 {
3235    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3236 
3237    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3238             emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3239 }
3240 
3241 static void
lodq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3242 lodq_emit(
3243    const struct lp_build_tgsi_action * action,
3244    struct lp_build_tgsi_context * bld_base,
3245    struct lp_build_emit_data * emit_data)
3246 {
3247    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3248 
3249    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3250             emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3251 }
3252 
3253 static void
txq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3254 txq_emit(
3255    const struct lp_build_tgsi_action * action,
3256    struct lp_build_tgsi_context * bld_base,
3257    struct lp_build_emit_data * emit_data)
3258 {
3259    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3260 
3261    emit_size_query(bld, emit_data->inst, emit_data->output, false);
3262 }
3263 
3264 static void
txf_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3265 txf_emit(
3266    const struct lp_build_tgsi_action * action,
3267    struct lp_build_tgsi_context * bld_base,
3268    struct lp_build_emit_data * emit_data)
3269 {
3270    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3271 
3272    emit_fetch_texels(bld, emit_data->inst, emit_data->output, false);
3273 }
3274 
3275 static void
sample_i_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3276 sample_i_emit(
3277    const struct lp_build_tgsi_action * action,
3278    struct lp_build_tgsi_context * bld_base,
3279    struct lp_build_emit_data * emit_data)
3280 {
3281    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3282 
3283    emit_fetch_texels(bld, emit_data->inst, emit_data->output, true);
3284 }
3285 
3286 static void
sample_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3287 sample_emit(
3288    const struct lp_build_tgsi_action * action,
3289    struct lp_build_tgsi_context * bld_base,
3290    struct lp_build_emit_data * emit_data)
3291 {
3292    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3293 
3294    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3295                false, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3296 }
3297 
3298 static void
sample_b_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3299 sample_b_emit(
3300    const struct lp_build_tgsi_action * action,
3301    struct lp_build_tgsi_context * bld_base,
3302    struct lp_build_emit_data * emit_data)
3303 {
3304    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3305 
3306    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3307                false, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3308 }
3309 
3310 static void
sample_c_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3311 sample_c_emit(
3312    const struct lp_build_tgsi_action * action,
3313    struct lp_build_tgsi_context * bld_base,
3314    struct lp_build_emit_data * emit_data)
3315 {
3316    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3317 
3318    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3319                true, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3320 }
3321 
3322 static void
sample_c_lz_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3323 sample_c_lz_emit(
3324    const struct lp_build_tgsi_action * action,
3325    struct lp_build_tgsi_context * bld_base,
3326    struct lp_build_emit_data * emit_data)
3327 {
3328    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3329 
3330    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3331                true, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3332 }
3333 
3334 static void
sample_d_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3335 sample_d_emit(
3336    const struct lp_build_tgsi_action * action,
3337    struct lp_build_tgsi_context * bld_base,
3338    struct lp_build_emit_data * emit_data)
3339 {
3340    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3341 
3342    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3343                false, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3344 }
3345 
3346 static void
sample_l_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3347 sample_l_emit(
3348    const struct lp_build_tgsi_action * action,
3349    struct lp_build_tgsi_context * bld_base,
3350    struct lp_build_emit_data * emit_data)
3351 {
3352    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3353 
3354    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3355                false, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3356 }
3357 
3358 static void
gather4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3359 gather4_emit(
3360    const struct lp_build_tgsi_action * action,
3361    struct lp_build_tgsi_context * bld_base,
3362    struct lp_build_emit_data * emit_data)
3363 {
3364    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3365 
3366    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3367                false, LP_SAMPLER_OP_GATHER, emit_data->output);
3368 }
3369 
3370 static void
sviewinfo_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3371 sviewinfo_emit(
3372    const struct lp_build_tgsi_action * action,
3373    struct lp_build_tgsi_context * bld_base,
3374    struct lp_build_emit_data * emit_data)
3375 {
3376    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3377 
3378    emit_size_query(bld, emit_data->inst, emit_data->output, true);
3379 }
3380 
3381 static void
lod_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3382 lod_emit(
3383    const struct lp_build_tgsi_action * action,
3384    struct lp_build_tgsi_context * bld_base,
3385    struct lp_build_emit_data * emit_data)
3386 {
3387    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3388 
3389    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3390                false, LP_SAMPLER_OP_LODQ, emit_data->output);
3391 }
3392 
3393 static void
target_to_dims_layer(enum tgsi_texture_type target,unsigned * dims,unsigned * layer_coord)3394 target_to_dims_layer(enum tgsi_texture_type target,
3395                      unsigned *dims,
3396                      unsigned *layer_coord)
3397 {
3398    *layer_coord = 0;
3399    switch (target) {
3400    case TGSI_TEXTURE_1D:
3401    case TGSI_TEXTURE_BUFFER:
3402       *dims = 1;
3403       break;
3404    case TGSI_TEXTURE_1D_ARRAY:
3405       *layer_coord = 1;
3406       *dims = 1;
3407       break;
3408    case TGSI_TEXTURE_2D:
3409    case TGSI_TEXTURE_RECT:
3410       *dims = 2;
3411       break;
3412    case TGSI_TEXTURE_2D_ARRAY:
3413       *layer_coord = 2;
3414       *dims = 2;
3415       break;
3416    case TGSI_TEXTURE_3D:
3417    case TGSI_TEXTURE_CUBE:
3418    case TGSI_TEXTURE_CUBE_ARRAY:
3419       *dims = 3;
3420       break;
3421    default:
3422       assert(0);
3423       *dims = 0;
3424       return;
3425    }
3426 }
3427 
3428 static void
img_load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3429 img_load_emit(
3430    const struct lp_build_tgsi_action * action,
3431    struct lp_build_tgsi_context * bld_base,
3432    struct lp_build_emit_data * emit_data)
3433 {
3434    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3435    struct lp_img_params params = { 0 };
3436    LLVMValueRef coords[5];
3437    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3438    unsigned dims;
3439    enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3440    unsigned layer_coord;
3441 
3442    target_to_dims_layer(target, &dims, &layer_coord);
3443 
3444    for (unsigned i = 0; i < dims; i++) {
3445       coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3446    }
3447    for (unsigned i = dims; i < 5; i++) {
3448       coords[i] = coord_undef;
3449    }
3450    if (layer_coord)
3451       coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3452 
3453    params.type = bld->bld_base.base.type;
3454    params.resources_type = bld->resources_type;
3455    params.resources_ptr = bld->resources_ptr;
3456    params.thread_data_type = bld->thread_data_type;
3457    params.thread_data_ptr = bld->thread_data_ptr;
3458    params.coords = coords;
3459    params.outdata = emit_data->output;
3460    params.target = tgsi_to_pipe_tex_target(target);
3461    params.image_index = emit_data->inst->Src[0].Register.Index;
3462    params.img_op = LP_IMG_LOAD;
3463    bld->image->emit_op(bld->image,
3464                          bld->bld_base.base.gallivm,
3465                          &params);
3466 }
3467 
3468 static void
load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3469 load_emit(
3470    const struct lp_build_tgsi_action * action,
3471    struct lp_build_tgsi_context * bld_base,
3472    struct lp_build_emit_data * emit_data)
3473 {
3474    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3475    struct gallivm_state * gallivm = bld_base->base.gallivm;
3476    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3477    const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3478    unsigned buf = bufreg->Register.Index;
3479    assert(bufreg->Register.File == TGSI_FILE_BUFFER ||
3480           bufreg->Register.File == TGSI_FILE_IMAGE ||
3481           bufreg->Register.File == TGSI_FILE_MEMORY ||
3482           bufreg->Register.File == TGSI_FILE_CONSTBUF);
3483    bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3484    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3485 
3486    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3487       img_load_emit(action, bld_base, emit_data);
3488    } else if (bufreg->Register.File == TGSI_FILE_CONSTBUF) {
3489       LLVMValueRef consts_ptr = bld->consts[buf];
3490       LLVMValueRef num_consts = bld->consts_sizes[buf];
3491 
3492       LLVMValueRef indirect_index;
3493       LLVMValueRef overflow_mask;
3494 
3495       indirect_index = lp_build_emit_fetch(bld_base, emit_data->inst, 1, 0);
3496       indirect_index = lp_build_shr_imm(uint_bld, indirect_index, 4);
3497 
3498       /* All fetches are from the same constant buffer, so
3499        * we need to propagate the size to a vector to do a
3500        * vector comparison */
3501       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
3502 
3503       /* Gather values from the constant buffer */
3504       unsigned chan_index;
3505       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3506          /* Construct a boolean vector telling us which channels
3507           * overflow the bound constant buffer */
3508          overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
3509                                           indirect_index, num_consts);
3510 
3511          /* index_vec = indirect_index * 4 */
3512          LLVMValueRef index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
3513          index_vec = lp_build_add(uint_bld, index_vec,
3514                                   lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3515 
3516          emit_data->output[chan_index] = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, NULL);
3517       }
3518    } else if (0) {
3519       /* for indirect support with ARB_gpu_shader5 */
3520    } else {
3521       LLVMValueRef index;
3522       LLVMValueRef scalar, scalar_ptr;
3523       unsigned chan_index;
3524 
3525       index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3526       index = lp_build_shr_imm(uint_bld, index, 2);
3527 
3528       scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3529 
3530       LLVMValueRef ssbo_limit = NULL;
3531 
3532       if (!is_shared) {
3533          ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3534          ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3535       }
3536 
3537       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3538          LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3539 
3540          LLVMValueRef exec_mask = mask_vec(bld_base);
3541          if (!is_shared) {
3542             LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3543             exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3544          }
3545 
3546          LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3547          struct lp_build_loop_state loop_state;
3548          lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3549 
3550          struct lp_build_if_state ifthen;
3551          LLVMValueRef cond, temp_res;
3552 
3553          loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3554                                               loop_state.counter, "");
3555 
3556          cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3557          cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3558 
3559          lp_build_if(&ifthen, gallivm, cond);
3560          scalar = lp_build_pointer_get2(builder, uint_bld->elem_type, scalar_ptr, loop_index);
3561 
3562          temp_res = LLVMBuildLoad2(builder, uint_bld->vec_type, result, "");
3563          temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3564          LLVMBuildStore(builder, temp_res, result);
3565          lp_build_else(&ifthen);
3566          temp_res = LLVMBuildLoad2(builder, uint_bld->vec_type, result, "");
3567          temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3568          LLVMBuildStore(builder, temp_res, result);
3569          lp_build_endif(&ifthen);
3570          lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3571                                 NULL, LLVMIntUGE);
3572          emit_data->output[chan_index] = LLVMBuildLoad2(gallivm->builder, uint_bld->vec_type,
3573                                                         result, "");
3574       }
3575    }
3576 }
3577 
3578 static void
img_store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3579 img_store_emit(
3580    const struct lp_build_tgsi_action * action,
3581    struct lp_build_tgsi_context * bld_base,
3582    struct lp_build_emit_data * emit_data)
3583 {
3584    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3585    struct lp_img_params params = { 0 };
3586    LLVMValueRef coords[5];
3587    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3588    unsigned dims;
3589    enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3590    unsigned layer_coord;
3591 
3592    target_to_dims_layer(target, &dims, &layer_coord);
3593    for (unsigned i = 0; i < dims; i++) {
3594       coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
3595    }
3596    for (unsigned i = dims; i < 5; i++) {
3597       coords[i] = coord_undef;
3598    }
3599    if (layer_coord)
3600       coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
3601 
3602    params.type = bld->bld_base.base.type;
3603    params.resources_type = bld->resources_type;
3604    params.resources_ptr = bld->resources_ptr;
3605    params.thread_data_type = bld->thread_data_type;
3606    params.thread_data_ptr = bld->thread_data_ptr;
3607    params.coords = coords;
3608    params.outdata = NULL;
3609    params.exec_mask = mask_vec(bld_base);
3610    params.target = tgsi_to_pipe_tex_target(target);
3611    params.image_index = emit_data->inst->Dst[0].Register.Index;
3612    params.img_op = LP_IMG_STORE;
3613    for (unsigned i = 0; i < 4; i++)
3614       params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3615 
3616    bld->image->emit_op(bld->image,
3617                        bld->bld_base.base.gallivm,
3618                        &params);
3619 }
3620 
3621 static void
store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3622 store_emit(
3623    const struct lp_build_tgsi_action * action,
3624    struct lp_build_tgsi_context * bld_base,
3625    struct lp_build_emit_data * emit_data)
3626 {
3627    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3628    struct gallivm_state * gallivm = bld_base->base.gallivm;
3629    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3630    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3631    const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3632    unsigned buf = bufreg->Register.Index;
3633    assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3634    bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3635 
3636    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3637       img_store_emit(action, bld_base, emit_data);
3638    } else if (0) {
3639 
3640    } else {
3641       LLVMValueRef index;  /* index into the const buffer */
3642       LLVMValueRef scalar_ptr;
3643       LLVMValueRef value;
3644       unsigned chan_index;
3645 
3646       index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3647       index = lp_build_shr_imm(uint_bld, index, 2);
3648 
3649       scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3650 
3651       LLVMValueRef ssbo_limit = NULL;
3652 
3653       if (!is_shared) {
3654          ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3655          ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3656       }
3657 
3658       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3659          LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3660 
3661          value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3662 
3663          LLVMValueRef exec_mask = mask_vec(bld_base);
3664          if (!is_shared) {
3665             LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3666             exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3667          }
3668 
3669          struct lp_build_loop_state loop_state;
3670          lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3671 
3672          LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3673                                                           loop_state.counter, "");
3674          value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3675 
3676          struct lp_build_if_state ifthen;
3677          LLVMValueRef cond;
3678 
3679          loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3680                                               loop_state.counter, "");
3681 
3682          cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3683          cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3684          lp_build_if(&ifthen, gallivm, cond);
3685 
3686          lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3687 
3688          lp_build_endif(&ifthen);
3689          lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3690                                 NULL, LLVMIntUGE);
3691       }
3692    }
3693 }
3694 
3695 static void
resq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3696 resq_emit(
3697    const struct lp_build_tgsi_action * action,
3698    struct lp_build_tgsi_context * bld_base,
3699    struct lp_build_emit_data * emit_data)
3700 {
3701    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3702    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3703    const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3704 
3705    unsigned buf = bufreg->Register.Index;
3706    assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
3707 
3708    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3709       enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3710       struct lp_sampler_size_query_params params = { 0 };
3711       params.int_type = bld->bld_base.int_bld.type;
3712       params.texture_unit = buf;
3713       params.target = tgsi_to_pipe_tex_target(target);
3714       params.resources_type = bld->resources_type;
3715       params.resources_ptr = bld->resources_ptr;
3716       params.sizes_out = emit_data->output;
3717 
3718       bld->image->emit_size_query(bld->image,
3719                                   bld->bld_base.base.gallivm,
3720                                   &params);
3721    } else {
3722       LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3723 
3724       emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3725    }
3726 }
3727 
3728 static void
img_atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data,LLVMAtomicRMWBinOp op)3729 img_atomic_emit(
3730    const struct lp_build_tgsi_action * action,
3731    struct lp_build_tgsi_context * bld_base,
3732    struct lp_build_emit_data * emit_data,
3733    LLVMAtomicRMWBinOp op)
3734 {
3735    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3736    struct lp_img_params params = { 0 };
3737    LLVMValueRef coords[5];
3738    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3739    unsigned dims;
3740    unsigned layer_coord;
3741    enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3742 
3743    target_to_dims_layer(target, &dims, &layer_coord);
3744 
3745    for (unsigned i = 0; i < dims; i++) {
3746       coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3747    }
3748    for (unsigned i = dims; i < 5; i++) {
3749       coords[i] = coord_undef;
3750    }
3751    if (layer_coord)
3752       coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3753 
3754    params.type = bld->bld_base.base.type;
3755    params.resources_type = bld->resources_type;
3756    params.resources_ptr = bld->resources_ptr;
3757    params.thread_data_ptr = bld->thread_data_ptr;
3758    params.exec_mask = mask_vec(bld_base);
3759    params.image_index = emit_data->inst->Src[0].Register.Index;
3760    params.coords = coords;
3761    params.target = tgsi_to_pipe_tex_target(target);
3762    params.op = op;
3763    params.outdata = emit_data->output;
3764    params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
3765 
3766    for (unsigned i = 0; i < 4; i++)
3767       params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
3768    if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3769       for (unsigned i = 0; i < 4; i++)
3770          params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
3771    }
3772    bld->image->emit_op(bld->image,
3773                        bld->bld_base.base.gallivm,
3774                        &params);
3775 }
3776 
3777 static void
atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3778 atomic_emit(
3779    const struct lp_build_tgsi_action * action,
3780    struct lp_build_tgsi_context * bld_base,
3781    struct lp_build_emit_data * emit_data)
3782 {
3783    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3784    struct gallivm_state * gallivm = bld_base->base.gallivm;
3785    LLVMBuilderRef builder = gallivm->builder;
3786    struct lp_build_context *uint_bld = &bld_base->uint_bld;
3787    const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3788 
3789    assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3790    unsigned buf = bufreg->Register.Index;
3791    bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3792 
3793    LLVMAtomicRMWBinOp op = -1;
3794    switch (emit_data->inst->Instruction.Opcode) {
3795    case TGSI_OPCODE_ATOMUADD:
3796       op = LLVMAtomicRMWBinOpAdd;
3797       break;
3798    case TGSI_OPCODE_ATOMXCHG:
3799       op = LLVMAtomicRMWBinOpXchg;
3800       break;
3801    case TGSI_OPCODE_ATOMAND:
3802       op = LLVMAtomicRMWBinOpAnd;
3803       break;
3804    case TGSI_OPCODE_ATOMOR:
3805       op = LLVMAtomicRMWBinOpOr;
3806       break;
3807    case TGSI_OPCODE_ATOMXOR:
3808       op = LLVMAtomicRMWBinOpXor;
3809       break;
3810    case TGSI_OPCODE_ATOMUMIN:
3811       op = LLVMAtomicRMWBinOpUMin;
3812       break;
3813    case TGSI_OPCODE_ATOMUMAX:
3814       op = LLVMAtomicRMWBinOpUMax;
3815       break;
3816    case TGSI_OPCODE_ATOMIMIN:
3817       op = LLVMAtomicRMWBinOpMin;
3818       break;
3819    case TGSI_OPCODE_ATOMIMAX:
3820       op = LLVMAtomicRMWBinOpMax;
3821       break;
3822    case TGSI_OPCODE_ATOMCAS:
3823       break;
3824    default:
3825       assert(0);
3826       return;
3827    }
3828 
3829    if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3830       img_atomic_emit(action, bld_base, emit_data, op);
3831    } else if (0) {
3832    } else {
3833       LLVMValueRef index;  /* index into the const buffer */
3834       LLVMValueRef scalar, scalar_ptr;
3835       LLVMValueRef value;
3836 
3837       index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3838       value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3839 
3840       index = lp_build_shr_imm(uint_bld, index, 2);
3841 
3842       if (!is_shared) {
3843          index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3844          scalar_ptr = bld->ssbos[buf];
3845       } else
3846          scalar_ptr = bld->shared_ptr;
3847 
3848       LLVMValueRef atom_res = lp_build_alloca(gallivm,
3849                                               uint_bld->vec_type, "");
3850 
3851       LLVMValueRef ssbo_limit = NULL;
3852       if (!is_shared) {
3853          ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3854          ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3855       }
3856 
3857       LLVMValueRef exec_mask = mask_vec(bld_base);
3858 
3859       if (!is_shared) {
3860          LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3861          exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3862       }
3863 
3864       struct lp_build_loop_state loop_state;
3865       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3866 
3867       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3868                                                        loop_state.counter, "");
3869       value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3870 
3871       index = LLVMBuildExtractElement(gallivm->builder, index,
3872                                       loop_state.counter, "");
3873 
3874       scalar_ptr = LLVMBuildGEP2(builder, uint_bld->elem_type, scalar_ptr,
3875                                  &index, 1, "");
3876 
3877       struct lp_build_if_state ifthen;
3878       LLVMValueRef cond, temp_res;
3879 
3880       cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3881       cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3882       lp_build_if(&ifthen, gallivm, cond);
3883 
3884       if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3885          LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3886          LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3887                                                             loop_state.counter, "");
3888          cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3889          scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3890                                          cas_src_ptr,
3891                                          LLVMAtomicOrderingSequentiallyConsistent,
3892                                          LLVMAtomicOrderingSequentiallyConsistent,
3893                                          false);
3894          scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3895       } else {
3896          scalar = LLVMBuildAtomicRMW(builder, op,
3897                                      scalar_ptr, value_ptr,
3898                                      LLVMAtomicOrderingSequentiallyConsistent,
3899                                      false);
3900       }
3901       temp_res = LLVMBuildLoad2(builder, uint_bld->vec_type, atom_res, "");
3902       temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3903       LLVMBuildStore(builder, temp_res, atom_res);
3904       lp_build_else(&ifthen);
3905       temp_res = LLVMBuildLoad2(builder, uint_bld->vec_type, atom_res, "");
3906       temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3907       LLVMBuildStore(builder, temp_res, atom_res);
3908       lp_build_endif(&ifthen);
3909 
3910       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3911                              NULL, LLVMIntUGE);
3912       emit_data->output[emit_data->chan] = LLVMBuildLoad2(gallivm->builder, uint_bld->vec_type, atom_res, "");
3913    }
3914 }
3915 
3916 static void
barrier_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3917 barrier_emit(
3918    const struct lp_build_tgsi_action * action,
3919    struct lp_build_tgsi_context * bld_base,
3920    struct lp_build_emit_data * emit_data)
3921 {
3922    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3923    struct gallivm_state * gallivm = bld_base->base.gallivm;
3924 
3925    LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
3926 
3927    lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
3928    LLVMPositionBuilderAtEnd(gallivm->builder, resume);
3929 }
3930 
3931 static void
membar_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3932 membar_emit(
3933    const struct lp_build_tgsi_action * action,
3934    struct lp_build_tgsi_context * bld_base,
3935    struct lp_build_emit_data * emit_data)
3936 {
3937    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3938    LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
3939 }
3940 
3941 static void
increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3942 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3943                           LLVMValueRef ptr,
3944                           LLVMValueRef mask)
3945 {
3946    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3947    LLVMValueRef current_vec = LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type, ptr, "");
3948 
3949    current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3950 
3951    LLVMBuildStore(builder, current_vec, ptr);
3952 }
3953 
3954 static void
clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3955 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3956                              LLVMValueRef ptr,
3957                              LLVMValueRef mask)
3958 {
3959    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3960    LLVMValueRef current_vec = LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type, ptr, "");
3961 
3962    current_vec = lp_build_select(&bld_base->uint_bld,
3963                                  mask,
3964                                  bld_base->uint_bld.zero,
3965                                  current_vec);
3966 
3967    LLVMBuildStore(builder, current_vec, ptr);
3968 }
3969 
3970 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)3971 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3972                                   LLVMValueRef current_mask_vec,
3973                                   LLVMValueRef total_emitted_vertices_vec)
3974 {
3975    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3976    struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3977    LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3978                                         total_emitted_vertices_vec,
3979                                         bld->max_output_vertices_vec);
3980 
3981    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3982 }
3983 
3984 static void
emit_vertex(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3985 emit_vertex(
3986    const struct lp_build_tgsi_action * action,
3987    struct lp_build_tgsi_context * bld_base,
3988    struct lp_build_emit_data * emit_data)
3989 {
3990    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3991    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3992 
3993    if (bld->gs_iface->emit_vertex) {
3994       LLVMValueRef stream_id = emit_fetch_immediate(bld_base, &emit_data->inst->Src[0],
3995                                                     TGSI_TYPE_UNSIGNED,
3996                                                     emit_data->inst->Src[0].Register.SwizzleX);
3997       LLVMValueRef mask = mask_vec(bld_base);
3998       LLVMValueRef total_emitted_vertices_vec =
3999          LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr, "");
4000 
4001       mask = clamp_mask_to_max_output_vertices(bld, mask,
4002                                                total_emitted_vertices_vec);
4003       gather_outputs(bld);
4004       bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
4005                                  bld->outputs,
4006                                  total_emitted_vertices_vec,
4007                                  mask,
4008                                  stream_id);
4009       increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
4010                                 mask);
4011       increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
4012                                 mask);
4013 #if DUMP_GS_EMITS
4014       lp_build_print_value(bld->bld_base.base.gallivm,
4015                            " +++ emit vertex masked ones = ",
4016                            mask);
4017       lp_build_print_value(bld->bld_base.base.gallivm,
4018                            " +++ emit vertex emitted = ",
4019                            total_emitted_vertices_vec);
4020 #endif
4021    }
4022 }
4023 
4024 
4025 static void
end_primitive_masked(struct lp_build_tgsi_context * bld_base,LLVMValueRef mask)4026 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
4027                      LLVMValueRef mask)
4028 {
4029    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4030    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
4031 
4032    if (bld->gs_iface->end_primitive) {
4033       struct lp_build_context *uint_bld = &bld_base->uint_bld;
4034       LLVMValueRef emitted_vertices_vec =
4035          LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type,
4036                         bld->emitted_vertices_vec_ptr, "");
4037       LLVMValueRef emitted_prims_vec =
4038          LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type,
4039                         bld->emitted_prims_vec_ptr, "");
4040       LLVMValueRef total_emitted_vertices_vec =
4041          LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type,
4042                         bld->total_emitted_vertices_vec_ptr, "");
4043       LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4044                                                emitted_vertices_vec,
4045                                                uint_bld->zero);
4046       /* We need to combine the current execution mask with the mask
4047          telling us which, if any, execution slots actually have
4048          unemitted primitives, this way we make sure that end_primitives
4049          executes only on the paths that have unflushed vertices */
4050       mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
4051 
4052       bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
4053                                    total_emitted_vertices_vec,
4054                                    emitted_vertices_vec,
4055                                    emitted_prims_vec,
4056                                    mask_vec(bld_base), 0);
4057 
4058 #if DUMP_GS_EMITS
4059       lp_build_print_value(bld->bld_base.base.gallivm,
4060                            " +++ end prim masked ones = ",
4061                            mask);
4062       lp_build_print_value(bld->bld_base.base.gallivm,
4063                            " +++ end prim emitted verts1 = ",
4064                            emitted_vertices_vec);
4065       lp_build_print_value(bld->bld_base.base.gallivm,
4066                            " +++ end prim emitted prims1 = ",
4067                            LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type,
4068                                           bld->emitted_prims_vec_ptr, ""));
4069 #endif
4070       increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
4071                                 mask);
4072       clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
4073                                    mask);
4074 #if DUMP_GS_EMITS
4075       lp_build_print_value(bld->bld_base.base.gallivm,
4076                            " +++ end prim emitted verts2 = ",
4077                            LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type,
4078                                           bld->emitted_vertices_vec_ptr, ""));
4079 #endif
4080    }
4081 
4082 }
4083 
4084 static void
end_primitive(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4085 end_primitive(
4086    const struct lp_build_tgsi_action * action,
4087    struct lp_build_tgsi_context * bld_base,
4088    struct lp_build_emit_data * emit_data)
4089 {
4090    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4091 
4092    if (bld->gs_iface->end_primitive) {
4093       LLVMValueRef mask = mask_vec(bld_base);
4094       end_primitive_masked(bld_base, mask);
4095    }
4096 }
4097 
4098 static void
barrier_emit_tcs(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4099 barrier_emit_tcs(
4100    const struct lp_build_tgsi_action * action,
4101    struct lp_build_tgsi_context * bld_base,
4102    struct lp_build_emit_data * emit_data)
4103 {
4104    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4105 
4106    if (bld->tcs_iface->emit_barrier) {
4107       bld->tcs_iface->emit_barrier((struct lp_build_context*)bld_base);
4108    }
4109 }
4110 
4111 
4112 static void
cal_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4113 cal_emit(
4114    const struct lp_build_tgsi_action * action,
4115    struct lp_build_tgsi_context * bld_base,
4116    struct lp_build_emit_data * emit_data)
4117 {
4118    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4119 
4120    lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
4121                      &bld_base->pc);
4122 }
4123 
4124 static void
ret_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4125 ret_emit(
4126    const struct lp_build_tgsi_action * action,
4127    struct lp_build_tgsi_context * bld_base,
4128    struct lp_build_emit_data * emit_data)
4129 {
4130    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4131 
4132    lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
4133 }
4134 
4135 static void
brk_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4136 brk_emit(
4137    const struct lp_build_tgsi_action * action,
4138    struct lp_build_tgsi_context * bld_base,
4139    struct lp_build_emit_data * emit_data)
4140 {
4141    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4142 
4143    lp_exec_tgsi_break(&bld->exec_mask, bld_base);
4144 }
4145 
4146 static void
if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4147 if_emit(
4148    const struct lp_build_tgsi_action * action,
4149    struct lp_build_tgsi_context * bld_base,
4150    struct lp_build_emit_data * emit_data)
4151 {
4152    LLVMValueRef tmp;
4153    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4154 
4155    tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
4156                       emit_data->args[0], bld->bld_base.base.zero);
4157    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4158 }
4159 
4160 static void
uif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4161 uif_emit(
4162    const struct lp_build_tgsi_action * action,
4163    struct lp_build_tgsi_context * bld_base,
4164    struct lp_build_emit_data * emit_data)
4165 {
4166    LLVMValueRef tmp;
4167    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4168    struct lp_build_context *uint_bld = &bld_base->uint_bld;
4169 
4170    tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4171                       emit_data->args[0], uint_bld->zero);
4172    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4173 }
4174 
4175 static void
case_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4176 case_emit(
4177    const struct lp_build_tgsi_action * action,
4178    struct lp_build_tgsi_context * bld_base,
4179    struct lp_build_emit_data * emit_data)
4180 {
4181    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4182 
4183    lp_exec_case(&bld->exec_mask, emit_data->args[0]);
4184 }
4185 
4186 static void
default_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4187 default_emit(
4188    const struct lp_build_tgsi_action * action,
4189    struct lp_build_tgsi_context * bld_base,
4190    struct lp_build_emit_data * emit_data)
4191 {
4192    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4193 
4194    lp_exec_default(&bld->exec_mask, bld_base);
4195 }
4196 
4197 static void
switch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4198 switch_emit(
4199    const struct lp_build_tgsi_action * action,
4200    struct lp_build_tgsi_context * bld_base,
4201    struct lp_build_emit_data * emit_data)
4202 {
4203    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4204 
4205    lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
4206 }
4207 
4208 static void
endswitch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4209 endswitch_emit(
4210    const struct lp_build_tgsi_action * action,
4211    struct lp_build_tgsi_context * bld_base,
4212    struct lp_build_emit_data * emit_data)
4213 {
4214    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4215 
4216    lp_exec_endswitch(&bld->exec_mask, bld_base);
4217 }
4218 
4219 static void
bgnloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4220 bgnloop_emit(
4221    const struct lp_build_tgsi_action * action,
4222    struct lp_build_tgsi_context * bld_base,
4223    struct lp_build_emit_data * emit_data)
4224 {
4225    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4226 
4227    lp_exec_bgnloop(&bld->exec_mask, true);
4228 }
4229 
4230 static void
bgnsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4231 bgnsub_emit(
4232    const struct lp_build_tgsi_action * action,
4233    struct lp_build_tgsi_context * bld_base,
4234    struct lp_build_emit_data * emit_data)
4235 {
4236    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4237 
4238    lp_exec_mask_bgnsub(&bld->exec_mask);
4239 }
4240 
4241 static void
else_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4242 else_emit(
4243    const struct lp_build_tgsi_action * action,
4244    struct lp_build_tgsi_context * bld_base,
4245    struct lp_build_emit_data * emit_data)
4246 {
4247    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4248 
4249    lp_exec_mask_cond_invert(&bld->exec_mask);
4250 }
4251 
4252 static void
endif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4253 endif_emit(
4254    const struct lp_build_tgsi_action * action,
4255    struct lp_build_tgsi_context * bld_base,
4256    struct lp_build_emit_data * emit_data)
4257 {
4258    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4259 
4260    lp_exec_mask_cond_pop(&bld->exec_mask);
4261 }
4262 
4263 static void
endloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4264 endloop_emit(
4265    const struct lp_build_tgsi_action * action,
4266    struct lp_build_tgsi_context * bld_base,
4267    struct lp_build_emit_data * emit_data)
4268 {
4269    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4270 
4271    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask, bld->mask);
4272 }
4273 
4274 static void
endsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4275 endsub_emit(
4276    const struct lp_build_tgsi_action * action,
4277    struct lp_build_tgsi_context * bld_base,
4278    struct lp_build_emit_data * emit_data)
4279 {
4280    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4281 
4282    lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
4283 }
4284 
4285 static void
cont_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4286 cont_emit(
4287    const struct lp_build_tgsi_action * action,
4288    struct lp_build_tgsi_context * bld_base,
4289    struct lp_build_emit_data * emit_data)
4290 {
4291    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4292 
4293    lp_exec_continue(&bld->exec_mask);
4294 }
4295 
emit_prologue(struct lp_build_tgsi_context * bld_base)4296 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
4297 {
4298    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4299    struct gallivm_state * gallivm = bld_base->base.gallivm;
4300 
4301    if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
4302       unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
4303       bld->temps_array_type = LLVMArrayType(bld_base->base.vec_type, array_size);
4304       bld->temps_array = lp_build_alloca_undef(gallivm,
4305                                                LLVMArrayType(bld_base->base.vec_type, array_size),
4306                                                "temp_array");
4307    }
4308 
4309    if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
4310       LLVMValueRef array_size =
4311          lp_build_const_int32(gallivm,
4312                             bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
4313       bld->outputs_array_type = bld_base->base.vec_type;
4314       bld->outputs_array = lp_build_array_alloca(gallivm,
4315                                                 bld_base->base.vec_type, array_size,
4316                                                 "output_array");
4317    }
4318 
4319    if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
4320       unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
4321       bld->imms_array = lp_build_alloca_undef(gallivm,
4322                                               LLVMArrayType(bld_base->base.vec_type, array_size),
4323                                               "imms_array");
4324    }
4325 
4326    /* If we have indirect addressing in inputs we need to copy them into
4327     * our alloca array to be able to iterate over them */
4328    if (bld->indirect_files & (1 << TGSI_FILE_INPUT) &&
4329        !bld->gs_iface && !bld->tes_iface && !bld->tcs_iface) {
4330       unsigned index, chan;
4331       LLVMTypeRef vec_type = bld_base->base.vec_type;
4332       LLVMValueRef array_size = lp_build_const_int32(gallivm,
4333             bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
4334       bld->inputs_array = lp_build_array_alloca(gallivm,
4335                                                vec_type, array_size,
4336                                                "input_array");
4337 
4338       assert(bld_base->info->num_inputs
4339                         <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
4340 
4341       for (index = 0; index < bld_base->info->num_inputs; ++index) {
4342          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
4343             LLVMValueRef lindex =
4344                lp_build_const_int32(gallivm, index * 4 + chan);
4345             LLVMValueRef input_ptr =
4346                LLVMBuildGEP2(gallivm->builder,
4347                              bld->bld_base.base.vec_type,
4348                              bld->inputs_array,
4349                              &lindex, 1, "");
4350             LLVMValueRef value = bld->inputs[index][chan];
4351             if (value)
4352                LLVMBuildStore(gallivm->builder, value, input_ptr);
4353          }
4354       }
4355    }
4356 
4357    if (bld->gs_iface) {
4358       struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
4359       bld->emitted_prims_vec_ptr =
4360          lp_build_alloca(gallivm,
4361                          uint_bld->vec_type,
4362                          "emitted_prims_ptr");
4363       bld->emitted_vertices_vec_ptr =
4364          lp_build_alloca(gallivm,
4365                          uint_bld->vec_type,
4366                          "emitted_vertices_ptr");
4367       bld->total_emitted_vertices_vec_ptr =
4368          lp_build_alloca(gallivm,
4369                          uint_bld->vec_type,
4370                          "total_emitted_vertices_ptr");
4371 
4372       LLVMBuildStore(gallivm->builder, uint_bld->zero,
4373                      bld->emitted_prims_vec_ptr);
4374       LLVMBuildStore(gallivm->builder, uint_bld->zero,
4375                      bld->emitted_vertices_vec_ptr);
4376       LLVMBuildStore(gallivm->builder, uint_bld->zero,
4377                      bld->total_emitted_vertices_vec_ptr);
4378    }
4379 
4380    if (DEBUG_EXECUTION) {
4381       lp_build_printf(gallivm, "\n");
4382       emit_dump_file(bld, TGSI_FILE_CONSTANT);
4383       if (!bld->gs_iface)
4384          emit_dump_file(bld, TGSI_FILE_INPUT);
4385    }
4386 }
4387 
emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)4388 static void emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)
4389 {
4390    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4391 
4392    if (bld->tcs_iface && bld->tcs_iface->emit_prologue) {
4393       bld->tcs_iface->emit_prologue((struct lp_build_context*)bld_base);
4394    }
4395 }
4396 
emit_epilogue(struct lp_build_tgsi_context * bld_base)4397 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
4398 {
4399    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4400    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
4401 
4402    if (DEBUG_EXECUTION) {
4403       /* for debugging */
4404       if (0) {
4405          emit_dump_file(bld, TGSI_FILE_TEMPORARY);
4406       }
4407       emit_dump_file(bld, TGSI_FILE_OUTPUT);
4408       lp_build_printf(bld_base->base.gallivm, "\n");
4409    }
4410 
4411    if (bld->tcs_iface && bld->tcs_iface->emit_epilogue) {
4412       bld->tcs_iface->emit_epilogue((struct lp_build_context*)bld_base);
4413    }
4414 
4415    /* If we have indirect addressing in outputs we need to copy our alloca array
4416     * to the outputs slots specified by the caller */
4417    if (bld->gs_iface) {
4418       LLVMValueRef total_emitted_vertices_vec;
4419       LLVMValueRef emitted_prims_vec;
4420       /* implicit end_primitives, needed in case there are any unflushed
4421          vertices in the cache. Note must not call end_primitive here
4422          since the exec_mask is not valid at this point. */
4423       end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
4424 
4425       total_emitted_vertices_vec =
4426          LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type,
4427                         bld->total_emitted_vertices_vec_ptr, "");
4428       emitted_prims_vec =
4429          LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type,
4430                         bld->emitted_prims_vec_ptr, "");
4431 
4432       bld->gs_iface->gs_epilogue(bld->gs_iface,
4433                                  total_emitted_vertices_vec,
4434                                  emitted_prims_vec, 0);
4435    } else {
4436       gather_outputs(bld);
4437    }
4438 }
4439 
4440 void
lp_build_tgsi_soa(struct gallivm_state * gallivm,const struct tgsi_token * tokens,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS])4441 lp_build_tgsi_soa(struct gallivm_state *gallivm,
4442                   const struct tgsi_token *tokens,
4443                   const struct lp_build_tgsi_params *params,
4444                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
4445 {
4446    struct lp_build_tgsi_soa_context bld;
4447    struct lp_type type = params->type;
4448    struct lp_type res_type;
4449 
4450    assert(type.length <= LP_MAX_VECTOR_LENGTH);
4451    memset(&res_type, 0, sizeof res_type);
4452    res_type.width = type.width;
4453    res_type.length = type.length;
4454    res_type.sign = 1;
4455 
4456    /* Setup build context */
4457    memset(&bld, 0, sizeof bld);
4458    lp_build_context_init(&bld.bld_base.base, gallivm, type);
4459    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
4460    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
4461    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
4462    {
4463       struct lp_type dbl_type;
4464       dbl_type = type;
4465       dbl_type.width *= 2;
4466       lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4467    }
4468    {
4469       struct lp_type uint64_type;
4470       uint64_type = lp_uint_type(type);
4471       uint64_type.width *= 2;
4472       lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4473    }
4474    {
4475       struct lp_type int64_type;
4476       int64_type = lp_int_type(type);
4477       int64_type.width *= 2;
4478       lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4479    }
4480    bld.mask = params->mask;
4481    bld.inputs = params->inputs;
4482    bld.outputs = outputs;
4483    bld.consts_ptr = params->consts_ptr;
4484    bld.ssbo_ptr = params->ssbo_ptr;
4485    bld.sampler = params->sampler;
4486    bld.bld_base.info = params->info;
4487    bld.indirect_files = params->info->indirect_files;
4488    bld.context_type = params->context_type;
4489    bld.context_ptr = params->context_ptr;
4490    bld.resources_type = params->resources_type;
4491    bld.resources_ptr = params->resources_ptr;
4492    bld.thread_data_type =  params->thread_data_type;
4493    bld.thread_data_ptr = params->thread_data_ptr;
4494    bld.image = params->image;
4495    bld.shared_ptr = params->shared_ptr;
4496    bld.coro = params->coro;
4497 
4498    /*
4499     * If the number of temporaries is rather large then we just
4500     * allocate them as an array right from the start and treat
4501     * like indirect temporaries.
4502     */
4503    if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4504       bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4505    }
4506    /*
4507     * For performance reason immediates are always backed in a static
4508     * array, but if their number is too great, we have to use just
4509     * a dynamically allocated array.
4510     */
4511    bld.use_immediates_array =
4512          (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4513    if (bld.use_immediates_array) {
4514       bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4515    }
4516 
4517 
4518    bld.bld_base.soa = true;
4519    bld.bld_base.emit_debug = emit_debug;
4520    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4521    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4522    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4523    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4524    bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4525 
4526    bld.bld_base.emit_store = emit_store;
4527    bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_output;
4528    bld.bld_base.emit_store_reg_funcs[TGSI_FILE_TEMPORARY] = emit_store_temp;
4529    bld.bld_base.emit_store_reg_funcs[TGSI_FILE_ADDRESS] = emit_store_address;
4530 
4531    bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4532    bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4533 
4534    bld.bld_base.emit_prologue = emit_prologue;
4535    bld.bld_base.emit_prologue_post_decl = emit_prologue_post_decl;
4536    bld.bld_base.emit_epilogue = emit_epilogue;
4537 
4538    /* Set opcode actions */
4539    lp_set_default_actions_cpu(&bld.bld_base);
4540 
4541    bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4542    bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4543    bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4544    bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4545    bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4546    bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4547    bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4548    bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4549    bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4550    bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4551    bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4552    bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4553    bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4554    bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4555    bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4556    bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4557    bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4558    bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4559    bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4560    bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4561    bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4562    bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4563    bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4564    bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4565    bld.bld_base.op_actions[TGSI_OPCODE_TEX_LZ].emit = txl_emit;
4566    bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4567    bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4568    bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4569    bld.bld_base.op_actions[TGSI_OPCODE_TXF_LZ].emit = txf_emit;
4570    bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4571    bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4572    bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4573    bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4574    bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4575    /* DX10 sampling ops */
4576    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4577    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4578    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4579    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4580    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4581    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4582    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4583    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4584    bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4585    bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4586    bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4587 
4588    bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4589    bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4590    bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4591 
4592    bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4593    bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4594    bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4595    bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4596    bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4597    bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4598    bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4599    bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4600    bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4601    bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4602 
4603    bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
4604    bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
4605 
4606    if (params->gs_iface) {
4607       /* There's no specific value for this because it should always
4608        * be set, but apps using ext_geometry_shader4 quite often
4609        * were forgetting so we're using MAX_VERTEX_VARYING from
4610        * that spec even though we could assert if it's not
4611        * set, but that's a lot uglier. */
4612       unsigned max_output_vertices;
4613 
4614       /* inputs are always indirect with gs */
4615       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4616       bld.gs_iface = params->gs_iface;
4617       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4618       bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4619       bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4620 
4621       max_output_vertices =
4622          params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4623       if (!max_output_vertices)
4624          max_output_vertices = 32;
4625 
4626       bld.max_output_vertices_vec =
4627          lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4628                                 max_output_vertices);
4629    }
4630 
4631    if (params->tes_iface) {
4632       /* inputs are always indirect with tes */
4633       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4634       bld.tes_iface = params->tes_iface;
4635       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tes_input;
4636    }
4637 
4638    if (params->tcs_iface) {
4639       bld.tcs_iface = params->tcs_iface;
4640       /* outputs and inputs are always indirect with tcs */
4641       bld.indirect_files |= (1 << TGSI_FILE_OUTPUT);
4642       bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_tcs_output;
4643       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4644       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tcs_input;
4645       bld.bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_tcs_input;
4646       bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit_tcs;
4647    }
4648 
4649    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4650 
4651    bld.system_values = *params->system_values;
4652 
4653    lp_build_tgsi_llvm(&bld.bld_base, tokens);
4654 
4655    if (0) {
4656       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4657       LLVMValueRef function = LLVMGetBasicBlockParent(block);
4658       debug_printf("11111111111111111111111111111 \n");
4659       tgsi_dump(tokens, 0);
4660       lp_debug_dump_value(function);
4661       debug_printf("2222222222222222222222222222 \n");
4662    }
4663 
4664    if (0) {
4665       LLVMModuleRef module = LLVMGetGlobalParent(
4666          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4667       LLVMDumpModule(module);
4668 
4669    }
4670    lp_exec_mask_fini(&bld.exec_mask);
4671 }
4672